gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "backend.h"
  68 #include "tree.h"
  69 #include "gimple.h"
  70 #include "rtl.h"
  71 #include "ssa.h"
  72 #include "alias.h"
  73 #include "fold-const.h"
  74 #include "stor-layout.h"
  75 #include "tm_p.h"
  76 #include "gimple-pretty-print.h"
  77 #include "internal-fn.h"
  78 #include "tree-eh.h"
  79 #include "gimplify.h"
  80 #include "gimple-iterator.h"
  81 #include "gimplify-me.h"
  82 #include "cgraph.h"
  83 #include "tree-cfg.h"
  84 #include "tree-ssa-loop-ivopts.h"
  85 #include "tree-ssa-loop-manip.h"
  86 #include "tree-ssa-loop-niter.h"
  87 #include "tree-ssa-loop.h"
  88 #include "flags.h"
  89 #include "insn-config.h"
  90 #include "expmed.h"
  91 #include "dojump.h"
  92 #include "explow.h"
  93 #include "calls.h"
  94 #include "emit-rtl.h"
  95 #include "varasm.h"
  96 #include "stmt.h"
  97 #include "expr.h"
  98 #include "tree-dfa.h"
  99 #include "tree-ssa.h"
 100 #include "cfgloop.h"
 101 #include "tree-pass.h"
 102 #include "tree-chrec.h"
 103 #include "tree-scalar-evolution.h"
 104 #include "params.h"
 105 #include "langhooks.h"
 106 #include "tree-affine.h"
 107 #include "target.h"
 108 #include "tree-inline.h"
 109 #include "tree-ssa-propagate.h"
 110 #include "tree-ssa-address.h"
 111 #include "builtins.h"
 112 #include "tree-vectorizer.h"
 113
 114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 115    cost of different addressing modes.  This should be moved to a TBD
 116    interface between the GIMPLE and RTL worlds.  */
 117 #include "recog.h"
 118
 119 /* The infinite cost.  */
 120 #define INFTY 10000000
 121
 122 #define AVG_LOOP_NITER(LOOP) 5
 123
 124 /* Returns the expected number of loop iterations for LOOP.
 125    The average trip count is computed from profile data if it
 126    exists. */
 127
 128 static inline HOST_WIDE_INT
 129 avg_loop_niter (struct loop *loop)
 130 {
 131   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 132   if (niter == -1)
 133     return AVG_LOOP_NITER (loop);
 134
 135   return niter;
 136 }
 137
 138 /* Representation of the induction variable.  */
 139 struct iv
 140 {
 141   tree base;            /* Initial value of the iv.  */
 142   tree base_object;     /* A memory object to that the induction variable points.  */
 143   tree step;            /* Step of the iv (constant only).  */
 144   tree ssa_name;        /* The ssa name with the value.  */
 145   unsigned use_id;      /* The identifier in the use if it is the case.  */
 146   bool biv_p;           /* Is it a biv?  */
 147   bool have_use_for;    /* Do we already have a use for it?  */
 148   bool no_overflow;     /* True if the iv doesn't overflow.  */
 149 };
 150
 151 /* Per-ssa version information (induction variable descriptions, etc.).  */
 152 struct version_info
 153 {
 154   tree name;            /* The ssa name.  */
 155   struct iv *iv;        /* Induction variable description.  */
 156   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 157                            an expression that is not an induction variable.  */
 158   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 159   unsigned inv_id;      /* Id of an invariant.  */
 160 };
 161
 162 /* Types of uses.  */
 163 enum use_type
 164 {
 165   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 166   USE_ADDRESS,          /* Use in an address.  */
 167   USE_COMPARE           /* Use is a compare.  */
 168 };
 169
 170 /* Cost of a computation.  */
 171 typedef struct
 172 {
 173   int cost;             /* The runtime cost.  */
 174   unsigned complexity;  /* The estimate of the complexity of the code for
 175                            the computation (in no concrete units --
 176                            complexity field should be larger for more
 177                            complex expressions and addressing modes).  */
 178 } comp_cost;
 179
 180 static const comp_cost no_cost = {0, 0};
 181 static const comp_cost infinite_cost = {INFTY, INFTY};
 182
 183 /* The candidate - cost pair.  */
 184 struct cost_pair
 185 {
 186   struct iv_cand *cand; /* The candidate.  */
 187   comp_cost cost;       /* The cost.  */
 188   bitmap depends_on;    /* The list of invariants that have to be
 189                            preserved.  */
 190   tree value;           /* For final value elimination, the expression for
 191                            the final value of the iv.  For iv elimination,
 192                            the new bound to compare with.  */
 193   enum tree_code comp;  /* For iv elimination, the comparison.  */
 194   int inv_expr_id;      /* Loop invariant expression id.  */
 195 };
 196
 197 /* Use.  */
 198 struct iv_use
 199 {
 200   unsigned id;          /* The id of the use.  */
 201   unsigned sub_id;      /* The id of the sub use.  */
 202   enum use_type type;   /* Type of the use.  */
 203   struct iv *iv;        /* The induction variable it is based on.  */
 204   gimple stmt;          /* Statement in that it occurs.  */
 205   tree *op_p;           /* The place where it occurs.  */
 206   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 207                            important ones.  */
 208
 209   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 210   struct cost_pair *cost_map;
 211                         /* The costs wrto the iv candidates.  */
 212
 213   struct iv_cand *selected;
 214                         /* The selected candidate.  */
 215
 216   struct iv_use *next;  /* The next sub use.  */
 217   tree addr_base;       /* Base address with const offset stripped.  */
 218   unsigned HOST_WIDE_INT addr_offset;
 219                         /* Const offset stripped from base address.  */
 220 };
 221
 222 /* The position where the iv is computed.  */
 223 enum iv_position
 224 {
 225   IP_NORMAL,            /* At the end, just before the exit condition.  */
 226   IP_END,               /* At the end of the latch block.  */
 227   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 228   IP_AFTER_USE,         /* Immediately after a specific use.  */
 229   IP_ORIGINAL           /* The original biv.  */
 230 };
 231
 232 /* The induction variable candidate.  */
 233 struct iv_cand
 234 {
 235   unsigned id;          /* The number of the candidate.  */
 236   bool important;       /* Whether this is an "important" candidate, i.e. such
 237                            that it should be considered by all uses.  */
 238   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 239   gimple incremented_at;/* For original biv, the statement where it is
 240                            incremented.  */
 241   tree var_before;      /* The variable used for it before increment.  */
 242   tree var_after;       /* The variable used for it after increment.  */
 243   struct iv *iv;        /* The value of the candidate.  NULL for
 244                            "pseudocandidate" used to indicate the possibility
 245                            to replace the final value of an iv by direct
 246                            computation of the value.  */
 247   unsigned cost;        /* Cost of the candidate.  */
 248   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 249   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 250                               where it is incremented.  */
 251   bitmap depends_on;    /* The list of invariants that are used in step of the
 252                            biv.  */
 253 };
 254
 255 /* Loop invariant expression hashtable entry.  */
 256 struct iv_inv_expr_ent
 257 {
 258   tree expr;
 259   int id;
 260   hashval_t hash;
 261 };
 262
 263 /* The data used by the induction variable optimizations.  */
 264
 265 typedef struct iv_use *iv_use_p;
 266
 267 typedef struct iv_cand *iv_cand_p;
 268
 269 /* Hashtable helpers.  */
 270
 271 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 272 {
 273   static inline hashval_t hash (const iv_inv_expr_ent *);
 274   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 275 };
 276
 277 /* Hash function for loop invariant expressions.  */
 278
 279 inline hashval_t
 280 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 281 {
 282   return expr->hash;
 283 }
 284
 285 /* Hash table equality function for expressions.  */
 286
 287 inline bool
 288 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 289                            const iv_inv_expr_ent *expr2)
 290 {
 291   return expr1->hash == expr2->hash
 292          && operand_equal_p (expr1->expr, expr2->expr, 0);
 293 }
 294
 295 struct ivopts_data
 296 {
 297   /* The currently optimized loop.  */
 298   struct loop *current_loop;
 299   source_location loop_loc;
 300
 301   /* Numbers of iterations for all exits of the current loop.  */
 302   hash_map<edge, tree_niter_desc *> *niters;
 303
 304   /* Number of registers used in it.  */
 305   unsigned regs_used;
 306
 307   /* The size of version_info array allocated.  */
 308   unsigned version_info_size;
 309
 310   /* The array of information for the ssa names.  */
 311   struct version_info *version_info;
 312
 313   /* The hashtable of loop invariant expressions created
 314      by ivopt.  */
 315   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 316
 317   /* Loop invariant expression id.  */
 318   int inv_expr_id;
 319
 320   /* The bitmap of indices in version_info whose value was changed.  */
 321   bitmap relevant;
 322
 323   /* The uses of induction variables.  */
 324   vec<iv_use_p> iv_uses;
 325
 326   /* The candidates.  */
 327   vec<iv_cand_p> iv_candidates;
 328
 329   /* A bitmap of important candidates.  */
 330   bitmap important_candidates;
 331
 332   /* Cache used by tree_to_aff_combination_expand.  */
 333   hash_map<tree, name_expansion *> *name_expansion_cache;
 334
 335   /* The maximum invariant id.  */
 336   unsigned max_inv_id;
 337
 338   /* Obstack for iv structure.  */
 339   struct obstack iv_obstack;
 340
 341   /* Whether to consider just related and important candidates when replacing a
 342      use.  */
 343   bool consider_all_candidates;
 344
 345   /* Are we optimizing for speed?  */
 346   bool speed;
 347
 348   /* Whether the loop body includes any function calls.  */
 349   bool body_includes_call;
 350
 351   /* Whether the loop body can only be exited via single exit.  */
 352   bool loop_single_exit_p;
 353 };
 354
 355 /* An assignment of iv candidates to uses.  */
 356
 357 struct iv_ca
 358 {
 359   /* The number of uses covered by the assignment.  */
 360   unsigned upto;
 361
 362   /* Number of uses that cannot be expressed by the candidates in the set.  */
 363   unsigned bad_uses;
 364
 365   /* Candidate assigned to a use, together with the related costs.  */
 366   struct cost_pair **cand_for_use;
 367
 368   /* Number of times each candidate is used.  */
 369   unsigned *n_cand_uses;
 370
 371   /* The candidates used.  */
 372   bitmap cands;
 373
 374   /* The number of candidates in the set.  */
 375   unsigned n_cands;
 376
 377   /* Total number of registers needed.  */
 378   unsigned n_regs;
 379
 380   /* Total cost of expressing uses.  */
 381   comp_cost cand_use_cost;
 382
 383   /* Total cost of candidates.  */
 384   unsigned cand_cost;
 385
 386   /* Number of times each invariant is used.  */
 387   unsigned *n_invariant_uses;
 388
 389   /* The array holding the number of uses of each loop
 390      invariant expressions created by ivopt.  */
 391   unsigned *used_inv_expr;
 392
 393   /* The number of created loop invariants.  */
 394   unsigned num_used_inv_expr;
 395
 396   /* Total cost of the assignment.  */
 397   comp_cost cost;
 398 };
 399
 400 /* Difference of two iv candidate assignments.  */
 401
 402 struct iv_ca_delta
 403 {
 404   /* Changed use.  */
 405   struct iv_use *use;
 406
 407   /* An old assignment (for rollback purposes).  */
 408   struct cost_pair *old_cp;
 409
 410   /* A new assignment.  */
 411   struct cost_pair *new_cp;
 412
 413   /* Next change in the list.  */
 414   struct iv_ca_delta *next_change;
 415 };
 416
 417 /* Bound on number of candidates below that all candidates are considered.  */
 418
 419 #define CONSIDER_ALL_CANDIDATES_BOUND \
 420   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 421
 422 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 423    optimizing such a loop would help, and it would take ages).  */
 424
 425 #define MAX_CONSIDERED_USES \
 426   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 427
 428 /* If there are at most this number of ivs in the set, try removing unnecessary
 429    ivs from the set always.  */
 430
 431 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 432   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 433
 434 /* The list of trees for that the decl_rtl field must be reset is stored
 435    here.  */
 436
 437 static vec<tree> decl_rtl_to_reset;
 438
 439 static comp_cost force_expr_to_var_cost (tree, bool);
 440
 441 /* Number of uses recorded in DATA.  */
 442
 443 static inline unsigned
 444 n_iv_uses (struct ivopts_data *data)
 445 {
 446   return data->iv_uses.length ();
 447 }
 448
 449 /* Ith use recorded in DATA.  */
 450
 451 static inline struct iv_use *
 452 iv_use (struct ivopts_data *data, unsigned i)
 453 {
 454   return data->iv_uses[i];
 455 }
 456
 457 /* Number of candidates recorded in DATA.  */
 458
 459 static inline unsigned
 460 n_iv_cands (struct ivopts_data *data)
 461 {
 462   return data->iv_candidates.length ();
 463 }
 464
 465 /* Ith candidate recorded in DATA.  */
 466
 467 static inline struct iv_cand *
 468 iv_cand (struct ivopts_data *data, unsigned i)
 469 {
 470   return data->iv_candidates[i];
 471 }
 472
 473 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 474
 475 edge
 476 single_dom_exit (struct loop *loop)
 477 {
 478   edge exit = single_exit (loop);
 479
 480   if (!exit)
 481     return NULL;
 482
 483   if (!just_once_each_iteration_p (loop, exit->src))
 484     return NULL;
 485
 486   return exit;
 487 }
 488
 489 /* Dumps information about the induction variable IV to FILE.  */
 490
 491 void
 492 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 493 {
 494   if (iv->ssa_name && dump_name)
 495     {
 496       fprintf (file, "ssa name ");
 497       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 498       fprintf (file, "\n");
 499     }
 500
 501   fprintf (file, "  type ");
 502   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 503   fprintf (file, "\n");
 504
 505   if (iv->step)
 506     {
 507       fprintf (file, "  base ");
 508       print_generic_expr (file, iv->base, TDF_SLIM);
 509       fprintf (file, "\n");
 510
 511       fprintf (file, "  step ");
 512       print_generic_expr (file, iv->step, TDF_SLIM);
 513       fprintf (file, "\n");
 514     }
 515   else
 516     {
 517       fprintf (file, "  invariant ");
 518       print_generic_expr (file, iv->base, TDF_SLIM);
 519       fprintf (file, "\n");
 520     }
 521
 522   if (iv->base_object)
 523     {
 524       fprintf (file, "  base object ");
 525       print_generic_expr (file, iv->base_object, TDF_SLIM);
 526       fprintf (file, "\n");
 527     }
 528
 529   if (iv->biv_p)
 530     fprintf (file, "  is a biv\n");
 531 }
 532
 533 /* Dumps information about the USE to FILE.  */
 534
 535 void
 536 dump_use (FILE *file, struct iv_use *use)
 537 {
 538   fprintf (file, "use %d", use->id);
 539   if (use->sub_id)
 540     fprintf (file, ".%d", use->sub_id);
 541
 542   fprintf (file, "\n");
 543
 544   switch (use->type)
 545     {
 546     case USE_NONLINEAR_EXPR:
 547       fprintf (file, "  generic\n");
 548       break;
 549
 550     case USE_ADDRESS:
 551       fprintf (file, "  address\n");
 552       break;
 553
 554     case USE_COMPARE:
 555       fprintf (file, "  compare\n");
 556       break;
 557
 558     default:
 559       gcc_unreachable ();
 560     }
 561
 562   fprintf (file, "  in statement ");
 563   print_gimple_stmt (file, use->stmt, 0, 0);
 564   fprintf (file, "\n");
 565
 566   fprintf (file, "  at position ");
 567   if (use->op_p)
 568     print_generic_expr (file, *use->op_p, TDF_SLIM);
 569   fprintf (file, "\n");
 570
 571   dump_iv (file, use->iv, false);
 572
 573   if (use->related_cands)
 574     {
 575       fprintf (file, "  related candidates ");
 576       dump_bitmap (file, use->related_cands);
 577     }
 578 }
 579
 580 /* Dumps information about the uses to FILE.  */
 581
 582 void
 583 dump_uses (FILE *file, struct ivopts_data *data)
 584 {
 585   unsigned i;
 586   struct iv_use *use;
 587
 588   for (i = 0; i < n_iv_uses (data); i++)
 589     {
 590       use = iv_use (data, i);
 591       do
 592         {
 593           dump_use (file, use);
 594           use = use->next;
 595         }
 596       while (use);
 597       fprintf (file, "\n");
 598     }
 599 }
 600
 601 /* Dumps information about induction variable candidate CAND to FILE.  */
 602
 603 void
 604 dump_cand (FILE *file, struct iv_cand *cand)
 605 {
 606   struct iv *iv = cand->iv;
 607
 608   fprintf (file, "candidate %d%s\n",
 609            cand->id, cand->important ? " (important)" : "");
 610
 611   if (cand->depends_on)
 612     {
 613       fprintf (file, "  depends on ");
 614       dump_bitmap (file, cand->depends_on);
 615     }
 616
 617   if (!iv)
 618     {
 619       fprintf (file, "  final value replacement\n");
 620       return;
 621     }
 622
 623   if (cand->var_before)
 624     {
 625       fprintf (file, "  var_before ");
 626       print_generic_expr (file, cand->var_before, TDF_SLIM);
 627       fprintf (file, "\n");
 628     }
 629   if (cand->var_after)
 630     {
 631       fprintf (file, "  var_after ");
 632       print_generic_expr (file, cand->var_after, TDF_SLIM);
 633       fprintf (file, "\n");
 634     }
 635
 636   switch (cand->pos)
 637     {
 638     case IP_NORMAL:
 639       fprintf (file, "  incremented before exit test\n");
 640       break;
 641
 642     case IP_BEFORE_USE:
 643       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 644       break;
 645
 646     case IP_AFTER_USE:
 647       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 648       break;
 649
 650     case IP_END:
 651       fprintf (file, "  incremented at end\n");
 652       break;
 653
 654     case IP_ORIGINAL:
 655       fprintf (file, "  original biv\n");
 656       break;
 657     }
 658
 659   dump_iv (file, iv, false);
 660 }
 661
 662 /* Returns the info for ssa version VER.  */
 663
 664 static inline struct version_info *
 665 ver_info (struct ivopts_data *data, unsigned ver)
 666 {
 667   return data->version_info + ver;
 668 }
 669
 670 /* Returns the info for ssa name NAME.  */
 671
 672 static inline struct version_info *
 673 name_info (struct ivopts_data *data, tree name)
 674 {
 675   return ver_info (data, SSA_NAME_VERSION (name));
 676 }
 677
 678 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 679    emitted in LOOP.  */
 680
 681 static bool
 682 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 683 {
 684   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 685
 686   gcc_assert (bb);
 687
 688   if (sbb == loop->latch)
 689     return true;
 690
 691   if (sbb != bb)
 692     return false;
 693
 694   return stmt == last_stmt (bb);
 695 }
 696
 697 /* Returns true if STMT if after the place where the original induction
 698    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 699    if the positions are identical.  */
 700
 701 static bool
 702 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 703 {
 704   basic_block cand_bb = gimple_bb (cand->incremented_at);
 705   basic_block stmt_bb = gimple_bb (stmt);
 706
 707   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 708     return false;
 709
 710   if (stmt_bb != cand_bb)
 711     return true;
 712
 713   if (true_if_equal
 714       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 715     return true;
 716   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 717 }
 718
 719 /* Returns true if STMT if after the place where the induction variable
 720    CAND is incremented in LOOP.  */
 721
 722 static bool
 723 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 724 {
 725   switch (cand->pos)
 726     {
 727     case IP_END:
 728       return false;
 729
 730     case IP_NORMAL:
 731       return stmt_after_ip_normal_pos (loop, stmt);
 732
 733     case IP_ORIGINAL:
 734     case IP_AFTER_USE:
 735       return stmt_after_inc_pos (cand, stmt, false);
 736
 737     case IP_BEFORE_USE:
 738       return stmt_after_inc_pos (cand, stmt, true);
 739
 740     default:
 741       gcc_unreachable ();
 742     }
 743 }
 744
 745 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 746
 747 static bool
 748 abnormal_ssa_name_p (tree exp)
 749 {
 750   if (!exp)
 751     return false;
 752
 753   if (TREE_CODE (exp) != SSA_NAME)
 754     return false;
 755
 756   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 757 }
 758
 759 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 760    abnormal phi node.  Callback for for_each_index.  */
 761
 762 static bool
 763 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 764                                   void *data ATTRIBUTE_UNUSED)
 765 {
 766   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 767     {
 768       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 769         return false;
 770       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 771         return false;
 772     }
 773
 774   return !abnormal_ssa_name_p (*index);
 775 }
 776
 777 /* Returns true if EXPR contains a ssa name that occurs in an
 778    abnormal phi node.  */
 779
 780 bool
 781 contains_abnormal_ssa_name_p (tree expr)
 782 {
 783   enum tree_code code;
 784   enum tree_code_class codeclass;
 785
 786   if (!expr)
 787     return false;
 788
 789   code = TREE_CODE (expr);
 790   codeclass = TREE_CODE_CLASS (code);
 791
 792   if (code == SSA_NAME)
 793     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 794
 795   if (code == INTEGER_CST
 796       || is_gimple_min_invariant (expr))
 797     return false;
 798
 799   if (code == ADDR_EXPR)
 800     return !for_each_index (&TREE_OPERAND (expr, 0),
 801                             idx_contains_abnormal_ssa_name_p,
 802                             NULL);
 803
 804   if (code == COND_EXPR)
 805     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 806       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 807       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 808
 809   switch (codeclass)
 810     {
 811     case tcc_binary:
 812     case tcc_comparison:
 813       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 814         return true;
 815
 816       /* Fallthru.  */
 817     case tcc_unary:
 818       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 819         return true;
 820
 821       break;
 822
 823     default:
 824       gcc_unreachable ();
 825     }
 826
 827   return false;
 828 }
 829
 830 /*  Returns the structure describing number of iterations determined from
 831     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 832
 833 static struct tree_niter_desc *
 834 niter_for_exit (struct ivopts_data *data, edge exit)
 835 {
 836   struct tree_niter_desc *desc;
 837   tree_niter_desc **slot;
 838
 839   if (!data->niters)
 840     {
 841       data->niters = new hash_map<edge, tree_niter_desc *>;
 842       slot = NULL;
 843     }
 844   else
 845     slot = data->niters->get (exit);
 846
 847   if (!slot)
 848     {
 849       /* Try to determine number of iterations.  We cannot safely work with ssa
 850          names that appear in phi nodes on abnormal edges, so that we do not
 851          create overlapping life ranges for them (PR 27283).  */
 852       desc = XNEW (struct tree_niter_desc);
 853       if (!number_of_iterations_exit (data->current_loop,
 854                                       exit, desc, true)
 855           || contains_abnormal_ssa_name_p (desc->niter))
 856         {
 857           XDELETE (desc);
 858           desc = NULL;
 859         }
 860       data->niters->put (exit, desc);
 861     }
 862   else
 863     desc = *slot;
 864
 865   return desc;
 866 }
 867
 868 /* Returns the structure describing number of iterations determined from
 869    single dominating exit of DATA->current_loop, or NULL if something
 870    goes wrong.  */
 871
 872 static struct tree_niter_desc *
 873 niter_for_single_dom_exit (struct ivopts_data *data)
 874 {
 875   edge exit = single_dom_exit (data->current_loop);
 876
 877   if (!exit)
 878     return NULL;
 879
 880   return niter_for_exit (data, exit);
 881 }
 882
 883 /* Initializes data structures used by the iv optimization pass, stored
 884    in DATA.  */
 885
 886 static void
 887 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 888 {
 889   data->version_info_size = 2 * num_ssa_names;
 890   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 891   data->relevant = BITMAP_ALLOC (NULL);
 892   data->important_candidates = BITMAP_ALLOC (NULL);
 893   data->max_inv_id = 0;
 894   data->niters = NULL;
 895   data->iv_uses.create (20);
 896   data->iv_candidates.create (20);
 897   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 898   data->inv_expr_id = 0;
 899   data->name_expansion_cache = NULL;
 900   decl_rtl_to_reset.create (20);
 901   gcc_obstack_init (&data->iv_obstack);
 902 }
 903
 904 /* Returns a memory object to that EXPR points.  In case we are able to
 905    determine that it does not point to any such object, NULL is returned.  */
 906
 907 static tree
 908 determine_base_object (tree expr)
 909 {
 910   enum tree_code code = TREE_CODE (expr);
 911   tree base, obj;
 912
 913   /* If this is a pointer casted to any type, we need to determine
 914      the base object for the pointer; so handle conversions before
 915      throwing away non-pointer expressions.  */
 916   if (CONVERT_EXPR_P (expr))
 917     return determine_base_object (TREE_OPERAND (expr, 0));
 918
 919   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 920     return NULL_TREE;
 921
 922   switch (code)
 923     {
 924     case INTEGER_CST:
 925       return NULL_TREE;
 926
 927     case ADDR_EXPR:
 928       obj = TREE_OPERAND (expr, 0);
 929       base = get_base_address (obj);
 930
 931       if (!base)
 932         return expr;
 933
 934       if (TREE_CODE (base) == MEM_REF)
 935         return determine_base_object (TREE_OPERAND (base, 0));
 936
 937       return fold_convert (ptr_type_node,
 938                            build_fold_addr_expr (base));
 939
 940     case POINTER_PLUS_EXPR:
 941       return determine_base_object (TREE_OPERAND (expr, 0));
 942
 943     case PLUS_EXPR:
 944     case MINUS_EXPR:
 945       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 946       gcc_unreachable ();
 947
 948     default:
 949       return fold_convert (ptr_type_node, expr);
 950     }
 951 }
 952
 953 /* Return true if address expression with non-DECL_P operand appears
 954    in EXPR.  */
 955
 956 static bool
 957 contain_complex_addr_expr (tree expr)
 958 {
 959   bool res = false;
 960
 961   STRIP_NOPS (expr);
 962   switch (TREE_CODE (expr))
 963     {
 964     case POINTER_PLUS_EXPR:
 965     case PLUS_EXPR:
 966     case MINUS_EXPR:
 967       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 968       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 969       break;
 970
 971     case ADDR_EXPR:
 972       return (!DECL_P (TREE_OPERAND (expr, 0)));
 973
 974     default:
 975       return false;
 976     }
 977
 978   return res;
 979 }
 980
 981 /* Allocates an induction variable with given initial value BASE and step STEP
 982    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 983
 984 static struct iv *
 985 alloc_iv (struct ivopts_data *data, tree base, tree step,
 986           bool no_overflow = false)
 987 {
 988   tree expr = base;
 989   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
 990                                               sizeof (struct iv));
 991   gcc_assert (step != NULL_TREE);
 992
 993   /* Lower address expression in base except ones with DECL_P as operand.
 994      By doing this:
 995        1) More accurate cost can be computed for address expressions;
 996        2) Duplicate candidates won't be created for bases in different
 997           forms, like &a[0] and &a.  */
 998   STRIP_NOPS (expr);
 999   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1000       || contain_complex_addr_expr (expr))
1001     {
1002       aff_tree comb;
1003       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1004       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1005     }
1006
1007   iv->base = base;
1008   iv->base_object = determine_base_object (base);
1009   iv->step = step;
1010   iv->biv_p = false;
1011   iv->have_use_for = false;
1012   iv->use_id = 0;
1013   iv->ssa_name = NULL_TREE;
1014   iv->no_overflow = no_overflow;
1015
1016   return iv;
1017 }
1018
1019 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1020    doesn't overflow.  */
1021
1022 static void
1023 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1024         bool no_overflow)
1025 {
1026   struct version_info *info = name_info (data, iv);
1027
1028   gcc_assert (!info->iv);
1029
1030   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1031   info->iv = alloc_iv (data, base, step, no_overflow);
1032   info->iv->ssa_name = iv;
1033 }
1034
1035 /* Finds induction variable declaration for VAR.  */
1036
1037 static struct iv *
1038 get_iv (struct ivopts_data *data, tree var)
1039 {
1040   basic_block bb;
1041   tree type = TREE_TYPE (var);
1042
1043   if (!POINTER_TYPE_P (type)
1044       && !INTEGRAL_TYPE_P (type))
1045     return NULL;
1046
1047   if (!name_info (data, var)->iv)
1048     {
1049       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1050
1051       if (!bb
1052           || !flow_bb_inside_loop_p (data->current_loop, bb))
1053         set_iv (data, var, var, build_int_cst (type, 0), true);
1054     }
1055
1056   return name_info (data, var)->iv;
1057 }
1058
1059 /* Return the first non-invariant ssa var found in EXPR.  */
1060
1061 static tree
1062 extract_single_var_from_expr (tree expr)
1063 {
1064   int i, n;
1065   tree tmp;
1066   enum tree_code code;
1067
1068   if (!expr || is_gimple_min_invariant (expr))
1069     return NULL;
1070
1071   code = TREE_CODE (expr);
1072   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1073     {
1074       n = TREE_OPERAND_LENGTH (expr);
1075       for (i = 0; i < n; i++)
1076         {
1077           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1078
1079           if (tmp)
1080             return tmp;
1081         }
1082     }
1083   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1084 }
1085
1086 /* Finds basic ivs.  */
1087
1088 static bool
1089 find_bivs (struct ivopts_data *data)
1090 {
1091   gphi *phi;
1092   affine_iv iv;
1093   tree step, type, base, stop;
1094   bool found = false;
1095   struct loop *loop = data->current_loop;
1096   gphi_iterator psi;
1097
1098   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1099     {
1100       phi = psi.phi ();
1101
1102       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1103         continue;
1104
1105       if (virtual_operand_p (PHI_RESULT (phi)))
1106         continue;
1107
1108       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1109         continue;
1110
1111       if (integer_zerop (iv.step))
1112         continue;
1113
1114       step = iv.step;
1115       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1116       /* Stop expanding iv base at the first ssa var referred by iv step.
1117          Ideally we should stop at any ssa var, because that's expensive
1118          and unusual to happen, we just do it on the first one.
1119
1120          See PR64705 for the rationale.  */
1121       stop = extract_single_var_from_expr (step);
1122       base = expand_simple_operations (base, stop);
1123       if (contains_abnormal_ssa_name_p (base)
1124           || contains_abnormal_ssa_name_p (step))
1125         continue;
1126
1127       type = TREE_TYPE (PHI_RESULT (phi));
1128       base = fold_convert (type, base);
1129       if (step)
1130         {
1131           if (POINTER_TYPE_P (type))
1132             step = convert_to_ptrofftype (step);
1133           else
1134             step = fold_convert (type, step);
1135         }
1136
1137       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1138       found = true;
1139     }
1140
1141   return found;
1142 }
1143
1144 /* Marks basic ivs.  */
1145
1146 static void
1147 mark_bivs (struct ivopts_data *data)
1148 {
1149   gphi *phi;
1150   gimple def;
1151   tree var;
1152   struct iv *iv, *incr_iv;
1153   struct loop *loop = data->current_loop;
1154   basic_block incr_bb;
1155   gphi_iterator psi;
1156
1157   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1158     {
1159       phi = psi.phi ();
1160
1161       iv = get_iv (data, PHI_RESULT (phi));
1162       if (!iv)
1163         continue;
1164
1165       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1166       def = SSA_NAME_DEF_STMT (var);
1167       /* Don't mark iv peeled from other one as biv.  */
1168       if (def
1169           && gimple_code (def) == GIMPLE_PHI
1170           && gimple_bb (def) == loop->header)
1171         continue;
1172
1173       incr_iv = get_iv (data, var);
1174       if (!incr_iv)
1175         continue;
1176
1177       /* If the increment is in the subloop, ignore it.  */
1178       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1179       if (incr_bb->loop_father != data->current_loop
1180           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1181         continue;
1182
1183       iv->biv_p = true;
1184       incr_iv->biv_p = true;
1185     }
1186 }
1187
1188 /* Checks whether STMT defines a linear induction variable and stores its
1189    parameters to IV.  */
1190
1191 static bool
1192 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1193 {
1194   tree lhs, stop;
1195   struct loop *loop = data->current_loop;
1196
1197   iv->base = NULL_TREE;
1198   iv->step = NULL_TREE;
1199
1200   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1201     return false;
1202
1203   lhs = gimple_assign_lhs (stmt);
1204   if (TREE_CODE (lhs) != SSA_NAME)
1205     return false;
1206
1207   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1208     return false;
1209
1210   /* Stop expanding iv base at the first ssa var referred by iv step.
1211      Ideally we should stop at any ssa var, because that's expensive
1212      and unusual to happen, we just do it on the first one.
1213
1214      See PR64705 for the rationale.  */
1215   stop = extract_single_var_from_expr (iv->step);
1216   iv->base = expand_simple_operations (iv->base, stop);
1217   if (contains_abnormal_ssa_name_p (iv->base)
1218       || contains_abnormal_ssa_name_p (iv->step))
1219     return false;
1220
1221   /* If STMT could throw, then do not consider STMT as defining a GIV.
1222      While this will suppress optimizations, we can not safely delete this
1223      GIV and associated statements, even if it appears it is not used.  */
1224   if (stmt_could_throw_p (stmt))
1225     return false;
1226
1227   return true;
1228 }
1229
1230 /* Finds general ivs in statement STMT.  */
1231
1232 static void
1233 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1234 {
1235   affine_iv iv;
1236
1237   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1238     return;
1239
1240   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1241 }
1242
1243 /* Finds general ivs in basic block BB.  */
1244
1245 static void
1246 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1247 {
1248   gimple_stmt_iterator bsi;
1249
1250   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1251     find_givs_in_stmt (data, gsi_stmt (bsi));
1252 }
1253
1254 /* Finds general ivs.  */
1255
1256 static void
1257 find_givs (struct ivopts_data *data)
1258 {
1259   struct loop *loop = data->current_loop;
1260   basic_block *body = get_loop_body_in_dom_order (loop);
1261   unsigned i;
1262
1263   for (i = 0; i < loop->num_nodes; i++)
1264     find_givs_in_bb (data, body[i]);
1265   free (body);
1266 }
1267
1268 /* For each ssa name defined in LOOP determines whether it is an induction
1269    variable and if so, its initial value and step.  */
1270
1271 static bool
1272 find_induction_variables (struct ivopts_data *data)
1273 {
1274   unsigned i;
1275   bitmap_iterator bi;
1276
1277   if (!find_bivs (data))
1278     return false;
1279
1280   find_givs (data);
1281   mark_bivs (data);
1282
1283   if (dump_file && (dump_flags & TDF_DETAILS))
1284     {
1285       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1286
1287       if (niter)
1288         {
1289           fprintf (dump_file, "  number of iterations ");
1290           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1291           if (!integer_zerop (niter->may_be_zero))
1292             {
1293               fprintf (dump_file, "; zero if ");
1294               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1295             }
1296           fprintf (dump_file, "\n\n");
1297         };
1298
1299       fprintf (dump_file, "Induction variables:\n\n");
1300
1301       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1302         {
1303           if (ver_info (data, i)->iv)
1304             dump_iv (dump_file, ver_info (data, i)->iv, true);
1305         }
1306     }
1307
1308   return true;
1309 }
1310
1311 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1312    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1313    is the const offset stripped from IV base.  For uses of other types,
1314    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1315
1316 static struct iv_use *
1317 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1318             gimple stmt, enum use_type use_type, tree addr_base = NULL,
1319             unsigned HOST_WIDE_INT addr_offset = 0)
1320 {
1321   struct iv_use *use = XCNEW (struct iv_use);
1322
1323   use->id = n_iv_uses (data);
1324   use->sub_id = 0;
1325   use->type = use_type;
1326   use->iv = iv;
1327   use->stmt = stmt;
1328   use->op_p = use_p;
1329   use->related_cands = BITMAP_ALLOC (NULL);
1330   use->next = NULL;
1331   use->addr_base = addr_base;
1332   use->addr_offset = addr_offset;
1333
1334   data->iv_uses.safe_push (use);
1335
1336   return use;
1337 }
1338
1339 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1340    The sub use is recorded under the one whose use id is ID_GROUP.  */
1341
1342 static struct iv_use *
1343 record_sub_use (struct ivopts_data *data, tree *use_p,
1344                     struct iv *iv, gimple stmt, enum use_type use_type,
1345                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1346                     unsigned int id_group)
1347 {
1348   struct iv_use *use = XCNEW (struct iv_use);
1349   struct iv_use *group = iv_use (data, id_group);
1350
1351   use->id = group->id;
1352   use->sub_id = 0;
1353   use->type = use_type;
1354   use->iv = iv;
1355   use->stmt = stmt;
1356   use->op_p = use_p;
1357   use->related_cands = NULL;
1358   use->addr_base = addr_base;
1359   use->addr_offset = addr_offset;
1360
1361   /* Sub use list is maintained in offset ascending order.  */
1362   if (addr_offset <= group->addr_offset)
1363     {
1364       use->related_cands = group->related_cands;
1365       group->related_cands = NULL;
1366       use->next = group;
1367       data->iv_uses[id_group] = use;
1368     }
1369   else
1370     {
1371       struct iv_use *pre;
1372       do
1373         {
1374           pre = group;
1375           group = group->next;
1376         }
1377       while (group && addr_offset > group->addr_offset);
1378       use->next = pre->next;
1379       pre->next = use;
1380     }
1381
1382   return use;
1383 }
1384
1385 /* Checks whether OP is a loop-level invariant and if so, records it.
1386    NONLINEAR_USE is true if the invariant is used in a way we do not
1387    handle specially.  */
1388
1389 static void
1390 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1391 {
1392   basic_block bb;
1393   struct version_info *info;
1394
1395   if (TREE_CODE (op) != SSA_NAME
1396       || virtual_operand_p (op))
1397     return;
1398
1399   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1400   if (bb
1401       && flow_bb_inside_loop_p (data->current_loop, bb))
1402     return;
1403
1404   info = name_info (data, op);
1405   info->name = op;
1406   info->has_nonlin_use |= nonlinear_use;
1407   if (!info->inv_id)
1408     info->inv_id = ++data->max_inv_id;
1409   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1410 }
1411
1412 /* Checks whether the use OP is interesting and if so, records it.  */
1413
1414 static struct iv_use *
1415 find_interesting_uses_op (struct ivopts_data *data, tree op)
1416 {
1417   struct iv *iv;
1418   gimple stmt;
1419   struct iv_use *use;
1420
1421   if (TREE_CODE (op) != SSA_NAME)
1422     return NULL;
1423
1424   iv = get_iv (data, op);
1425   if (!iv)
1426     return NULL;
1427
1428   if (iv->have_use_for)
1429     {
1430       use = iv_use (data, iv->use_id);
1431
1432       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1433       return use;
1434     }
1435
1436   if (integer_zerop (iv->step))
1437     {
1438       record_invariant (data, op, true);
1439       return NULL;
1440     }
1441   iv->have_use_for = true;
1442
1443   stmt = SSA_NAME_DEF_STMT (op);
1444   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1445               || is_gimple_assign (stmt));
1446
1447   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1448   iv->use_id = use->id;
1449
1450   return use;
1451 }
1452
1453 /* Given a condition in statement STMT, checks whether it is a compare
1454    of an induction variable and an invariant.  If this is the case,
1455    CONTROL_VAR is set to location of the iv, BOUND to the location of
1456    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1457    induction variable descriptions, and true is returned.  If this is not
1458    the case, CONTROL_VAR and BOUND are set to the arguments of the
1459    condition and false is returned.  */
1460
1461 static bool
1462 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1463                        tree **control_var, tree **bound,
1464                        struct iv **iv_var, struct iv **iv_bound)
1465 {
1466   /* The objects returned when COND has constant operands.  */
1467   static struct iv const_iv;
1468   static tree zero;
1469   tree *op0 = &zero, *op1 = &zero;
1470   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1471   bool ret = false;
1472
1473   if (gimple_code (stmt) == GIMPLE_COND)
1474     {
1475       gcond *cond_stmt = as_a <gcond *> (stmt);
1476       op0 = gimple_cond_lhs_ptr (cond_stmt);
1477       op1 = gimple_cond_rhs_ptr (cond_stmt);
1478     }
1479   else
1480     {
1481       op0 = gimple_assign_rhs1_ptr (stmt);
1482       op1 = gimple_assign_rhs2_ptr (stmt);
1483     }
1484
1485   zero = integer_zero_node;
1486   const_iv.step = integer_zero_node;
1487
1488   if (TREE_CODE (*op0) == SSA_NAME)
1489     iv0 = get_iv (data, *op0);
1490   if (TREE_CODE (*op1) == SSA_NAME)
1491     iv1 = get_iv (data, *op1);
1492
1493   /* Exactly one of the compared values must be an iv, and the other one must
1494      be an invariant.  */
1495   if (!iv0 || !iv1)
1496     goto end;
1497
1498   if (integer_zerop (iv0->step))
1499     {
1500       /* Control variable may be on the other side.  */
1501       std::swap (op0, op1);
1502       std::swap (iv0, iv1);
1503     }
1504   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1505
1506 end:
1507   if (control_var)
1508     *control_var = op0;
1509   if (iv_var)
1510     *iv_var = iv0;
1511   if (bound)
1512     *bound = op1;
1513   if (iv_bound)
1514     *iv_bound = iv1;
1515
1516   return ret;
1517 }
1518
1519 /* Checks whether the condition in STMT is interesting and if so,
1520    records it.  */
1521
1522 static void
1523 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1524 {
1525   tree *var_p, *bound_p;
1526   struct iv *var_iv;
1527
1528   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1529     {
1530       find_interesting_uses_op (data, *var_p);
1531       find_interesting_uses_op (data, *bound_p);
1532       return;
1533     }
1534
1535   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1536 }
1537
1538 /* Returns the outermost loop EXPR is obviously invariant in
1539    relative to the loop LOOP, i.e. if all its operands are defined
1540    outside of the returned loop.  Returns NULL if EXPR is not
1541    even obviously invariant in LOOP.  */
1542
1543 struct loop *
1544 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1545 {
1546   basic_block def_bb;
1547   unsigned i, len;
1548
1549   if (is_gimple_min_invariant (expr))
1550     return current_loops->tree_root;
1551
1552   if (TREE_CODE (expr) == SSA_NAME)
1553     {
1554       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1555       if (def_bb)
1556         {
1557           if (flow_bb_inside_loop_p (loop, def_bb))
1558             return NULL;
1559           return superloop_at_depth (loop,
1560                                      loop_depth (def_bb->loop_father) + 1);
1561         }
1562
1563       return current_loops->tree_root;
1564     }
1565
1566   if (!EXPR_P (expr))
1567     return NULL;
1568
1569   unsigned maxdepth = 0;
1570   len = TREE_OPERAND_LENGTH (expr);
1571   for (i = 0; i < len; i++)
1572     {
1573       struct loop *ivloop;
1574       if (!TREE_OPERAND (expr, i))
1575         continue;
1576
1577       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1578       if (!ivloop)
1579         return NULL;
1580       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1581     }
1582
1583   return superloop_at_depth (loop, maxdepth);
1584 }
1585
1586 /* Returns true if expression EXPR is obviously invariant in LOOP,
1587    i.e. if all its operands are defined outside of the LOOP.  LOOP
1588    should not be the function body.  */
1589
1590 bool
1591 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1592 {
1593   basic_block def_bb;
1594   unsigned i, len;
1595
1596   gcc_assert (loop_depth (loop) > 0);
1597
1598   if (is_gimple_min_invariant (expr))
1599     return true;
1600
1601   if (TREE_CODE (expr) == SSA_NAME)
1602     {
1603       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1604       if (def_bb
1605           && flow_bb_inside_loop_p (loop, def_bb))
1606         return false;
1607
1608       return true;
1609     }
1610
1611   if (!EXPR_P (expr))
1612     return false;
1613
1614   len = TREE_OPERAND_LENGTH (expr);
1615   for (i = 0; i < len; i++)
1616     if (TREE_OPERAND (expr, i)
1617         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1618       return false;
1619
1620   return true;
1621 }
1622
1623 /* Cumulates the steps of indices into DATA and replaces their values with the
1624    initial ones.  Returns false when the value of the index cannot be determined.
1625    Callback for for_each_index.  */
1626
1627 struct ifs_ivopts_data
1628 {
1629   struct ivopts_data *ivopts_data;
1630   gimple stmt;
1631   tree step;
1632 };
1633
1634 static bool
1635 idx_find_step (tree base, tree *idx, void *data)
1636 {
1637   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1638   struct iv *iv;
1639   bool use_overflow_semantics = false;
1640   tree step, iv_base, iv_step, lbound, off;
1641   struct loop *loop = dta->ivopts_data->current_loop;
1642
1643   /* If base is a component ref, require that the offset of the reference
1644      be invariant.  */
1645   if (TREE_CODE (base) == COMPONENT_REF)
1646     {
1647       off = component_ref_field_offset (base);
1648       return expr_invariant_in_loop_p (loop, off);
1649     }
1650
1651   /* If base is array, first check whether we will be able to move the
1652      reference out of the loop (in order to take its address in strength
1653      reduction).  In order for this to work we need both lower bound
1654      and step to be loop invariants.  */
1655   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1656     {
1657       /* Moreover, for a range, the size needs to be invariant as well.  */
1658       if (TREE_CODE (base) == ARRAY_RANGE_REF
1659           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1660         return false;
1661
1662       step = array_ref_element_size (base);
1663       lbound = array_ref_low_bound (base);
1664
1665       if (!expr_invariant_in_loop_p (loop, step)
1666           || !expr_invariant_in_loop_p (loop, lbound))
1667         return false;
1668     }
1669
1670   if (TREE_CODE (*idx) != SSA_NAME)
1671     return true;
1672
1673   iv = get_iv (dta->ivopts_data, *idx);
1674   if (!iv)
1675     return false;
1676
1677   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1678           *&x[0], which is not folded and does not trigger the
1679           ARRAY_REF path below.  */
1680   *idx = iv->base;
1681
1682   if (integer_zerop (iv->step))
1683     return true;
1684
1685   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1686     {
1687       step = array_ref_element_size (base);
1688
1689       /* We only handle addresses whose step is an integer constant.  */
1690       if (TREE_CODE (step) != INTEGER_CST)
1691         return false;
1692     }
1693   else
1694     /* The step for pointer arithmetics already is 1 byte.  */
1695     step = size_one_node;
1696
1697   iv_base = iv->base;
1698   iv_step = iv->step;
1699   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1700     use_overflow_semantics = true;
1701
1702   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1703                             sizetype, &iv_base, &iv_step, dta->stmt,
1704                             use_overflow_semantics))
1705     {
1706       /* The index might wrap.  */
1707       return false;
1708     }
1709
1710   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1711   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1712
1713   return true;
1714 }
1715
1716 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1717    object is passed to it in DATA.  */
1718
1719 static bool
1720 idx_record_use (tree base, tree *idx,
1721                 void *vdata)
1722 {
1723   struct ivopts_data *data = (struct ivopts_data *) vdata;
1724   find_interesting_uses_op (data, *idx);
1725   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1726     {
1727       find_interesting_uses_op (data, array_ref_element_size (base));
1728       find_interesting_uses_op (data, array_ref_low_bound (base));
1729     }
1730   return true;
1731 }
1732
1733 /* If we can prove that TOP = cst * BOT for some constant cst,
1734    store cst to MUL and return true.  Otherwise return false.
1735    The returned value is always sign-extended, regardless of the
1736    signedness of TOP and BOT.  */
1737
1738 static bool
1739 constant_multiple_of (tree top, tree bot, widest_int *mul)
1740 {
1741   tree mby;
1742   enum tree_code code;
1743   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1744   widest_int res, p0, p1;
1745
1746   STRIP_NOPS (top);
1747   STRIP_NOPS (bot);
1748
1749   if (operand_equal_p (top, bot, 0))
1750     {
1751       *mul = 1;
1752       return true;
1753     }
1754
1755   code = TREE_CODE (top);
1756   switch (code)
1757     {
1758     case MULT_EXPR:
1759       mby = TREE_OPERAND (top, 1);
1760       if (TREE_CODE (mby) != INTEGER_CST)
1761         return false;
1762
1763       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1764         return false;
1765
1766       *mul = wi::sext (res * wi::to_widest (mby), precision);
1767       return true;
1768
1769     case PLUS_EXPR:
1770     case MINUS_EXPR:
1771       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1772           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1773         return false;
1774
1775       if (code == MINUS_EXPR)
1776         p1 = -p1;
1777       *mul = wi::sext (p0 + p1, precision);
1778       return true;
1779
1780     case INTEGER_CST:
1781       if (TREE_CODE (bot) != INTEGER_CST)
1782         return false;
1783
1784       p0 = widest_int::from (top, SIGNED);
1785       p1 = widest_int::from (bot, SIGNED);
1786       if (p1 == 0)
1787         return false;
1788       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1789       return res == 0;
1790
1791     default:
1792       return false;
1793     }
1794 }
1795
1796 /* Return true if memory reference REF with step STEP may be unaligned.  */
1797
1798 static bool
1799 may_be_unaligned_p (tree ref, tree step)
1800 {
1801   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1802      thus they are not misaligned.  */
1803   if (TREE_CODE (ref) == TARGET_MEM_REF)
1804     return false;
1805
1806   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1807   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1808     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1809
1810   unsigned HOST_WIDE_INT bitpos;
1811   unsigned int ref_align;
1812   get_object_alignment_1 (ref, &ref_align, &bitpos);
1813   if (ref_align < align
1814       || (bitpos % align) != 0
1815       || (bitpos % BITS_PER_UNIT) != 0)
1816     return true;
1817
1818   unsigned int trailing_zeros = tree_ctz (step);
1819   if (trailing_zeros < HOST_BITS_PER_INT
1820       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1821     return true;
1822
1823   return false;
1824 }
1825
1826 /* Return true if EXPR may be non-addressable.   */
1827
1828 bool
1829 may_be_nonaddressable_p (tree expr)
1830 {
1831   switch (TREE_CODE (expr))
1832     {
1833     case TARGET_MEM_REF:
1834       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1835          target, thus they are always addressable.  */
1836       return false;
1837
1838     case COMPONENT_REF:
1839       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1840              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1841
1842     case VIEW_CONVERT_EXPR:
1843       /* This kind of view-conversions may wrap non-addressable objects
1844          and make them look addressable.  After some processing the
1845          non-addressability may be uncovered again, causing ADDR_EXPRs
1846          of inappropriate objects to be built.  */
1847       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1848           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1849         return true;
1850
1851       /* ... fall through ... */
1852
1853     case ARRAY_REF:
1854     case ARRAY_RANGE_REF:
1855       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1856
1857     CASE_CONVERT:
1858       return true;
1859
1860     default:
1861       break;
1862     }
1863
1864   return false;
1865 }
1866
1867 static tree
1868 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1869
1870 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1871    If there is an existing use which has same stripped iv base and step,
1872    this function records this one as a sub use to that; otherwise records
1873    it as a normal one.  */
1874
1875 static struct iv_use *
1876 record_group_use (struct ivopts_data *data, tree *use_p,
1877                   struct iv *iv, gimple stmt, enum use_type use_type)
1878 {
1879   unsigned int i;
1880   struct iv_use *use;
1881   tree addr_base;
1882   unsigned HOST_WIDE_INT addr_offset;
1883
1884   /* Only support sub use for address type uses, that is, with base
1885      object.  */
1886   if (!iv->base_object)
1887     return record_use (data, use_p, iv, stmt, use_type);
1888
1889   addr_base = strip_offset (iv->base, &addr_offset);
1890   for (i = 0; i < n_iv_uses (data); i++)
1891     {
1892       use = iv_use (data, i);
1893       if (use->type != USE_ADDRESS || !use->iv->base_object)
1894         continue;
1895
1896       /* Check if it has the same stripped base and step.  */
1897       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1898           && operand_equal_p (iv->step, use->iv->step, 0)
1899           && operand_equal_p (addr_base, use->addr_base, 0))
1900         break;
1901     }
1902
1903   if (i == n_iv_uses (data))
1904     return record_use (data, use_p, iv, stmt,
1905                        use_type, addr_base, addr_offset);
1906   else
1907     return record_sub_use (data, use_p, iv, stmt,
1908                            use_type, addr_base, addr_offset, i);
1909 }
1910
1911 /* Finds addresses in *OP_P inside STMT.  */
1912
1913 static void
1914 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1915 {
1916   tree base = *op_p, step = size_zero_node;
1917   struct iv *civ;
1918   struct ifs_ivopts_data ifs_ivopts_data;
1919
1920   /* Do not play with volatile memory references.  A bit too conservative,
1921      perhaps, but safe.  */
1922   if (gimple_has_volatile_ops (stmt))
1923     goto fail;
1924
1925   /* Ignore bitfields for now.  Not really something terribly complicated
1926      to handle.  TODO.  */
1927   if (TREE_CODE (base) == BIT_FIELD_REF)
1928     goto fail;
1929
1930   base = unshare_expr (base);
1931
1932   if (TREE_CODE (base) == TARGET_MEM_REF)
1933     {
1934       tree type = build_pointer_type (TREE_TYPE (base));
1935       tree astep;
1936
1937       if (TMR_BASE (base)
1938           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1939         {
1940           civ = get_iv (data, TMR_BASE (base));
1941           if (!civ)
1942             goto fail;
1943
1944           TMR_BASE (base) = civ->base;
1945           step = civ->step;
1946         }
1947       if (TMR_INDEX2 (base)
1948           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1949         {
1950           civ = get_iv (data, TMR_INDEX2 (base));
1951           if (!civ)
1952             goto fail;
1953
1954           TMR_INDEX2 (base) = civ->base;
1955           step = civ->step;
1956         }
1957       if (TMR_INDEX (base)
1958           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1959         {
1960           civ = get_iv (data, TMR_INDEX (base));
1961           if (!civ)
1962             goto fail;
1963
1964           TMR_INDEX (base) = civ->base;
1965           astep = civ->step;
1966
1967           if (astep)
1968             {
1969               if (TMR_STEP (base))
1970                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1971
1972               step = fold_build2 (PLUS_EXPR, type, step, astep);
1973             }
1974         }
1975
1976       if (integer_zerop (step))
1977         goto fail;
1978       base = tree_mem_ref_addr (type, base);
1979     }
1980   else
1981     {
1982       ifs_ivopts_data.ivopts_data = data;
1983       ifs_ivopts_data.stmt = stmt;
1984       ifs_ivopts_data.step = size_zero_node;
1985       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1986           || integer_zerop (ifs_ivopts_data.step))
1987         goto fail;
1988       step = ifs_ivopts_data.step;
1989
1990       /* Check that the base expression is addressable.  This needs
1991          to be done after substituting bases of IVs into it.  */
1992       if (may_be_nonaddressable_p (base))
1993         goto fail;
1994
1995       /* Moreover, on strict alignment platforms, check that it is
1996          sufficiently aligned.  */
1997       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1998         goto fail;
1999
2000       base = build_fold_addr_expr (base);
2001
2002       /* Substituting bases of IVs into the base expression might
2003          have caused folding opportunities.  */
2004       if (TREE_CODE (base) == ADDR_EXPR)
2005         {
2006           tree *ref = &TREE_OPERAND (base, 0);
2007           while (handled_component_p (*ref))
2008             ref = &TREE_OPERAND (*ref, 0);
2009           if (TREE_CODE (*ref) == MEM_REF)
2010             {
2011               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2012                                       TREE_OPERAND (*ref, 0),
2013                                       TREE_OPERAND (*ref, 1));
2014               if (tem)
2015                 *ref = tem;
2016             }
2017         }
2018     }
2019
2020   civ = alloc_iv (data, base, step);
2021   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2022   return;
2023
2024 fail:
2025   for_each_index (op_p, idx_record_use, data);
2026 }
2027
2028 /* Finds and records invariants used in STMT.  */
2029
2030 static void
2031 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2032 {
2033   ssa_op_iter iter;
2034   use_operand_p use_p;
2035   tree op;
2036
2037   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2038     {
2039       op = USE_FROM_PTR (use_p);
2040       record_invariant (data, op, false);
2041     }
2042 }
2043
2044 /* Finds interesting uses of induction variables in the statement STMT.  */
2045
2046 static void
2047 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2048 {
2049   struct iv *iv;
2050   tree op, *lhs, *rhs;
2051   ssa_op_iter iter;
2052   use_operand_p use_p;
2053   enum tree_code code;
2054
2055   find_invariants_stmt (data, stmt);
2056
2057   if (gimple_code (stmt) == GIMPLE_COND)
2058     {
2059       find_interesting_uses_cond (data, stmt);
2060       return;
2061     }
2062
2063   if (is_gimple_assign (stmt))
2064     {
2065       lhs = gimple_assign_lhs_ptr (stmt);
2066       rhs = gimple_assign_rhs1_ptr (stmt);
2067
2068       if (TREE_CODE (*lhs) == SSA_NAME)
2069         {
2070           /* If the statement defines an induction variable, the uses are not
2071              interesting by themselves.  */
2072
2073           iv = get_iv (data, *lhs);
2074
2075           if (iv && !integer_zerop (iv->step))
2076             return;
2077         }
2078
2079       code = gimple_assign_rhs_code (stmt);
2080       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2081           && (REFERENCE_CLASS_P (*rhs)
2082               || is_gimple_val (*rhs)))
2083         {
2084           if (REFERENCE_CLASS_P (*rhs))
2085             find_interesting_uses_address (data, stmt, rhs);
2086           else
2087             find_interesting_uses_op (data, *rhs);
2088
2089           if (REFERENCE_CLASS_P (*lhs))
2090             find_interesting_uses_address (data, stmt, lhs);
2091           return;
2092         }
2093       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2094         {
2095           find_interesting_uses_cond (data, stmt);
2096           return;
2097         }
2098
2099       /* TODO -- we should also handle address uses of type
2100
2101          memory = call (whatever);
2102
2103          and
2104
2105          call (memory).  */
2106     }
2107
2108   if (gimple_code (stmt) == GIMPLE_PHI
2109       && gimple_bb (stmt) == data->current_loop->header)
2110     {
2111       iv = get_iv (data, PHI_RESULT (stmt));
2112
2113       if (iv && !integer_zerop (iv->step))
2114         return;
2115     }
2116
2117   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2118     {
2119       op = USE_FROM_PTR (use_p);
2120
2121       if (TREE_CODE (op) != SSA_NAME)
2122         continue;
2123
2124       iv = get_iv (data, op);
2125       if (!iv)
2126         continue;
2127
2128       find_interesting_uses_op (data, op);
2129     }
2130 }
2131
2132 /* Finds interesting uses of induction variables outside of loops
2133    on loop exit edge EXIT.  */
2134
2135 static void
2136 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2137 {
2138   gphi *phi;
2139   gphi_iterator psi;
2140   tree def;
2141
2142   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2143     {
2144       phi = psi.phi ();
2145       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2146       if (!virtual_operand_p (def))
2147         find_interesting_uses_op (data, def);
2148     }
2149 }
2150
2151 /* Finds uses of the induction variables that are interesting.  */
2152
2153 static void
2154 find_interesting_uses (struct ivopts_data *data)
2155 {
2156   basic_block bb;
2157   gimple_stmt_iterator bsi;
2158   basic_block *body = get_loop_body (data->current_loop);
2159   unsigned i;
2160   struct version_info *info;
2161   edge e;
2162
2163   if (dump_file && (dump_flags & TDF_DETAILS))
2164     fprintf (dump_file, "Uses:\n\n");
2165
2166   for (i = 0; i < data->current_loop->num_nodes; i++)
2167     {
2168       edge_iterator ei;
2169       bb = body[i];
2170
2171       FOR_EACH_EDGE (e, ei, bb->succs)
2172         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2173             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2174           find_interesting_uses_outside (data, e);
2175
2176       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2177         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2178       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2179         if (!is_gimple_debug (gsi_stmt (bsi)))
2180           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2181     }
2182
2183   if (dump_file && (dump_flags & TDF_DETAILS))
2184     {
2185       bitmap_iterator bi;
2186
2187       fprintf (dump_file, "\n");
2188
2189       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2190         {
2191           info = ver_info (data, i);
2192           if (info->inv_id)
2193             {
2194               fprintf (dump_file, "  ");
2195               print_generic_expr (dump_file, info->name, TDF_SLIM);
2196               fprintf (dump_file, " is invariant (%d)%s\n",
2197                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2198             }
2199         }
2200
2201       fprintf (dump_file, "\n");
2202     }
2203
2204   free (body);
2205 }
2206
2207 /* Compute maximum offset of [base + offset] addressing mode
2208    for memory reference represented by USE.  */
2209
2210 static HOST_WIDE_INT
2211 compute_max_addr_offset (struct iv_use *use)
2212 {
2213   int width;
2214   rtx reg, addr;
2215   HOST_WIDE_INT i, off;
2216   unsigned list_index, num;
2217   addr_space_t as;
2218   machine_mode mem_mode, addr_mode;
2219   static vec<HOST_WIDE_INT> max_offset_list;
2220
2221   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2222   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2223
2224   num = max_offset_list.length ();
2225   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2226   if (list_index >= num)
2227     {
2228       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2229       for (; num < max_offset_list.length (); num++)
2230         max_offset_list[num] = -1;
2231     }
2232
2233   off = max_offset_list[list_index];
2234   if (off != -1)
2235     return off;
2236
2237   addr_mode = targetm.addr_space.address_mode (as);
2238   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2239   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2240
2241   width = GET_MODE_BITSIZE (addr_mode) - 1;
2242   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2243     width = HOST_BITS_PER_WIDE_INT - 1;
2244
2245   for (i = width; i > 0; i--)
2246     {
2247       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2248       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2249       if (memory_address_addr_space_p (mem_mode, addr, as))
2250         break;
2251
2252       /* For some strict-alignment targets, the offset must be naturally
2253          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2254       off = ((unsigned HOST_WIDE_INT) 1 << i);
2255       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2256         {
2257           off -= GET_MODE_SIZE (mem_mode);
2258           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2259           if (memory_address_addr_space_p (mem_mode, addr, as))
2260             break;
2261         }
2262     }
2263   if (i == 0)
2264     off = 0;
2265
2266   max_offset_list[list_index] = off;
2267   return off;
2268 }
2269
2270 /* Check if all small groups should be split.  Return true if and
2271    only if:
2272
2273      1) At least one groups contain two uses with different offsets.
2274      2) No group contains more than two uses with different offsets.
2275
2276    Return false otherwise.  We want to split such groups because:
2277
2278      1) Small groups don't have much benefit and may interfer with
2279         general candidate selection.
2280      2) Size for problem with only small groups is usually small and
2281         general algorithm can handle it well.
2282
2283    TODO -- Above claim may not hold when auto increment is supported.  */
2284
2285 static bool
2286 split_all_small_groups (struct ivopts_data *data)
2287 {
2288   bool split_p = false;
2289   unsigned int i, n, distinct;
2290   struct iv_use *pre, *use;
2291
2292   n = n_iv_uses (data);
2293   for (i = 0; i < n; i++)
2294     {
2295       use = iv_use (data, i);
2296       if (!use->next)
2297         continue;
2298
2299       distinct = 1;
2300       gcc_assert (use->type == USE_ADDRESS);
2301       for (pre = use, use = use->next; use; pre = use, use = use->next)
2302         {
2303           if (pre->addr_offset != use->addr_offset)
2304             distinct++;
2305
2306           if (distinct > 2)
2307             return false;
2308         }
2309       if (distinct == 2)
2310         split_p = true;
2311     }
2312
2313   return split_p;
2314 }
2315
2316 /* For each group of address type uses, this function further groups
2317    these uses according to the maximum offset supported by target's
2318    [base + offset] addressing mode.  */
2319
2320 static void
2321 group_address_uses (struct ivopts_data *data)
2322 {
2323   HOST_WIDE_INT max_offset = -1;
2324   unsigned int i, n, sub_id;
2325   struct iv_use *pre, *use;
2326   unsigned HOST_WIDE_INT addr_offset_first;
2327
2328   /* Reset max offset to split all small groups.  */
2329   if (split_all_small_groups (data))
2330     max_offset = 0;
2331
2332   n = n_iv_uses (data);
2333   for (i = 0; i < n; i++)
2334     {
2335       use = iv_use (data, i);
2336       if (!use->next)
2337         continue;
2338
2339       gcc_assert (use->type == USE_ADDRESS);
2340       if (max_offset != 0)
2341         max_offset = compute_max_addr_offset (use);
2342
2343       while (use)
2344         {
2345           sub_id = 0;
2346           addr_offset_first = use->addr_offset;
2347           /* Only uses with offset that can fit in offset part against
2348              the first use can be grouped together.  */
2349           for (pre = use, use = use->next;
2350                use && (use->addr_offset - addr_offset_first
2351                        <= (unsigned HOST_WIDE_INT) max_offset);
2352                pre = use, use = use->next)
2353             {
2354               use->id = pre->id;
2355               use->sub_id = ++sub_id;
2356             }
2357
2358           /* Break the list and create new group.  */
2359           if (use)
2360             {
2361               pre->next = NULL;
2362               use->id = n_iv_uses (data);
2363               use->related_cands = BITMAP_ALLOC (NULL);
2364               data->iv_uses.safe_push (use);
2365             }
2366         }
2367     }
2368
2369   if (dump_file && (dump_flags & TDF_DETAILS))
2370     dump_uses (dump_file, data);
2371 }
2372
2373 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2374    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2375    we are at the top-level of the processed address.  */
2376
2377 static tree
2378 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2379                 HOST_WIDE_INT *offset)
2380 {
2381   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2382   enum tree_code code;
2383   tree type, orig_type = TREE_TYPE (expr);
2384   HOST_WIDE_INT off0, off1, st;
2385   tree orig_expr = expr;
2386
2387   STRIP_NOPS (expr);
2388
2389   type = TREE_TYPE (expr);
2390   code = TREE_CODE (expr);
2391   *offset = 0;
2392
2393   switch (code)
2394     {
2395     case INTEGER_CST:
2396       if (!cst_and_fits_in_hwi (expr)
2397           || integer_zerop (expr))
2398         return orig_expr;
2399
2400       *offset = int_cst_value (expr);
2401       return build_int_cst (orig_type, 0);
2402
2403     case POINTER_PLUS_EXPR:
2404     case PLUS_EXPR:
2405     case MINUS_EXPR:
2406       op0 = TREE_OPERAND (expr, 0);
2407       op1 = TREE_OPERAND (expr, 1);
2408
2409       op0 = strip_offset_1 (op0, false, false, &off0);
2410       op1 = strip_offset_1 (op1, false, false, &off1);
2411
2412       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2413       if (op0 == TREE_OPERAND (expr, 0)
2414           && op1 == TREE_OPERAND (expr, 1))
2415         return orig_expr;
2416
2417       if (integer_zerop (op1))
2418         expr = op0;
2419       else if (integer_zerop (op0))
2420         {
2421           if (code == MINUS_EXPR)
2422             expr = fold_build1 (NEGATE_EXPR, type, op1);
2423           else
2424             expr = op1;
2425         }
2426       else
2427         expr = fold_build2 (code, type, op0, op1);
2428
2429       return fold_convert (orig_type, expr);
2430
2431     case MULT_EXPR:
2432       op1 = TREE_OPERAND (expr, 1);
2433       if (!cst_and_fits_in_hwi (op1))
2434         return orig_expr;
2435
2436       op0 = TREE_OPERAND (expr, 0);
2437       op0 = strip_offset_1 (op0, false, false, &off0);
2438       if (op0 == TREE_OPERAND (expr, 0))
2439         return orig_expr;
2440
2441       *offset = off0 * int_cst_value (op1);
2442       if (integer_zerop (op0))
2443         expr = op0;
2444       else
2445         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2446
2447       return fold_convert (orig_type, expr);
2448
2449     case ARRAY_REF:
2450     case ARRAY_RANGE_REF:
2451       if (!inside_addr)
2452         return orig_expr;
2453
2454       step = array_ref_element_size (expr);
2455       if (!cst_and_fits_in_hwi (step))
2456         break;
2457
2458       st = int_cst_value (step);
2459       op1 = TREE_OPERAND (expr, 1);
2460       op1 = strip_offset_1 (op1, false, false, &off1);
2461       *offset = off1 * st;
2462
2463       if (top_compref
2464           && integer_zerop (op1))
2465         {
2466           /* Strip the component reference completely.  */
2467           op0 = TREE_OPERAND (expr, 0);
2468           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2469           *offset += off0;
2470           return op0;
2471         }
2472       break;
2473
2474     case COMPONENT_REF:
2475       {
2476         tree field;
2477
2478         if (!inside_addr)
2479           return orig_expr;
2480
2481         tmp = component_ref_field_offset (expr);
2482         field = TREE_OPERAND (expr, 1);
2483         if (top_compref
2484             && cst_and_fits_in_hwi (tmp)
2485             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2486           {
2487             HOST_WIDE_INT boffset, abs_off;
2488
2489             /* Strip the component reference completely.  */
2490             op0 = TREE_OPERAND (expr, 0);
2491             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2492             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2493             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2494             if (boffset < 0)
2495               abs_off = -abs_off;
2496
2497             *offset = off0 + int_cst_value (tmp) + abs_off;
2498             return op0;
2499           }
2500       }
2501       break;
2502
2503     case ADDR_EXPR:
2504       op0 = TREE_OPERAND (expr, 0);
2505       op0 = strip_offset_1 (op0, true, true, &off0);
2506       *offset += off0;
2507
2508       if (op0 == TREE_OPERAND (expr, 0))
2509         return orig_expr;
2510
2511       expr = build_fold_addr_expr (op0);
2512       return fold_convert (orig_type, expr);
2513
2514     case MEM_REF:
2515       /* ???  Offset operand?  */
2516       inside_addr = false;
2517       break;
2518
2519     default:
2520       return orig_expr;
2521     }
2522
2523   /* Default handling of expressions for that we want to recurse into
2524      the first operand.  */
2525   op0 = TREE_OPERAND (expr, 0);
2526   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2527   *offset += off0;
2528
2529   if (op0 == TREE_OPERAND (expr, 0)
2530       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2531     return orig_expr;
2532
2533   expr = copy_node (expr);
2534   TREE_OPERAND (expr, 0) = op0;
2535   if (op1)
2536     TREE_OPERAND (expr, 1) = op1;
2537
2538   /* Inside address, we might strip the top level component references,
2539      thus changing type of the expression.  Handling of ADDR_EXPR
2540      will fix that.  */
2541   expr = fold_convert (orig_type, expr);
2542
2543   return expr;
2544 }
2545
2546 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2547
2548 static tree
2549 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2550 {
2551   HOST_WIDE_INT off;
2552   tree core = strip_offset_1 (expr, false, false, &off);
2553   *offset = off;
2554   return core;
2555 }
2556
2557 /* Returns variant of TYPE that can be used as base for different uses.
2558    We return unsigned type with the same precision, which avoids problems
2559    with overflows.  */
2560
2561 static tree
2562 generic_type_for (tree type)
2563 {
2564   if (POINTER_TYPE_P (type))
2565     return unsigned_type_for (type);
2566
2567   if (TYPE_UNSIGNED (type))
2568     return type;
2569
2570   return unsigned_type_for (type);
2571 }
2572
2573 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2574    the bitmap to that we should store it.  */
2575
2576 static struct ivopts_data *fd_ivopts_data;
2577 static tree
2578 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2579 {
2580   bitmap *depends_on = (bitmap *) data;
2581   struct version_info *info;
2582
2583   if (TREE_CODE (*expr_p) != SSA_NAME)
2584     return NULL_TREE;
2585   info = name_info (fd_ivopts_data, *expr_p);
2586
2587   if (!info->inv_id || info->has_nonlin_use)
2588     return NULL_TREE;
2589
2590   if (!*depends_on)
2591     *depends_on = BITMAP_ALLOC (NULL);
2592   bitmap_set_bit (*depends_on, info->inv_id);
2593
2594   return NULL_TREE;
2595 }
2596
2597 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2598    position to POS.  If USE is not NULL, the candidate is set as related to
2599    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2600    replacement of the final value of the iv by a direct computation.  */
2601
2602 static struct iv_cand *
2603 add_candidate_1 (struct ivopts_data *data,
2604                  tree base, tree step, bool important, enum iv_position pos,
2605                  struct iv_use *use, gimple incremented_at)
2606 {
2607   unsigned i;
2608   struct iv_cand *cand = NULL;
2609   tree type, orig_type;
2610
2611   /* For non-original variables, make sure their values are computed in a type
2612      that does not invoke undefined behavior on overflows (since in general,
2613      we cannot prove that these induction variables are non-wrapping).  */
2614   if (pos != IP_ORIGINAL)
2615     {
2616       orig_type = TREE_TYPE (base);
2617       type = generic_type_for (orig_type);
2618       if (type != orig_type)
2619         {
2620           base = fold_convert (type, base);
2621           step = fold_convert (type, step);
2622         }
2623     }
2624
2625   for (i = 0; i < n_iv_cands (data); i++)
2626     {
2627       cand = iv_cand (data, i);
2628
2629       if (cand->pos != pos)
2630         continue;
2631
2632       if (cand->incremented_at != incremented_at
2633           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2634               && cand->ainc_use != use))
2635         continue;
2636
2637       if (!cand->iv)
2638         {
2639           if (!base && !step)
2640             break;
2641
2642           continue;
2643         }
2644
2645       if (!base && !step)
2646         continue;
2647
2648       if (operand_equal_p (base, cand->iv->base, 0)
2649           && operand_equal_p (step, cand->iv->step, 0)
2650           && (TYPE_PRECISION (TREE_TYPE (base))
2651               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2652         break;
2653     }
2654
2655   if (i == n_iv_cands (data))
2656     {
2657       cand = XCNEW (struct iv_cand);
2658       cand->id = i;
2659
2660       if (!base && !step)
2661         cand->iv = NULL;
2662       else
2663         cand->iv = alloc_iv (data, base, step);
2664
2665       cand->pos = pos;
2666       if (pos != IP_ORIGINAL && cand->iv)
2667         {
2668           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2669           cand->var_after = cand->var_before;
2670         }
2671       cand->important = important;
2672       cand->incremented_at = incremented_at;
2673       data->iv_candidates.safe_push (cand);
2674
2675       if (step
2676           && TREE_CODE (step) != INTEGER_CST)
2677         {
2678           fd_ivopts_data = data;
2679           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2680         }
2681
2682       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2683         cand->ainc_use = use;
2684       else
2685         cand->ainc_use = NULL;
2686
2687       if (dump_file && (dump_flags & TDF_DETAILS))
2688         dump_cand (dump_file, cand);
2689     }
2690
2691   if (important && !cand->important)
2692     {
2693       cand->important = true;
2694       if (dump_file && (dump_flags & TDF_DETAILS))
2695         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2696     }
2697
2698   if (use)
2699     {
2700       bitmap_set_bit (use->related_cands, i);
2701       if (dump_file && (dump_flags & TDF_DETAILS))
2702         fprintf (dump_file, "Candidate %d is related to use %d\n",
2703                  cand->id, use->id);
2704     }
2705
2706   return cand;
2707 }
2708
2709 /* Returns true if incrementing the induction variable at the end of the LOOP
2710    is allowed.
2711
2712    The purpose is to avoid splitting latch edge with a biv increment, thus
2713    creating a jump, possibly confusing other optimization passes and leaving
2714    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2715    is not available (so we do not have a better alternative), or if the latch
2716    edge is already nonempty.  */
2717
2718 static bool
2719 allow_ip_end_pos_p (struct loop *loop)
2720 {
2721   if (!ip_normal_pos (loop))
2722     return true;
2723
2724   if (!empty_block_p (ip_end_pos (loop)))
2725     return true;
2726
2727   return false;
2728 }
2729
2730 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2731    Important field is set to IMPORTANT.  */
2732
2733 static void
2734 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2735                         bool important, struct iv_use *use)
2736 {
2737   basic_block use_bb = gimple_bb (use->stmt);
2738   machine_mode mem_mode;
2739   unsigned HOST_WIDE_INT cstepi;
2740
2741   /* If we insert the increment in any position other than the standard
2742      ones, we must ensure that it is incremented once per iteration.
2743      It must not be in an inner nested loop, or one side of an if
2744      statement.  */
2745   if (use_bb->loop_father != data->current_loop
2746       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2747       || stmt_could_throw_p (use->stmt)
2748       || !cst_and_fits_in_hwi (step))
2749     return;
2750
2751   cstepi = int_cst_value (step);
2752
2753   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2754   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2755         || USE_STORE_PRE_INCREMENT (mem_mode))
2756        && GET_MODE_SIZE (mem_mode) == cstepi)
2757       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2758            || USE_STORE_PRE_DECREMENT (mem_mode))
2759           && GET_MODE_SIZE (mem_mode) == -cstepi))
2760     {
2761       enum tree_code code = MINUS_EXPR;
2762       tree new_base;
2763       tree new_step = step;
2764
2765       if (POINTER_TYPE_P (TREE_TYPE (base)))
2766         {
2767           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2768           code = POINTER_PLUS_EXPR;
2769         }
2770       else
2771         new_step = fold_convert (TREE_TYPE (base), new_step);
2772       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2773       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2774                        use->stmt);
2775     }
2776   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2777         || USE_STORE_POST_INCREMENT (mem_mode))
2778        && GET_MODE_SIZE (mem_mode) == cstepi)
2779       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2780            || USE_STORE_POST_DECREMENT (mem_mode))
2781           && GET_MODE_SIZE (mem_mode) == -cstepi))
2782     {
2783       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2784                        use->stmt);
2785     }
2786 }
2787
2788 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2789    position to POS.  If USE is not NULL, the candidate is set as related to
2790    it.  The candidate computation is scheduled on all available positions.  */
2791
2792 static void
2793 add_candidate (struct ivopts_data *data,
2794                tree base, tree step, bool important, struct iv_use *use)
2795 {
2796   gcc_assert (use == NULL || use->sub_id == 0);
2797
2798   if (ip_normal_pos (data->current_loop))
2799     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2800   if (ip_end_pos (data->current_loop)
2801       && allow_ip_end_pos_p (data->current_loop))
2802     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2803
2804   if (use != NULL && use->type == USE_ADDRESS)
2805     add_autoinc_candidates (data, base, step, important, use);
2806 }
2807
2808 /* Adds standard iv candidates.  */
2809
2810 static void
2811 add_standard_iv_candidates (struct ivopts_data *data)
2812 {
2813   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2814
2815   /* The same for a double-integer type if it is still fast enough.  */
2816   if (TYPE_PRECISION
2817         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2818       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2819     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2820                    build_int_cst (long_integer_type_node, 1), true, NULL);
2821
2822   /* The same for a double-integer type if it is still fast enough.  */
2823   if (TYPE_PRECISION
2824         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2825       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2826     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2827                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2828 }
2829
2830
2831 /* Adds candidates bases on the old induction variable IV.  */
2832
2833 static void
2834 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2835 {
2836   gimple phi;
2837   tree def;
2838   struct iv_cand *cand;
2839
2840   add_candidate (data, iv->base, iv->step, true, NULL);
2841
2842   /* The same, but with initial value zero.  */
2843   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2844     add_candidate (data, size_int (0), iv->step, true, NULL);
2845   else
2846     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2847                    iv->step, true, NULL);
2848
2849   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2850   if (gimple_code (phi) == GIMPLE_PHI)
2851     {
2852       /* Additionally record the possibility of leaving the original iv
2853          untouched.  */
2854       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2855       /* Don't add candidate if it's from another PHI node because
2856          it's an affine iv appearing in the form of PEELED_CHREC.  */
2857       phi = SSA_NAME_DEF_STMT (def);
2858       if (gimple_code (phi) != GIMPLE_PHI)
2859         {
2860           cand = add_candidate_1 (data,
2861                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2862                                   SSA_NAME_DEF_STMT (def));
2863           cand->var_before = iv->ssa_name;
2864           cand->var_after = def;
2865         }
2866       else
2867         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2868     }
2869 }
2870
2871 /* Adds candidates based on the old induction variables.  */
2872
2873 static void
2874 add_old_ivs_candidates (struct ivopts_data *data)
2875 {
2876   unsigned i;
2877   struct iv *iv;
2878   bitmap_iterator bi;
2879
2880   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2881     {
2882       iv = ver_info (data, i)->iv;
2883       if (iv && iv->biv_p && !integer_zerop (iv->step))
2884         add_old_iv_candidates (data, iv);
2885     }
2886 }
2887
2888 /* Adds candidates based on the value of the induction variable IV and USE.  */
2889
2890 static void
2891 add_iv_value_candidates (struct ivopts_data *data,
2892                          struct iv *iv, struct iv_use *use)
2893 {
2894   unsigned HOST_WIDE_INT offset;
2895   tree base;
2896   tree basetype;
2897
2898   add_candidate (data, iv->base, iv->step, false, use);
2899
2900   /* The same, but with initial value zero.  Make such variable important,
2901      since it is generic enough so that possibly many uses may be based
2902      on it.  */
2903   basetype = TREE_TYPE (iv->base);
2904   if (POINTER_TYPE_P (basetype))
2905     basetype = sizetype;
2906   add_candidate (data, build_int_cst (basetype, 0),
2907                  iv->step, true, use);
2908
2909   /* Third, try removing the constant offset.  Make sure to even
2910      add a candidate for &a[0] vs. (T *)&a.  */
2911   base = strip_offset (iv->base, &offset);
2912   if (offset
2913       || base != iv->base)
2914     add_candidate (data, base, iv->step, false, use);
2915 }
2916
2917 /* Adds candidates based on the uses.  */
2918
2919 static void
2920 add_derived_ivs_candidates (struct ivopts_data *data)
2921 {
2922   unsigned i;
2923
2924   for (i = 0; i < n_iv_uses (data); i++)
2925     {
2926       struct iv_use *use = iv_use (data, i);
2927
2928       if (!use)
2929         continue;
2930
2931       switch (use->type)
2932         {
2933         case USE_NONLINEAR_EXPR:
2934         case USE_COMPARE:
2935         case USE_ADDRESS:
2936           /* Just add the ivs based on the value of the iv used here.  */
2937           add_iv_value_candidates (data, use->iv, use);
2938           break;
2939
2940         default:
2941           gcc_unreachable ();
2942         }
2943     }
2944 }
2945
2946 /* Record important candidates and add them to related_cands bitmaps
2947    if needed.  */
2948
2949 static void
2950 record_important_candidates (struct ivopts_data *data)
2951 {
2952   unsigned i;
2953   struct iv_use *use;
2954
2955   for (i = 0; i < n_iv_cands (data); i++)
2956     {
2957       struct iv_cand *cand = iv_cand (data, i);
2958
2959       if (cand->important)
2960         bitmap_set_bit (data->important_candidates, i);
2961     }
2962
2963   data->consider_all_candidates = (n_iv_cands (data)
2964                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2965
2966   if (data->consider_all_candidates)
2967     {
2968       /* We will not need "related_cands" bitmaps in this case,
2969          so release them to decrease peak memory consumption.  */
2970       for (i = 0; i < n_iv_uses (data); i++)
2971         {
2972           use = iv_use (data, i);
2973           BITMAP_FREE (use->related_cands);
2974         }
2975     }
2976   else
2977     {
2978       /* Add important candidates to the related_cands bitmaps.  */
2979       for (i = 0; i < n_iv_uses (data); i++)
2980         bitmap_ior_into (iv_use (data, i)->related_cands,
2981                          data->important_candidates);
2982     }
2983 }
2984
2985 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2986    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2987    we allocate a simple list to every use.  */
2988
2989 static void
2990 alloc_use_cost_map (struct ivopts_data *data)
2991 {
2992   unsigned i, size, s;
2993
2994   for (i = 0; i < n_iv_uses (data); i++)
2995     {
2996       struct iv_use *use = iv_use (data, i);
2997
2998       if (data->consider_all_candidates)
2999         size = n_iv_cands (data);
3000       else
3001         {
3002           s = bitmap_count_bits (use->related_cands);
3003
3004           /* Round up to the power of two, so that moduling by it is fast.  */
3005           size = s ? (1 << ceil_log2 (s)) : 1;
3006         }
3007
3008       use->n_map_members = size;
3009       use->cost_map = XCNEWVEC (struct cost_pair, size);
3010     }
3011 }
3012
3013 /* Returns description of computation cost of expression whose runtime
3014    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3015
3016 static comp_cost
3017 new_cost (unsigned runtime, unsigned complexity)
3018 {
3019   comp_cost cost;
3020
3021   cost.cost = runtime;
3022   cost.complexity = complexity;
3023
3024   return cost;
3025 }
3026
3027 /* Returns true if COST is infinite.  */
3028
3029 static bool
3030 infinite_cost_p (comp_cost cost)
3031 {
3032   return cost.cost == INFTY;
3033 }
3034
3035 /* Adds costs COST1 and COST2.  */
3036
3037 static comp_cost
3038 add_costs (comp_cost cost1, comp_cost cost2)
3039 {
3040   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3041     return infinite_cost;
3042
3043   cost1.cost += cost2.cost;
3044   cost1.complexity += cost2.complexity;
3045
3046   return cost1;
3047 }
3048 /* Subtracts costs COST1 and COST2.  */
3049
3050 static comp_cost
3051 sub_costs (comp_cost cost1, comp_cost cost2)
3052 {
3053   cost1.cost -= cost2.cost;
3054   cost1.complexity -= cost2.complexity;
3055
3056   return cost1;
3057 }
3058
3059 /* Returns a negative number if COST1 < COST2, a positive number if
3060    COST1 > COST2, and 0 if COST1 = COST2.  */
3061
3062 static int
3063 compare_costs (comp_cost cost1, comp_cost cost2)
3064 {
3065   if (cost1.cost == cost2.cost)
3066     return cost1.complexity - cost2.complexity;
3067
3068   return cost1.cost - cost2.cost;
3069 }
3070
3071 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3072    on invariants DEPENDS_ON and that the value used in expressing it
3073    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3074
3075 static void
3076 set_use_iv_cost (struct ivopts_data *data,
3077                  struct iv_use *use, struct iv_cand *cand,
3078                  comp_cost cost, bitmap depends_on, tree value,
3079                  enum tree_code comp, int inv_expr_id)
3080 {
3081   unsigned i, s;
3082
3083   if (infinite_cost_p (cost))
3084     {
3085       BITMAP_FREE (depends_on);
3086       return;
3087     }
3088
3089   if (data->consider_all_candidates)
3090     {
3091       use->cost_map[cand->id].cand = cand;
3092       use->cost_map[cand->id].cost = cost;
3093       use->cost_map[cand->id].depends_on = depends_on;
3094       use->cost_map[cand->id].value = value;
3095       use->cost_map[cand->id].comp = comp;
3096       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3097       return;
3098     }
3099
3100   /* n_map_members is a power of two, so this computes modulo.  */
3101   s = cand->id & (use->n_map_members - 1);
3102   for (i = s; i < use->n_map_members; i++)
3103     if (!use->cost_map[i].cand)
3104       goto found;
3105   for (i = 0; i < s; i++)
3106     if (!use->cost_map[i].cand)
3107       goto found;
3108
3109   gcc_unreachable ();
3110
3111 found:
3112   use->cost_map[i].cand = cand;
3113   use->cost_map[i].cost = cost;
3114   use->cost_map[i].depends_on = depends_on;
3115   use->cost_map[i].value = value;
3116   use->cost_map[i].comp = comp;
3117   use->cost_map[i].inv_expr_id = inv_expr_id;
3118 }
3119
3120 /* Gets cost of (USE, CANDIDATE) pair.  */
3121
3122 static struct cost_pair *
3123 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3124                  struct iv_cand *cand)
3125 {
3126   unsigned i, s;
3127   struct cost_pair *ret;
3128
3129   if (!cand)
3130     return NULL;
3131
3132   if (data->consider_all_candidates)
3133     {
3134       ret = use->cost_map + cand->id;
3135       if (!ret->cand)
3136         return NULL;
3137
3138       return ret;
3139     }
3140
3141   /* n_map_members is a power of two, so this computes modulo.  */
3142   s = cand->id & (use->n_map_members - 1);
3143   for (i = s; i < use->n_map_members; i++)
3144     if (use->cost_map[i].cand == cand)
3145       return use->cost_map + i;
3146     else if (use->cost_map[i].cand == NULL)
3147       return NULL;
3148   for (i = 0; i < s; i++)
3149     if (use->cost_map[i].cand == cand)
3150       return use->cost_map + i;
3151     else if (use->cost_map[i].cand == NULL)
3152       return NULL;
3153
3154   return NULL;
3155 }
3156
3157 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3158 static rtx
3159 produce_memory_decl_rtl (tree obj, int *regno)
3160 {
3161   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3162   machine_mode address_mode = targetm.addr_space.address_mode (as);
3163   rtx x;
3164
3165   gcc_assert (obj);
3166   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3167     {
3168       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3169       x = gen_rtx_SYMBOL_REF (address_mode, name);
3170       SET_SYMBOL_REF_DECL (x, obj);
3171       x = gen_rtx_MEM (DECL_MODE (obj), x);
3172       set_mem_addr_space (x, as);
3173       targetm.encode_section_info (obj, x, true);
3174     }
3175   else
3176     {
3177       x = gen_raw_REG (address_mode, (*regno)++);
3178       x = gen_rtx_MEM (DECL_MODE (obj), x);
3179       set_mem_addr_space (x, as);
3180     }
3181
3182   return x;
3183 }
3184
3185 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3186    walk_tree.  DATA contains the actual fake register number.  */
3187
3188 static tree
3189 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3190 {
3191   tree obj = NULL_TREE;
3192   rtx x = NULL_RTX;
3193   int *regno = (int *) data;
3194
3195   switch (TREE_CODE (*expr_p))
3196     {
3197     case ADDR_EXPR:
3198       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3199            handled_component_p (*expr_p);
3200            expr_p = &TREE_OPERAND (*expr_p, 0))
3201         continue;
3202       obj = *expr_p;
3203       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3204         x = produce_memory_decl_rtl (obj, regno);
3205       break;
3206
3207     case SSA_NAME:
3208       *ws = 0;
3209       obj = SSA_NAME_VAR (*expr_p);
3210       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3211       if (!obj)
3212         return NULL_TREE;
3213       if (!DECL_RTL_SET_P (obj))
3214         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3215       break;
3216
3217     case VAR_DECL:
3218     case PARM_DECL:
3219     case RESULT_DECL:
3220       *ws = 0;
3221       obj = *expr_p;
3222
3223       if (DECL_RTL_SET_P (obj))
3224         break;
3225
3226       if (DECL_MODE (obj) == BLKmode)
3227         x = produce_memory_decl_rtl (obj, regno);
3228       else
3229         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3230
3231       break;
3232
3233     default:
3234       break;
3235     }
3236
3237   if (x)
3238     {
3239       decl_rtl_to_reset.safe_push (obj);
3240       SET_DECL_RTL (obj, x);
3241     }
3242
3243   return NULL_TREE;
3244 }
3245
3246 /* Determines cost of the computation of EXPR.  */
3247
3248 static unsigned
3249 computation_cost (tree expr, bool speed)
3250 {
3251   rtx_insn *seq;
3252   rtx rslt;
3253   tree type = TREE_TYPE (expr);
3254   unsigned cost;
3255   /* Avoid using hard regs in ways which may be unsupported.  */
3256   int regno = LAST_VIRTUAL_REGISTER + 1;
3257   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3258   enum node_frequency real_frequency = node->frequency;
3259
3260   node->frequency = NODE_FREQUENCY_NORMAL;
3261   crtl->maybe_hot_insn_p = speed;
3262   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3263   start_sequence ();
3264   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3265   seq = get_insns ();
3266   end_sequence ();
3267   default_rtl_profile ();
3268   node->frequency = real_frequency;
3269
3270   cost = seq_cost (seq, speed);
3271   if (MEM_P (rslt))
3272     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3273                           TYPE_ADDR_SPACE (type), speed);
3274   else if (!REG_P (rslt))
3275     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3276
3277   return cost;
3278 }
3279
3280 /* Returns variable containing the value of candidate CAND at statement AT.  */
3281
3282 static tree
3283 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3284 {
3285   if (stmt_after_increment (loop, cand, stmt))
3286     return cand->var_after;
3287   else
3288     return cand->var_before;
3289 }
3290
3291 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3292    same precision that is at least as wide as the precision of TYPE, stores
3293    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3294    type of A and B.  */
3295
3296 static tree
3297 determine_common_wider_type (tree *a, tree *b)
3298 {
3299   tree wider_type = NULL;
3300   tree suba, subb;
3301   tree atype = TREE_TYPE (*a);
3302
3303   if (CONVERT_EXPR_P (*a))
3304     {
3305       suba = TREE_OPERAND (*a, 0);
3306       wider_type = TREE_TYPE (suba);
3307       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3308         return atype;
3309     }
3310   else
3311     return atype;
3312
3313   if (CONVERT_EXPR_P (*b))
3314     {
3315       subb = TREE_OPERAND (*b, 0);
3316       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3317         return atype;
3318     }
3319   else
3320     return atype;
3321
3322   *a = suba;
3323   *b = subb;
3324   return wider_type;
3325 }
3326
3327 /* Determines the expression by that USE is expressed from induction variable
3328    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3329    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3330
3331 static bool
3332 get_computation_aff (struct loop *loop,
3333                      struct iv_use *use, struct iv_cand *cand, gimple at,
3334                      struct aff_tree *aff)
3335 {
3336   tree ubase = use->iv->base;
3337   tree ustep = use->iv->step;
3338   tree cbase = cand->iv->base;
3339   tree cstep = cand->iv->step, cstep_common;
3340   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3341   tree common_type, var;
3342   tree uutype;
3343   aff_tree cbase_aff, var_aff;
3344   widest_int rat;
3345
3346   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3347     {
3348       /* We do not have a precision to express the values of use.  */
3349       return false;
3350     }
3351
3352   var = var_at_stmt (loop, cand, at);
3353   uutype = unsigned_type_for (utype);
3354
3355   /* If the conversion is not noop, perform it.  */
3356   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3357     {
3358       cstep = fold_convert (uutype, cstep);
3359       cbase = fold_convert (uutype, cbase);
3360       var = fold_convert (uutype, var);
3361     }
3362
3363   if (!constant_multiple_of (ustep, cstep, &rat))
3364     return false;
3365
3366   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3367      type, we achieve better folding by computing their difference in this
3368      wider type, and cast the result to UUTYPE.  We do not need to worry about
3369      overflows, as all the arithmetics will in the end be performed in UUTYPE
3370      anyway.  */
3371   common_type = determine_common_wider_type (&ubase, &cbase);
3372
3373   /* use = ubase - ratio * cbase + ratio * var.  */
3374   tree_to_aff_combination (ubase, common_type, aff);
3375   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3376   tree_to_aff_combination (var, uutype, &var_aff);
3377
3378   /* We need to shift the value if we are after the increment.  */
3379   if (stmt_after_increment (loop, cand, at))
3380     {
3381       aff_tree cstep_aff;
3382
3383       if (common_type != uutype)
3384         cstep_common = fold_convert (common_type, cstep);
3385       else
3386         cstep_common = cstep;
3387
3388       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3389       aff_combination_add (&cbase_aff, &cstep_aff);
3390     }
3391
3392   aff_combination_scale (&cbase_aff, -rat);
3393   aff_combination_add (aff, &cbase_aff);
3394   if (common_type != uutype)
3395     aff_combination_convert (aff, uutype);
3396
3397   aff_combination_scale (&var_aff, rat);
3398   aff_combination_add (aff, &var_aff);
3399
3400   return true;
3401 }
3402
3403 /* Return the type of USE.  */
3404
3405 static tree
3406 get_use_type (struct iv_use *use)
3407 {
3408   tree base_type = TREE_TYPE (use->iv->base);
3409   tree type;
3410
3411   if (use->type == USE_ADDRESS)
3412     {
3413       /* The base_type may be a void pointer.  Create a pointer type based on
3414          the mem_ref instead.  */
3415       type = build_pointer_type (TREE_TYPE (*use->op_p));
3416       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3417                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3418     }
3419   else
3420     type = base_type;
3421
3422   return type;
3423 }
3424
3425 /* Determines the expression by that USE is expressed from induction variable
3426    CAND at statement AT in LOOP.  The computation is unshared.  */
3427
3428 static tree
3429 get_computation_at (struct loop *loop,
3430                     struct iv_use *use, struct iv_cand *cand, gimple at)
3431 {
3432   aff_tree aff;
3433   tree type = get_use_type (use);
3434
3435   if (!get_computation_aff (loop, use, cand, at, &aff))
3436     return NULL_TREE;
3437   unshare_aff_combination (&aff);
3438   return fold_convert (type, aff_combination_to_tree (&aff));
3439 }
3440
3441 /* Determines the expression by that USE is expressed from induction variable
3442    CAND in LOOP.  The computation is unshared.  */
3443
3444 static tree
3445 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3446 {
3447   return get_computation_at (loop, use, cand, use->stmt);
3448 }
3449
3450 /* Adjust the cost COST for being in loop setup rather than loop body.
3451    If we're optimizing for space, the loop setup overhead is constant;
3452    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3453 static unsigned
3454 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3455 {
3456   if (cost == INFTY)
3457     return cost;
3458   else if (optimize_loop_for_speed_p (data->current_loop))
3459     return cost / avg_loop_niter (data->current_loop);
3460   else
3461     return cost;
3462 }
3463
3464 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3465    validity for a memory reference accessing memory of mode MODE in
3466    address space AS.  */
3467
3468
3469 bool
3470 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3471                                  addr_space_t as)
3472 {
3473 #define MAX_RATIO 128
3474   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3475   static vec<sbitmap> valid_mult_list;
3476   sbitmap valid_mult;
3477
3478   if (data_index >= valid_mult_list.length ())
3479     valid_mult_list.safe_grow_cleared (data_index + 1);
3480
3481   valid_mult = valid_mult_list[data_index];
3482   if (!valid_mult)
3483     {
3484       machine_mode address_mode = targetm.addr_space.address_mode (as);
3485       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3486       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3487       rtx addr, scaled;
3488       HOST_WIDE_INT i;
3489
3490       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3491       bitmap_clear (valid_mult);
3492       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3493       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3494       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3495         {
3496           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3497           if (memory_address_addr_space_p (mode, addr, as)
3498               || memory_address_addr_space_p (mode, scaled, as))
3499             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3500         }
3501
3502       if (dump_file && (dump_flags & TDF_DETAILS))
3503         {
3504           fprintf (dump_file, "  allowed multipliers:");
3505           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3506             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3507               fprintf (dump_file, " %d", (int) i);
3508           fprintf (dump_file, "\n");
3509           fprintf (dump_file, "\n");
3510         }
3511
3512       valid_mult_list[data_index] = valid_mult;
3513     }
3514
3515   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3516     return false;
3517
3518   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3519 }
3520
3521 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3522    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3523    variable is omitted.  Compute the cost for a memory reference that accesses
3524    a memory location of mode MEM_MODE in address space AS.
3525
3526    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3527    size of MEM_MODE / RATIO) is available.  To make this determination, we
3528    look at the size of the increment to be made, which is given in CSTEP.
3529    CSTEP may be zero if the step is unknown.
3530    STMT_AFTER_INC is true iff the statement we're looking at is after the
3531    increment of the original biv.
3532
3533    TODO -- there must be some better way.  This all is quite crude.  */
3534
3535 enum ainc_type
3536 {
3537   AINC_PRE_INC,         /* Pre increment.  */
3538   AINC_PRE_DEC,         /* Pre decrement.  */
3539   AINC_POST_INC,        /* Post increment.  */
3540   AINC_POST_DEC,        /* Post decrement.  */
3541   AINC_NONE             /* Also the number of auto increment types.  */
3542 };
3543
3544 typedef struct address_cost_data_s
3545 {
3546   HOST_WIDE_INT min_offset, max_offset;
3547   unsigned costs[2][2][2][2];
3548   unsigned ainc_costs[AINC_NONE];
3549 } *address_cost_data;
3550
3551
3552 static comp_cost
3553 get_address_cost (bool symbol_present, bool var_present,
3554                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3555                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3556                   addr_space_t as, bool speed,
3557                   bool stmt_after_inc, bool *may_autoinc)
3558 {
3559   machine_mode address_mode = targetm.addr_space.address_mode (as);
3560   static vec<address_cost_data> address_cost_data_list;
3561   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3562   address_cost_data data;
3563   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3564   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3565   unsigned cost, acost, complexity;
3566   enum ainc_type autoinc_type;
3567   bool offset_p, ratio_p, autoinc;
3568   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3569   unsigned HOST_WIDE_INT mask;
3570   unsigned bits;
3571
3572   if (data_index >= address_cost_data_list.length ())
3573     address_cost_data_list.safe_grow_cleared (data_index + 1);
3574
3575   data = address_cost_data_list[data_index];
3576   if (!data)
3577     {
3578       HOST_WIDE_INT i;
3579       HOST_WIDE_INT rat, off = 0;
3580       int old_cse_not_expected, width;
3581       unsigned sym_p, var_p, off_p, rat_p, add_c;
3582       rtx_insn *seq;
3583       rtx addr, base;
3584       rtx reg0, reg1;
3585
3586       data = (address_cost_data) xcalloc (1, sizeof (*data));
3587
3588       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3589
3590       width = GET_MODE_BITSIZE (address_mode) - 1;
3591       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3592         width = HOST_BITS_PER_WIDE_INT - 1;
3593       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3594
3595       for (i = width; i >= 0; i--)
3596         {
3597           off = -((unsigned HOST_WIDE_INT) 1 << i);
3598           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3599           if (memory_address_addr_space_p (mem_mode, addr, as))
3600             break;
3601         }
3602       data->min_offset = (i == -1? 0 : off);
3603
3604       for (i = width; i >= 0; i--)
3605         {
3606           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3607           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3608           if (memory_address_addr_space_p (mem_mode, addr, as))
3609             break;
3610           /* For some strict-alignment targets, the offset must be naturally
3611              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3612           off = mem_mode != QImode
3613                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3614                     - GET_MODE_SIZE (mem_mode)
3615                 : 0;
3616           if (off > 0)
3617             {
3618               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3619               if (memory_address_addr_space_p (mem_mode, addr, as))
3620                 break;
3621             }
3622         }
3623       if (i == -1)
3624         off = 0;
3625       data->max_offset = off;
3626
3627       if (dump_file && (dump_flags & TDF_DETAILS))
3628         {
3629           fprintf (dump_file, "get_address_cost:\n");
3630           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3631                    GET_MODE_NAME (mem_mode),
3632                    data->min_offset);
3633           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3634                    GET_MODE_NAME (mem_mode),
3635                    data->max_offset);
3636         }
3637
3638       rat = 1;
3639       for (i = 2; i <= MAX_RATIO; i++)
3640         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3641           {
3642             rat = i;
3643             break;
3644           }
3645
3646       /* Compute the cost of various addressing modes.  */
3647       acost = 0;
3648       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3649       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3650
3651       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3652           || USE_STORE_PRE_DECREMENT (mem_mode))
3653         {
3654           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3655           has_predec[mem_mode]
3656             = memory_address_addr_space_p (mem_mode, addr, as);
3657
3658           if (has_predec[mem_mode])
3659             data->ainc_costs[AINC_PRE_DEC]
3660               = address_cost (addr, mem_mode, as, speed);
3661         }
3662       if (USE_LOAD_POST_DECREMENT (mem_mode)
3663           || USE_STORE_POST_DECREMENT (mem_mode))
3664         {
3665           addr = gen_rtx_POST_DEC (address_mode, reg0);
3666           has_postdec[mem_mode]
3667             = memory_address_addr_space_p (mem_mode, addr, as);
3668
3669           if (has_postdec[mem_mode])
3670             data->ainc_costs[AINC_POST_DEC]
3671               = address_cost (addr, mem_mode, as, speed);
3672         }
3673       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3674           || USE_STORE_PRE_DECREMENT (mem_mode))
3675         {
3676           addr = gen_rtx_PRE_INC (address_mode, reg0);
3677           has_preinc[mem_mode]
3678             = memory_address_addr_space_p (mem_mode, addr, as);
3679
3680           if (has_preinc[mem_mode])
3681             data->ainc_costs[AINC_PRE_INC]
3682               = address_cost (addr, mem_mode, as, speed);
3683         }
3684       if (USE_LOAD_POST_INCREMENT (mem_mode)
3685           || USE_STORE_POST_INCREMENT (mem_mode))
3686         {
3687           addr = gen_rtx_POST_INC (address_mode, reg0);
3688           has_postinc[mem_mode]
3689             = memory_address_addr_space_p (mem_mode, addr, as);
3690
3691           if (has_postinc[mem_mode])
3692             data->ainc_costs[AINC_POST_INC]
3693               = address_cost (addr, mem_mode, as, speed);
3694         }
3695       for (i = 0; i < 16; i++)
3696         {
3697           sym_p = i & 1;
3698           var_p = (i >> 1) & 1;
3699           off_p = (i >> 2) & 1;
3700           rat_p = (i >> 3) & 1;
3701
3702           addr = reg0;
3703           if (rat_p)
3704             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3705                                    gen_int_mode (rat, address_mode));
3706
3707           if (var_p)
3708             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3709
3710           if (sym_p)
3711             {
3712               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3713               /* ??? We can run into trouble with some backends by presenting
3714                  it with symbols which haven't been properly passed through
3715                  targetm.encode_section_info.  By setting the local bit, we
3716                  enhance the probability of things working.  */
3717               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3718
3719               if (off_p)
3720                 base = gen_rtx_fmt_e (CONST, address_mode,
3721                                       gen_rtx_fmt_ee
3722                                         (PLUS, address_mode, base,
3723                                          gen_int_mode (off, address_mode)));
3724             }
3725           else if (off_p)
3726             base = gen_int_mode (off, address_mode);
3727           else
3728             base = NULL_RTX;
3729
3730           if (base)
3731             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3732
3733           start_sequence ();
3734           /* To avoid splitting addressing modes, pretend that no cse will
3735              follow.  */
3736           old_cse_not_expected = cse_not_expected;
3737           cse_not_expected = true;
3738           addr = memory_address_addr_space (mem_mode, addr, as);
3739           cse_not_expected = old_cse_not_expected;
3740           seq = get_insns ();
3741           end_sequence ();
3742
3743           acost = seq_cost (seq, speed);
3744           acost += address_cost (addr, mem_mode, as, speed);
3745
3746           if (!acost)
3747             acost = 1;
3748           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3749         }
3750
3751       /* On some targets, it is quite expensive to load symbol to a register,
3752          which makes addresses that contain symbols look much more expensive.
3753          However, the symbol will have to be loaded in any case before the
3754          loop (and quite likely we have it in register already), so it does not
3755          make much sense to penalize them too heavily.  So make some final
3756          tweaks for the SYMBOL_PRESENT modes:
3757
3758          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3759          var is cheaper, use this mode with small penalty.
3760          If VAR_PRESENT is true, try whether the mode with
3761          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3762          if this is the case, use it.  */
3763       add_c = add_cost (speed, address_mode);
3764       for (i = 0; i < 8; i++)
3765         {
3766           var_p = i & 1;
3767           off_p = (i >> 1) & 1;
3768           rat_p = (i >> 2) & 1;
3769
3770           acost = data->costs[0][1][off_p][rat_p] + 1;
3771           if (var_p)
3772             acost += add_c;
3773
3774           if (acost < data->costs[1][var_p][off_p][rat_p])
3775             data->costs[1][var_p][off_p][rat_p] = acost;
3776         }
3777
3778       if (dump_file && (dump_flags & TDF_DETAILS))
3779         {
3780           fprintf (dump_file, "Address costs:\n");
3781
3782           for (i = 0; i < 16; i++)
3783             {
3784               sym_p = i & 1;
3785               var_p = (i >> 1) & 1;
3786               off_p = (i >> 2) & 1;
3787               rat_p = (i >> 3) & 1;
3788
3789               fprintf (dump_file, "  ");
3790               if (sym_p)
3791                 fprintf (dump_file, "sym + ");
3792               if (var_p)
3793                 fprintf (dump_file, "var + ");
3794               if (off_p)
3795                 fprintf (dump_file, "cst + ");
3796               if (rat_p)
3797                 fprintf (dump_file, "rat * ");
3798
3799               acost = data->costs[sym_p][var_p][off_p][rat_p];
3800               fprintf (dump_file, "index costs %d\n", acost);
3801             }
3802           if (has_predec[mem_mode] || has_postdec[mem_mode]
3803               || has_preinc[mem_mode] || has_postinc[mem_mode])
3804             fprintf (dump_file, "  May include autoinc/dec\n");
3805           fprintf (dump_file, "\n");
3806         }
3807
3808       address_cost_data_list[data_index] = data;
3809     }
3810
3811   bits = GET_MODE_BITSIZE (address_mode);
3812   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3813   offset &= mask;
3814   if ((offset >> (bits - 1) & 1))
3815     offset |= ~mask;
3816   s_offset = offset;
3817
3818   autoinc = false;
3819   autoinc_type = AINC_NONE;
3820   msize = GET_MODE_SIZE (mem_mode);
3821   autoinc_offset = offset;
3822   if (stmt_after_inc)
3823     autoinc_offset += ratio * cstep;
3824   if (symbol_present || var_present || ratio != 1)
3825     autoinc = false;
3826   else
3827     {
3828       if (has_postinc[mem_mode] && autoinc_offset == 0
3829           && msize == cstep)
3830         autoinc_type = AINC_POST_INC;
3831       else if (has_postdec[mem_mode] && autoinc_offset == 0
3832                && msize == -cstep)
3833         autoinc_type = AINC_POST_DEC;
3834       else if (has_preinc[mem_mode] && autoinc_offset == msize
3835                && msize == cstep)
3836         autoinc_type = AINC_PRE_INC;
3837       else if (has_predec[mem_mode] && autoinc_offset == -msize
3838                && msize == -cstep)
3839         autoinc_type = AINC_PRE_DEC;
3840
3841       if (autoinc_type != AINC_NONE)
3842         autoinc = true;
3843     }
3844
3845   cost = 0;
3846   offset_p = (s_offset != 0
3847               && data->min_offset <= s_offset
3848               && s_offset <= data->max_offset);
3849   ratio_p = (ratio != 1
3850              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3851
3852   if (ratio != 1 && !ratio_p)
3853     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3854
3855   if (s_offset && !offset_p && !symbol_present)
3856     cost += add_cost (speed, address_mode);
3857
3858   if (may_autoinc)
3859     *may_autoinc = autoinc;
3860   if (autoinc)
3861     acost = data->ainc_costs[autoinc_type];
3862   else
3863     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3864   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3865   return new_cost (cost + acost, complexity);
3866 }
3867
3868  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3869     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3870     calculating the operands of EXPR.  Returns true if successful, and returns
3871     the cost in COST.  */
3872
3873 static bool
3874 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3875                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3876 {
3877   comp_cost res;
3878   tree op1 = TREE_OPERAND (expr, 1);
3879   tree cst = TREE_OPERAND (mult, 1);
3880   tree multop = TREE_OPERAND (mult, 0);
3881   int m = exact_log2 (int_cst_value (cst));
3882   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3883   int as_cost, sa_cost;
3884   bool mult_in_op1;
3885
3886   if (!(m >= 0 && m < maxm))
3887     return false;
3888
3889   mult_in_op1 = operand_equal_p (op1, mult, 0);
3890
3891   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3892
3893   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3894      use that in preference to a shift insn followed by an add insn.  */
3895   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3896              ? shiftadd_cost (speed, mode, m)
3897              : (mult_in_op1
3898                 ? shiftsub1_cost (speed, mode, m)
3899                 : shiftsub0_cost (speed, mode, m)));
3900
3901   res = new_cost (MIN (as_cost, sa_cost), 0);
3902   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3903
3904   STRIP_NOPS (multop);
3905   if (!is_gimple_val (multop))
3906     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3907
3908   *cost = res;
3909   return true;
3910 }
3911
3912 /* Estimates cost of forcing expression EXPR into a variable.  */
3913
3914 static comp_cost
3915 force_expr_to_var_cost (tree expr, bool speed)
3916 {
3917   static bool costs_initialized = false;
3918   static unsigned integer_cost [2];
3919   static unsigned symbol_cost [2];
3920   static unsigned address_cost [2];
3921   tree op0, op1;
3922   comp_cost cost0, cost1, cost;
3923   machine_mode mode;
3924
3925   if (!costs_initialized)
3926     {
3927       tree type = build_pointer_type (integer_type_node);
3928       tree var, addr;
3929       rtx x;
3930       int i;
3931
3932       var = create_tmp_var_raw (integer_type_node, "test_var");
3933       TREE_STATIC (var) = 1;
3934       x = produce_memory_decl_rtl (var, NULL);
3935       SET_DECL_RTL (var, x);
3936
3937       addr = build1 (ADDR_EXPR, type, var);
3938
3939
3940       for (i = 0; i < 2; i++)
3941         {
3942           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3943                                                              2000), i);
3944
3945           symbol_cost[i] = computation_cost (addr, i) + 1;
3946
3947           address_cost[i]
3948             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3949           if (dump_file && (dump_flags & TDF_DETAILS))
3950             {
3951               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3952               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3953               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3954               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3955               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3956               fprintf (dump_file, "\n");
3957             }
3958         }
3959
3960       costs_initialized = true;
3961     }
3962
3963   STRIP_NOPS (expr);
3964
3965   if (SSA_VAR_P (expr))
3966     return no_cost;
3967
3968   if (is_gimple_min_invariant (expr))
3969     {
3970       if (TREE_CODE (expr) == INTEGER_CST)
3971         return new_cost (integer_cost [speed], 0);
3972
3973       if (TREE_CODE (expr) == ADDR_EXPR)
3974         {
3975           tree obj = TREE_OPERAND (expr, 0);
3976
3977           if (TREE_CODE (obj) == VAR_DECL
3978               || TREE_CODE (obj) == PARM_DECL
3979               || TREE_CODE (obj) == RESULT_DECL)
3980             return new_cost (symbol_cost [speed], 0);
3981         }
3982
3983       return new_cost (address_cost [speed], 0);
3984     }
3985
3986   switch (TREE_CODE (expr))
3987     {
3988     case POINTER_PLUS_EXPR:
3989     case PLUS_EXPR:
3990     case MINUS_EXPR:
3991     case MULT_EXPR:
3992       op0 = TREE_OPERAND (expr, 0);
3993       op1 = TREE_OPERAND (expr, 1);
3994       STRIP_NOPS (op0);
3995       STRIP_NOPS (op1);
3996       break;
3997
3998     CASE_CONVERT:
3999     case NEGATE_EXPR:
4000       op0 = TREE_OPERAND (expr, 0);
4001       STRIP_NOPS (op0);
4002       op1 = NULL_TREE;
4003       break;
4004
4005     default:
4006       /* Just an arbitrary value, FIXME.  */
4007       return new_cost (target_spill_cost[speed], 0);
4008     }
4009
4010   if (op0 == NULL_TREE
4011       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4012     cost0 = no_cost;
4013   else
4014     cost0 = force_expr_to_var_cost (op0, speed);
4015
4016   if (op1 == NULL_TREE
4017       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4018     cost1 = no_cost;
4019   else
4020     cost1 = force_expr_to_var_cost (op1, speed);
4021
4022   mode = TYPE_MODE (TREE_TYPE (expr));
4023   switch (TREE_CODE (expr))
4024     {
4025     case POINTER_PLUS_EXPR:
4026     case PLUS_EXPR:
4027     case MINUS_EXPR:
4028     case NEGATE_EXPR:
4029       cost = new_cost (add_cost (speed, mode), 0);
4030       if (TREE_CODE (expr) != NEGATE_EXPR)
4031         {
4032           tree mult = NULL_TREE;
4033           comp_cost sa_cost;
4034           if (TREE_CODE (op1) == MULT_EXPR)
4035             mult = op1;
4036           else if (TREE_CODE (op0) == MULT_EXPR)
4037             mult = op0;
4038
4039           if (mult != NULL_TREE
4040               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4041               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4042                                     speed, &sa_cost))
4043             return sa_cost;
4044         }
4045       break;
4046
4047     CASE_CONVERT:
4048       {
4049         tree inner_mode, outer_mode;
4050         outer_mode = TREE_TYPE (expr);
4051         inner_mode = TREE_TYPE (op0);
4052         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4053                                        TYPE_MODE (inner_mode), speed), 0);
4054       }
4055       break;
4056
4057     case MULT_EXPR:
4058       if (cst_and_fits_in_hwi (op0))
4059         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4060                                              mode, speed), 0);
4061       else if (cst_and_fits_in_hwi (op1))
4062         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4063                                              mode, speed), 0);
4064       else
4065         return new_cost (target_spill_cost [speed], 0);
4066       break;
4067
4068     default:
4069       gcc_unreachable ();
4070     }
4071
4072   cost = add_costs (cost, cost0);
4073   cost = add_costs (cost, cost1);
4074
4075   /* Bound the cost by target_spill_cost.  The parts of complicated
4076      computations often are either loop invariant or at least can
4077      be shared between several iv uses, so letting this grow without
4078      limits would not give reasonable results.  */
4079   if (cost.cost > (int) target_spill_cost [speed])
4080     cost.cost = target_spill_cost [speed];
4081
4082   return cost;
4083 }
4084
4085 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4086    invariants the computation depends on.  */
4087
4088 static comp_cost
4089 force_var_cost (struct ivopts_data *data,
4090                 tree expr, bitmap *depends_on)
4091 {
4092   if (depends_on)
4093     {
4094       fd_ivopts_data = data;
4095       walk_tree (&expr, find_depends, depends_on, NULL);
4096     }
4097
4098   return force_expr_to_var_cost (expr, data->speed);
4099 }
4100
4101 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4102    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4103    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4104    invariants the computation depends on.  */
4105
4106 static comp_cost
4107 split_address_cost (struct ivopts_data *data,
4108                     tree addr, bool *symbol_present, bool *var_present,
4109                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4110 {
4111   tree core;
4112   HOST_WIDE_INT bitsize;
4113   HOST_WIDE_INT bitpos;
4114   tree toffset;
4115   machine_mode mode;
4116   int unsignedp, volatilep;
4117
4118   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4119                               &unsignedp, &volatilep, false);
4120
4121   if (toffset != 0
4122       || bitpos % BITS_PER_UNIT != 0
4123       || TREE_CODE (core) != VAR_DECL)
4124     {
4125       *symbol_present = false;
4126       *var_present = true;
4127       fd_ivopts_data = data;
4128       walk_tree (&addr, find_depends, depends_on, NULL);
4129       return new_cost (target_spill_cost[data->speed], 0);
4130     }
4131
4132   *offset += bitpos / BITS_PER_UNIT;
4133   if (TREE_STATIC (core)
4134       || DECL_EXTERNAL (core))
4135     {
4136       *symbol_present = true;
4137       *var_present = false;
4138       return no_cost;
4139     }
4140
4141   *symbol_present = false;
4142   *var_present = true;
4143   return no_cost;
4144 }
4145
4146 /* Estimates cost of expressing difference of addresses E1 - E2 as
4147    var + symbol + offset.  The value of offset is added to OFFSET,
4148    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4149    part is missing.  DEPENDS_ON is a set of the invariants the computation
4150    depends on.  */
4151
4152 static comp_cost
4153 ptr_difference_cost (struct ivopts_data *data,
4154                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4155                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4156 {
4157   HOST_WIDE_INT diff = 0;
4158   aff_tree aff_e1, aff_e2;
4159   tree type;
4160
4161   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4162
4163   if (ptr_difference_const (e1, e2, &diff))
4164     {
4165       *offset += diff;
4166       *symbol_present = false;
4167       *var_present = false;
4168       return no_cost;
4169     }
4170
4171   if (integer_zerop (e2))
4172     return split_address_cost (data, TREE_OPERAND (e1, 0),
4173                                symbol_present, var_present, offset, depends_on);
4174
4175   *symbol_present = false;
4176   *var_present = true;
4177
4178   type = signed_type_for (TREE_TYPE (e1));
4179   tree_to_aff_combination (e1, type, &aff_e1);
4180   tree_to_aff_combination (e2, type, &aff_e2);
4181   aff_combination_scale (&aff_e2, -1);
4182   aff_combination_add (&aff_e1, &aff_e2);
4183
4184   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4185 }
4186
4187 /* Estimates cost of expressing difference E1 - E2 as
4188    var + symbol + offset.  The value of offset is added to OFFSET,
4189    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4190    part is missing.  DEPENDS_ON is a set of the invariants the computation
4191    depends on.  */
4192
4193 static comp_cost
4194 difference_cost (struct ivopts_data *data,
4195                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4196                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4197 {
4198   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4199   unsigned HOST_WIDE_INT off1, off2;
4200   aff_tree aff_e1, aff_e2;
4201   tree type;
4202
4203   e1 = strip_offset (e1, &off1);
4204   e2 = strip_offset (e2, &off2);
4205   *offset += off1 - off2;
4206
4207   STRIP_NOPS (e1);
4208   STRIP_NOPS (e2);
4209
4210   if (TREE_CODE (e1) == ADDR_EXPR)
4211     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4212                                 offset, depends_on);
4213   *symbol_present = false;
4214
4215   if (operand_equal_p (e1, e2, 0))
4216     {
4217       *var_present = false;
4218       return no_cost;
4219     }
4220
4221   *var_present = true;
4222
4223   if (integer_zerop (e2))
4224     return force_var_cost (data, e1, depends_on);
4225
4226   if (integer_zerop (e1))
4227     {
4228       comp_cost cost = force_var_cost (data, e2, depends_on);
4229       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4230       return cost;
4231     }
4232
4233   type = signed_type_for (TREE_TYPE (e1));
4234   tree_to_aff_combination (e1, type, &aff_e1);
4235   tree_to_aff_combination (e2, type, &aff_e2);
4236   aff_combination_scale (&aff_e2, -1);
4237   aff_combination_add (&aff_e1, &aff_e2);
4238
4239   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4240 }
4241
4242 /* Returns true if AFF1 and AFF2 are identical.  */
4243
4244 static bool
4245 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4246 {
4247   unsigned i;
4248
4249   if (aff1->n != aff2->n)
4250     return false;
4251
4252   for (i = 0; i < aff1->n; i++)
4253     {
4254       if (aff1->elts[i].coef != aff2->elts[i].coef)
4255         return false;
4256
4257       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4258         return false;
4259     }
4260   return true;
4261 }
4262
4263 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4264
4265 static int
4266 get_expr_id (struct ivopts_data *data, tree expr)
4267 {
4268   struct iv_inv_expr_ent ent;
4269   struct iv_inv_expr_ent **slot;
4270
4271   ent.expr = expr;
4272   ent.hash = iterative_hash_expr (expr, 0);
4273   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4274   if (*slot)
4275     return (*slot)->id;
4276
4277   *slot = XNEW (struct iv_inv_expr_ent);
4278   (*slot)->expr = expr;
4279   (*slot)->hash = ent.hash;
4280   (*slot)->id = data->inv_expr_id++;
4281   return (*slot)->id;
4282 }
4283
4284 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4285    requires a new compiler generated temporary.  Returns -1 otherwise.
4286    ADDRESS_P is a flag indicating if the expression is for address
4287    computation.  */
4288
4289 static int
4290 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4291                             tree cbase, HOST_WIDE_INT ratio,
4292                             bool address_p)
4293 {
4294   aff_tree ubase_aff, cbase_aff;
4295   tree expr, ub, cb;
4296
4297   STRIP_NOPS (ubase);
4298   STRIP_NOPS (cbase);
4299   ub = ubase;
4300   cb = cbase;
4301
4302   if ((TREE_CODE (ubase) == INTEGER_CST)
4303       && (TREE_CODE (cbase) == INTEGER_CST))
4304     return -1;
4305
4306   /* Strips the constant part. */
4307   if (TREE_CODE (ubase) == PLUS_EXPR
4308       || TREE_CODE (ubase) == MINUS_EXPR
4309       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4310     {
4311       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4312         ubase = TREE_OPERAND (ubase, 0);
4313     }
4314
4315   /* Strips the constant part. */
4316   if (TREE_CODE (cbase) == PLUS_EXPR
4317       || TREE_CODE (cbase) == MINUS_EXPR
4318       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4319     {
4320       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4321         cbase = TREE_OPERAND (cbase, 0);
4322     }
4323
4324   if (address_p)
4325     {
4326       if (((TREE_CODE (ubase) == SSA_NAME)
4327            || (TREE_CODE (ubase) == ADDR_EXPR
4328                && is_gimple_min_invariant (ubase)))
4329           && (TREE_CODE (cbase) == INTEGER_CST))
4330         return -1;
4331
4332       if (((TREE_CODE (cbase) == SSA_NAME)
4333            || (TREE_CODE (cbase) == ADDR_EXPR
4334                && is_gimple_min_invariant (cbase)))
4335           && (TREE_CODE (ubase) == INTEGER_CST))
4336         return -1;
4337     }
4338
4339   if (ratio == 1)
4340     {
4341       if (operand_equal_p (ubase, cbase, 0))
4342         return -1;
4343
4344       if (TREE_CODE (ubase) == ADDR_EXPR
4345           && TREE_CODE (cbase) == ADDR_EXPR)
4346         {
4347           tree usym, csym;
4348
4349           usym = TREE_OPERAND (ubase, 0);
4350           csym = TREE_OPERAND (cbase, 0);
4351           if (TREE_CODE (usym) == ARRAY_REF)
4352             {
4353               tree ind = TREE_OPERAND (usym, 1);
4354               if (TREE_CODE (ind) == INTEGER_CST
4355                   && tree_fits_shwi_p (ind)
4356                   && tree_to_shwi (ind) == 0)
4357                 usym = TREE_OPERAND (usym, 0);
4358             }
4359           if (TREE_CODE (csym) == ARRAY_REF)
4360             {
4361               tree ind = TREE_OPERAND (csym, 1);
4362               if (TREE_CODE (ind) == INTEGER_CST
4363                   && tree_fits_shwi_p (ind)
4364                   && tree_to_shwi (ind) == 0)
4365                 csym = TREE_OPERAND (csym, 0);
4366             }
4367           if (operand_equal_p (usym, csym, 0))
4368             return -1;
4369         }
4370       /* Now do more complex comparison  */
4371       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4372       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4373       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4374         return -1;
4375     }
4376
4377   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4378   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4379
4380   aff_combination_scale (&cbase_aff, -1 * ratio);
4381   aff_combination_add (&ubase_aff, &cbase_aff);
4382   expr = aff_combination_to_tree (&ubase_aff);
4383   return get_expr_id (data, expr);
4384 }
4385
4386
4387
4388 /* Determines the cost of the computation by that USE is expressed
4389    from induction variable CAND.  If ADDRESS_P is true, we just need
4390    to create an address from it, otherwise we want to get it into
4391    register.  A set of invariants we depend on is stored in
4392    DEPENDS_ON.  AT is the statement at that the value is computed.
4393    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4394    addressing is likely.  */
4395
4396 static comp_cost
4397 get_computation_cost_at (struct ivopts_data *data,
4398                          struct iv_use *use, struct iv_cand *cand,
4399                          bool address_p, bitmap *depends_on, gimple at,
4400                          bool *can_autoinc,
4401                          int *inv_expr_id)
4402 {
4403   tree ubase = use->iv->base, ustep = use->iv->step;
4404   tree cbase, cstep;
4405   tree utype = TREE_TYPE (ubase), ctype;
4406   unsigned HOST_WIDE_INT cstepi, offset = 0;
4407   HOST_WIDE_INT ratio, aratio;
4408   bool var_present, symbol_present, stmt_is_after_inc;
4409   comp_cost cost;
4410   widest_int rat;
4411   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4412   machine_mode mem_mode = (address_p
4413                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4414                                 : VOIDmode);
4415
4416   *depends_on = NULL;
4417
4418   /* Only consider real candidates.  */
4419   if (!cand->iv)
4420     return infinite_cost;
4421
4422   cbase = cand->iv->base;
4423   cstep = cand->iv->step;
4424   ctype = TREE_TYPE (cbase);
4425
4426   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4427     {
4428       /* We do not have a precision to express the values of use.  */
4429       return infinite_cost;
4430     }
4431
4432   if (address_p
4433       || (use->iv->base_object
4434           && cand->iv->base_object
4435           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4436           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4437     {
4438       /* Do not try to express address of an object with computation based
4439          on address of a different object.  This may cause problems in rtl
4440          level alias analysis (that does not expect this to be happening,
4441          as this is illegal in C), and would be unlikely to be useful
4442          anyway.  */
4443       if (use->iv->base_object
4444           && cand->iv->base_object
4445           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4446         return infinite_cost;
4447     }
4448
4449   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4450     {
4451       /* TODO -- add direct handling of this case.  */
4452       goto fallback;
4453     }
4454
4455   /* CSTEPI is removed from the offset in case statement is after the
4456      increment.  If the step is not constant, we use zero instead.
4457      This is a bit imprecise (there is the extra addition), but
4458      redundancy elimination is likely to transform the code so that
4459      it uses value of the variable before increment anyway,
4460      so it is not that much unrealistic.  */
4461   if (cst_and_fits_in_hwi (cstep))
4462     cstepi = int_cst_value (cstep);
4463   else
4464     cstepi = 0;
4465
4466   if (!constant_multiple_of (ustep, cstep, &rat))
4467     return infinite_cost;
4468
4469   if (wi::fits_shwi_p (rat))
4470     ratio = rat.to_shwi ();
4471   else
4472     return infinite_cost;
4473
4474   STRIP_NOPS (cbase);
4475   ctype = TREE_TYPE (cbase);
4476
4477   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4478
4479   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4480      or ratio == 1, it is better to handle this like
4481
4482      ubase - ratio * cbase + ratio * var
4483
4484      (also holds in the case ratio == -1, TODO.  */
4485
4486   if (cst_and_fits_in_hwi (cbase))
4487     {
4488       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4489       cost = difference_cost (data,
4490                               ubase, build_int_cst (utype, 0),
4491                               &symbol_present, &var_present, &offset,
4492                               depends_on);
4493       cost.cost /= avg_loop_niter (data->current_loop);
4494     }
4495   else if (ratio == 1)
4496     {
4497       tree real_cbase = cbase;
4498
4499       /* Check to see if any adjustment is needed.  */
4500       if (cstepi == 0 && stmt_is_after_inc)
4501         {
4502           aff_tree real_cbase_aff;
4503           aff_tree cstep_aff;
4504
4505           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4506                                    &real_cbase_aff);
4507           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4508
4509           aff_combination_add (&real_cbase_aff, &cstep_aff);
4510           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4511         }
4512
4513       cost = difference_cost (data,
4514                               ubase, real_cbase,
4515                               &symbol_present, &var_present, &offset,
4516                               depends_on);
4517       cost.cost /= avg_loop_niter (data->current_loop);
4518     }
4519   else if (address_p
4520            && !POINTER_TYPE_P (ctype)
4521            && multiplier_allowed_in_address_p
4522                 (ratio, mem_mode,
4523                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4524     {
4525       cbase
4526         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4527       cost = difference_cost (data,
4528                               ubase, cbase,
4529                               &symbol_present, &var_present, &offset,
4530                               depends_on);
4531       cost.cost /= avg_loop_niter (data->current_loop);
4532     }
4533   else
4534     {
4535       cost = force_var_cost (data, cbase, depends_on);
4536       cost = add_costs (cost,
4537                         difference_cost (data,
4538                                          ubase, build_int_cst (utype, 0),
4539                                          &symbol_present, &var_present,
4540                                          &offset, depends_on));
4541       cost.cost /= avg_loop_niter (data->current_loop);
4542       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4543     }
4544
4545   /* Set of invariants depended on by sub use has already been computed
4546      for the first use in the group.  */
4547   if (use->sub_id)
4548     {
4549       cost.cost = 0;
4550       if (depends_on && *depends_on)
4551         bitmap_clear (*depends_on);
4552     }
4553   else if (inv_expr_id)
4554     {
4555       *inv_expr_id =
4556           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4557       /* Clear depends on.  */
4558       if (*inv_expr_id != -1 && depends_on && *depends_on)
4559         bitmap_clear (*depends_on);
4560     }
4561
4562   /* If we are after the increment, the value of the candidate is higher by
4563      one iteration.  */
4564   if (stmt_is_after_inc)
4565     offset -= ratio * cstepi;
4566
4567   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4568      (symbol/var1/const parts may be omitted).  If we are looking for an
4569      address, find the cost of addressing this.  */
4570   if (address_p)
4571     return add_costs (cost,
4572                       get_address_cost (symbol_present, var_present,
4573                                         offset, ratio, cstepi,
4574                                         mem_mode,
4575                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4576                                         speed, stmt_is_after_inc,
4577                                         can_autoinc));
4578
4579   /* Otherwise estimate the costs for computing the expression.  */
4580   if (!symbol_present && !var_present && !offset)
4581     {
4582       if (ratio != 1)
4583         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4584       return cost;
4585     }
4586
4587   /* Symbol + offset should be compile-time computable so consider that they
4588       are added once to the variable, if present.  */
4589   if (var_present && (symbol_present || offset))
4590     cost.cost += adjust_setup_cost (data,
4591                                     add_cost (speed, TYPE_MODE (ctype)));
4592
4593   /* Having offset does not affect runtime cost in case it is added to
4594      symbol, but it increases complexity.  */
4595   if (offset)
4596     cost.complexity++;
4597
4598   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4599
4600   aratio = ratio > 0 ? ratio : -ratio;
4601   if (aratio != 1)
4602     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4603   return cost;
4604
4605 fallback:
4606   if (can_autoinc)
4607     *can_autoinc = false;
4608
4609   {
4610     /* Just get the expression, expand it and measure the cost.  */
4611     tree comp = get_computation_at (data->current_loop, use, cand, at);
4612
4613     if (!comp)
4614       return infinite_cost;
4615
4616     if (address_p)
4617       comp = build_simple_mem_ref (comp);
4618
4619     return new_cost (computation_cost (comp, speed), 0);
4620   }
4621 }
4622
4623 /* Determines the cost of the computation by that USE is expressed
4624    from induction variable CAND.  If ADDRESS_P is true, we just need
4625    to create an address from it, otherwise we want to get it into
4626    register.  A set of invariants we depend on is stored in
4627    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4628    autoinc addressing is likely.  */
4629
4630 static comp_cost
4631 get_computation_cost (struct ivopts_data *data,
4632                       struct iv_use *use, struct iv_cand *cand,
4633                       bool address_p, bitmap *depends_on,
4634                       bool *can_autoinc, int *inv_expr_id)
4635 {
4636   return get_computation_cost_at (data,
4637                                   use, cand, address_p, depends_on, use->stmt,
4638                                   can_autoinc, inv_expr_id);
4639 }
4640
4641 /* Determines cost of basing replacement of USE on CAND in a generic
4642    expression.  */
4643
4644 static bool
4645 determine_use_iv_cost_generic (struct ivopts_data *data,
4646                                struct iv_use *use, struct iv_cand *cand)
4647 {
4648   bitmap depends_on;
4649   comp_cost cost;
4650   int inv_expr_id = -1;
4651
4652   /* The simple case first -- if we need to express value of the preserved
4653      original biv, the cost is 0.  This also prevents us from counting the
4654      cost of increment twice -- once at this use and once in the cost of
4655      the candidate.  */
4656   if (cand->pos == IP_ORIGINAL
4657       && cand->incremented_at == use->stmt)
4658     {
4659       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4660                        ERROR_MARK, -1);
4661       return true;
4662     }
4663
4664   cost = get_computation_cost (data, use, cand, false, &depends_on,
4665                                NULL, &inv_expr_id);
4666
4667   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4668                    inv_expr_id);
4669
4670   return !infinite_cost_p (cost);
4671 }
4672
4673 /* Determines cost of basing replacement of USE on CAND in an address.  */
4674
4675 static bool
4676 determine_use_iv_cost_address (struct ivopts_data *data,
4677                                struct iv_use *use, struct iv_cand *cand)
4678 {
4679   bitmap depends_on;
4680   bool can_autoinc;
4681   int inv_expr_id = -1;
4682   struct iv_use *sub_use;
4683   comp_cost sub_cost;
4684   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4685                                          &can_autoinc, &inv_expr_id);
4686
4687   if (cand->ainc_use == use)
4688     {
4689       if (can_autoinc)
4690         cost.cost -= cand->cost_step;
4691       /* If we generated the candidate solely for exploiting autoincrement
4692          opportunities, and it turns out it can't be used, set the cost to
4693          infinity to make sure we ignore it.  */
4694       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4695         cost = infinite_cost;
4696     }
4697   for (sub_use = use->next;
4698        sub_use && !infinite_cost_p (cost);
4699        sub_use = sub_use->next)
4700     {
4701        sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4702                                         &can_autoinc, &inv_expr_id);
4703        cost = add_costs (cost, sub_cost);
4704     }
4705
4706   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4707                    inv_expr_id);
4708
4709   return !infinite_cost_p (cost);
4710 }
4711
4712 /* Computes value of candidate CAND at position AT in iteration NITER, and
4713    stores it to VAL.  */
4714
4715 static void
4716 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4717                aff_tree *val)
4718 {
4719   aff_tree step, delta, nit;
4720   struct iv *iv = cand->iv;
4721   tree type = TREE_TYPE (iv->base);
4722   tree steptype = type;
4723   if (POINTER_TYPE_P (type))
4724     steptype = sizetype;
4725   steptype = unsigned_type_for (type);
4726
4727   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4728   aff_combination_convert (&step, steptype);
4729   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4730   aff_combination_convert (&nit, steptype);
4731   aff_combination_mult (&nit, &step, &delta);
4732   if (stmt_after_increment (loop, cand, at))
4733     aff_combination_add (&delta, &step);
4734
4735   tree_to_aff_combination (iv->base, type, val);
4736   if (!POINTER_TYPE_P (type))
4737     aff_combination_convert (val, steptype);
4738   aff_combination_add (val, &delta);
4739 }
4740
4741 /* Returns period of induction variable iv.  */
4742
4743 static tree
4744 iv_period (struct iv *iv)
4745 {
4746   tree step = iv->step, period, type;
4747   tree pow2div;
4748
4749   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4750
4751   type = unsigned_type_for (TREE_TYPE (step));
4752   /* Period of the iv is lcm (step, type_range)/step -1,
4753      i.e., N*type_range/step - 1. Since type range is power
4754      of two, N == (step >> num_of_ending_zeros_binary (step),
4755      so the final result is
4756
4757        (type_range >> num_of_ending_zeros_binary (step)) - 1
4758
4759   */
4760   pow2div = num_ending_zeros (step);
4761
4762   period = build_low_bits_mask (type,
4763                                 (TYPE_PRECISION (type)
4764                                  - tree_to_uhwi (pow2div)));
4765
4766   return period;
4767 }
4768
4769 /* Returns the comparison operator used when eliminating the iv USE.  */
4770
4771 static enum tree_code
4772 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4773 {
4774   struct loop *loop = data->current_loop;
4775   basic_block ex_bb;
4776   edge exit;
4777
4778   ex_bb = gimple_bb (use->stmt);
4779   exit = EDGE_SUCC (ex_bb, 0);
4780   if (flow_bb_inside_loop_p (loop, exit->dest))
4781     exit = EDGE_SUCC (ex_bb, 1);
4782
4783   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4784 }
4785
4786 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4787    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4788    calculation is performed in non-wrapping type.
4789
4790    TODO: More generally, we could test for the situation that
4791          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4792          This would require knowing the sign of OFFSET.  */
4793
4794 static bool
4795 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4796 {
4797   enum tree_code code;
4798   tree e1, e2;
4799   aff_tree aff_e1, aff_e2, aff_offset;
4800
4801   if (!nowrap_type_p (TREE_TYPE (base)))
4802     return false;
4803
4804   base = expand_simple_operations (base);
4805
4806   if (TREE_CODE (base) == SSA_NAME)
4807     {
4808       gimple stmt = SSA_NAME_DEF_STMT (base);
4809
4810       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4811         return false;
4812
4813       code = gimple_assign_rhs_code (stmt);
4814       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4815         return false;
4816
4817       e1 = gimple_assign_rhs1 (stmt);
4818       e2 = gimple_assign_rhs2 (stmt);
4819     }
4820   else
4821     {
4822       code = TREE_CODE (base);
4823       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4824         return false;
4825       e1 = TREE_OPERAND (base, 0);
4826       e2 = TREE_OPERAND (base, 1);
4827     }
4828
4829   /* Use affine expansion as deeper inspection to prove the equality.  */
4830   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4831                                   &aff_e2, &data->name_expansion_cache);
4832   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4833                                   &aff_offset, &data->name_expansion_cache);
4834   aff_combination_scale (&aff_offset, -1);
4835   switch (code)
4836     {
4837     case PLUS_EXPR:
4838       aff_combination_add (&aff_e2, &aff_offset);
4839       if (aff_combination_zero_p (&aff_e2))
4840         return true;
4841
4842       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4843                                       &aff_e1, &data->name_expansion_cache);
4844       aff_combination_add (&aff_e1, &aff_offset);
4845       return aff_combination_zero_p (&aff_e1);
4846
4847     case POINTER_PLUS_EXPR:
4848       aff_combination_add (&aff_e2, &aff_offset);
4849       return aff_combination_zero_p (&aff_e2);
4850
4851     default:
4852       return false;
4853     }
4854 }
4855
4856 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4857    comparison with CAND.  NITER describes the number of iterations of
4858    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4859
4860    We aim to handle the following situation:
4861
4862    sometype *base, *p;
4863    int a, b, i;
4864
4865    i = a;
4866    p = p_0 = base + a;
4867
4868    do
4869      {
4870        bla (*p);
4871        p++;
4872        i++;
4873      }
4874    while (i < b);
4875
4876    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4877    We aim to optimize this to
4878
4879    p = p_0 = base + a;
4880    do
4881      {
4882        bla (*p);
4883        p++;
4884      }
4885    while (p < p_0 - a + b);
4886
4887    This preserves the correctness, since the pointer arithmetics does not
4888    overflow.  More precisely:
4889
4890    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4891       overflow in computing it or the values of p.
4892    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4893       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4894
4895 static bool
4896 iv_elimination_compare_lt (struct ivopts_data *data,
4897                            struct iv_cand *cand, enum tree_code *comp_p,
4898                            struct tree_niter_desc *niter)
4899 {
4900   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4901   struct aff_tree nit, tmpa, tmpb;
4902   enum tree_code comp;
4903   HOST_WIDE_INT step;
4904
4905   /* We need to know that the candidate induction variable does not overflow.
4906      While more complex analysis may be used to prove this, for now just
4907      check that the variable appears in the original program and that it
4908      is computed in a type that guarantees no overflows.  */
4909   cand_type = TREE_TYPE (cand->iv->base);
4910   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4911     return false;
4912
4913   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4914      the calculation of the BOUND could overflow, making the comparison
4915      invalid.  */
4916   if (!data->loop_single_exit_p)
4917     return false;
4918
4919   /* We need to be able to decide whether candidate is increasing or decreasing
4920      in order to choose the right comparison operator.  */
4921   if (!cst_and_fits_in_hwi (cand->iv->step))
4922     return false;
4923   step = int_cst_value (cand->iv->step);
4924
4925   /* Check that the number of iterations matches the expected pattern:
4926      a + 1 > b ? 0 : b - a - 1.  */
4927   mbz = niter->may_be_zero;
4928   if (TREE_CODE (mbz) == GT_EXPR)
4929     {
4930       /* Handle a + 1 > b.  */
4931       tree op0 = TREE_OPERAND (mbz, 0);
4932       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4933         {
4934           a = TREE_OPERAND (op0, 0);
4935           b = TREE_OPERAND (mbz, 1);
4936         }
4937       else
4938         return false;
4939     }
4940   else if (TREE_CODE (mbz) == LT_EXPR)
4941     {
4942       tree op1 = TREE_OPERAND (mbz, 1);
4943
4944       /* Handle b < a + 1.  */
4945       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4946         {
4947           a = TREE_OPERAND (op1, 0);
4948           b = TREE_OPERAND (mbz, 0);
4949         }
4950       else
4951         return false;
4952     }
4953   else
4954     return false;
4955
4956   /* Expected number of iterations is B - A - 1.  Check that it matches
4957      the actual number, i.e., that B - A - NITER = 1.  */
4958   tree_to_aff_combination (niter->niter, nit_type, &nit);
4959   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4960   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4961   aff_combination_scale (&nit, -1);
4962   aff_combination_scale (&tmpa, -1);
4963   aff_combination_add (&tmpb, &tmpa);
4964   aff_combination_add (&tmpb, &nit);
4965   if (tmpb.n != 0 || tmpb.offset != 1)
4966     return false;
4967
4968   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4969      overflow.  */
4970   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4971                         cand->iv->step,
4972                         fold_convert (TREE_TYPE (cand->iv->step), a));
4973   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4974     return false;
4975
4976   /* Determine the new comparison operator.  */
4977   comp = step < 0 ? GT_EXPR : LT_EXPR;
4978   if (*comp_p == NE_EXPR)
4979     *comp_p = comp;
4980   else if (*comp_p == EQ_EXPR)
4981     *comp_p = invert_tree_comparison (comp, false);
4982   else
4983     gcc_unreachable ();
4984
4985   return true;
4986 }
4987
4988 /* Check whether it is possible to express the condition in USE by comparison
4989    of candidate CAND.  If so, store the value compared with to BOUND, and the
4990    comparison operator to COMP.  */
4991
4992 static bool
4993 may_eliminate_iv (struct ivopts_data *data,
4994                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4995                   enum tree_code *comp)
4996 {
4997   basic_block ex_bb;
4998   edge exit;
4999   tree period;
5000   struct loop *loop = data->current_loop;
5001   aff_tree bnd;
5002   struct tree_niter_desc *desc = NULL;
5003
5004   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5005     return false;
5006
5007   /* For now works only for exits that dominate the loop latch.
5008      TODO: extend to other conditions inside loop body.  */
5009   ex_bb = gimple_bb (use->stmt);
5010   if (use->stmt != last_stmt (ex_bb)
5011       || gimple_code (use->stmt) != GIMPLE_COND
5012       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5013     return false;
5014
5015   exit = EDGE_SUCC (ex_bb, 0);
5016   if (flow_bb_inside_loop_p (loop, exit->dest))
5017     exit = EDGE_SUCC (ex_bb, 1);
5018   if (flow_bb_inside_loop_p (loop, exit->dest))
5019     return false;
5020
5021   desc = niter_for_exit (data, exit);
5022   if (!desc)
5023     return false;
5024
5025   /* Determine whether we can use the variable to test the exit condition.
5026      This is the case iff the period of the induction variable is greater
5027      than the number of iterations for which the exit condition is true.  */
5028   period = iv_period (cand->iv);
5029
5030   /* If the number of iterations is constant, compare against it directly.  */
5031   if (TREE_CODE (desc->niter) == INTEGER_CST)
5032     {
5033       /* See cand_value_at.  */
5034       if (stmt_after_increment (loop, cand, use->stmt))
5035         {
5036           if (!tree_int_cst_lt (desc->niter, period))
5037             return false;
5038         }
5039       else
5040         {
5041           if (tree_int_cst_lt (period, desc->niter))
5042             return false;
5043         }
5044     }
5045
5046   /* If not, and if this is the only possible exit of the loop, see whether
5047      we can get a conservative estimate on the number of iterations of the
5048      entire loop and compare against that instead.  */
5049   else
5050     {
5051       widest_int period_value, max_niter;
5052
5053       max_niter = desc->max;
5054       if (stmt_after_increment (loop, cand, use->stmt))
5055         max_niter += 1;
5056       period_value = wi::to_widest (period);
5057       if (wi::gtu_p (max_niter, period_value))
5058         {
5059           /* See if we can take advantage of inferred loop bound information.  */
5060           if (data->loop_single_exit_p)
5061             {
5062               if (!max_loop_iterations (loop, &max_niter))
5063                 return false;
5064               /* The loop bound is already adjusted by adding 1.  */
5065               if (wi::gtu_p (max_niter, period_value))
5066                 return false;
5067             }
5068           else
5069             return false;
5070         }
5071     }
5072
5073   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5074
5075   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5076                          aff_combination_to_tree (&bnd));
5077   *comp = iv_elimination_compare (data, use);
5078
5079   /* It is unlikely that computing the number of iterations using division
5080      would be more profitable than keeping the original induction variable.  */
5081   if (expression_expensive_p (*bound))
5082     return false;
5083
5084   /* Sometimes, it is possible to handle the situation that the number of
5085      iterations may be zero unless additional assumtions by using <
5086      instead of != in the exit condition.
5087
5088      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5089            base the exit condition on it.  However, that is often too
5090            expensive.  */
5091   if (!integer_zerop (desc->may_be_zero))
5092     return iv_elimination_compare_lt (data, cand, comp, desc);
5093
5094   return true;
5095 }
5096
5097  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5098     be copied, if is is used in the loop body and DATA->body_includes_call.  */
5099
5100 static int
5101 parm_decl_cost (struct ivopts_data *data, tree bound)
5102 {
5103   tree sbound = bound;
5104   STRIP_NOPS (sbound);
5105
5106   if (TREE_CODE (sbound) == SSA_NAME
5107       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5108       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5109       && data->body_includes_call)
5110     return COSTS_N_INSNS (1);
5111
5112   return 0;
5113 }
5114
5115 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5116
5117 static bool
5118 determine_use_iv_cost_condition (struct ivopts_data *data,
5119                                  struct iv_use *use, struct iv_cand *cand)
5120 {
5121   tree bound = NULL_TREE;
5122   struct iv *cmp_iv;
5123   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5124   comp_cost elim_cost, express_cost, cost, bound_cost;
5125   bool ok;
5126   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5127   tree *control_var, *bound_cst;
5128   enum tree_code comp = ERROR_MARK;
5129
5130   /* Only consider real candidates.  */
5131   if (!cand->iv)
5132     {
5133       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5134                        ERROR_MARK, -1);
5135       return false;
5136     }
5137
5138   /* Try iv elimination.  */
5139   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5140     {
5141       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5142       if (elim_cost.cost == 0)
5143         elim_cost.cost = parm_decl_cost (data, bound);
5144       else if (TREE_CODE (bound) == INTEGER_CST)
5145         elim_cost.cost = 0;
5146       /* If we replace a loop condition 'i < n' with 'p < base + n',
5147          depends_on_elim will have 'base' and 'n' set, which implies
5148          that both 'base' and 'n' will be live during the loop.  More likely,
5149          'base + n' will be loop invariant, resulting in only one live value
5150          during the loop.  So in that case we clear depends_on_elim and set
5151         elim_inv_expr_id instead.  */
5152       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5153         {
5154           elim_inv_expr_id = get_expr_id (data, bound);
5155           bitmap_clear (depends_on_elim);
5156         }
5157       /* The bound is a loop invariant, so it will be only computed
5158          once.  */
5159       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5160     }
5161   else
5162     elim_cost = infinite_cost;
5163
5164   /* Try expressing the original giv.  If it is compared with an invariant,
5165      note that we cannot get rid of it.  */
5166   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5167                               NULL, &cmp_iv);
5168   gcc_assert (ok);
5169
5170   /* When the condition is a comparison of the candidate IV against
5171      zero, prefer this IV.
5172
5173      TODO: The constant that we're subtracting from the cost should
5174      be target-dependent.  This information should be added to the
5175      target costs for each backend.  */
5176   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5177       && integer_zerop (*bound_cst)
5178       && (operand_equal_p (*control_var, cand->var_after, 0)
5179           || operand_equal_p (*control_var, cand->var_before, 0)))
5180     elim_cost.cost -= 1;
5181
5182   express_cost = get_computation_cost (data, use, cand, false,
5183                                        &depends_on_express, NULL,
5184                                        &express_inv_expr_id);
5185   fd_ivopts_data = data;
5186   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5187
5188   /* Count the cost of the original bound as well.  */
5189   bound_cost = force_var_cost (data, *bound_cst, NULL);
5190   if (bound_cost.cost == 0)
5191     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5192   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5193     bound_cost.cost = 0;
5194   express_cost.cost += bound_cost.cost;
5195
5196   /* Choose the better approach, preferring the eliminated IV. */
5197   if (compare_costs (elim_cost, express_cost) <= 0)
5198     {
5199       cost = elim_cost;
5200       depends_on = depends_on_elim;
5201       depends_on_elim = NULL;
5202       inv_expr_id = elim_inv_expr_id;
5203     }
5204   else
5205     {
5206       cost = express_cost;
5207       depends_on = depends_on_express;
5208       depends_on_express = NULL;
5209       bound = NULL_TREE;
5210       comp = ERROR_MARK;
5211       inv_expr_id = express_inv_expr_id;
5212     }
5213
5214   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5215
5216   if (depends_on_elim)
5217     BITMAP_FREE (depends_on_elim);
5218   if (depends_on_express)
5219     BITMAP_FREE (depends_on_express);
5220
5221   return !infinite_cost_p (cost);
5222 }
5223
5224 /* Determines cost of basing replacement of USE on CAND.  Returns false
5225    if USE cannot be based on CAND.  */
5226
5227 static bool
5228 determine_use_iv_cost (struct ivopts_data *data,
5229                        struct iv_use *use, struct iv_cand *cand)
5230 {
5231   switch (use->type)
5232     {
5233     case USE_NONLINEAR_EXPR:
5234       return determine_use_iv_cost_generic (data, use, cand);
5235
5236     case USE_ADDRESS:
5237       return determine_use_iv_cost_address (data, use, cand);
5238
5239     case USE_COMPARE:
5240       return determine_use_iv_cost_condition (data, use, cand);
5241
5242     default:
5243       gcc_unreachable ();
5244     }
5245 }
5246
5247 /* Return true if get_computation_cost indicates that autoincrement is
5248    a possibility for the pair of USE and CAND, false otherwise.  */
5249
5250 static bool
5251 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5252                            struct iv_cand *cand)
5253 {
5254   bitmap depends_on;
5255   bool can_autoinc;
5256   comp_cost cost;
5257
5258   if (use->type != USE_ADDRESS)
5259     return false;
5260
5261   cost = get_computation_cost (data, use, cand, true, &depends_on,
5262                                &can_autoinc, NULL);
5263
5264   BITMAP_FREE (depends_on);
5265
5266   return !infinite_cost_p (cost) && can_autoinc;
5267 }
5268
5269 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5270    use that allows autoincrement, and set their AINC_USE if possible.  */
5271
5272 static void
5273 set_autoinc_for_original_candidates (struct ivopts_data *data)
5274 {
5275   unsigned i, j;
5276
5277   for (i = 0; i < n_iv_cands (data); i++)
5278     {
5279       struct iv_cand *cand = iv_cand (data, i);
5280       struct iv_use *closest_before = NULL;
5281       struct iv_use *closest_after = NULL;
5282       if (cand->pos != IP_ORIGINAL)
5283         continue;
5284
5285       for (j = 0; j < n_iv_uses (data); j++)
5286         {
5287           struct iv_use *use = iv_use (data, j);
5288           unsigned uid = gimple_uid (use->stmt);
5289
5290           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5291             continue;
5292
5293           if (uid < gimple_uid (cand->incremented_at)
5294               && (closest_before == NULL
5295                   || uid > gimple_uid (closest_before->stmt)))
5296             closest_before = use;
5297
5298           if (uid > gimple_uid (cand->incremented_at)
5299               && (closest_after == NULL
5300                   || uid < gimple_uid (closest_after->stmt)))
5301             closest_after = use;
5302         }
5303
5304       if (closest_before != NULL
5305           && autoinc_possible_for_pair (data, closest_before, cand))
5306         cand->ainc_use = closest_before;
5307       else if (closest_after != NULL
5308                && autoinc_possible_for_pair (data, closest_after, cand))
5309         cand->ainc_use = closest_after;
5310     }
5311 }
5312
5313 /* Finds the candidates for the induction variables.  */
5314
5315 static void
5316 find_iv_candidates (struct ivopts_data *data)
5317 {
5318   /* Add commonly used ivs.  */
5319   add_standard_iv_candidates (data);
5320
5321   /* Add old induction variables.  */
5322   add_old_ivs_candidates (data);
5323
5324   /* Add induction variables derived from uses.  */
5325   add_derived_ivs_candidates (data);
5326
5327   set_autoinc_for_original_candidates (data);
5328
5329   /* Record the important candidates.  */
5330   record_important_candidates (data);
5331 }
5332
5333 /* Determines costs of basing the use of the iv on an iv candidate.  */
5334
5335 static void
5336 determine_use_iv_costs (struct ivopts_data *data)
5337 {
5338   unsigned i, j;
5339   struct iv_use *use;
5340   struct iv_cand *cand;
5341   bitmap to_clear = BITMAP_ALLOC (NULL);
5342
5343   alloc_use_cost_map (data);
5344
5345   for (i = 0; i < n_iv_uses (data); i++)
5346     {
5347       use = iv_use (data, i);
5348
5349       if (data->consider_all_candidates)
5350         {
5351           for (j = 0; j < n_iv_cands (data); j++)
5352             {
5353               cand = iv_cand (data, j);
5354               determine_use_iv_cost (data, use, cand);
5355             }
5356         }
5357       else
5358         {
5359           bitmap_iterator bi;
5360
5361           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5362             {
5363               cand = iv_cand (data, j);
5364               if (!determine_use_iv_cost (data, use, cand))
5365                 bitmap_set_bit (to_clear, j);
5366             }
5367
5368           /* Remove the candidates for that the cost is infinite from
5369              the list of related candidates.  */
5370           bitmap_and_compl_into (use->related_cands, to_clear);
5371           bitmap_clear (to_clear);
5372         }
5373     }
5374
5375   BITMAP_FREE (to_clear);
5376
5377   if (dump_file && (dump_flags & TDF_DETAILS))
5378     {
5379       fprintf (dump_file, "Use-candidate costs:\n");
5380
5381       for (i = 0; i < n_iv_uses (data); i++)
5382         {
5383           use = iv_use (data, i);
5384
5385           fprintf (dump_file, "Use %d:\n", i);
5386           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5387           for (j = 0; j < use->n_map_members; j++)
5388             {
5389               if (!use->cost_map[j].cand
5390                   || infinite_cost_p (use->cost_map[j].cost))
5391                 continue;
5392
5393               fprintf (dump_file, "  %d\t%d\t%d\t",
5394                        use->cost_map[j].cand->id,
5395                        use->cost_map[j].cost.cost,
5396                        use->cost_map[j].cost.complexity);
5397               if (use->cost_map[j].depends_on)
5398                 bitmap_print (dump_file,
5399                               use->cost_map[j].depends_on, "","");
5400               if (use->cost_map[j].inv_expr_id != -1)
5401                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5402               fprintf (dump_file, "\n");
5403             }
5404
5405           fprintf (dump_file, "\n");
5406         }
5407       fprintf (dump_file, "\n");
5408     }
5409 }
5410
5411 /* Determines cost of the candidate CAND.  */
5412
5413 static void
5414 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5415 {
5416   comp_cost cost_base;
5417   unsigned cost, cost_step;
5418   tree base;
5419
5420   if (!cand->iv)
5421     {
5422       cand->cost = 0;
5423       return;
5424     }
5425
5426   /* There are two costs associated with the candidate -- its increment
5427      and its initialization.  The second is almost negligible for any loop
5428      that rolls enough, so we take it just very little into account.  */
5429
5430   base = cand->iv->base;
5431   cost_base = force_var_cost (data, base, NULL);
5432   /* It will be exceptional that the iv register happens to be initialized with
5433      the proper value at no cost.  In general, there will at least be a regcopy
5434      or a const set.  */
5435   if (cost_base.cost == 0)
5436     cost_base.cost = COSTS_N_INSNS (1);
5437   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5438
5439   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5440
5441   /* Prefer the original ivs unless we may gain something by replacing it.
5442      The reason is to make debugging simpler; so this is not relevant for
5443      artificial ivs created by other optimization passes.  */
5444   if (cand->pos != IP_ORIGINAL
5445       || !SSA_NAME_VAR (cand->var_before)
5446       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5447     cost++;
5448
5449   /* Prefer not to insert statements into latch unless there are some
5450      already (so that we do not create unnecessary jumps).  */
5451   if (cand->pos == IP_END
5452       && empty_block_p (ip_end_pos (data->current_loop)))
5453     cost++;
5454
5455   cand->cost = cost;
5456   cand->cost_step = cost_step;
5457 }
5458
5459 /* Determines costs of computation of the candidates.  */
5460
5461 static void
5462 determine_iv_costs (struct ivopts_data *data)
5463 {
5464   unsigned i;
5465
5466   if (dump_file && (dump_flags & TDF_DETAILS))
5467     {
5468       fprintf (dump_file, "Candidate costs:\n");
5469       fprintf (dump_file, "  cand\tcost\n");
5470     }
5471
5472   for (i = 0; i < n_iv_cands (data); i++)
5473     {
5474       struct iv_cand *cand = iv_cand (data, i);
5475
5476       determine_iv_cost (data, cand);
5477
5478       if (dump_file && (dump_flags & TDF_DETAILS))
5479         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5480     }
5481
5482   if (dump_file && (dump_flags & TDF_DETAILS))
5483     fprintf (dump_file, "\n");
5484 }
5485
5486 /* Calculates cost for having SIZE induction variables.  */
5487
5488 static unsigned
5489 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5490 {
5491   /* We add size to the cost, so that we prefer eliminating ivs
5492      if possible.  */
5493   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5494                                             data->body_includes_call);
5495 }
5496
5497 /* For each size of the induction variable set determine the penalty.  */
5498
5499 static void
5500 determine_set_costs (struct ivopts_data *data)
5501 {
5502   unsigned j, n;
5503   gphi *phi;
5504   gphi_iterator psi;
5505   tree op;
5506   struct loop *loop = data->current_loop;
5507   bitmap_iterator bi;
5508
5509   if (dump_file && (dump_flags & TDF_DETAILS))
5510     {
5511       fprintf (dump_file, "Global costs:\n");
5512       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5513       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5514       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5515       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5516     }
5517
5518   n = 0;
5519   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5520     {
5521       phi = psi.phi ();
5522       op = PHI_RESULT (phi);
5523
5524       if (virtual_operand_p (op))
5525         continue;
5526
5527       if (get_iv (data, op))
5528         continue;
5529
5530       n++;
5531     }
5532
5533   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5534     {
5535       struct version_info *info = ver_info (data, j);
5536
5537       if (info->inv_id && info->has_nonlin_use)
5538         n++;
5539     }
5540
5541   data->regs_used = n;
5542   if (dump_file && (dump_flags & TDF_DETAILS))
5543     fprintf (dump_file, "  regs_used %d\n", n);
5544
5545   if (dump_file && (dump_flags & TDF_DETAILS))
5546     {
5547       fprintf (dump_file, "  cost for size:\n");
5548       fprintf (dump_file, "  ivs\tcost\n");
5549       for (j = 0; j <= 2 * target_avail_regs; j++)
5550         fprintf (dump_file, "  %d\t%d\n", j,
5551                  ivopts_global_cost_for_size (data, j));
5552       fprintf (dump_file, "\n");
5553     }
5554 }
5555
5556 /* Returns true if A is a cheaper cost pair than B.  */
5557
5558 static bool
5559 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5560 {
5561   int cmp;
5562
5563   if (!a)
5564     return false;
5565
5566   if (!b)
5567     return true;
5568
5569   cmp = compare_costs (a->cost, b->cost);
5570   if (cmp < 0)
5571     return true;
5572
5573   if (cmp > 0)
5574     return false;
5575
5576   /* In case the costs are the same, prefer the cheaper candidate.  */
5577   if (a->cand->cost < b->cand->cost)
5578     return true;
5579
5580   return false;
5581 }
5582
5583
5584 /* Returns candidate by that USE is expressed in IVS.  */
5585
5586 static struct cost_pair *
5587 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5588 {
5589   return ivs->cand_for_use[use->id];
5590 }
5591
5592 /* Computes the cost field of IVS structure.  */
5593
5594 static void
5595 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5596 {
5597   comp_cost cost = ivs->cand_use_cost;
5598
5599   cost.cost += ivs->cand_cost;
5600
5601   cost.cost += ivopts_global_cost_for_size (data,
5602                                             ivs->n_regs + ivs->num_used_inv_expr);
5603
5604   ivs->cost = cost;
5605 }
5606
5607 /* Remove invariants in set INVS to set IVS.  */
5608
5609 static void
5610 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5611 {
5612   bitmap_iterator bi;
5613   unsigned iid;
5614
5615   if (!invs)
5616     return;
5617
5618   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5619     {
5620       ivs->n_invariant_uses[iid]--;
5621       if (ivs->n_invariant_uses[iid] == 0)
5622         ivs->n_regs--;
5623     }
5624 }
5625
5626 /* Set USE not to be expressed by any candidate in IVS.  */
5627
5628 static void
5629 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5630                  struct iv_use *use)
5631 {
5632   unsigned uid = use->id, cid;
5633   struct cost_pair *cp;
5634
5635   cp = ivs->cand_for_use[uid];
5636   if (!cp)
5637     return;
5638   cid = cp->cand->id;
5639
5640   ivs->bad_uses++;
5641   ivs->cand_for_use[uid] = NULL;
5642   ivs->n_cand_uses[cid]--;
5643
5644   if (ivs->n_cand_uses[cid] == 0)
5645     {
5646       bitmap_clear_bit (ivs->cands, cid);
5647       /* Do not count the pseudocandidates.  */
5648       if (cp->cand->iv)
5649         ivs->n_regs--;
5650       ivs->n_cands--;
5651       ivs->cand_cost -= cp->cand->cost;
5652
5653       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5654     }
5655
5656   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5657
5658   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5659
5660   if (cp->inv_expr_id != -1)
5661     {
5662       ivs->used_inv_expr[cp->inv_expr_id]--;
5663       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5664         ivs->num_used_inv_expr--;
5665     }
5666   iv_ca_recount_cost (data, ivs);
5667 }
5668
5669 /* Add invariants in set INVS to set IVS.  */
5670
5671 static void
5672 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5673 {
5674   bitmap_iterator bi;
5675   unsigned iid;
5676
5677   if (!invs)
5678     return;
5679
5680   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5681     {
5682       ivs->n_invariant_uses[iid]++;
5683       if (ivs->n_invariant_uses[iid] == 1)
5684         ivs->n_regs++;
5685     }
5686 }
5687
5688 /* Set cost pair for USE in set IVS to CP.  */
5689
5690 static void
5691 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5692               struct iv_use *use, struct cost_pair *cp)
5693 {
5694   unsigned uid = use->id, cid;
5695
5696   if (ivs->cand_for_use[uid] == cp)
5697     return;
5698
5699   if (ivs->cand_for_use[uid])
5700     iv_ca_set_no_cp (data, ivs, use);
5701
5702   if (cp)
5703     {
5704       cid = cp->cand->id;
5705
5706       ivs->bad_uses--;
5707       ivs->cand_for_use[uid] = cp;
5708       ivs->n_cand_uses[cid]++;
5709       if (ivs->n_cand_uses[cid] == 1)
5710         {
5711           bitmap_set_bit (ivs->cands, cid);
5712           /* Do not count the pseudocandidates.  */
5713           if (cp->cand->iv)
5714             ivs->n_regs++;
5715           ivs->n_cands++;
5716           ivs->cand_cost += cp->cand->cost;
5717
5718           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5719         }
5720
5721       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5722       iv_ca_set_add_invariants (ivs, cp->depends_on);
5723
5724       if (cp->inv_expr_id != -1)
5725         {
5726           ivs->used_inv_expr[cp->inv_expr_id]++;
5727           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5728             ivs->num_used_inv_expr++;
5729         }
5730       iv_ca_recount_cost (data, ivs);
5731     }
5732 }
5733
5734 /* Extend set IVS by expressing USE by some of the candidates in it
5735    if possible.  Consider all important candidates if candidates in
5736    set IVS don't give any result.  */
5737
5738 static void
5739 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5740                struct iv_use *use)
5741 {
5742   struct cost_pair *best_cp = NULL, *cp;
5743   bitmap_iterator bi;
5744   unsigned i;
5745   struct iv_cand *cand;
5746
5747   gcc_assert (ivs->upto >= use->id);
5748   ivs->upto++;
5749   ivs->bad_uses++;
5750
5751   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5752     {
5753       cand = iv_cand (data, i);
5754       cp = get_use_iv_cost (data, use, cand);
5755       if (cheaper_cost_pair (cp, best_cp))
5756         best_cp = cp;
5757     }
5758
5759   if (best_cp == NULL)
5760     {
5761       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5762         {
5763           cand = iv_cand (data, i);
5764           cp = get_use_iv_cost (data, use, cand);
5765           if (cheaper_cost_pair (cp, best_cp))
5766             best_cp = cp;
5767         }
5768     }
5769
5770   iv_ca_set_cp (data, ivs, use, best_cp);
5771 }
5772
5773 /* Get cost for assignment IVS.  */
5774
5775 static comp_cost
5776 iv_ca_cost (struct iv_ca *ivs)
5777 {
5778   /* This was a conditional expression but it triggered a bug in
5779      Sun C 5.5.  */
5780   if (ivs->bad_uses)
5781     return infinite_cost;
5782   else
5783     return ivs->cost;
5784 }
5785
5786 /* Returns true if all dependences of CP are among invariants in IVS.  */
5787
5788 static bool
5789 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5790 {
5791   unsigned i;
5792   bitmap_iterator bi;
5793
5794   if (!cp->depends_on)
5795     return true;
5796
5797   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5798     {
5799       if (ivs->n_invariant_uses[i] == 0)
5800         return false;
5801     }
5802
5803   return true;
5804 }
5805
5806 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5807    it before NEXT_CHANGE.  */
5808
5809 static struct iv_ca_delta *
5810 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5811                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5812 {
5813   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5814
5815   change->use = use;
5816   change->old_cp = old_cp;
5817   change->new_cp = new_cp;
5818   change->next_change = next_change;
5819
5820   return change;
5821 }
5822
5823 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5824    are rewritten.  */
5825
5826 static struct iv_ca_delta *
5827 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5828 {
5829   struct iv_ca_delta *last;
5830
5831   if (!l2)
5832     return l1;
5833
5834   if (!l1)
5835     return l2;
5836
5837   for (last = l1; last->next_change; last = last->next_change)
5838     continue;
5839   last->next_change = l2;
5840
5841   return l1;
5842 }
5843
5844 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5845
5846 static struct iv_ca_delta *
5847 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5848 {
5849   struct iv_ca_delta *act, *next, *prev = NULL;
5850
5851   for (act = delta; act; act = next)
5852     {
5853       next = act->next_change;
5854       act->next_change = prev;
5855       prev = act;
5856
5857       std::swap (act->old_cp, act->new_cp);
5858     }
5859
5860   return prev;
5861 }
5862
5863 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5864    reverted instead.  */
5865
5866 static void
5867 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5868                     struct iv_ca_delta *delta, bool forward)
5869 {
5870   struct cost_pair *from, *to;
5871   struct iv_ca_delta *act;
5872
5873   if (!forward)
5874     delta = iv_ca_delta_reverse (delta);
5875
5876   for (act = delta; act; act = act->next_change)
5877     {
5878       from = act->old_cp;
5879       to = act->new_cp;
5880       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5881       iv_ca_set_cp (data, ivs, act->use, to);
5882     }
5883
5884   if (!forward)
5885     iv_ca_delta_reverse (delta);
5886 }
5887
5888 /* Returns true if CAND is used in IVS.  */
5889
5890 static bool
5891 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5892 {
5893   return ivs->n_cand_uses[cand->id] > 0;
5894 }
5895
5896 /* Returns number of induction variable candidates in the set IVS.  */
5897
5898 static unsigned
5899 iv_ca_n_cands (struct iv_ca *ivs)
5900 {
5901   return ivs->n_cands;
5902 }
5903
5904 /* Free the list of changes DELTA.  */
5905
5906 static void
5907 iv_ca_delta_free (struct iv_ca_delta **delta)
5908 {
5909   struct iv_ca_delta *act, *next;
5910
5911   for (act = *delta; act; act = next)
5912     {
5913       next = act->next_change;
5914       free (act);
5915     }
5916
5917   *delta = NULL;
5918 }
5919
5920 /* Allocates new iv candidates assignment.  */
5921
5922 static struct iv_ca *
5923 iv_ca_new (struct ivopts_data *data)
5924 {
5925   struct iv_ca *nw = XNEW (struct iv_ca);
5926
5927   nw->upto = 0;
5928   nw->bad_uses = 0;
5929   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5930   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5931   nw->cands = BITMAP_ALLOC (NULL);
5932   nw->n_cands = 0;
5933   nw->n_regs = 0;
5934   nw->cand_use_cost = no_cost;
5935   nw->cand_cost = 0;
5936   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5937   nw->cost = no_cost;
5938   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5939   nw->num_used_inv_expr = 0;
5940
5941   return nw;
5942 }
5943
5944 /* Free memory occupied by the set IVS.  */
5945
5946 static void
5947 iv_ca_free (struct iv_ca **ivs)
5948 {
5949   free ((*ivs)->cand_for_use);
5950   free ((*ivs)->n_cand_uses);
5951   BITMAP_FREE ((*ivs)->cands);
5952   free ((*ivs)->n_invariant_uses);
5953   free ((*ivs)->used_inv_expr);
5954   free (*ivs);
5955   *ivs = NULL;
5956 }
5957
5958 /* Dumps IVS to FILE.  */
5959
5960 static void
5961 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5962 {
5963   const char *pref = "  invariants ";
5964   unsigned i;
5965   comp_cost cost = iv_ca_cost (ivs);
5966
5967   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5968   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5969            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5970   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5971
5972    for (i = 0; i < ivs->upto; i++)
5973     {
5974       struct iv_use *use = iv_use (data, i);
5975       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5976       if (cp)
5977         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5978                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5979       else
5980         fprintf (file, "   use:%d --> ??\n", use->id);
5981     }
5982
5983   for (i = 1; i <= data->max_inv_id; i++)
5984     if (ivs->n_invariant_uses[i])
5985       {
5986         fprintf (file, "%s%d", pref, i);
5987         pref = ", ";
5988       }
5989   fprintf (file, "\n\n");
5990 }
5991
5992 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5993    new set, and store differences in DELTA.  Number of induction variables
5994    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5995    the function will try to find a solution with mimimal iv candidates.  */
5996
5997 static comp_cost
5998 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5999               struct iv_cand *cand, struct iv_ca_delta **delta,
6000               unsigned *n_ivs, bool min_ncand)
6001 {
6002   unsigned i;
6003   comp_cost cost;
6004   struct iv_use *use;
6005   struct cost_pair *old_cp, *new_cp;
6006
6007   *delta = NULL;
6008   for (i = 0; i < ivs->upto; i++)
6009     {
6010       use = iv_use (data, i);
6011       old_cp = iv_ca_cand_for_use (ivs, use);
6012
6013       if (old_cp
6014           && old_cp->cand == cand)
6015         continue;
6016
6017       new_cp = get_use_iv_cost (data, use, cand);
6018       if (!new_cp)
6019         continue;
6020
6021       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6022         continue;
6023
6024       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6025         continue;
6026
6027       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6028     }
6029
6030   iv_ca_delta_commit (data, ivs, *delta, true);
6031   cost = iv_ca_cost (ivs);
6032   if (n_ivs)
6033     *n_ivs = iv_ca_n_cands (ivs);
6034   iv_ca_delta_commit (data, ivs, *delta, false);
6035
6036   return cost;
6037 }
6038
6039 /* Try narrowing set IVS by removing CAND.  Return the cost of
6040    the new set and store the differences in DELTA.  START is
6041    the candidate with which we start narrowing.  */
6042
6043 static comp_cost
6044 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6045               struct iv_cand *cand, struct iv_cand *start,
6046               struct iv_ca_delta **delta)
6047 {
6048   unsigned i, ci;
6049   struct iv_use *use;
6050   struct cost_pair *old_cp, *new_cp, *cp;
6051   bitmap_iterator bi;
6052   struct iv_cand *cnd;
6053   comp_cost cost, best_cost, acost;
6054
6055   *delta = NULL;
6056   for (i = 0; i < n_iv_uses (data); i++)
6057     {
6058       use = iv_use (data, i);
6059
6060       old_cp = iv_ca_cand_for_use (ivs, use);
6061       if (old_cp->cand != cand)
6062         continue;
6063
6064       best_cost = iv_ca_cost (ivs);
6065       /* Start narrowing with START.  */
6066       new_cp = get_use_iv_cost (data, use, start);
6067
6068       if (data->consider_all_candidates)
6069         {
6070           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6071             {
6072               if (ci == cand->id || (start && ci == start->id))
6073                 continue;
6074
6075               cnd = iv_cand (data, ci);
6076
6077               cp = get_use_iv_cost (data, use, cnd);
6078               if (!cp)
6079                 continue;
6080
6081               iv_ca_set_cp (data, ivs, use, cp);
6082               acost = iv_ca_cost (ivs);
6083
6084               if (compare_costs (acost, best_cost) < 0)
6085                 {
6086                   best_cost = acost;
6087                   new_cp = cp;
6088                 }
6089             }
6090         }
6091       else
6092         {
6093           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6094             {
6095               if (ci == cand->id || (start && ci == start->id))
6096                 continue;
6097
6098               cnd = iv_cand (data, ci);
6099
6100               cp = get_use_iv_cost (data, use, cnd);
6101               if (!cp)
6102                 continue;
6103
6104               iv_ca_set_cp (data, ivs, use, cp);
6105               acost = iv_ca_cost (ivs);
6106
6107               if (compare_costs (acost, best_cost) < 0)
6108                 {
6109                   best_cost = acost;
6110                   new_cp = cp;
6111                 }
6112             }
6113         }
6114       /* Restore to old cp for use.  */
6115       iv_ca_set_cp (data, ivs, use, old_cp);
6116
6117       if (!new_cp)
6118         {
6119           iv_ca_delta_free (delta);
6120           return infinite_cost;
6121         }
6122
6123       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6124     }
6125
6126   iv_ca_delta_commit (data, ivs, *delta, true);
6127   cost = iv_ca_cost (ivs);
6128   iv_ca_delta_commit (data, ivs, *delta, false);
6129
6130   return cost;
6131 }
6132
6133 /* Try optimizing the set of candidates IVS by removing candidates different
6134    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6135    differences in DELTA.  */
6136
6137 static comp_cost
6138 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6139              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6140 {
6141   bitmap_iterator bi;
6142   struct iv_ca_delta *act_delta, *best_delta;
6143   unsigned i;
6144   comp_cost best_cost, acost;
6145   struct iv_cand *cand;
6146
6147   best_delta = NULL;
6148   best_cost = iv_ca_cost (ivs);
6149
6150   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6151     {
6152       cand = iv_cand (data, i);
6153
6154       if (cand == except_cand)
6155         continue;
6156
6157       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6158
6159       if (compare_costs (acost, best_cost) < 0)
6160         {
6161           best_cost = acost;
6162           iv_ca_delta_free (&best_delta);
6163           best_delta = act_delta;
6164         }
6165       else
6166         iv_ca_delta_free (&act_delta);
6167     }
6168
6169   if (!best_delta)
6170     {
6171       *delta = NULL;
6172       return best_cost;
6173     }
6174
6175   /* Recurse to possibly remove other unnecessary ivs.  */
6176   iv_ca_delta_commit (data, ivs, best_delta, true);
6177   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6178   iv_ca_delta_commit (data, ivs, best_delta, false);
6179   *delta = iv_ca_delta_join (best_delta, *delta);
6180   return best_cost;
6181 }
6182
6183 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6184    cheaper local cost for USE than BEST_CP.  Return pointer to
6185    the corresponding cost_pair, otherwise just return BEST_CP.  */
6186
6187 static struct cost_pair*
6188 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6189                         unsigned int cand_idx, struct iv_cand *old_cand,
6190                         struct cost_pair *best_cp)
6191 {
6192   struct iv_cand *cand;
6193   struct cost_pair *cp;
6194
6195   gcc_assert (old_cand != NULL && best_cp != NULL);
6196   if (cand_idx == old_cand->id)
6197     return best_cp;
6198
6199   cand = iv_cand (data, cand_idx);
6200   cp = get_use_iv_cost (data, use, cand);
6201   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6202     return cp;
6203
6204   return best_cp;
6205 }
6206
6207 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6208    which are used by more than one iv uses.  For each of those candidates,
6209    this function tries to represent iv uses under that candidate using
6210    other ones with lower local cost, then tries to prune the new set.
6211    If the new set has lower cost, It returns the new cost after recording
6212    candidate replacement in list DELTA.  */
6213
6214 static comp_cost
6215 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6216                struct iv_ca_delta **delta)
6217 {
6218   bitmap_iterator bi, bj;
6219   unsigned int i, j, k;
6220   struct iv_use *use;
6221   struct iv_cand *cand;
6222   comp_cost orig_cost, acost;
6223   struct iv_ca_delta *act_delta, *tmp_delta;
6224   struct cost_pair *old_cp, *best_cp = NULL;
6225
6226   *delta = NULL;
6227   orig_cost = iv_ca_cost (ivs);
6228
6229   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6230     {
6231       if (ivs->n_cand_uses[i] == 1
6232           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6233         continue;
6234
6235       cand = iv_cand (data, i);
6236
6237       act_delta = NULL;
6238       /*  Represent uses under current candidate using other ones with
6239           lower local cost.  */
6240       for (j = 0; j < ivs->upto; j++)
6241         {
6242           use = iv_use (data, j);
6243           old_cp = iv_ca_cand_for_use (ivs, use);
6244
6245           if (old_cp->cand != cand)
6246             continue;
6247
6248           best_cp = old_cp;
6249           if (data->consider_all_candidates)
6250             for (k = 0; k < n_iv_cands (data); k++)
6251               best_cp = cheaper_cost_with_cand (data, use, k,
6252                                                 old_cp->cand, best_cp);
6253           else
6254             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6255               best_cp = cheaper_cost_with_cand (data, use, k,
6256                                                 old_cp->cand, best_cp);
6257
6258           if (best_cp == old_cp)
6259             continue;
6260
6261           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6262         }
6263       /* No need for further prune.  */
6264       if (!act_delta)
6265         continue;
6266
6267       /* Prune the new candidate set.  */
6268       iv_ca_delta_commit (data, ivs, act_delta, true);
6269       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6270       iv_ca_delta_commit (data, ivs, act_delta, false);
6271       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6272
6273       if (compare_costs (acost, orig_cost) < 0)
6274         {
6275           *delta = act_delta;
6276           return acost;
6277         }
6278       else
6279         iv_ca_delta_free (&act_delta);
6280     }
6281
6282   return orig_cost;
6283 }
6284
6285 /* Tries to extend the sets IVS in the best possible way in order
6286    to express the USE.  If ORIGINALP is true, prefer candidates from
6287    the original set of IVs, otherwise favor important candidates not
6288    based on any memory object.  */
6289
6290 static bool
6291 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6292                   struct iv_use *use, bool originalp)
6293 {
6294   comp_cost best_cost, act_cost;
6295   unsigned i;
6296   bitmap_iterator bi;
6297   struct iv_cand *cand;
6298   struct iv_ca_delta *best_delta = NULL, *act_delta;
6299   struct cost_pair *cp;
6300
6301   iv_ca_add_use (data, ivs, use);
6302   best_cost = iv_ca_cost (ivs);
6303   cp = iv_ca_cand_for_use (ivs, use);
6304   if (cp)
6305     {
6306       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6307       iv_ca_set_no_cp (data, ivs, use);
6308     }
6309
6310   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6311      first try important candidates not based on any memory object.  Only if
6312      this fails, try the specific ones.  Rationale -- in loops with many
6313      variables the best choice often is to use just one generic biv.  If we
6314      added here many ivs specific to the uses, the optimization algorithm later
6315      would be likely to get stuck in a local minimum, thus causing us to create
6316      too many ivs.  The approach from few ivs to more seems more likely to be
6317      successful -- starting from few ivs, replacing an expensive use by a
6318      specific iv should always be a win.  */
6319   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6320     {
6321       cand = iv_cand (data, i);
6322
6323       if (originalp && cand->pos !=IP_ORIGINAL)
6324         continue;
6325
6326       if (!originalp && cand->iv->base_object != NULL_TREE)
6327         continue;
6328
6329       if (iv_ca_cand_used_p (ivs, cand))
6330         continue;
6331
6332       cp = get_use_iv_cost (data, use, cand);
6333       if (!cp)
6334         continue;
6335
6336       iv_ca_set_cp (data, ivs, use, cp);
6337       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6338                                true);
6339       iv_ca_set_no_cp (data, ivs, use);
6340       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6341
6342       if (compare_costs (act_cost, best_cost) < 0)
6343         {
6344           best_cost = act_cost;
6345
6346           iv_ca_delta_free (&best_delta);
6347           best_delta = act_delta;
6348         }
6349       else
6350         iv_ca_delta_free (&act_delta);
6351     }
6352
6353   if (infinite_cost_p (best_cost))
6354     {
6355       for (i = 0; i < use->n_map_members; i++)
6356         {
6357           cp = use->cost_map + i;
6358           cand = cp->cand;
6359           if (!cand)
6360             continue;
6361
6362           /* Already tried this.  */
6363           if (cand->important)
6364             {
6365               if (originalp && cand->pos == IP_ORIGINAL)
6366                 continue;
6367               if (!originalp && cand->iv->base_object == NULL_TREE)
6368                 continue;
6369             }
6370
6371           if (iv_ca_cand_used_p (ivs, cand))
6372             continue;
6373
6374           act_delta = NULL;
6375           iv_ca_set_cp (data, ivs, use, cp);
6376           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6377           iv_ca_set_no_cp (data, ivs, use);
6378           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6379                                        cp, act_delta);
6380
6381           if (compare_costs (act_cost, best_cost) < 0)
6382             {
6383               best_cost = act_cost;
6384
6385               if (best_delta)
6386                 iv_ca_delta_free (&best_delta);
6387               best_delta = act_delta;
6388             }
6389           else
6390             iv_ca_delta_free (&act_delta);
6391         }
6392     }
6393
6394   iv_ca_delta_commit (data, ivs, best_delta, true);
6395   iv_ca_delta_free (&best_delta);
6396
6397   return !infinite_cost_p (best_cost);
6398 }
6399
6400 /* Finds an initial assignment of candidates to uses.  */
6401
6402 static struct iv_ca *
6403 get_initial_solution (struct ivopts_data *data, bool originalp)
6404 {
6405   struct iv_ca *ivs = iv_ca_new (data);
6406   unsigned i;
6407
6408   for (i = 0; i < n_iv_uses (data); i++)
6409     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6410       {
6411         iv_ca_free (&ivs);
6412         return NULL;
6413       }
6414
6415   return ivs;
6416 }
6417
6418 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6419    points to a bool variable, this function tries to break local
6420    optimal fixed-point by replacing candidates in IVS if it's true.  */
6421
6422 static bool
6423 try_improve_iv_set (struct ivopts_data *data,
6424                     struct iv_ca *ivs, bool *try_replace_p)
6425 {
6426   unsigned i, n_ivs;
6427   comp_cost acost, best_cost = iv_ca_cost (ivs);
6428   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6429   struct iv_cand *cand;
6430
6431   /* Try extending the set of induction variables by one.  */
6432   for (i = 0; i < n_iv_cands (data); i++)
6433     {
6434       cand = iv_cand (data, i);
6435
6436       if (iv_ca_cand_used_p (ivs, cand))
6437         continue;
6438
6439       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6440       if (!act_delta)
6441         continue;
6442
6443       /* If we successfully added the candidate and the set is small enough,
6444          try optimizing it by removing other candidates.  */
6445       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6446         {
6447           iv_ca_delta_commit (data, ivs, act_delta, true);
6448           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6449           iv_ca_delta_commit (data, ivs, act_delta, false);
6450           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6451         }
6452
6453       if (compare_costs (acost, best_cost) < 0)
6454         {
6455           best_cost = acost;
6456           iv_ca_delta_free (&best_delta);
6457           best_delta = act_delta;
6458         }
6459       else
6460         iv_ca_delta_free (&act_delta);
6461     }
6462
6463   if (!best_delta)
6464     {
6465       /* Try removing the candidates from the set instead.  */
6466       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6467
6468       if (!best_delta && *try_replace_p)
6469         {
6470           *try_replace_p = false;
6471           /* So far candidate selecting algorithm tends to choose fewer IVs
6472              so that it can handle cases in which loops have many variables
6473              but the best choice is often to use only one general biv.  One
6474              weakness is it can't handle opposite cases, in which different
6475              candidates should be chosen with respect to each use.  To solve
6476              the problem, we replace candidates in a manner described by the
6477              comments of iv_ca_replace, thus give general algorithm a chance
6478              to break local optimal fixed-point in these cases.  */
6479           best_cost = iv_ca_replace (data, ivs, &best_delta);
6480         }
6481
6482       if (!best_delta)
6483         return false;
6484     }
6485
6486   iv_ca_delta_commit (data, ivs, best_delta, true);
6487   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6488   iv_ca_delta_free (&best_delta);
6489   return true;
6490 }
6491
6492 /* Attempts to find the optimal set of induction variables.  We do simple
6493    greedy heuristic -- we try to replace at most one candidate in the selected
6494    solution and remove the unused ivs while this improves the cost.  */
6495
6496 static struct iv_ca *
6497 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6498 {
6499   struct iv_ca *set;
6500   bool try_replace_p = true;
6501
6502   /* Get the initial solution.  */
6503   set = get_initial_solution (data, originalp);
6504   if (!set)
6505     {
6506       if (dump_file && (dump_flags & TDF_DETAILS))
6507         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6508       return NULL;
6509     }
6510
6511   if (dump_file && (dump_flags & TDF_DETAILS))
6512     {
6513       fprintf (dump_file, "Initial set of candidates:\n");
6514       iv_ca_dump (data, dump_file, set);
6515     }
6516
6517   while (try_improve_iv_set (data, set, &try_replace_p))
6518     {
6519       if (dump_file && (dump_flags & TDF_DETAILS))
6520         {
6521           fprintf (dump_file, "Improved to:\n");
6522           iv_ca_dump (data, dump_file, set);
6523         }
6524     }
6525
6526   return set;
6527 }
6528
6529 static struct iv_ca *
6530 find_optimal_iv_set (struct ivopts_data *data)
6531 {
6532   unsigned i;
6533   struct iv_ca *set, *origset;
6534   struct iv_use *use;
6535   comp_cost cost, origcost;
6536
6537   /* Determine the cost based on a strategy that starts with original IVs,
6538      and try again using a strategy that prefers candidates not based
6539      on any IVs.  */
6540   origset = find_optimal_iv_set_1 (data, true);
6541   set = find_optimal_iv_set_1 (data, false);
6542
6543   if (!origset && !set)
6544     return NULL;
6545
6546   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6547   cost = set ? iv_ca_cost (set) : infinite_cost;
6548
6549   if (dump_file && (dump_flags & TDF_DETAILS))
6550     {
6551       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6552                origcost.cost, origcost.complexity);
6553       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6554                cost.cost, cost.complexity);
6555     }
6556
6557   /* Choose the one with the best cost.  */
6558   if (compare_costs (origcost, cost) <= 0)
6559     {
6560       if (set)
6561         iv_ca_free (&set);
6562       set = origset;
6563     }
6564   else if (origset)
6565     iv_ca_free (&origset);
6566
6567   for (i = 0; i < n_iv_uses (data); i++)
6568     {
6569       use = iv_use (data, i);
6570       use->selected = iv_ca_cand_for_use (set, use)->cand;
6571     }
6572
6573   return set;
6574 }
6575
6576 /* Creates a new induction variable corresponding to CAND.  */
6577
6578 static void
6579 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6580 {
6581   gimple_stmt_iterator incr_pos;
6582   tree base;
6583   bool after = false;
6584
6585   if (!cand->iv)
6586     return;
6587
6588   switch (cand->pos)
6589     {
6590     case IP_NORMAL:
6591       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6592       break;
6593
6594     case IP_END:
6595       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6596       after = true;
6597       break;
6598
6599     case IP_AFTER_USE:
6600       after = true;
6601       /* fall through */
6602     case IP_BEFORE_USE:
6603       incr_pos = gsi_for_stmt (cand->incremented_at);
6604       break;
6605
6606     case IP_ORIGINAL:
6607       /* Mark that the iv is preserved.  */
6608       name_info (data, cand->var_before)->preserve_biv = true;
6609       name_info (data, cand->var_after)->preserve_biv = true;
6610
6611       /* Rewrite the increment so that it uses var_before directly.  */
6612       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6613       return;
6614     }
6615
6616   gimple_add_tmp_var (cand->var_before);
6617
6618   base = unshare_expr (cand->iv->base);
6619
6620   create_iv (base, unshare_expr (cand->iv->step),
6621              cand->var_before, data->current_loop,
6622              &incr_pos, after, &cand->var_before, &cand->var_after);
6623 }
6624
6625 /* Creates new induction variables described in SET.  */
6626
6627 static void
6628 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6629 {
6630   unsigned i;
6631   struct iv_cand *cand;
6632   bitmap_iterator bi;
6633
6634   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6635     {
6636       cand = iv_cand (data, i);
6637       create_new_iv (data, cand);
6638     }
6639
6640   if (dump_file && (dump_flags & TDF_DETAILS))
6641     {
6642       fprintf (dump_file, "Selected IV set for loop %d",
6643                data->current_loop->num);
6644       if (data->loop_loc != UNKNOWN_LOCATION)
6645         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6646                  LOCATION_LINE (data->loop_loc));
6647       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6648       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6649         {
6650           cand = iv_cand (data, i);
6651           dump_cand (dump_file, cand);
6652         }
6653       fprintf (dump_file, "\n");
6654     }
6655 }
6656
6657 /* Rewrites USE (definition of iv used in a nonlinear expression)
6658    using candidate CAND.  */
6659
6660 static void
6661 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6662                             struct iv_use *use, struct iv_cand *cand)
6663 {
6664   tree comp;
6665   tree op, tgt;
6666   gassign *ass;
6667   gimple_stmt_iterator bsi;
6668
6669   /* An important special case -- if we are asked to express value of
6670      the original iv by itself, just exit; there is no need to
6671      introduce a new computation (that might also need casting the
6672      variable to unsigned and back).  */
6673   if (cand->pos == IP_ORIGINAL
6674       && cand->incremented_at == use->stmt)
6675     {
6676       enum tree_code stmt_code;
6677
6678       gcc_assert (is_gimple_assign (use->stmt));
6679       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6680
6681       /* Check whether we may leave the computation unchanged.
6682          This is the case only if it does not rely on other
6683          computations in the loop -- otherwise, the computation
6684          we rely upon may be removed in remove_unused_ivs,
6685          thus leading to ICE.  */
6686       stmt_code = gimple_assign_rhs_code (use->stmt);
6687       if (stmt_code == PLUS_EXPR
6688           || stmt_code == MINUS_EXPR
6689           || stmt_code == POINTER_PLUS_EXPR)
6690         {
6691           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6692             op = gimple_assign_rhs2 (use->stmt);
6693           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6694             op = gimple_assign_rhs1 (use->stmt);
6695           else
6696             op = NULL_TREE;
6697         }
6698       else
6699         op = NULL_TREE;
6700
6701       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6702         return;
6703     }
6704
6705   comp = get_computation (data->current_loop, use, cand);
6706   gcc_assert (comp != NULL_TREE);
6707
6708   switch (gimple_code (use->stmt))
6709     {
6710     case GIMPLE_PHI:
6711       tgt = PHI_RESULT (use->stmt);
6712
6713       /* If we should keep the biv, do not replace it.  */
6714       if (name_info (data, tgt)->preserve_biv)
6715         return;
6716
6717       bsi = gsi_after_labels (gimple_bb (use->stmt));
6718       break;
6719
6720     case GIMPLE_ASSIGN:
6721       tgt = gimple_assign_lhs (use->stmt);
6722       bsi = gsi_for_stmt (use->stmt);
6723       break;
6724
6725     default:
6726       gcc_unreachable ();
6727     }
6728
6729   if (!valid_gimple_rhs_p (comp)
6730       || (gimple_code (use->stmt) != GIMPLE_PHI
6731           /* We can't allow re-allocating the stmt as it might be pointed
6732              to still.  */
6733           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6734               >= gimple_num_ops (gsi_stmt (bsi)))))
6735     {
6736       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6737                                        true, GSI_SAME_STMT);
6738       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6739         {
6740           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6741           /* As this isn't a plain copy we have to reset alignment
6742              information.  */
6743           if (SSA_NAME_PTR_INFO (comp))
6744             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6745         }
6746     }
6747
6748   if (gimple_code (use->stmt) == GIMPLE_PHI)
6749     {
6750       ass = gimple_build_assign (tgt, comp);
6751       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6752
6753       bsi = gsi_for_stmt (use->stmt);
6754       remove_phi_node (&bsi, false);
6755     }
6756   else
6757     {
6758       gimple_assign_set_rhs_from_tree (&bsi, comp);
6759       use->stmt = gsi_stmt (bsi);
6760     }
6761 }
6762
6763 /* Performs a peephole optimization to reorder the iv update statement with
6764    a mem ref to enable instruction combining in later phases. The mem ref uses
6765    the iv value before the update, so the reordering transformation requires
6766    adjustment of the offset. CAND is the selected IV_CAND.
6767
6768    Example:
6769
6770    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6771    iv2 = iv1 + 1;
6772
6773    if (t < val)      (1)
6774      goto L;
6775    goto Head;
6776
6777
6778    directly propagating t over to (1) will introduce overlapping live range
6779    thus increase register pressure. This peephole transform it into:
6780
6781
6782    iv2 = iv1 + 1;
6783    t = MEM_REF (base, iv2, 8, 8);
6784    if (t < val)
6785      goto L;
6786    goto Head;
6787 */
6788
6789 static void
6790 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6791 {
6792   tree var_after;
6793   gimple iv_update, stmt;
6794   basic_block bb;
6795   gimple_stmt_iterator gsi, gsi_iv;
6796
6797   if (cand->pos != IP_NORMAL)
6798     return;
6799
6800   var_after = cand->var_after;
6801   iv_update = SSA_NAME_DEF_STMT (var_after);
6802
6803   bb = gimple_bb (iv_update);
6804   gsi = gsi_last_nondebug_bb (bb);
6805   stmt = gsi_stmt (gsi);
6806
6807   /* Only handle conditional statement for now.  */
6808   if (gimple_code (stmt) != GIMPLE_COND)
6809     return;
6810
6811   gsi_prev_nondebug (&gsi);
6812   stmt = gsi_stmt (gsi);
6813   if (stmt != iv_update)
6814     return;
6815
6816   gsi_prev_nondebug (&gsi);
6817   if (gsi_end_p (gsi))
6818     return;
6819
6820   stmt = gsi_stmt (gsi);
6821   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6822     return;
6823
6824   if (stmt != use->stmt)
6825     return;
6826
6827   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6828     return;
6829
6830   if (dump_file && (dump_flags & TDF_DETAILS))
6831     {
6832       fprintf (dump_file, "Reordering \n");
6833       print_gimple_stmt (dump_file, iv_update, 0, 0);
6834       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6835       fprintf (dump_file, "\n");
6836     }
6837
6838   gsi = gsi_for_stmt (use->stmt);
6839   gsi_iv = gsi_for_stmt (iv_update);
6840   gsi_move_before (&gsi_iv, &gsi);
6841
6842   cand->pos = IP_BEFORE_USE;
6843   cand->incremented_at = use->stmt;
6844 }
6845
6846 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6847
6848 static void
6849 rewrite_use_address_1 (struct ivopts_data *data,
6850                        struct iv_use *use, struct iv_cand *cand)
6851 {
6852   aff_tree aff;
6853   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6854   tree base_hint = NULL_TREE;
6855   tree ref, iv;
6856   bool ok;
6857
6858   adjust_iv_update_pos (cand, use);
6859   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6860   gcc_assert (ok);
6861   unshare_aff_combination (&aff);
6862
6863   /* To avoid undefined overflow problems, all IV candidates use unsigned
6864      integer types.  The drawback is that this makes it impossible for
6865      create_mem_ref to distinguish an IV that is based on a memory object
6866      from one that represents simply an offset.
6867
6868      To work around this problem, we pass a hint to create_mem_ref that
6869      indicates which variable (if any) in aff is an IV based on a memory
6870      object.  Note that we only consider the candidate.  If this is not
6871      based on an object, the base of the reference is in some subexpression
6872      of the use -- but these will use pointer types, so they are recognized
6873      by the create_mem_ref heuristics anyway.  */
6874   if (cand->iv->base_object)
6875     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6876
6877   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6878   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6879                         reference_alias_ptr_type (*use->op_p),
6880                         iv, base_hint, data->speed);
6881   copy_ref_info (ref, *use->op_p);
6882   *use->op_p = ref;
6883 }
6884
6885 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
6886    first use of a group, rewrites sub uses in the group too.  */
6887
6888 static void
6889 rewrite_use_address (struct ivopts_data *data,
6890                       struct iv_use *use, struct iv_cand *cand)
6891 {
6892   struct iv_use *next;
6893
6894   gcc_assert (use->sub_id == 0);
6895   rewrite_use_address_1 (data, use, cand);
6896   update_stmt (use->stmt);
6897
6898   for (next = use->next; next != NULL; next = next->next)
6899     {
6900       rewrite_use_address_1 (data, next, cand);
6901       update_stmt (next->stmt);
6902     }
6903
6904   return;
6905 }
6906
6907 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6908    candidate CAND.  */
6909
6910 static void
6911 rewrite_use_compare (struct ivopts_data *data,
6912                      struct iv_use *use, struct iv_cand *cand)
6913 {
6914   tree comp, *var_p, op, bound;
6915   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6916   enum tree_code compare;
6917   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6918   bool ok;
6919
6920   bound = cp->value;
6921   if (bound)
6922     {
6923       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6924       tree var_type = TREE_TYPE (var);
6925       gimple_seq stmts;
6926
6927       if (dump_file && (dump_flags & TDF_DETAILS))
6928         {
6929           fprintf (dump_file, "Replacing exit test: ");
6930           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6931         }
6932       compare = cp->comp;
6933       bound = unshare_expr (fold_convert (var_type, bound));
6934       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6935       if (stmts)
6936         gsi_insert_seq_on_edge_immediate (
6937                 loop_preheader_edge (data->current_loop),
6938                 stmts);
6939
6940       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6941       gimple_cond_set_lhs (cond_stmt, var);
6942       gimple_cond_set_code (cond_stmt, compare);
6943       gimple_cond_set_rhs (cond_stmt, op);
6944       return;
6945     }
6946
6947   /* The induction variable elimination failed; just express the original
6948      giv.  */
6949   comp = get_computation (data->current_loop, use, cand);
6950   gcc_assert (comp != NULL_TREE);
6951
6952   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6953   gcc_assert (ok);
6954
6955   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6956                                      true, GSI_SAME_STMT);
6957 }
6958
6959 /* Rewrites USE using candidate CAND.  */
6960
6961 static void
6962 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6963 {
6964   switch (use->type)
6965     {
6966       case USE_NONLINEAR_EXPR:
6967         rewrite_use_nonlinear_expr (data, use, cand);
6968         break;
6969
6970       case USE_ADDRESS:
6971         rewrite_use_address (data, use, cand);
6972         break;
6973
6974       case USE_COMPARE:
6975         rewrite_use_compare (data, use, cand);
6976         break;
6977
6978       default:
6979         gcc_unreachable ();
6980     }
6981
6982   update_stmt (use->stmt);
6983 }
6984
6985 /* Rewrite the uses using the selected induction variables.  */
6986
6987 static void
6988 rewrite_uses (struct ivopts_data *data)
6989 {
6990   unsigned i;
6991   struct iv_cand *cand;
6992   struct iv_use *use;
6993
6994   for (i = 0; i < n_iv_uses (data); i++)
6995     {
6996       use = iv_use (data, i);
6997       cand = use->selected;
6998       gcc_assert (cand);
6999
7000       rewrite_use (data, use, cand);
7001     }
7002 }
7003
7004 /* Removes the ivs that are not used after rewriting.  */
7005
7006 static void
7007 remove_unused_ivs (struct ivopts_data *data)
7008 {
7009   unsigned j;
7010   bitmap_iterator bi;
7011   bitmap toremove = BITMAP_ALLOC (NULL);
7012
7013   /* Figure out an order in which to release SSA DEFs so that we don't
7014      release something that we'd have to propagate into a debug stmt
7015      afterwards.  */
7016   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7017     {
7018       struct version_info *info;
7019
7020       info = ver_info (data, j);
7021       if (info->iv
7022           && !integer_zerop (info->iv->step)
7023           && !info->inv_id
7024           && !info->iv->have_use_for
7025           && !info->preserve_biv)
7026         {
7027           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7028
7029           tree def = info->iv->ssa_name;
7030
7031           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7032             {
7033               imm_use_iterator imm_iter;
7034               use_operand_p use_p;
7035               gimple stmt;
7036               int count = 0;
7037
7038               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7039                 {
7040                   if (!gimple_debug_bind_p (stmt))
7041                     continue;
7042
7043                   /* We just want to determine whether to do nothing
7044                      (count == 0), to substitute the computed
7045                      expression into a single use of the SSA DEF by
7046                      itself (count == 1), or to use a debug temp
7047                      because the SSA DEF is used multiple times or as
7048                      part of a larger expression (count > 1). */
7049                   count++;
7050                   if (gimple_debug_bind_get_value (stmt) != def)
7051                     count++;
7052
7053                   if (count > 1)
7054                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7055                 }
7056
7057               if (!count)
7058                 continue;
7059
7060               struct iv_use dummy_use;
7061               struct iv_cand *best_cand = NULL, *cand;
7062               unsigned i, best_pref = 0, cand_pref;
7063
7064               memset (&dummy_use, 0, sizeof (dummy_use));
7065               dummy_use.iv = info->iv;
7066               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7067                 {
7068                   cand = iv_use (data, i)->selected;
7069                   if (cand == best_cand)
7070                     continue;
7071                   cand_pref = operand_equal_p (cand->iv->step,
7072                                                info->iv->step, 0)
7073                     ? 4 : 0;
7074                   cand_pref
7075                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7076                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7077                     ? 2 : 0;
7078                   cand_pref
7079                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7080                     ? 1 : 0;
7081                   if (best_cand == NULL || best_pref < cand_pref)
7082                     {
7083                       best_cand = cand;
7084                       best_pref = cand_pref;
7085                     }
7086                 }
7087
7088               if (!best_cand)
7089                 continue;
7090
7091               tree comp = get_computation_at (data->current_loop,
7092                                               &dummy_use, best_cand,
7093                                               SSA_NAME_DEF_STMT (def));
7094               if (!comp)
7095                 continue;
7096
7097               if (count > 1)
7098                 {
7099                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7100                   DECL_ARTIFICIAL (vexpr) = 1;
7101                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7102                   if (SSA_NAME_VAR (def))
7103                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7104                   else
7105                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7106                   gdebug *def_temp
7107                     = gimple_build_debug_bind (vexpr, comp, NULL);
7108                   gimple_stmt_iterator gsi;
7109
7110                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7111                     gsi = gsi_after_labels (gimple_bb
7112                                             (SSA_NAME_DEF_STMT (def)));
7113                   else
7114                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7115
7116                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7117                   comp = vexpr;
7118                 }
7119
7120               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7121                 {
7122                   if (!gimple_debug_bind_p (stmt))
7123                     continue;
7124
7125                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7126                     SET_USE (use_p, comp);
7127
7128                   update_stmt (stmt);
7129                 }
7130             }
7131         }
7132     }
7133
7134   release_defs_bitset (toremove);
7135
7136   BITMAP_FREE (toremove);
7137 }
7138
7139 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7140    for hash_map::traverse.  */
7141
7142 bool
7143 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7144 {
7145   free (value);
7146   return true;
7147 }
7148
7149 /* Frees data allocated by the optimization of a single loop.  */
7150
7151 static void
7152 free_loop_data (struct ivopts_data *data)
7153 {
7154   unsigned i, j;
7155   bitmap_iterator bi;
7156   tree obj;
7157
7158   if (data->niters)
7159     {
7160       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7161       delete data->niters;
7162       data->niters = NULL;
7163     }
7164
7165   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7166     {
7167       struct version_info *info;
7168
7169       info = ver_info (data, i);
7170       info->iv = NULL;
7171       info->has_nonlin_use = false;
7172       info->preserve_biv = false;
7173       info->inv_id = 0;
7174     }
7175   bitmap_clear (data->relevant);
7176   bitmap_clear (data->important_candidates);
7177
7178   for (i = 0; i < n_iv_uses (data); i++)
7179     {
7180       struct iv_use *use = iv_use (data, i);
7181       struct iv_use *pre = use, *sub = use->next;
7182
7183       while (sub)
7184         {
7185           gcc_assert (sub->related_cands == NULL);
7186           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7187
7188           pre = sub;
7189           sub = sub->next;
7190           free (pre);
7191         }
7192
7193       BITMAP_FREE (use->related_cands);
7194       for (j = 0; j < use->n_map_members; j++)
7195         if (use->cost_map[j].depends_on)
7196           BITMAP_FREE (use->cost_map[j].depends_on);
7197       free (use->cost_map);
7198       free (use);
7199     }
7200   data->iv_uses.truncate (0);
7201
7202   for (i = 0; i < n_iv_cands (data); i++)
7203     {
7204       struct iv_cand *cand = iv_cand (data, i);
7205
7206       if (cand->depends_on)
7207         BITMAP_FREE (cand->depends_on);
7208       free (cand);
7209     }
7210   data->iv_candidates.truncate (0);
7211
7212   if (data->version_info_size < num_ssa_names)
7213     {
7214       data->version_info_size = 2 * num_ssa_names;
7215       free (data->version_info);
7216       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7217     }
7218
7219   data->max_inv_id = 0;
7220
7221   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7222     SET_DECL_RTL (obj, NULL_RTX);
7223
7224   decl_rtl_to_reset.truncate (0);
7225
7226   data->inv_expr_tab->empty ();
7227   data->inv_expr_id = 0;
7228 }
7229
7230 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7231    loop tree.  */
7232
7233 static void
7234 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7235 {
7236   free_loop_data (data);
7237   free (data->version_info);
7238   BITMAP_FREE (data->relevant);
7239   BITMAP_FREE (data->important_candidates);
7240
7241   decl_rtl_to_reset.release ();
7242   data->iv_uses.release ();
7243   data->iv_candidates.release ();
7244   delete data->inv_expr_tab;
7245   data->inv_expr_tab = NULL;
7246   free_affine_expand_cache (&data->name_expansion_cache);
7247   obstack_free (&data->iv_obstack, NULL);
7248 }
7249
7250 /* Returns true if the loop body BODY includes any function calls.  */
7251
7252 static bool
7253 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7254 {
7255   gimple_stmt_iterator gsi;
7256   unsigned i;
7257
7258   for (i = 0; i < num_nodes; i++)
7259     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7260       {
7261         gimple stmt = gsi_stmt (gsi);
7262         if (is_gimple_call (stmt)
7263             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7264           return true;
7265       }
7266   return false;
7267 }
7268
7269 /* Optimizes the LOOP.  Returns true if anything changed.  */
7270
7271 static bool
7272 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7273 {
7274   bool changed = false;
7275   struct iv_ca *iv_ca;
7276   edge exit = single_dom_exit (loop);
7277   basic_block *body;
7278
7279   gcc_assert (!data->niters);
7280   data->current_loop = loop;
7281   data->loop_loc = find_loop_location (loop);
7282   data->speed = optimize_loop_for_speed_p (loop);
7283
7284   if (dump_file && (dump_flags & TDF_DETAILS))
7285     {
7286       fprintf (dump_file, "Processing loop %d", loop->num);
7287       if (data->loop_loc != UNKNOWN_LOCATION)
7288         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7289                  LOCATION_LINE (data->loop_loc));
7290       fprintf (dump_file, "\n");
7291
7292       if (exit)
7293         {
7294           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7295                    exit->src->index, exit->dest->index);
7296           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7297           fprintf (dump_file, "\n");
7298         }
7299
7300       fprintf (dump_file, "\n");
7301     }
7302
7303   body = get_loop_body (loop);
7304   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7305   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7306   free (body);
7307
7308   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7309
7310   /* For each ssa name determines whether it behaves as an induction variable
7311      in some loop.  */
7312   if (!find_induction_variables (data))
7313     goto finish;
7314
7315   /* Finds interesting uses (item 1).  */
7316   find_interesting_uses (data);
7317   group_address_uses (data);
7318   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7319     goto finish;
7320
7321   /* Finds candidates for the induction variables (item 2).  */
7322   find_iv_candidates (data);
7323
7324   /* Calculates the costs (item 3, part 1).  */
7325   determine_iv_costs (data);
7326   determine_use_iv_costs (data);
7327   determine_set_costs (data);
7328
7329   /* Find the optimal set of induction variables (item 3, part 2).  */
7330   iv_ca = find_optimal_iv_set (data);
7331   if (!iv_ca)
7332     goto finish;
7333   changed = true;
7334
7335   /* Create the new induction variables (item 4, part 1).  */
7336   create_new_ivs (data, iv_ca);
7337   iv_ca_free (&iv_ca);
7338
7339   /* Rewrite the uses (item 4, part 2).  */
7340   rewrite_uses (data);
7341
7342   /* Remove the ivs that are unused after rewriting.  */
7343   remove_unused_ivs (data);
7344
7345   /* We have changed the structure of induction variables; it might happen
7346      that definitions in the scev database refer to some of them that were
7347      eliminated.  */
7348   scev_reset ();
7349
7350 finish:
7351   free_loop_data (data);
7352
7353   return changed;
7354 }
7355
7356 /* Main entry point.  Optimizes induction variables in loops.  */
7357
7358 void
7359 tree_ssa_iv_optimize (void)
7360 {
7361   struct loop *loop;
7362   struct ivopts_data data;
7363
7364   tree_ssa_iv_optimize_init (&data);
7365
7366   /* Optimize the loops starting with the innermost ones.  */
7367   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7368     {
7369       if (dump_file && (dump_flags & TDF_DETAILS))
7370         flow_loop_dump (loop, dump_file, NULL, 1);
7371
7372       tree_ssa_iv_optimize_loop (&data, loop);
7373     }
7374
7375   tree_ssa_iv_optimize_finalize (&data);
7376 }