gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "hash-set.h"
  69 #include "machmode.h"
  70 #include "vec.h"
  71 #include "double-int.h"
  72 #include "input.h"
  73 #include "alias.h"
  74 #include "symtab.h"
  75 #include "wide-int.h"
  76 #include "inchash.h"
  77 #include "tree.h"
  78 #include "fold-const.h"
  79 #include "stor-layout.h"
  80 #include "tm_p.h"
  81 #include "predict.h"
  82 #include "hard-reg-set.h"
  83 #include "function.h"
  84 #include "dominance.h"
  85 #include "cfg.h"
  86 #include "basic-block.h"
  87 #include "gimple-pretty-print.h"
  88 #include "hash-map.h"
  89 #include "hash-table.h"
  90 #include "tree-ssa-alias.h"
  91 #include "internal-fn.h"
  92 #include "tree-eh.h"
  93 #include "gimple-expr.h"
  94 #include "is-a.h"
  95 #include "gimple.h"
  96 #include "gimplify.h"
  97 #include "gimple-iterator.h"
  98 #include "gimplify-me.h"
  99 #include "gimple-ssa.h"
 100 #include "plugin-api.h"
 101 #include "ipa-ref.h"
 102 #include "cgraph.h"
 103 #include "tree-cfg.h"
 104 #include "tree-phinodes.h"
 105 #include "ssa-iterators.h"
 106 #include "stringpool.h"
 107 #include "tree-ssanames.h"
 108 #include "tree-ssa-loop-ivopts.h"
 109 #include "tree-ssa-loop-manip.h"
 110 #include "tree-ssa-loop-niter.h"
 111 #include "tree-ssa-loop.h"
 112 #include "hashtab.h"
 113 #include "rtl.h"
 114 #include "flags.h"
 115 #include "statistics.h"
 116 #include "real.h"
 117 #include "fixed-value.h"
 118 #include "insn-config.h"
 119 #include "expmed.h"
 120 #include "dojump.h"
 121 #include "explow.h"
 122 #include "calls.h"
 123 #include "emit-rtl.h"
 124 #include "varasm.h"
 125 #include "stmt.h"
 126 #include "expr.h"
 127 #include "tree-dfa.h"
 128 #include "tree-ssa.h"
 129 #include "cfgloop.h"
 130 #include "tree-pass.h"
 131 #include "tree-chrec.h"
 132 #include "tree-scalar-evolution.h"
 133 #include "params.h"
 134 #include "langhooks.h"
 135 #include "tree-affine.h"
 136 #include "target.h"
 137 #include "tree-inline.h"
 138 #include "tree-ssa-propagate.h"
 139 #include "tree-ssa-address.h"
 140 #include "builtins.h"
 141 #include "tree-vectorizer.h"
 142
 143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 144    cost of different addressing modes.  This should be moved to a TBD
 145    interface between the GIMPLE and RTL worlds.  */
 146 #include "recog.h"
 147
 148 /* The infinite cost.  */
 149 #define INFTY 10000000
 150
 151 #define AVG_LOOP_NITER(LOOP) 5
 152
 153 /* Returns the expected number of loop iterations for LOOP.
 154    The average trip count is computed from profile data if it
 155    exists. */
 156
 157 static inline HOST_WIDE_INT
 158 avg_loop_niter (struct loop *loop)
 159 {
 160   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 161   if (niter == -1)
 162     return AVG_LOOP_NITER (loop);
 163
 164   return niter;
 165 }
 166
 167 /* Representation of the induction variable.  */
 168 struct iv
 169 {
 170   tree base;            /* Initial value of the iv.  */
 171   tree base_object;     /* A memory object to that the induction variable points.  */
 172   tree step;            /* Step of the iv (constant only).  */
 173   tree ssa_name;        /* The ssa name with the value.  */
 174   bool biv_p;           /* Is it a biv?  */
 175   bool have_use_for;    /* Do we already have a use for it?  */
 176   unsigned use_id;      /* The identifier in the use if it is the case.  */
 177 };
 178
 179 /* Per-ssa version information (induction variable descriptions, etc.).  */
 180 struct version_info
 181 {
 182   tree name;            /* The ssa name.  */
 183   struct iv *iv;        /* Induction variable description.  */
 184   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 185                            an expression that is not an induction variable.  */
 186   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 187   unsigned inv_id;      /* Id of an invariant.  */
 188 };
 189
 190 /* Types of uses.  */
 191 enum use_type
 192 {
 193   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 194   USE_ADDRESS,          /* Use in an address.  */
 195   USE_COMPARE           /* Use is a compare.  */
 196 };
 197
 198 /* Cost of a computation.  */
 199 typedef struct
 200 {
 201   int cost;             /* The runtime cost.  */
 202   unsigned complexity;  /* The estimate of the complexity of the code for
 203                            the computation (in no concrete units --
 204                            complexity field should be larger for more
 205                            complex expressions and addressing modes).  */
 206 } comp_cost;
 207
 208 static const comp_cost no_cost = {0, 0};
 209 static const comp_cost infinite_cost = {INFTY, INFTY};
 210
 211 /* The candidate - cost pair.  */
 212 struct cost_pair
 213 {
 214   struct iv_cand *cand; /* The candidate.  */
 215   comp_cost cost;       /* The cost.  */
 216   bitmap depends_on;    /* The list of invariants that have to be
 217                            preserved.  */
 218   tree value;           /* For final value elimination, the expression for
 219                            the final value of the iv.  For iv elimination,
 220                            the new bound to compare with.  */
 221   enum tree_code comp;  /* For iv elimination, the comparison.  */
 222   int inv_expr_id;      /* Loop invariant expression id.  */
 223 };
 224
 225 /* Use.  */
 226 struct iv_use
 227 {
 228   unsigned id;          /* The id of the use.  */
 229   unsigned sub_id;      /* The id of the sub use.  */
 230   enum use_type type;   /* Type of the use.  */
 231   struct iv *iv;        /* The induction variable it is based on.  */
 232   gimple stmt;          /* Statement in that it occurs.  */
 233   tree *op_p;           /* The place where it occurs.  */
 234   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 235                            important ones.  */
 236
 237   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 238   struct cost_pair *cost_map;
 239                         /* The costs wrto the iv candidates.  */
 240
 241   struct iv_cand *selected;
 242                         /* The selected candidate.  */
 243
 244   struct iv_use *next;  /* The next sub use.  */
 245   tree addr_base;       /* Base address with const offset stripped.  */
 246   unsigned HOST_WIDE_INT addr_offset;
 247                         /* Const offset stripped from base address.  */
 248 };
 249
 250 /* The position where the iv is computed.  */
 251 enum iv_position
 252 {
 253   IP_NORMAL,            /* At the end, just before the exit condition.  */
 254   IP_END,               /* At the end of the latch block.  */
 255   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 256   IP_AFTER_USE,         /* Immediately after a specific use.  */
 257   IP_ORIGINAL           /* The original biv.  */
 258 };
 259
 260 /* The induction variable candidate.  */
 261 struct iv_cand
 262 {
 263   unsigned id;          /* The number of the candidate.  */
 264   bool important;       /* Whether this is an "important" candidate, i.e. such
 265                            that it should be considered by all uses.  */
 266   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 267   gimple incremented_at;/* For original biv, the statement where it is
 268                            incremented.  */
 269   tree var_before;      /* The variable used for it before increment.  */
 270   tree var_after;       /* The variable used for it after increment.  */
 271   struct iv *iv;        /* The value of the candidate.  NULL for
 272                            "pseudocandidate" used to indicate the possibility
 273                            to replace the final value of an iv by direct
 274                            computation of the value.  */
 275   unsigned cost;        /* Cost of the candidate.  */
 276   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 277   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 278                               where it is incremented.  */
 279   bitmap depends_on;    /* The list of invariants that are used in step of the
 280                            biv.  */
 281 };
 282
 283 /* Loop invariant expression hashtable entry.  */
 284 struct iv_inv_expr_ent
 285 {
 286   tree expr;
 287   int id;
 288   hashval_t hash;
 289 };
 290
 291 /* The data used by the induction variable optimizations.  */
 292
 293 typedef struct iv_use *iv_use_p;
 294
 295 typedef struct iv_cand *iv_cand_p;
 296
 297 /* Hashtable helpers.  */
 298
 299 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 300 {
 301   typedef iv_inv_expr_ent *value_type;
 302   typedef iv_inv_expr_ent *compare_type;
 303   static inline hashval_t hash (const iv_inv_expr_ent *);
 304   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 305 };
 306
 307 /* Hash function for loop invariant expressions.  */
 308
 309 inline hashval_t
 310 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 311 {
 312   return expr->hash;
 313 }
 314
 315 /* Hash table equality function for expressions.  */
 316
 317 inline bool
 318 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 319                            const iv_inv_expr_ent *expr2)
 320 {
 321   return expr1->hash == expr2->hash
 322          && operand_equal_p (expr1->expr, expr2->expr, 0);
 323 }
 324
 325 struct ivopts_data
 326 {
 327   /* The currently optimized loop.  */
 328   struct loop *current_loop;
 329   source_location loop_loc;
 330
 331   /* Numbers of iterations for all exits of the current loop.  */
 332   hash_map<edge, tree_niter_desc *> *niters;
 333
 334   /* Number of registers used in it.  */
 335   unsigned regs_used;
 336
 337   /* The size of version_info array allocated.  */
 338   unsigned version_info_size;
 339
 340   /* The array of information for the ssa names.  */
 341   struct version_info *version_info;
 342
 343   /* The hashtable of loop invariant expressions created
 344      by ivopt.  */
 345   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 346
 347   /* Loop invariant expression id.  */
 348   int inv_expr_id;
 349
 350   /* The bitmap of indices in version_info whose value was changed.  */
 351   bitmap relevant;
 352
 353   /* The uses of induction variables.  */
 354   vec<iv_use_p> iv_uses;
 355
 356   /* The candidates.  */
 357   vec<iv_cand_p> iv_candidates;
 358
 359   /* A bitmap of important candidates.  */
 360   bitmap important_candidates;
 361
 362   /* Cache used by tree_to_aff_combination_expand.  */
 363   hash_map<tree, name_expansion *> *name_expansion_cache;
 364
 365   /* The maximum invariant id.  */
 366   unsigned max_inv_id;
 367
 368   /* Whether to consider just related and important candidates when replacing a
 369      use.  */
 370   bool consider_all_candidates;
 371
 372   /* Are we optimizing for speed?  */
 373   bool speed;
 374
 375   /* Whether the loop body includes any function calls.  */
 376   bool body_includes_call;
 377
 378   /* Whether the loop body can only be exited via single exit.  */
 379   bool loop_single_exit_p;
 380 };
 381
 382 /* An assignment of iv candidates to uses.  */
 383
 384 struct iv_ca
 385 {
 386   /* The number of uses covered by the assignment.  */
 387   unsigned upto;
 388
 389   /* Number of uses that cannot be expressed by the candidates in the set.  */
 390   unsigned bad_uses;
 391
 392   /* Candidate assigned to a use, together with the related costs.  */
 393   struct cost_pair **cand_for_use;
 394
 395   /* Number of times each candidate is used.  */
 396   unsigned *n_cand_uses;
 397
 398   /* The candidates used.  */
 399   bitmap cands;
 400
 401   /* The number of candidates in the set.  */
 402   unsigned n_cands;
 403
 404   /* Total number of registers needed.  */
 405   unsigned n_regs;
 406
 407   /* Total cost of expressing uses.  */
 408   comp_cost cand_use_cost;
 409
 410   /* Total cost of candidates.  */
 411   unsigned cand_cost;
 412
 413   /* Number of times each invariant is used.  */
 414   unsigned *n_invariant_uses;
 415
 416   /* The array holding the number of uses of each loop
 417      invariant expressions created by ivopt.  */
 418   unsigned *used_inv_expr;
 419
 420   /* The number of created loop invariants.  */
 421   unsigned num_used_inv_expr;
 422
 423   /* Total cost of the assignment.  */
 424   comp_cost cost;
 425 };
 426
 427 /* Difference of two iv candidate assignments.  */
 428
 429 struct iv_ca_delta
 430 {
 431   /* Changed use.  */
 432   struct iv_use *use;
 433
 434   /* An old assignment (for rollback purposes).  */
 435   struct cost_pair *old_cp;
 436
 437   /* A new assignment.  */
 438   struct cost_pair *new_cp;
 439
 440   /* Next change in the list.  */
 441   struct iv_ca_delta *next_change;
 442 };
 443
 444 /* Bound on number of candidates below that all candidates are considered.  */
 445
 446 #define CONSIDER_ALL_CANDIDATES_BOUND \
 447   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 448
 449 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 450    optimizing such a loop would help, and it would take ages).  */
 451
 452 #define MAX_CONSIDERED_USES \
 453   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 454
 455 /* If there are at most this number of ivs in the set, try removing unnecessary
 456    ivs from the set always.  */
 457
 458 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 459   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 460
 461 /* The list of trees for that the decl_rtl field must be reset is stored
 462    here.  */
 463
 464 static vec<tree> decl_rtl_to_reset;
 465
 466 static comp_cost force_expr_to_var_cost (tree, bool);
 467
 468 /* Number of uses recorded in DATA.  */
 469
 470 static inline unsigned
 471 n_iv_uses (struct ivopts_data *data)
 472 {
 473   return data->iv_uses.length ();
 474 }
 475
 476 /* Ith use recorded in DATA.  */
 477
 478 static inline struct iv_use *
 479 iv_use (struct ivopts_data *data, unsigned i)
 480 {
 481   return data->iv_uses[i];
 482 }
 483
 484 /* Number of candidates recorded in DATA.  */
 485
 486 static inline unsigned
 487 n_iv_cands (struct ivopts_data *data)
 488 {
 489   return data->iv_candidates.length ();
 490 }
 491
 492 /* Ith candidate recorded in DATA.  */
 493
 494 static inline struct iv_cand *
 495 iv_cand (struct ivopts_data *data, unsigned i)
 496 {
 497   return data->iv_candidates[i];
 498 }
 499
 500 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 501
 502 edge
 503 single_dom_exit (struct loop *loop)
 504 {
 505   edge exit = single_exit (loop);
 506
 507   if (!exit)
 508     return NULL;
 509
 510   if (!just_once_each_iteration_p (loop, exit->src))
 511     return NULL;
 512
 513   return exit;
 514 }
 515
 516 /* Dumps information about the induction variable IV to FILE.  */
 517
 518 void
 519 dump_iv (FILE *file, struct iv *iv)
 520 {
 521   if (iv->ssa_name)
 522     {
 523       fprintf (file, "ssa name ");
 524       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 525       fprintf (file, "\n");
 526     }
 527
 528   fprintf (file, "  type ");
 529   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 530   fprintf (file, "\n");
 531
 532   if (iv->step)
 533     {
 534       fprintf (file, "  base ");
 535       print_generic_expr (file, iv->base, TDF_SLIM);
 536       fprintf (file, "\n");
 537
 538       fprintf (file, "  step ");
 539       print_generic_expr (file, iv->step, TDF_SLIM);
 540       fprintf (file, "\n");
 541     }
 542   else
 543     {
 544       fprintf (file, "  invariant ");
 545       print_generic_expr (file, iv->base, TDF_SLIM);
 546       fprintf (file, "\n");
 547     }
 548
 549   if (iv->base_object)
 550     {
 551       fprintf (file, "  base object ");
 552       print_generic_expr (file, iv->base_object, TDF_SLIM);
 553       fprintf (file, "\n");
 554     }
 555
 556   if (iv->biv_p)
 557     fprintf (file, "  is a biv\n");
 558 }
 559
 560 /* Dumps information about the USE to FILE.  */
 561
 562 void
 563 dump_use (FILE *file, struct iv_use *use)
 564 {
 565   fprintf (file, "use %d", use->id);
 566   if (use->sub_id)
 567     fprintf (file, ".%d", use->sub_id);
 568
 569   fprintf (file, "\n");
 570
 571   switch (use->type)
 572     {
 573     case USE_NONLINEAR_EXPR:
 574       fprintf (file, "  generic\n");
 575       break;
 576
 577     case USE_ADDRESS:
 578       fprintf (file, "  address\n");
 579       break;
 580
 581     case USE_COMPARE:
 582       fprintf (file, "  compare\n");
 583       break;
 584
 585     default:
 586       gcc_unreachable ();
 587     }
 588
 589   fprintf (file, "  in statement ");
 590   print_gimple_stmt (file, use->stmt, 0, 0);
 591   fprintf (file, "\n");
 592
 593   fprintf (file, "  at position ");
 594   if (use->op_p)
 595     print_generic_expr (file, *use->op_p, TDF_SLIM);
 596   fprintf (file, "\n");
 597
 598   dump_iv (file, use->iv);
 599
 600   if (use->related_cands)
 601     {
 602       fprintf (file, "  related candidates ");
 603       dump_bitmap (file, use->related_cands);
 604     }
 605 }
 606
 607 /* Dumps information about the uses to FILE.  */
 608
 609 void
 610 dump_uses (FILE *file, struct ivopts_data *data)
 611 {
 612   unsigned i;
 613   struct iv_use *use;
 614
 615   for (i = 0; i < n_iv_uses (data); i++)
 616     {
 617       use = iv_use (data, i);
 618       do
 619         {
 620           dump_use (file, use);
 621           use = use->next;
 622         }
 623       while (use);
 624       fprintf (file, "\n");
 625     }
 626 }
 627
 628 /* Dumps information about induction variable candidate CAND to FILE.  */
 629
 630 void
 631 dump_cand (FILE *file, struct iv_cand *cand)
 632 {
 633   struct iv *iv = cand->iv;
 634
 635   fprintf (file, "candidate %d%s\n",
 636            cand->id, cand->important ? " (important)" : "");
 637
 638   if (cand->depends_on)
 639     {
 640       fprintf (file, "  depends on ");
 641       dump_bitmap (file, cand->depends_on);
 642     }
 643
 644   if (!iv)
 645     {
 646       fprintf (file, "  final value replacement\n");
 647       return;
 648     }
 649
 650   if (cand->var_before)
 651     {
 652       fprintf (file, "  var_before ");
 653       print_generic_expr (file, cand->var_before, TDF_SLIM);
 654       fprintf (file, "\n");
 655     }
 656   if (cand->var_after)
 657     {
 658       fprintf (file, "  var_after ");
 659       print_generic_expr (file, cand->var_after, TDF_SLIM);
 660       fprintf (file, "\n");
 661     }
 662
 663   switch (cand->pos)
 664     {
 665     case IP_NORMAL:
 666       fprintf (file, "  incremented before exit test\n");
 667       break;
 668
 669     case IP_BEFORE_USE:
 670       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 671       break;
 672
 673     case IP_AFTER_USE:
 674       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 675       break;
 676
 677     case IP_END:
 678       fprintf (file, "  incremented at end\n");
 679       break;
 680
 681     case IP_ORIGINAL:
 682       fprintf (file, "  original biv\n");
 683       break;
 684     }
 685
 686   dump_iv (file, iv);
 687 }
 688
 689 /* Returns the info for ssa version VER.  */
 690
 691 static inline struct version_info *
 692 ver_info (struct ivopts_data *data, unsigned ver)
 693 {
 694   return data->version_info + ver;
 695 }
 696
 697 /* Returns the info for ssa name NAME.  */
 698
 699 static inline struct version_info *
 700 name_info (struct ivopts_data *data, tree name)
 701 {
 702   return ver_info (data, SSA_NAME_VERSION (name));
 703 }
 704
 705 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 706    emitted in LOOP.  */
 707
 708 static bool
 709 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 710 {
 711   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 712
 713   gcc_assert (bb);
 714
 715   if (sbb == loop->latch)
 716     return true;
 717
 718   if (sbb != bb)
 719     return false;
 720
 721   return stmt == last_stmt (bb);
 722 }
 723
 724 /* Returns true if STMT if after the place where the original induction
 725    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 726    if the positions are identical.  */
 727
 728 static bool
 729 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 730 {
 731   basic_block cand_bb = gimple_bb (cand->incremented_at);
 732   basic_block stmt_bb = gimple_bb (stmt);
 733
 734   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 735     return false;
 736
 737   if (stmt_bb != cand_bb)
 738     return true;
 739
 740   if (true_if_equal
 741       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 742     return true;
 743   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 744 }
 745
 746 /* Returns true if STMT if after the place where the induction variable
 747    CAND is incremented in LOOP.  */
 748
 749 static bool
 750 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 751 {
 752   switch (cand->pos)
 753     {
 754     case IP_END:
 755       return false;
 756
 757     case IP_NORMAL:
 758       return stmt_after_ip_normal_pos (loop, stmt);
 759
 760     case IP_ORIGINAL:
 761     case IP_AFTER_USE:
 762       return stmt_after_inc_pos (cand, stmt, false);
 763
 764     case IP_BEFORE_USE:
 765       return stmt_after_inc_pos (cand, stmt, true);
 766
 767     default:
 768       gcc_unreachable ();
 769     }
 770 }
 771
 772 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 773
 774 static bool
 775 abnormal_ssa_name_p (tree exp)
 776 {
 777   if (!exp)
 778     return false;
 779
 780   if (TREE_CODE (exp) != SSA_NAME)
 781     return false;
 782
 783   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 784 }
 785
 786 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 787    abnormal phi node.  Callback for for_each_index.  */
 788
 789 static bool
 790 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 791                                   void *data ATTRIBUTE_UNUSED)
 792 {
 793   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 794     {
 795       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 796         return false;
 797       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 798         return false;
 799     }
 800
 801   return !abnormal_ssa_name_p (*index);
 802 }
 803
 804 /* Returns true if EXPR contains a ssa name that occurs in an
 805    abnormal phi node.  */
 806
 807 bool
 808 contains_abnormal_ssa_name_p (tree expr)
 809 {
 810   enum tree_code code;
 811   enum tree_code_class codeclass;
 812
 813   if (!expr)
 814     return false;
 815
 816   code = TREE_CODE (expr);
 817   codeclass = TREE_CODE_CLASS (code);
 818
 819   if (code == SSA_NAME)
 820     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 821
 822   if (code == INTEGER_CST
 823       || is_gimple_min_invariant (expr))
 824     return false;
 825
 826   if (code == ADDR_EXPR)
 827     return !for_each_index (&TREE_OPERAND (expr, 0),
 828                             idx_contains_abnormal_ssa_name_p,
 829                             NULL);
 830
 831   if (code == COND_EXPR)
 832     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 833       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 834       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 835
 836   switch (codeclass)
 837     {
 838     case tcc_binary:
 839     case tcc_comparison:
 840       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 841         return true;
 842
 843       /* Fallthru.  */
 844     case tcc_unary:
 845       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 846         return true;
 847
 848       break;
 849
 850     default:
 851       gcc_unreachable ();
 852     }
 853
 854   return false;
 855 }
 856
 857 /*  Returns the structure describing number of iterations determined from
 858     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 859
 860 static struct tree_niter_desc *
 861 niter_for_exit (struct ivopts_data *data, edge exit)
 862 {
 863   struct tree_niter_desc *desc;
 864   tree_niter_desc **slot;
 865
 866   if (!data->niters)
 867     {
 868       data->niters = new hash_map<edge, tree_niter_desc *>;
 869       slot = NULL;
 870     }
 871   else
 872     slot = data->niters->get (exit);
 873
 874   if (!slot)
 875     {
 876       /* Try to determine number of iterations.  We cannot safely work with ssa
 877          names that appear in phi nodes on abnormal edges, so that we do not
 878          create overlapping life ranges for them (PR 27283).  */
 879       desc = XNEW (struct tree_niter_desc);
 880       if (!number_of_iterations_exit (data->current_loop,
 881                                       exit, desc, true)
 882           || contains_abnormal_ssa_name_p (desc->niter))
 883         {
 884           XDELETE (desc);
 885           desc = NULL;
 886         }
 887       data->niters->put (exit, desc);
 888     }
 889   else
 890     desc = *slot;
 891
 892   return desc;
 893 }
 894
 895 /* Returns the structure describing number of iterations determined from
 896    single dominating exit of DATA->current_loop, or NULL if something
 897    goes wrong.  */
 898
 899 static struct tree_niter_desc *
 900 niter_for_single_dom_exit (struct ivopts_data *data)
 901 {
 902   edge exit = single_dom_exit (data->current_loop);
 903
 904   if (!exit)
 905     return NULL;
 906
 907   return niter_for_exit (data, exit);
 908 }
 909
 910 /* Initializes data structures used by the iv optimization pass, stored
 911    in DATA.  */
 912
 913 static void
 914 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 915 {
 916   data->version_info_size = 2 * num_ssa_names;
 917   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 918   data->relevant = BITMAP_ALLOC (NULL);
 919   data->important_candidates = BITMAP_ALLOC (NULL);
 920   data->max_inv_id = 0;
 921   data->niters = NULL;
 922   data->iv_uses.create (20);
 923   data->iv_candidates.create (20);
 924   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 925   data->inv_expr_id = 0;
 926   data->name_expansion_cache = NULL;
 927   decl_rtl_to_reset.create (20);
 928 }
 929
 930 /* Returns a memory object to that EXPR points.  In case we are able to
 931    determine that it does not point to any such object, NULL is returned.  */
 932
 933 static tree
 934 determine_base_object (tree expr)
 935 {
 936   enum tree_code code = TREE_CODE (expr);
 937   tree base, obj;
 938
 939   /* If this is a pointer casted to any type, we need to determine
 940      the base object for the pointer; so handle conversions before
 941      throwing away non-pointer expressions.  */
 942   if (CONVERT_EXPR_P (expr))
 943     return determine_base_object (TREE_OPERAND (expr, 0));
 944
 945   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 946     return NULL_TREE;
 947
 948   switch (code)
 949     {
 950     case INTEGER_CST:
 951       return NULL_TREE;
 952
 953     case ADDR_EXPR:
 954       obj = TREE_OPERAND (expr, 0);
 955       base = get_base_address (obj);
 956
 957       if (!base)
 958         return expr;
 959
 960       if (TREE_CODE (base) == MEM_REF)
 961         return determine_base_object (TREE_OPERAND (base, 0));
 962
 963       return fold_convert (ptr_type_node,
 964                            build_fold_addr_expr (base));
 965
 966     case POINTER_PLUS_EXPR:
 967       return determine_base_object (TREE_OPERAND (expr, 0));
 968
 969     case PLUS_EXPR:
 970     case MINUS_EXPR:
 971       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 972       gcc_unreachable ();
 973
 974     default:
 975       return fold_convert (ptr_type_node, expr);
 976     }
 977 }
 978
 979 /* Return true if address expression with non-DECL_P operand appears
 980    in EXPR.  */
 981
 982 static bool
 983 contain_complex_addr_expr (tree expr)
 984 {
 985   bool res = false;
 986
 987   STRIP_NOPS (expr);
 988   switch (TREE_CODE (expr))
 989     {
 990     case POINTER_PLUS_EXPR:
 991     case PLUS_EXPR:
 992     case MINUS_EXPR:
 993       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 994       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 995       break;
 996
 997     case ADDR_EXPR:
 998       return (!DECL_P (TREE_OPERAND (expr, 0)));
 999
1000     default:
1001       return false;
1002     }
1003
1004   return res;
1005 }
1006
1007 /* Allocates an induction variable with given initial value BASE and step STEP
1008    for loop LOOP.  */
1009
1010 static struct iv *
1011 alloc_iv (tree base, tree step)
1012 {
1013   tree expr = base;
1014   struct iv *iv = XCNEW (struct iv);
1015   gcc_assert (step != NULL_TREE);
1016
1017   /* Lower address expression in base except ones with DECL_P as operand.
1018      By doing this:
1019        1) More accurate cost can be computed for address expressions;
1020        2) Duplicate candidates won't be created for bases in different
1021           forms, like &a[0] and &a.  */
1022   STRIP_NOPS (expr);
1023   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1024       || contain_complex_addr_expr (expr))
1025     {
1026       aff_tree comb;
1027       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1028       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1029     }
1030
1031   iv->base = base;
1032   iv->base_object = determine_base_object (base);
1033   iv->step = step;
1034   iv->biv_p = false;
1035   iv->have_use_for = false;
1036   iv->use_id = 0;
1037   iv->ssa_name = NULL_TREE;
1038
1039   return iv;
1040 }
1041
1042 /* Sets STEP and BASE for induction variable IV.  */
1043
1044 static void
1045 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1046 {
1047   struct version_info *info = name_info (data, iv);
1048
1049   gcc_assert (!info->iv);
1050
1051   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1052   info->iv = alloc_iv (base, step);
1053   info->iv->ssa_name = iv;
1054 }
1055
1056 /* Finds induction variable declaration for VAR.  */
1057
1058 static struct iv *
1059 get_iv (struct ivopts_data *data, tree var)
1060 {
1061   basic_block bb;
1062   tree type = TREE_TYPE (var);
1063
1064   if (!POINTER_TYPE_P (type)
1065       && !INTEGRAL_TYPE_P (type))
1066     return NULL;
1067
1068   if (!name_info (data, var)->iv)
1069     {
1070       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1071
1072       if (!bb
1073           || !flow_bb_inside_loop_p (data->current_loop, bb))
1074         set_iv (data, var, var, build_int_cst (type, 0));
1075     }
1076
1077   return name_info (data, var)->iv;
1078 }
1079
1080 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1081    not define a simple affine biv with nonzero step.  */
1082
1083 static tree
1084 determine_biv_step (gphi *phi)
1085 {
1086   struct loop *loop = gimple_bb (phi)->loop_father;
1087   tree name = PHI_RESULT (phi);
1088   affine_iv iv;
1089
1090   if (virtual_operand_p (name))
1091     return NULL_TREE;
1092
1093   if (!simple_iv (loop, loop, name, &iv, true))
1094     return NULL_TREE;
1095
1096   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1097 }
1098
1099 /* Return the first non-invariant ssa var found in EXPR.  */
1100
1101 static tree
1102 extract_single_var_from_expr (tree expr)
1103 {
1104   int i, n;
1105   tree tmp;
1106   enum tree_code code;
1107
1108   if (!expr || is_gimple_min_invariant (expr))
1109     return NULL;
1110
1111   code = TREE_CODE (expr);
1112   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1113     {
1114       n = TREE_OPERAND_LENGTH (expr);
1115       for (i = 0; i < n; i++)
1116         {
1117           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1118
1119           if (tmp)
1120             return tmp;
1121         }
1122     }
1123   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1124 }
1125
1126 /* Finds basic ivs.  */
1127
1128 static bool
1129 find_bivs (struct ivopts_data *data)
1130 {
1131   gphi *phi;
1132   tree step, type, base, stop;
1133   bool found = false;
1134   struct loop *loop = data->current_loop;
1135   gphi_iterator psi;
1136
1137   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1138     {
1139       phi = psi.phi ();
1140
1141       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1142         continue;
1143
1144       step = determine_biv_step (phi);
1145       if (!step)
1146         continue;
1147
1148       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1149       /* Stop expanding iv base at the first ssa var referred by iv step.
1150          Ideally we should stop at any ssa var, because that's expensive
1151          and unusual to happen, we just do it on the first one.
1152
1153          See PR64705 for the rationale.  */
1154       stop = extract_single_var_from_expr (step);
1155       base = expand_simple_operations (base, stop);
1156       if (contains_abnormal_ssa_name_p (base)
1157           || contains_abnormal_ssa_name_p (step))
1158         continue;
1159
1160       type = TREE_TYPE (PHI_RESULT (phi));
1161       base = fold_convert (type, base);
1162       if (step)
1163         {
1164           if (POINTER_TYPE_P (type))
1165             step = convert_to_ptrofftype (step);
1166           else
1167             step = fold_convert (type, step);
1168         }
1169
1170       set_iv (data, PHI_RESULT (phi), base, step);
1171       found = true;
1172     }
1173
1174   return found;
1175 }
1176
1177 /* Marks basic ivs.  */
1178
1179 static void
1180 mark_bivs (struct ivopts_data *data)
1181 {
1182   gphi *phi;
1183   gimple def;
1184   tree var;
1185   struct iv *iv, *incr_iv;
1186   struct loop *loop = data->current_loop;
1187   basic_block incr_bb;
1188   gphi_iterator psi;
1189
1190   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1191     {
1192       phi = psi.phi ();
1193
1194       iv = get_iv (data, PHI_RESULT (phi));
1195       if (!iv)
1196         continue;
1197
1198       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1199       def = SSA_NAME_DEF_STMT (var);
1200       /* Don't mark iv peeled from other one as biv.  */
1201       if (def
1202           && gimple_code (def) == GIMPLE_PHI
1203           && gimple_bb (def) == loop->header)
1204         continue;
1205
1206       incr_iv = get_iv (data, var);
1207       if (!incr_iv)
1208         continue;
1209
1210       /* If the increment is in the subloop, ignore it.  */
1211       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1212       if (incr_bb->loop_father != data->current_loop
1213           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1214         continue;
1215
1216       iv->biv_p = true;
1217       incr_iv->biv_p = true;
1218     }
1219 }
1220
1221 /* Checks whether STMT defines a linear induction variable and stores its
1222    parameters to IV.  */
1223
1224 static bool
1225 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1226 {
1227   tree lhs, stop;
1228   struct loop *loop = data->current_loop;
1229
1230   iv->base = NULL_TREE;
1231   iv->step = NULL_TREE;
1232
1233   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1234     return false;
1235
1236   lhs = gimple_assign_lhs (stmt);
1237   if (TREE_CODE (lhs) != SSA_NAME)
1238     return false;
1239
1240   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1241     return false;
1242
1243   /* Stop expanding iv base at the first ssa var referred by iv step.
1244      Ideally we should stop at any ssa var, because that's expensive
1245      and unusual to happen, we just do it on the first one.
1246
1247      See PR64705 for the rationale.  */
1248   stop = extract_single_var_from_expr (iv->step);
1249   iv->base = expand_simple_operations (iv->base, stop);
1250   if (contains_abnormal_ssa_name_p (iv->base)
1251       || contains_abnormal_ssa_name_p (iv->step))
1252     return false;
1253
1254   /* If STMT could throw, then do not consider STMT as defining a GIV.
1255      While this will suppress optimizations, we can not safely delete this
1256      GIV and associated statements, even if it appears it is not used.  */
1257   if (stmt_could_throw_p (stmt))
1258     return false;
1259
1260   return true;
1261 }
1262
1263 /* Finds general ivs in statement STMT.  */
1264
1265 static void
1266 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1267 {
1268   affine_iv iv;
1269
1270   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1271     return;
1272
1273   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1274 }
1275
1276 /* Finds general ivs in basic block BB.  */
1277
1278 static void
1279 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1280 {
1281   gimple_stmt_iterator bsi;
1282
1283   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1284     find_givs_in_stmt (data, gsi_stmt (bsi));
1285 }
1286
1287 /* Finds general ivs.  */
1288
1289 static void
1290 find_givs (struct ivopts_data *data)
1291 {
1292   struct loop *loop = data->current_loop;
1293   basic_block *body = get_loop_body_in_dom_order (loop);
1294   unsigned i;
1295
1296   for (i = 0; i < loop->num_nodes; i++)
1297     find_givs_in_bb (data, body[i]);
1298   free (body);
1299 }
1300
1301 /* For each ssa name defined in LOOP determines whether it is an induction
1302    variable and if so, its initial value and step.  */
1303
1304 static bool
1305 find_induction_variables (struct ivopts_data *data)
1306 {
1307   unsigned i;
1308   bitmap_iterator bi;
1309
1310   if (!find_bivs (data))
1311     return false;
1312
1313   find_givs (data);
1314   mark_bivs (data);
1315
1316   if (dump_file && (dump_flags & TDF_DETAILS))
1317     {
1318       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1319
1320       if (niter)
1321         {
1322           fprintf (dump_file, "  number of iterations ");
1323           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1324           if (!integer_zerop (niter->may_be_zero))
1325             {
1326               fprintf (dump_file, "; zero if ");
1327               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1328             }
1329           fprintf (dump_file, "\n\n");
1330         };
1331
1332       fprintf (dump_file, "Induction variables:\n\n");
1333
1334       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1335         {
1336           if (ver_info (data, i)->iv)
1337             dump_iv (dump_file, ver_info (data, i)->iv);
1338         }
1339     }
1340
1341   return true;
1342 }
1343
1344 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1345    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1346    is the const offset stripped from IV base.  For uses of other types,
1347    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1348
1349 static struct iv_use *
1350 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1351             gimple stmt, enum use_type use_type, tree addr_base = NULL,
1352             unsigned HOST_WIDE_INT addr_offset = 0)
1353 {
1354   struct iv_use *use = XCNEW (struct iv_use);
1355
1356   use->id = n_iv_uses (data);
1357   use->sub_id = 0;
1358   use->type = use_type;
1359   use->iv = iv;
1360   use->stmt = stmt;
1361   use->op_p = use_p;
1362   use->related_cands = BITMAP_ALLOC (NULL);
1363   use->next = NULL;
1364   use->addr_base = addr_base;
1365   use->addr_offset = addr_offset;
1366
1367   /* To avoid showing ssa name in the dumps, if it was not reset by the
1368      caller.  */
1369   iv->ssa_name = NULL_TREE;
1370
1371   data->iv_uses.safe_push (use);
1372
1373   return use;
1374 }
1375
1376 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1377    The sub use is recorded under the one whose use id is ID_GROUP.  */
1378
1379 static struct iv_use *
1380 record_sub_use (struct ivopts_data *data, tree *use_p,
1381                     struct iv *iv, gimple stmt, enum use_type use_type,
1382                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1383                     unsigned int id_group)
1384 {
1385   struct iv_use *use = XCNEW (struct iv_use);
1386   struct iv_use *group = iv_use (data, id_group);
1387
1388   use->id = group->id;
1389   use->sub_id = 0;
1390   use->type = use_type;
1391   use->iv = iv;
1392   use->stmt = stmt;
1393   use->op_p = use_p;
1394   use->related_cands = NULL;
1395   use->addr_base = addr_base;
1396   use->addr_offset = addr_offset;
1397
1398   /* Sub use list is maintained in offset ascending order.  */
1399   if (addr_offset <= group->addr_offset)
1400     {
1401       use->related_cands = group->related_cands;
1402       group->related_cands = NULL;
1403       use->next = group;
1404       data->iv_uses[id_group] = use;
1405     }
1406   else
1407     {
1408       struct iv_use *pre;
1409       do
1410         {
1411           pre = group;
1412           group = group->next;
1413         }
1414       while (group && addr_offset > group->addr_offset);
1415       use->next = pre->next;
1416       pre->next = use;
1417     }
1418
1419   /* To avoid showing ssa name in the dumps, if it was not reset by the
1420      caller.  */
1421   iv->ssa_name = NULL_TREE;
1422
1423   return use;
1424 }
1425
1426 /* Checks whether OP is a loop-level invariant and if so, records it.
1427    NONLINEAR_USE is true if the invariant is used in a way we do not
1428    handle specially.  */
1429
1430 static void
1431 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1432 {
1433   basic_block bb;
1434   struct version_info *info;
1435
1436   if (TREE_CODE (op) != SSA_NAME
1437       || virtual_operand_p (op))
1438     return;
1439
1440   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1441   if (bb
1442       && flow_bb_inside_loop_p (data->current_loop, bb))
1443     return;
1444
1445   info = name_info (data, op);
1446   info->name = op;
1447   info->has_nonlin_use |= nonlinear_use;
1448   if (!info->inv_id)
1449     info->inv_id = ++data->max_inv_id;
1450   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1451 }
1452
1453 /* Checks whether the use OP is interesting and if so, records it.  */
1454
1455 static struct iv_use *
1456 find_interesting_uses_op (struct ivopts_data *data, tree op)
1457 {
1458   struct iv *iv;
1459   struct iv *civ;
1460   gimple stmt;
1461   struct iv_use *use;
1462
1463   if (TREE_CODE (op) != SSA_NAME)
1464     return NULL;
1465
1466   iv = get_iv (data, op);
1467   if (!iv)
1468     return NULL;
1469
1470   if (iv->have_use_for)
1471     {
1472       use = iv_use (data, iv->use_id);
1473
1474       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1475       return use;
1476     }
1477
1478   if (integer_zerop (iv->step))
1479     {
1480       record_invariant (data, op, true);
1481       return NULL;
1482     }
1483   iv->have_use_for = true;
1484
1485   civ = XNEW (struct iv);
1486   *civ = *iv;
1487
1488   stmt = SSA_NAME_DEF_STMT (op);
1489   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1490               || is_gimple_assign (stmt));
1491
1492   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1493   iv->use_id = use->id;
1494
1495   return use;
1496 }
1497
1498 /* Given a condition in statement STMT, checks whether it is a compare
1499    of an induction variable and an invariant.  If this is the case,
1500    CONTROL_VAR is set to location of the iv, BOUND to the location of
1501    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1502    induction variable descriptions, and true is returned.  If this is not
1503    the case, CONTROL_VAR and BOUND are set to the arguments of the
1504    condition and false is returned.  */
1505
1506 static bool
1507 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1508                        tree **control_var, tree **bound,
1509                        struct iv **iv_var, struct iv **iv_bound)
1510 {
1511   /* The objects returned when COND has constant operands.  */
1512   static struct iv const_iv;
1513   static tree zero;
1514   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1515   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1516   bool ret = false;
1517
1518   if (gimple_code (stmt) == GIMPLE_COND)
1519     {
1520       gcond *cond_stmt = as_a <gcond *> (stmt);
1521       op0 = gimple_cond_lhs_ptr (cond_stmt);
1522       op1 = gimple_cond_rhs_ptr (cond_stmt);
1523     }
1524   else
1525     {
1526       op0 = gimple_assign_rhs1_ptr (stmt);
1527       op1 = gimple_assign_rhs2_ptr (stmt);
1528     }
1529
1530   zero = integer_zero_node;
1531   const_iv.step = integer_zero_node;
1532
1533   if (TREE_CODE (*op0) == SSA_NAME)
1534     iv0 = get_iv (data, *op0);
1535   if (TREE_CODE (*op1) == SSA_NAME)
1536     iv1 = get_iv (data, *op1);
1537
1538   /* Exactly one of the compared values must be an iv, and the other one must
1539      be an invariant.  */
1540   if (!iv0 || !iv1)
1541     goto end;
1542
1543   if (integer_zerop (iv0->step))
1544     {
1545       /* Control variable may be on the other side.  */
1546       tmp_op = op0; op0 = op1; op1 = tmp_op;
1547       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1548     }
1549   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1550
1551 end:
1552   if (control_var)
1553     *control_var = op0;
1554   if (iv_var)
1555     *iv_var = iv0;
1556   if (bound)
1557     *bound = op1;
1558   if (iv_bound)
1559     *iv_bound = iv1;
1560
1561   return ret;
1562 }
1563
1564 /* Checks whether the condition in STMT is interesting and if so,
1565    records it.  */
1566
1567 static void
1568 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1569 {
1570   tree *var_p, *bound_p;
1571   struct iv *var_iv, *civ;
1572
1573   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1574     {
1575       find_interesting_uses_op (data, *var_p);
1576       find_interesting_uses_op (data, *bound_p);
1577       return;
1578     }
1579
1580   civ = XNEW (struct iv);
1581   *civ = *var_iv;
1582   record_use (data, NULL, civ, stmt, USE_COMPARE);
1583 }
1584
1585 /* Returns the outermost loop EXPR is obviously invariant in
1586    relative to the loop LOOP, i.e. if all its operands are defined
1587    outside of the returned loop.  Returns NULL if EXPR is not
1588    even obviously invariant in LOOP.  */
1589
1590 struct loop *
1591 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1592 {
1593   basic_block def_bb;
1594   unsigned i, len;
1595
1596   if (is_gimple_min_invariant (expr))
1597     return current_loops->tree_root;
1598
1599   if (TREE_CODE (expr) == SSA_NAME)
1600     {
1601       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1602       if (def_bb)
1603         {
1604           if (flow_bb_inside_loop_p (loop, def_bb))
1605             return NULL;
1606           return superloop_at_depth (loop,
1607                                      loop_depth (def_bb->loop_father) + 1);
1608         }
1609
1610       return current_loops->tree_root;
1611     }
1612
1613   if (!EXPR_P (expr))
1614     return NULL;
1615
1616   unsigned maxdepth = 0;
1617   len = TREE_OPERAND_LENGTH (expr);
1618   for (i = 0; i < len; i++)
1619     {
1620       struct loop *ivloop;
1621       if (!TREE_OPERAND (expr, i))
1622         continue;
1623
1624       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1625       if (!ivloop)
1626         return NULL;
1627       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1628     }
1629
1630   return superloop_at_depth (loop, maxdepth);
1631 }
1632
1633 /* Returns true if expression EXPR is obviously invariant in LOOP,
1634    i.e. if all its operands are defined outside of the LOOP.  LOOP
1635    should not be the function body.  */
1636
1637 bool
1638 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1639 {
1640   basic_block def_bb;
1641   unsigned i, len;
1642
1643   gcc_assert (loop_depth (loop) > 0);
1644
1645   if (is_gimple_min_invariant (expr))
1646     return true;
1647
1648   if (TREE_CODE (expr) == SSA_NAME)
1649     {
1650       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1651       if (def_bb
1652           && flow_bb_inside_loop_p (loop, def_bb))
1653         return false;
1654
1655       return true;
1656     }
1657
1658   if (!EXPR_P (expr))
1659     return false;
1660
1661   len = TREE_OPERAND_LENGTH (expr);
1662   for (i = 0; i < len; i++)
1663     if (TREE_OPERAND (expr, i)
1664         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1665       return false;
1666
1667   return true;
1668 }
1669
1670 /* Cumulates the steps of indices into DATA and replaces their values with the
1671    initial ones.  Returns false when the value of the index cannot be determined.
1672    Callback for for_each_index.  */
1673
1674 struct ifs_ivopts_data
1675 {
1676   struct ivopts_data *ivopts_data;
1677   gimple stmt;
1678   tree step;
1679 };
1680
1681 static bool
1682 idx_find_step (tree base, tree *idx, void *data)
1683 {
1684   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1685   struct iv *iv;
1686   tree step, iv_base, iv_step, lbound, off;
1687   struct loop *loop = dta->ivopts_data->current_loop;
1688
1689   /* If base is a component ref, require that the offset of the reference
1690      be invariant.  */
1691   if (TREE_CODE (base) == COMPONENT_REF)
1692     {
1693       off = component_ref_field_offset (base);
1694       return expr_invariant_in_loop_p (loop, off);
1695     }
1696
1697   /* If base is array, first check whether we will be able to move the
1698      reference out of the loop (in order to take its address in strength
1699      reduction).  In order for this to work we need both lower bound
1700      and step to be loop invariants.  */
1701   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1702     {
1703       /* Moreover, for a range, the size needs to be invariant as well.  */
1704       if (TREE_CODE (base) == ARRAY_RANGE_REF
1705           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1706         return false;
1707
1708       step = array_ref_element_size (base);
1709       lbound = array_ref_low_bound (base);
1710
1711       if (!expr_invariant_in_loop_p (loop, step)
1712           || !expr_invariant_in_loop_p (loop, lbound))
1713         return false;
1714     }
1715
1716   if (TREE_CODE (*idx) != SSA_NAME)
1717     return true;
1718
1719   iv = get_iv (dta->ivopts_data, *idx);
1720   if (!iv)
1721     return false;
1722
1723   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1724           *&x[0], which is not folded and does not trigger the
1725           ARRAY_REF path below.  */
1726   *idx = iv->base;
1727
1728   if (integer_zerop (iv->step))
1729     return true;
1730
1731   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1732     {
1733       step = array_ref_element_size (base);
1734
1735       /* We only handle addresses whose step is an integer constant.  */
1736       if (TREE_CODE (step) != INTEGER_CST)
1737         return false;
1738     }
1739   else
1740     /* The step for pointer arithmetics already is 1 byte.  */
1741     step = size_one_node;
1742
1743   iv_base = iv->base;
1744   iv_step = iv->step;
1745   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1746                             sizetype, &iv_base, &iv_step, dta->stmt,
1747                             false))
1748     {
1749       /* The index might wrap.  */
1750       return false;
1751     }
1752
1753   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1754   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1755
1756   return true;
1757 }
1758
1759 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1760    object is passed to it in DATA.  */
1761
1762 static bool
1763 idx_record_use (tree base, tree *idx,
1764                 void *vdata)
1765 {
1766   struct ivopts_data *data = (struct ivopts_data *) vdata;
1767   find_interesting_uses_op (data, *idx);
1768   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1769     {
1770       find_interesting_uses_op (data, array_ref_element_size (base));
1771       find_interesting_uses_op (data, array_ref_low_bound (base));
1772     }
1773   return true;
1774 }
1775
1776 /* If we can prove that TOP = cst * BOT for some constant cst,
1777    store cst to MUL and return true.  Otherwise return false.
1778    The returned value is always sign-extended, regardless of the
1779    signedness of TOP and BOT.  */
1780
1781 static bool
1782 constant_multiple_of (tree top, tree bot, widest_int *mul)
1783 {
1784   tree mby;
1785   enum tree_code code;
1786   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1787   widest_int res, p0, p1;
1788
1789   STRIP_NOPS (top);
1790   STRIP_NOPS (bot);
1791
1792   if (operand_equal_p (top, bot, 0))
1793     {
1794       *mul = 1;
1795       return true;
1796     }
1797
1798   code = TREE_CODE (top);
1799   switch (code)
1800     {
1801     case MULT_EXPR:
1802       mby = TREE_OPERAND (top, 1);
1803       if (TREE_CODE (mby) != INTEGER_CST)
1804         return false;
1805
1806       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1807         return false;
1808
1809       *mul = wi::sext (res * wi::to_widest (mby), precision);
1810       return true;
1811
1812     case PLUS_EXPR:
1813     case MINUS_EXPR:
1814       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1815           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1816         return false;
1817
1818       if (code == MINUS_EXPR)
1819         p1 = -p1;
1820       *mul = wi::sext (p0 + p1, precision);
1821       return true;
1822
1823     case INTEGER_CST:
1824       if (TREE_CODE (bot) != INTEGER_CST)
1825         return false;
1826
1827       p0 = widest_int::from (top, SIGNED);
1828       p1 = widest_int::from (bot, SIGNED);
1829       if (p1 == 0)
1830         return false;
1831       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1832       return res == 0;
1833
1834     default:
1835       return false;
1836     }
1837 }
1838
1839 /* Return true if memory reference REF with step STEP may be unaligned.  */
1840
1841 static bool
1842 may_be_unaligned_p (tree ref, tree step)
1843 {
1844   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1845      thus they are not misaligned.  */
1846   if (TREE_CODE (ref) == TARGET_MEM_REF)
1847     return false;
1848
1849   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1850   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1851     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1852
1853   unsigned HOST_WIDE_INT bitpos;
1854   unsigned int ref_align;
1855   get_object_alignment_1 (ref, &ref_align, &bitpos);
1856   if (ref_align < align
1857       || (bitpos % align) != 0
1858       || (bitpos % BITS_PER_UNIT) != 0)
1859     return true;
1860
1861   unsigned int trailing_zeros = tree_ctz (step);
1862   if (trailing_zeros < HOST_BITS_PER_INT
1863       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1864     return true;
1865
1866   return false;
1867 }
1868
1869 /* Return true if EXPR may be non-addressable.   */
1870
1871 bool
1872 may_be_nonaddressable_p (tree expr)
1873 {
1874   switch (TREE_CODE (expr))
1875     {
1876     case TARGET_MEM_REF:
1877       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1878          target, thus they are always addressable.  */
1879       return false;
1880
1881     case COMPONENT_REF:
1882       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1883              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1884
1885     case VIEW_CONVERT_EXPR:
1886       /* This kind of view-conversions may wrap non-addressable objects
1887          and make them look addressable.  After some processing the
1888          non-addressability may be uncovered again, causing ADDR_EXPRs
1889          of inappropriate objects to be built.  */
1890       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1891           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1892         return true;
1893
1894       /* ... fall through ... */
1895
1896     case ARRAY_REF:
1897     case ARRAY_RANGE_REF:
1898       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1899
1900     CASE_CONVERT:
1901       return true;
1902
1903     default:
1904       break;
1905     }
1906
1907   return false;
1908 }
1909
1910 static tree
1911 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1912
1913 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1914    If there is an existing use which has same stripped iv base and step,
1915    this function records this one as a sub use to that; otherwise records
1916    it as a normal one.  */
1917
1918 static struct iv_use *
1919 record_group_use (struct ivopts_data *data, tree *use_p,
1920                   struct iv *iv, gimple stmt, enum use_type use_type)
1921 {
1922   unsigned int i;
1923   struct iv_use *use;
1924   tree addr_base;
1925   unsigned HOST_WIDE_INT addr_offset;
1926
1927   /* Only support sub use for address type uses, that is, with base
1928      object.  */
1929   if (!iv->base_object)
1930     return record_use (data, use_p, iv, stmt, use_type);
1931
1932   addr_base = strip_offset (iv->base, &addr_offset);
1933   for (i = 0; i < n_iv_uses (data); i++)
1934     {
1935       use = iv_use (data, i);
1936       if (use->type != USE_ADDRESS || !use->iv->base_object)
1937         continue;
1938
1939       /* Check if it has the same stripped base and step.  */
1940       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1941           && operand_equal_p (iv->step, use->iv->step, 0)
1942           && operand_equal_p (addr_base, use->addr_base, 0))
1943         break;
1944     }
1945
1946   if (i == n_iv_uses (data))
1947     return record_use (data, use_p, iv, stmt,
1948                        use_type, addr_base, addr_offset);
1949   else
1950     return record_sub_use (data, use_p, iv, stmt,
1951                            use_type, addr_base, addr_offset, i);
1952 }
1953
1954 /* Finds addresses in *OP_P inside STMT.  */
1955
1956 static void
1957 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1958 {
1959   tree base = *op_p, step = size_zero_node;
1960   struct iv *civ;
1961   struct ifs_ivopts_data ifs_ivopts_data;
1962
1963   /* Do not play with volatile memory references.  A bit too conservative,
1964      perhaps, but safe.  */
1965   if (gimple_has_volatile_ops (stmt))
1966     goto fail;
1967
1968   /* Ignore bitfields for now.  Not really something terribly complicated
1969      to handle.  TODO.  */
1970   if (TREE_CODE (base) == BIT_FIELD_REF)
1971     goto fail;
1972
1973   base = unshare_expr (base);
1974
1975   if (TREE_CODE (base) == TARGET_MEM_REF)
1976     {
1977       tree type = build_pointer_type (TREE_TYPE (base));
1978       tree astep;
1979
1980       if (TMR_BASE (base)
1981           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1982         {
1983           civ = get_iv (data, TMR_BASE (base));
1984           if (!civ)
1985             goto fail;
1986
1987           TMR_BASE (base) = civ->base;
1988           step = civ->step;
1989         }
1990       if (TMR_INDEX2 (base)
1991           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1992         {
1993           civ = get_iv (data, TMR_INDEX2 (base));
1994           if (!civ)
1995             goto fail;
1996
1997           TMR_INDEX2 (base) = civ->base;
1998           step = civ->step;
1999         }
2000       if (TMR_INDEX (base)
2001           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2002         {
2003           civ = get_iv (data, TMR_INDEX (base));
2004           if (!civ)
2005             goto fail;
2006
2007           TMR_INDEX (base) = civ->base;
2008           astep = civ->step;
2009
2010           if (astep)
2011             {
2012               if (TMR_STEP (base))
2013                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2014
2015               step = fold_build2 (PLUS_EXPR, type, step, astep);
2016             }
2017         }
2018
2019       if (integer_zerop (step))
2020         goto fail;
2021       base = tree_mem_ref_addr (type, base);
2022     }
2023   else
2024     {
2025       ifs_ivopts_data.ivopts_data = data;
2026       ifs_ivopts_data.stmt = stmt;
2027       ifs_ivopts_data.step = size_zero_node;
2028       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2029           || integer_zerop (ifs_ivopts_data.step))
2030         goto fail;
2031       step = ifs_ivopts_data.step;
2032
2033       /* Check that the base expression is addressable.  This needs
2034          to be done after substituting bases of IVs into it.  */
2035       if (may_be_nonaddressable_p (base))
2036         goto fail;
2037
2038       /* Moreover, on strict alignment platforms, check that it is
2039          sufficiently aligned.  */
2040       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2041         goto fail;
2042
2043       base = build_fold_addr_expr (base);
2044
2045       /* Substituting bases of IVs into the base expression might
2046          have caused folding opportunities.  */
2047       if (TREE_CODE (base) == ADDR_EXPR)
2048         {
2049           tree *ref = &TREE_OPERAND (base, 0);
2050           while (handled_component_p (*ref))
2051             ref = &TREE_OPERAND (*ref, 0);
2052           if (TREE_CODE (*ref) == MEM_REF)
2053             {
2054               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2055                                       TREE_OPERAND (*ref, 0),
2056                                       TREE_OPERAND (*ref, 1));
2057               if (tem)
2058                 *ref = tem;
2059             }
2060         }
2061     }
2062
2063   civ = alloc_iv (base, step);
2064   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2065   return;
2066
2067 fail:
2068   for_each_index (op_p, idx_record_use, data);
2069 }
2070
2071 /* Finds and records invariants used in STMT.  */
2072
2073 static void
2074 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2075 {
2076   ssa_op_iter iter;
2077   use_operand_p use_p;
2078   tree op;
2079
2080   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2081     {
2082       op = USE_FROM_PTR (use_p);
2083       record_invariant (data, op, false);
2084     }
2085 }
2086
2087 /* Finds interesting uses of induction variables in the statement STMT.  */
2088
2089 static void
2090 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2091 {
2092   struct iv *iv;
2093   tree op, *lhs, *rhs;
2094   ssa_op_iter iter;
2095   use_operand_p use_p;
2096   enum tree_code code;
2097
2098   find_invariants_stmt (data, stmt);
2099
2100   if (gimple_code (stmt) == GIMPLE_COND)
2101     {
2102       find_interesting_uses_cond (data, stmt);
2103       return;
2104     }
2105
2106   if (is_gimple_assign (stmt))
2107     {
2108       lhs = gimple_assign_lhs_ptr (stmt);
2109       rhs = gimple_assign_rhs1_ptr (stmt);
2110
2111       if (TREE_CODE (*lhs) == SSA_NAME)
2112         {
2113           /* If the statement defines an induction variable, the uses are not
2114              interesting by themselves.  */
2115
2116           iv = get_iv (data, *lhs);
2117
2118           if (iv && !integer_zerop (iv->step))
2119             return;
2120         }
2121
2122       code = gimple_assign_rhs_code (stmt);
2123       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2124           && (REFERENCE_CLASS_P (*rhs)
2125               || is_gimple_val (*rhs)))
2126         {
2127           if (REFERENCE_CLASS_P (*rhs))
2128             find_interesting_uses_address (data, stmt, rhs);
2129           else
2130             find_interesting_uses_op (data, *rhs);
2131
2132           if (REFERENCE_CLASS_P (*lhs))
2133             find_interesting_uses_address (data, stmt, lhs);
2134           return;
2135         }
2136       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2137         {
2138           find_interesting_uses_cond (data, stmt);
2139           return;
2140         }
2141
2142       /* TODO -- we should also handle address uses of type
2143
2144          memory = call (whatever);
2145
2146          and
2147
2148          call (memory).  */
2149     }
2150
2151   if (gimple_code (stmt) == GIMPLE_PHI
2152       && gimple_bb (stmt) == data->current_loop->header)
2153     {
2154       iv = get_iv (data, PHI_RESULT (stmt));
2155
2156       if (iv && !integer_zerop (iv->step))
2157         return;
2158     }
2159
2160   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2161     {
2162       op = USE_FROM_PTR (use_p);
2163
2164       if (TREE_CODE (op) != SSA_NAME)
2165         continue;
2166
2167       iv = get_iv (data, op);
2168       if (!iv)
2169         continue;
2170
2171       find_interesting_uses_op (data, op);
2172     }
2173 }
2174
2175 /* Finds interesting uses of induction variables outside of loops
2176    on loop exit edge EXIT.  */
2177
2178 static void
2179 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2180 {
2181   gphi *phi;
2182   gphi_iterator psi;
2183   tree def;
2184
2185   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2186     {
2187       phi = psi.phi ();
2188       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2189       if (!virtual_operand_p (def))
2190         find_interesting_uses_op (data, def);
2191     }
2192 }
2193
2194 /* Finds uses of the induction variables that are interesting.  */
2195
2196 static void
2197 find_interesting_uses (struct ivopts_data *data)
2198 {
2199   basic_block bb;
2200   gimple_stmt_iterator bsi;
2201   basic_block *body = get_loop_body (data->current_loop);
2202   unsigned i;
2203   struct version_info *info;
2204   edge e;
2205
2206   if (dump_file && (dump_flags & TDF_DETAILS))
2207     fprintf (dump_file, "Uses:\n\n");
2208
2209   for (i = 0; i < data->current_loop->num_nodes; i++)
2210     {
2211       edge_iterator ei;
2212       bb = body[i];
2213
2214       FOR_EACH_EDGE (e, ei, bb->succs)
2215         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2216             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2217           find_interesting_uses_outside (data, e);
2218
2219       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2220         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2221       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2222         if (!is_gimple_debug (gsi_stmt (bsi)))
2223           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2224     }
2225
2226   if (dump_file && (dump_flags & TDF_DETAILS))
2227     {
2228       bitmap_iterator bi;
2229
2230       fprintf (dump_file, "\n");
2231
2232       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2233         {
2234           info = ver_info (data, i);
2235           if (info->inv_id)
2236             {
2237               fprintf (dump_file, "  ");
2238               print_generic_expr (dump_file, info->name, TDF_SLIM);
2239               fprintf (dump_file, " is invariant (%d)%s\n",
2240                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2241             }
2242         }
2243
2244       fprintf (dump_file, "\n");
2245     }
2246
2247   free (body);
2248 }
2249
2250 /* Compute maximum offset of [base + offset] addressing mode
2251    for memory reference represented by USE.  */
2252
2253 static HOST_WIDE_INT
2254 compute_max_addr_offset (struct iv_use *use)
2255 {
2256   int width;
2257   rtx reg, addr;
2258   HOST_WIDE_INT i, off;
2259   unsigned list_index, num;
2260   addr_space_t as;
2261   machine_mode mem_mode, addr_mode;
2262   static vec<HOST_WIDE_INT> max_offset_list;
2263
2264   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2265   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2266
2267   num = max_offset_list.length ();
2268   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2269   if (list_index >= num)
2270     {
2271       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2272       for (; num < max_offset_list.length (); num++)
2273         max_offset_list[num] = -1;
2274     }
2275
2276   off = max_offset_list[list_index];
2277   if (off != -1)
2278     return off;
2279
2280   addr_mode = targetm.addr_space.address_mode (as);
2281   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2282   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2283
2284   width = GET_MODE_BITSIZE (addr_mode) - 1;
2285   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2286     width = HOST_BITS_PER_WIDE_INT - 1;
2287
2288   for (i = width; i > 0; i--)
2289     {
2290       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2291       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2292       if (memory_address_addr_space_p (mem_mode, addr, as))
2293         break;
2294
2295       /* For some strict-alignment targets, the offset must be naturally
2296          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2297       off = ((unsigned HOST_WIDE_INT) 1 << i);
2298       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2299         {
2300           off -= GET_MODE_SIZE (mem_mode);
2301           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2302           if (memory_address_addr_space_p (mem_mode, addr, as))
2303             break;
2304         }
2305     }
2306   if (i == 0)
2307     off = 0;
2308
2309   max_offset_list[list_index] = off;
2310   return off;
2311 }
2312
2313 /* Check if all small groups should be split.  Return true if and
2314    only if:
2315
2316      1) At least one groups contain two uses with different offsets.
2317      2) No group contains more than two uses with different offsets.
2318
2319    Return false otherwise.  We want to split such groups because:
2320
2321      1) Small groups don't have much benefit and may interfer with
2322         general candidate selection.
2323      2) Size for problem with only small groups is usually small and
2324         general algorithm can handle it well.
2325
2326    TODO -- Above claim may not hold when auto increment is supported.  */
2327
2328 static bool
2329 split_all_small_groups (struct ivopts_data *data)
2330 {
2331   bool split_p = false;
2332   unsigned int i, n, distinct;
2333   struct iv_use *pre, *use;
2334
2335   n = n_iv_uses (data);
2336   for (i = 0; i < n; i++)
2337     {
2338       use = iv_use (data, i);
2339       if (!use->next)
2340         continue;
2341
2342       distinct = 1;
2343       gcc_assert (use->type == USE_ADDRESS);
2344       for (pre = use, use = use->next; use; pre = use, use = use->next)
2345         {
2346           if (pre->addr_offset != use->addr_offset)
2347             distinct++;
2348
2349           if (distinct > 2)
2350             return false;
2351         }
2352       if (distinct == 2)
2353         split_p = true;
2354     }
2355
2356   return split_p;
2357 }
2358
2359 /* For each group of address type uses, this function further groups
2360    these uses according to the maximum offset supported by target's
2361    [base + offset] addressing mode.  */
2362
2363 static void
2364 group_address_uses (struct ivopts_data *data)
2365 {
2366   HOST_WIDE_INT max_offset = -1;
2367   unsigned int i, n, sub_id;
2368   struct iv_use *pre, *use;
2369   unsigned HOST_WIDE_INT addr_offset_first;
2370
2371   /* Reset max offset to split all small groups.  */
2372   if (split_all_small_groups (data))
2373     max_offset = 0;
2374
2375   n = n_iv_uses (data);
2376   for (i = 0; i < n; i++)
2377     {
2378       use = iv_use (data, i);
2379       if (!use->next)
2380         continue;
2381
2382       gcc_assert (use->type == USE_ADDRESS);
2383       if (max_offset != 0)
2384         max_offset = compute_max_addr_offset (use);
2385
2386       while (use)
2387         {
2388           sub_id = 0;
2389           addr_offset_first = use->addr_offset;
2390           /* Only uses with offset that can fit in offset part against
2391              the first use can be grouped together.  */
2392           for (pre = use, use = use->next;
2393                use && (use->addr_offset - addr_offset_first
2394                        <= (unsigned HOST_WIDE_INT) max_offset);
2395                pre = use, use = use->next)
2396             {
2397               use->id = pre->id;
2398               use->sub_id = ++sub_id;
2399             }
2400
2401           /* Break the list and create new group.  */
2402           if (use)
2403             {
2404               pre->next = NULL;
2405               use->id = n_iv_uses (data);
2406               use->related_cands = BITMAP_ALLOC (NULL);
2407               data->iv_uses.safe_push (use);
2408             }
2409         }
2410     }
2411
2412   if (dump_file && (dump_flags & TDF_DETAILS))
2413     dump_uses (dump_file, data);
2414 }
2415
2416 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2417    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2418    we are at the top-level of the processed address.  */
2419
2420 static tree
2421 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2422                 HOST_WIDE_INT *offset)
2423 {
2424   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2425   enum tree_code code;
2426   tree type, orig_type = TREE_TYPE (expr);
2427   HOST_WIDE_INT off0, off1, st;
2428   tree orig_expr = expr;
2429
2430   STRIP_NOPS (expr);
2431
2432   type = TREE_TYPE (expr);
2433   code = TREE_CODE (expr);
2434   *offset = 0;
2435
2436   switch (code)
2437     {
2438     case INTEGER_CST:
2439       if (!cst_and_fits_in_hwi (expr)
2440           || integer_zerop (expr))
2441         return orig_expr;
2442
2443       *offset = int_cst_value (expr);
2444       return build_int_cst (orig_type, 0);
2445
2446     case POINTER_PLUS_EXPR:
2447     case PLUS_EXPR:
2448     case MINUS_EXPR:
2449       op0 = TREE_OPERAND (expr, 0);
2450       op1 = TREE_OPERAND (expr, 1);
2451
2452       op0 = strip_offset_1 (op0, false, false, &off0);
2453       op1 = strip_offset_1 (op1, false, false, &off1);
2454
2455       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2456       if (op0 == TREE_OPERAND (expr, 0)
2457           && op1 == TREE_OPERAND (expr, 1))
2458         return orig_expr;
2459
2460       if (integer_zerop (op1))
2461         expr = op0;
2462       else if (integer_zerop (op0))
2463         {
2464           if (code == MINUS_EXPR)
2465             expr = fold_build1 (NEGATE_EXPR, type, op1);
2466           else
2467             expr = op1;
2468         }
2469       else
2470         expr = fold_build2 (code, type, op0, op1);
2471
2472       return fold_convert (orig_type, expr);
2473
2474     case MULT_EXPR:
2475       op1 = TREE_OPERAND (expr, 1);
2476       if (!cst_and_fits_in_hwi (op1))
2477         return orig_expr;
2478
2479       op0 = TREE_OPERAND (expr, 0);
2480       op0 = strip_offset_1 (op0, false, false, &off0);
2481       if (op0 == TREE_OPERAND (expr, 0))
2482         return orig_expr;
2483
2484       *offset = off0 * int_cst_value (op1);
2485       if (integer_zerop (op0))
2486         expr = op0;
2487       else
2488         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2489
2490       return fold_convert (orig_type, expr);
2491
2492     case ARRAY_REF:
2493     case ARRAY_RANGE_REF:
2494       if (!inside_addr)
2495         return orig_expr;
2496
2497       step = array_ref_element_size (expr);
2498       if (!cst_and_fits_in_hwi (step))
2499         break;
2500
2501       st = int_cst_value (step);
2502       op1 = TREE_OPERAND (expr, 1);
2503       op1 = strip_offset_1 (op1, false, false, &off1);
2504       *offset = off1 * st;
2505
2506       if (top_compref
2507           && integer_zerop (op1))
2508         {
2509           /* Strip the component reference completely.  */
2510           op0 = TREE_OPERAND (expr, 0);
2511           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2512           *offset += off0;
2513           return op0;
2514         }
2515       break;
2516
2517     case COMPONENT_REF:
2518       {
2519         tree field;
2520
2521         if (!inside_addr)
2522           return orig_expr;
2523
2524         tmp = component_ref_field_offset (expr);
2525         field = TREE_OPERAND (expr, 1);
2526         if (top_compref
2527             && cst_and_fits_in_hwi (tmp)
2528             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2529           {
2530             HOST_WIDE_INT boffset, abs_off;
2531
2532             /* Strip the component reference completely.  */
2533             op0 = TREE_OPERAND (expr, 0);
2534             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2535             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2536             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2537             if (boffset < 0)
2538               abs_off = -abs_off;
2539
2540             *offset = off0 + int_cst_value (tmp) + abs_off;
2541             return op0;
2542           }
2543       }
2544       break;
2545
2546     case ADDR_EXPR:
2547       op0 = TREE_OPERAND (expr, 0);
2548       op0 = strip_offset_1 (op0, true, true, &off0);
2549       *offset += off0;
2550
2551       if (op0 == TREE_OPERAND (expr, 0))
2552         return orig_expr;
2553
2554       expr = build_fold_addr_expr (op0);
2555       return fold_convert (orig_type, expr);
2556
2557     case MEM_REF:
2558       /* ???  Offset operand?  */
2559       inside_addr = false;
2560       break;
2561
2562     default:
2563       return orig_expr;
2564     }
2565
2566   /* Default handling of expressions for that we want to recurse into
2567      the first operand.  */
2568   op0 = TREE_OPERAND (expr, 0);
2569   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2570   *offset += off0;
2571
2572   if (op0 == TREE_OPERAND (expr, 0)
2573       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2574     return orig_expr;
2575
2576   expr = copy_node (expr);
2577   TREE_OPERAND (expr, 0) = op0;
2578   if (op1)
2579     TREE_OPERAND (expr, 1) = op1;
2580
2581   /* Inside address, we might strip the top level component references,
2582      thus changing type of the expression.  Handling of ADDR_EXPR
2583      will fix that.  */
2584   expr = fold_convert (orig_type, expr);
2585
2586   return expr;
2587 }
2588
2589 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2590
2591 static tree
2592 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2593 {
2594   HOST_WIDE_INT off;
2595   tree core = strip_offset_1 (expr, false, false, &off);
2596   *offset = off;
2597   return core;
2598 }
2599
2600 /* Returns variant of TYPE that can be used as base for different uses.
2601    We return unsigned type with the same precision, which avoids problems
2602    with overflows.  */
2603
2604 static tree
2605 generic_type_for (tree type)
2606 {
2607   if (POINTER_TYPE_P (type))
2608     return unsigned_type_for (type);
2609
2610   if (TYPE_UNSIGNED (type))
2611     return type;
2612
2613   return unsigned_type_for (type);
2614 }
2615
2616 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2617    the bitmap to that we should store it.  */
2618
2619 static struct ivopts_data *fd_ivopts_data;
2620 static tree
2621 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2622 {
2623   bitmap *depends_on = (bitmap *) data;
2624   struct version_info *info;
2625
2626   if (TREE_CODE (*expr_p) != SSA_NAME)
2627     return NULL_TREE;
2628   info = name_info (fd_ivopts_data, *expr_p);
2629
2630   if (!info->inv_id || info->has_nonlin_use)
2631     return NULL_TREE;
2632
2633   if (!*depends_on)
2634     *depends_on = BITMAP_ALLOC (NULL);
2635   bitmap_set_bit (*depends_on, info->inv_id);
2636
2637   return NULL_TREE;
2638 }
2639
2640 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2641    position to POS.  If USE is not NULL, the candidate is set as related to
2642    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2643    replacement of the final value of the iv by a direct computation.  */
2644
2645 static struct iv_cand *
2646 add_candidate_1 (struct ivopts_data *data,
2647                  tree base, tree step, bool important, enum iv_position pos,
2648                  struct iv_use *use, gimple incremented_at)
2649 {
2650   unsigned i;
2651   struct iv_cand *cand = NULL;
2652   tree type, orig_type;
2653
2654   /* For non-original variables, make sure their values are computed in a type
2655      that does not invoke undefined behavior on overflows (since in general,
2656      we cannot prove that these induction variables are non-wrapping).  */
2657   if (pos != IP_ORIGINAL)
2658     {
2659       orig_type = TREE_TYPE (base);
2660       type = generic_type_for (orig_type);
2661       if (type != orig_type)
2662         {
2663           base = fold_convert (type, base);
2664           step = fold_convert (type, step);
2665         }
2666     }
2667
2668   for (i = 0; i < n_iv_cands (data); i++)
2669     {
2670       cand = iv_cand (data, i);
2671
2672       if (cand->pos != pos)
2673         continue;
2674
2675       if (cand->incremented_at != incremented_at
2676           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2677               && cand->ainc_use != use))
2678         continue;
2679
2680       if (!cand->iv)
2681         {
2682           if (!base && !step)
2683             break;
2684
2685           continue;
2686         }
2687
2688       if (!base && !step)
2689         continue;
2690
2691       if (operand_equal_p (base, cand->iv->base, 0)
2692           && operand_equal_p (step, cand->iv->step, 0)
2693           && (TYPE_PRECISION (TREE_TYPE (base))
2694               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2695         break;
2696     }
2697
2698   if (i == n_iv_cands (data))
2699     {
2700       cand = XCNEW (struct iv_cand);
2701       cand->id = i;
2702
2703       if (!base && !step)
2704         cand->iv = NULL;
2705       else
2706         cand->iv = alloc_iv (base, step);
2707
2708       cand->pos = pos;
2709       if (pos != IP_ORIGINAL && cand->iv)
2710         {
2711           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2712           cand->var_after = cand->var_before;
2713         }
2714       cand->important = important;
2715       cand->incremented_at = incremented_at;
2716       data->iv_candidates.safe_push (cand);
2717
2718       if (step
2719           && TREE_CODE (step) != INTEGER_CST)
2720         {
2721           fd_ivopts_data = data;
2722           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2723         }
2724
2725       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2726         cand->ainc_use = use;
2727       else
2728         cand->ainc_use = NULL;
2729
2730       if (dump_file && (dump_flags & TDF_DETAILS))
2731         dump_cand (dump_file, cand);
2732     }
2733
2734   if (important && !cand->important)
2735     {
2736       cand->important = true;
2737       if (dump_file && (dump_flags & TDF_DETAILS))
2738         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2739     }
2740
2741   if (use)
2742     {
2743       bitmap_set_bit (use->related_cands, i);
2744       if (dump_file && (dump_flags & TDF_DETAILS))
2745         fprintf (dump_file, "Candidate %d is related to use %d\n",
2746                  cand->id, use->id);
2747     }
2748
2749   return cand;
2750 }
2751
2752 /* Returns true if incrementing the induction variable at the end of the LOOP
2753    is allowed.
2754
2755    The purpose is to avoid splitting latch edge with a biv increment, thus
2756    creating a jump, possibly confusing other optimization passes and leaving
2757    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2758    is not available (so we do not have a better alternative), or if the latch
2759    edge is already nonempty.  */
2760
2761 static bool
2762 allow_ip_end_pos_p (struct loop *loop)
2763 {
2764   if (!ip_normal_pos (loop))
2765     return true;
2766
2767   if (!empty_block_p (ip_end_pos (loop)))
2768     return true;
2769
2770   return false;
2771 }
2772
2773 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2774    Important field is set to IMPORTANT.  */
2775
2776 static void
2777 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2778                         bool important, struct iv_use *use)
2779 {
2780   basic_block use_bb = gimple_bb (use->stmt);
2781   machine_mode mem_mode;
2782   unsigned HOST_WIDE_INT cstepi;
2783
2784   /* If we insert the increment in any position other than the standard
2785      ones, we must ensure that it is incremented once per iteration.
2786      It must not be in an inner nested loop, or one side of an if
2787      statement.  */
2788   if (use_bb->loop_father != data->current_loop
2789       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2790       || stmt_could_throw_p (use->stmt)
2791       || !cst_and_fits_in_hwi (step))
2792     return;
2793
2794   cstepi = int_cst_value (step);
2795
2796   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2797   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2798         || USE_STORE_PRE_INCREMENT (mem_mode))
2799        && GET_MODE_SIZE (mem_mode) == cstepi)
2800       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2801            || USE_STORE_PRE_DECREMENT (mem_mode))
2802           && GET_MODE_SIZE (mem_mode) == -cstepi))
2803     {
2804       enum tree_code code = MINUS_EXPR;
2805       tree new_base;
2806       tree new_step = step;
2807
2808       if (POINTER_TYPE_P (TREE_TYPE (base)))
2809         {
2810           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2811           code = POINTER_PLUS_EXPR;
2812         }
2813       else
2814         new_step = fold_convert (TREE_TYPE (base), new_step);
2815       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2816       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2817                        use->stmt);
2818     }
2819   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2820         || USE_STORE_POST_INCREMENT (mem_mode))
2821        && GET_MODE_SIZE (mem_mode) == cstepi)
2822       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2823            || USE_STORE_POST_DECREMENT (mem_mode))
2824           && GET_MODE_SIZE (mem_mode) == -cstepi))
2825     {
2826       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2827                        use->stmt);
2828     }
2829 }
2830
2831 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2832    position to POS.  If USE is not NULL, the candidate is set as related to
2833    it.  The candidate computation is scheduled on all available positions.  */
2834
2835 static void
2836 add_candidate (struct ivopts_data *data,
2837                tree base, tree step, bool important, struct iv_use *use)
2838 {
2839   gcc_assert (use == NULL || use->sub_id == 0);
2840
2841   if (ip_normal_pos (data->current_loop))
2842     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2843   if (ip_end_pos (data->current_loop)
2844       && allow_ip_end_pos_p (data->current_loop))
2845     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2846
2847   if (use != NULL && use->type == USE_ADDRESS)
2848     add_autoinc_candidates (data, base, step, important, use);
2849 }
2850
2851 /* Adds standard iv candidates.  */
2852
2853 static void
2854 add_standard_iv_candidates (struct ivopts_data *data)
2855 {
2856   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2857
2858   /* The same for a double-integer type if it is still fast enough.  */
2859   if (TYPE_PRECISION
2860         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2861       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2862     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2863                    build_int_cst (long_integer_type_node, 1), true, NULL);
2864
2865   /* The same for a double-integer type if it is still fast enough.  */
2866   if (TYPE_PRECISION
2867         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2868       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2869     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2870                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2871 }
2872
2873
2874 /* Adds candidates bases on the old induction variable IV.  */
2875
2876 static void
2877 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2878 {
2879   gimple phi;
2880   tree def;
2881   struct iv_cand *cand;
2882
2883   add_candidate (data, iv->base, iv->step, true, NULL);
2884
2885   /* The same, but with initial value zero.  */
2886   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2887     add_candidate (data, size_int (0), iv->step, true, NULL);
2888   else
2889     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2890                    iv->step, true, NULL);
2891
2892   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2893   if (gimple_code (phi) == GIMPLE_PHI)
2894     {
2895       /* Additionally record the possibility of leaving the original iv
2896          untouched.  */
2897       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2898       /* Don't add candidate if it's from another PHI node because
2899          it's an affine iv appearing in the form of PEELED_CHREC.  */
2900       phi = SSA_NAME_DEF_STMT (def);
2901       if (gimple_code (phi) != GIMPLE_PHI)
2902         {
2903           cand = add_candidate_1 (data,
2904                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2905                                   SSA_NAME_DEF_STMT (def));
2906           cand->var_before = iv->ssa_name;
2907           cand->var_after = def;
2908         }
2909       else
2910         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2911     }
2912 }
2913
2914 /* Adds candidates based on the old induction variables.  */
2915
2916 static void
2917 add_old_ivs_candidates (struct ivopts_data *data)
2918 {
2919   unsigned i;
2920   struct iv *iv;
2921   bitmap_iterator bi;
2922
2923   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2924     {
2925       iv = ver_info (data, i)->iv;
2926       if (iv && iv->biv_p && !integer_zerop (iv->step))
2927         add_old_iv_candidates (data, iv);
2928     }
2929 }
2930
2931 /* Adds candidates based on the value of the induction variable IV and USE.  */
2932
2933 static void
2934 add_iv_value_candidates (struct ivopts_data *data,
2935                          struct iv *iv, struct iv_use *use)
2936 {
2937   unsigned HOST_WIDE_INT offset;
2938   tree base;
2939   tree basetype;
2940
2941   add_candidate (data, iv->base, iv->step, false, use);
2942
2943   /* The same, but with initial value zero.  Make such variable important,
2944      since it is generic enough so that possibly many uses may be based
2945      on it.  */
2946   basetype = TREE_TYPE (iv->base);
2947   if (POINTER_TYPE_P (basetype))
2948     basetype = sizetype;
2949   add_candidate (data, build_int_cst (basetype, 0),
2950                  iv->step, true, use);
2951
2952   /* Third, try removing the constant offset.  Make sure to even
2953      add a candidate for &a[0] vs. (T *)&a.  */
2954   base = strip_offset (iv->base, &offset);
2955   if (offset
2956       || base != iv->base)
2957     add_candidate (data, base, iv->step, false, use);
2958 }
2959
2960 /* Adds candidates based on the uses.  */
2961
2962 static void
2963 add_derived_ivs_candidates (struct ivopts_data *data)
2964 {
2965   unsigned i;
2966
2967   for (i = 0; i < n_iv_uses (data); i++)
2968     {
2969       struct iv_use *use = iv_use (data, i);
2970
2971       if (!use)
2972         continue;
2973
2974       switch (use->type)
2975         {
2976         case USE_NONLINEAR_EXPR:
2977         case USE_COMPARE:
2978         case USE_ADDRESS:
2979           /* Just add the ivs based on the value of the iv used here.  */
2980           add_iv_value_candidates (data, use->iv, use);
2981           break;
2982
2983         default:
2984           gcc_unreachable ();
2985         }
2986     }
2987 }
2988
2989 /* Record important candidates and add them to related_cands bitmaps
2990    if needed.  */
2991
2992 static void
2993 record_important_candidates (struct ivopts_data *data)
2994 {
2995   unsigned i;
2996   struct iv_use *use;
2997
2998   for (i = 0; i < n_iv_cands (data); i++)
2999     {
3000       struct iv_cand *cand = iv_cand (data, i);
3001
3002       if (cand->important)
3003         bitmap_set_bit (data->important_candidates, i);
3004     }
3005
3006   data->consider_all_candidates = (n_iv_cands (data)
3007                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3008
3009   if (data->consider_all_candidates)
3010     {
3011       /* We will not need "related_cands" bitmaps in this case,
3012          so release them to decrease peak memory consumption.  */
3013       for (i = 0; i < n_iv_uses (data); i++)
3014         {
3015           use = iv_use (data, i);
3016           BITMAP_FREE (use->related_cands);
3017         }
3018     }
3019   else
3020     {
3021       /* Add important candidates to the related_cands bitmaps.  */
3022       for (i = 0; i < n_iv_uses (data); i++)
3023         bitmap_ior_into (iv_use (data, i)->related_cands,
3024                          data->important_candidates);
3025     }
3026 }
3027
3028 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3029    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3030    we allocate a simple list to every use.  */
3031
3032 static void
3033 alloc_use_cost_map (struct ivopts_data *data)
3034 {
3035   unsigned i, size, s;
3036
3037   for (i = 0; i < n_iv_uses (data); i++)
3038     {
3039       struct iv_use *use = iv_use (data, i);
3040
3041       if (data->consider_all_candidates)
3042         size = n_iv_cands (data);
3043       else
3044         {
3045           s = bitmap_count_bits (use->related_cands);
3046
3047           /* Round up to the power of two, so that moduling by it is fast.  */
3048           size = s ? (1 << ceil_log2 (s)) : 1;
3049         }
3050
3051       use->n_map_members = size;
3052       use->cost_map = XCNEWVEC (struct cost_pair, size);
3053     }
3054 }
3055
3056 /* Returns description of computation cost of expression whose runtime
3057    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3058
3059 static comp_cost
3060 new_cost (unsigned runtime, unsigned complexity)
3061 {
3062   comp_cost cost;
3063
3064   cost.cost = runtime;
3065   cost.complexity = complexity;
3066
3067   return cost;
3068 }
3069
3070 /* Returns true if COST is infinite.  */
3071
3072 static bool
3073 infinite_cost_p (comp_cost cost)
3074 {
3075   return cost.cost == INFTY;
3076 }
3077
3078 /* Adds costs COST1 and COST2.  */
3079
3080 static comp_cost
3081 add_costs (comp_cost cost1, comp_cost cost2)
3082 {
3083   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3084     return infinite_cost;
3085
3086   cost1.cost += cost2.cost;
3087   cost1.complexity += cost2.complexity;
3088
3089   return cost1;
3090 }
3091 /* Subtracts costs COST1 and COST2.  */
3092
3093 static comp_cost
3094 sub_costs (comp_cost cost1, comp_cost cost2)
3095 {
3096   cost1.cost -= cost2.cost;
3097   cost1.complexity -= cost2.complexity;
3098
3099   return cost1;
3100 }
3101
3102 /* Returns a negative number if COST1 < COST2, a positive number if
3103    COST1 > COST2, and 0 if COST1 = COST2.  */
3104
3105 static int
3106 compare_costs (comp_cost cost1, comp_cost cost2)
3107 {
3108   if (cost1.cost == cost2.cost)
3109     return cost1.complexity - cost2.complexity;
3110
3111   return cost1.cost - cost2.cost;
3112 }
3113
3114 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3115    on invariants DEPENDS_ON and that the value used in expressing it
3116    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3117
3118 static void
3119 set_use_iv_cost (struct ivopts_data *data,
3120                  struct iv_use *use, struct iv_cand *cand,
3121                  comp_cost cost, bitmap depends_on, tree value,
3122                  enum tree_code comp, int inv_expr_id)
3123 {
3124   unsigned i, s;
3125
3126   if (infinite_cost_p (cost))
3127     {
3128       BITMAP_FREE (depends_on);
3129       return;
3130     }
3131
3132   if (data->consider_all_candidates)
3133     {
3134       use->cost_map[cand->id].cand = cand;
3135       use->cost_map[cand->id].cost = cost;
3136       use->cost_map[cand->id].depends_on = depends_on;
3137       use->cost_map[cand->id].value = value;
3138       use->cost_map[cand->id].comp = comp;
3139       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3140       return;
3141     }
3142
3143   /* n_map_members is a power of two, so this computes modulo.  */
3144   s = cand->id & (use->n_map_members - 1);
3145   for (i = s; i < use->n_map_members; i++)
3146     if (!use->cost_map[i].cand)
3147       goto found;
3148   for (i = 0; i < s; i++)
3149     if (!use->cost_map[i].cand)
3150       goto found;
3151
3152   gcc_unreachable ();
3153
3154 found:
3155   use->cost_map[i].cand = cand;
3156   use->cost_map[i].cost = cost;
3157   use->cost_map[i].depends_on = depends_on;
3158   use->cost_map[i].value = value;
3159   use->cost_map[i].comp = comp;
3160   use->cost_map[i].inv_expr_id = inv_expr_id;
3161 }
3162
3163 /* Gets cost of (USE, CANDIDATE) pair.  */
3164
3165 static struct cost_pair *
3166 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3167                  struct iv_cand *cand)
3168 {
3169   unsigned i, s;
3170   struct cost_pair *ret;
3171
3172   if (!cand)
3173     return NULL;
3174
3175   if (data->consider_all_candidates)
3176     {
3177       ret = use->cost_map + cand->id;
3178       if (!ret->cand)
3179         return NULL;
3180
3181       return ret;
3182     }
3183
3184   /* n_map_members is a power of two, so this computes modulo.  */
3185   s = cand->id & (use->n_map_members - 1);
3186   for (i = s; i < use->n_map_members; i++)
3187     if (use->cost_map[i].cand == cand)
3188       return use->cost_map + i;
3189     else if (use->cost_map[i].cand == NULL)
3190       return NULL;
3191   for (i = 0; i < s; i++)
3192     if (use->cost_map[i].cand == cand)
3193       return use->cost_map + i;
3194     else if (use->cost_map[i].cand == NULL)
3195       return NULL;
3196
3197   return NULL;
3198 }
3199
3200 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3201 static rtx
3202 produce_memory_decl_rtl (tree obj, int *regno)
3203 {
3204   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3205   machine_mode address_mode = targetm.addr_space.address_mode (as);
3206   rtx x;
3207
3208   gcc_assert (obj);
3209   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3210     {
3211       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3212       x = gen_rtx_SYMBOL_REF (address_mode, name);
3213       SET_SYMBOL_REF_DECL (x, obj);
3214       x = gen_rtx_MEM (DECL_MODE (obj), x);
3215       set_mem_addr_space (x, as);
3216       targetm.encode_section_info (obj, x, true);
3217     }
3218   else
3219     {
3220       x = gen_raw_REG (address_mode, (*regno)++);
3221       x = gen_rtx_MEM (DECL_MODE (obj), x);
3222       set_mem_addr_space (x, as);
3223     }
3224
3225   return x;
3226 }
3227
3228 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3229    walk_tree.  DATA contains the actual fake register number.  */
3230
3231 static tree
3232 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3233 {
3234   tree obj = NULL_TREE;
3235   rtx x = NULL_RTX;
3236   int *regno = (int *) data;
3237
3238   switch (TREE_CODE (*expr_p))
3239     {
3240     case ADDR_EXPR:
3241       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3242            handled_component_p (*expr_p);
3243            expr_p = &TREE_OPERAND (*expr_p, 0))
3244         continue;
3245       obj = *expr_p;
3246       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3247         x = produce_memory_decl_rtl (obj, regno);
3248       break;
3249
3250     case SSA_NAME:
3251       *ws = 0;
3252       obj = SSA_NAME_VAR (*expr_p);
3253       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3254       if (!obj)
3255         return NULL_TREE;
3256       if (!DECL_RTL_SET_P (obj))
3257         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3258       break;
3259
3260     case VAR_DECL:
3261     case PARM_DECL:
3262     case RESULT_DECL:
3263       *ws = 0;
3264       obj = *expr_p;
3265
3266       if (DECL_RTL_SET_P (obj))
3267         break;
3268
3269       if (DECL_MODE (obj) == BLKmode)
3270         x = produce_memory_decl_rtl (obj, regno);
3271       else
3272         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3273
3274       break;
3275
3276     default:
3277       break;
3278     }
3279
3280   if (x)
3281     {
3282       decl_rtl_to_reset.safe_push (obj);
3283       SET_DECL_RTL (obj, x);
3284     }
3285
3286   return NULL_TREE;
3287 }
3288
3289 /* Determines cost of the computation of EXPR.  */
3290
3291 static unsigned
3292 computation_cost (tree expr, bool speed)
3293 {
3294   rtx_insn *seq;
3295   rtx rslt;
3296   tree type = TREE_TYPE (expr);
3297   unsigned cost;
3298   /* Avoid using hard regs in ways which may be unsupported.  */
3299   int regno = LAST_VIRTUAL_REGISTER + 1;
3300   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3301   enum node_frequency real_frequency = node->frequency;
3302
3303   node->frequency = NODE_FREQUENCY_NORMAL;
3304   crtl->maybe_hot_insn_p = speed;
3305   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3306   start_sequence ();
3307   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3308   seq = get_insns ();
3309   end_sequence ();
3310   default_rtl_profile ();
3311   node->frequency = real_frequency;
3312
3313   cost = seq_cost (seq, speed);
3314   if (MEM_P (rslt))
3315     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3316                           TYPE_ADDR_SPACE (type), speed);
3317   else if (!REG_P (rslt))
3318     cost += set_src_cost (rslt, speed);
3319
3320   return cost;
3321 }
3322
3323 /* Returns variable containing the value of candidate CAND at statement AT.  */
3324
3325 static tree
3326 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3327 {
3328   if (stmt_after_increment (loop, cand, stmt))
3329     return cand->var_after;
3330   else
3331     return cand->var_before;
3332 }
3333
3334 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3335    same precision that is at least as wide as the precision of TYPE, stores
3336    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3337    type of A and B.  */
3338
3339 static tree
3340 determine_common_wider_type (tree *a, tree *b)
3341 {
3342   tree wider_type = NULL;
3343   tree suba, subb;
3344   tree atype = TREE_TYPE (*a);
3345
3346   if (CONVERT_EXPR_P (*a))
3347     {
3348       suba = TREE_OPERAND (*a, 0);
3349       wider_type = TREE_TYPE (suba);
3350       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3351         return atype;
3352     }
3353   else
3354     return atype;
3355
3356   if (CONVERT_EXPR_P (*b))
3357     {
3358       subb = TREE_OPERAND (*b, 0);
3359       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3360         return atype;
3361     }
3362   else
3363     return atype;
3364
3365   *a = suba;
3366   *b = subb;
3367   return wider_type;
3368 }
3369
3370 /* Determines the expression by that USE is expressed from induction variable
3371    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3372    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3373
3374 static bool
3375 get_computation_aff (struct loop *loop,
3376                      struct iv_use *use, struct iv_cand *cand, gimple at,
3377                      struct aff_tree *aff)
3378 {
3379   tree ubase = use->iv->base;
3380   tree ustep = use->iv->step;
3381   tree cbase = cand->iv->base;
3382   tree cstep = cand->iv->step, cstep_common;
3383   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3384   tree common_type, var;
3385   tree uutype;
3386   aff_tree cbase_aff, var_aff;
3387   widest_int rat;
3388
3389   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3390     {
3391       /* We do not have a precision to express the values of use.  */
3392       return false;
3393     }
3394
3395   var = var_at_stmt (loop, cand, at);
3396   uutype = unsigned_type_for (utype);
3397
3398   /* If the conversion is not noop, perform it.  */
3399   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3400     {
3401       cstep = fold_convert (uutype, cstep);
3402       cbase = fold_convert (uutype, cbase);
3403       var = fold_convert (uutype, var);
3404     }
3405
3406   if (!constant_multiple_of (ustep, cstep, &rat))
3407     return false;
3408
3409   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3410      type, we achieve better folding by computing their difference in this
3411      wider type, and cast the result to UUTYPE.  We do not need to worry about
3412      overflows, as all the arithmetics will in the end be performed in UUTYPE
3413      anyway.  */
3414   common_type = determine_common_wider_type (&ubase, &cbase);
3415
3416   /* use = ubase - ratio * cbase + ratio * var.  */
3417   tree_to_aff_combination (ubase, common_type, aff);
3418   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3419   tree_to_aff_combination (var, uutype, &var_aff);
3420
3421   /* We need to shift the value if we are after the increment.  */
3422   if (stmt_after_increment (loop, cand, at))
3423     {
3424       aff_tree cstep_aff;
3425
3426       if (common_type != uutype)
3427         cstep_common = fold_convert (common_type, cstep);
3428       else
3429         cstep_common = cstep;
3430
3431       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3432       aff_combination_add (&cbase_aff, &cstep_aff);
3433     }
3434
3435   aff_combination_scale (&cbase_aff, -rat);
3436   aff_combination_add (aff, &cbase_aff);
3437   if (common_type != uutype)
3438     aff_combination_convert (aff, uutype);
3439
3440   aff_combination_scale (&var_aff, rat);
3441   aff_combination_add (aff, &var_aff);
3442
3443   return true;
3444 }
3445
3446 /* Return the type of USE.  */
3447
3448 static tree
3449 get_use_type (struct iv_use *use)
3450 {
3451   tree base_type = TREE_TYPE (use->iv->base);
3452   tree type;
3453
3454   if (use->type == USE_ADDRESS)
3455     {
3456       /* The base_type may be a void pointer.  Create a pointer type based on
3457          the mem_ref instead.  */
3458       type = build_pointer_type (TREE_TYPE (*use->op_p));
3459       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3460                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3461     }
3462   else
3463     type = base_type;
3464
3465   return type;
3466 }
3467
3468 /* Determines the expression by that USE is expressed from induction variable
3469    CAND at statement AT in LOOP.  The computation is unshared.  */
3470
3471 static tree
3472 get_computation_at (struct loop *loop,
3473                     struct iv_use *use, struct iv_cand *cand, gimple at)
3474 {
3475   aff_tree aff;
3476   tree type = get_use_type (use);
3477
3478   if (!get_computation_aff (loop, use, cand, at, &aff))
3479     return NULL_TREE;
3480   unshare_aff_combination (&aff);
3481   return fold_convert (type, aff_combination_to_tree (&aff));
3482 }
3483
3484 /* Determines the expression by that USE is expressed from induction variable
3485    CAND in LOOP.  The computation is unshared.  */
3486
3487 static tree
3488 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3489 {
3490   return get_computation_at (loop, use, cand, use->stmt);
3491 }
3492
3493 /* Adjust the cost COST for being in loop setup rather than loop body.
3494    If we're optimizing for space, the loop setup overhead is constant;
3495    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3496 static unsigned
3497 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3498 {
3499   if (cost == INFTY)
3500     return cost;
3501   else if (optimize_loop_for_speed_p (data->current_loop))
3502     return cost / avg_loop_niter (data->current_loop);
3503   else
3504     return cost;
3505 }
3506
3507 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3508    validity for a memory reference accessing memory of mode MODE in
3509    address space AS.  */
3510
3511
3512 bool
3513 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3514                                  addr_space_t as)
3515 {
3516 #define MAX_RATIO 128
3517   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3518   static vec<sbitmap> valid_mult_list;
3519   sbitmap valid_mult;
3520
3521   if (data_index >= valid_mult_list.length ())
3522     valid_mult_list.safe_grow_cleared (data_index + 1);
3523
3524   valid_mult = valid_mult_list[data_index];
3525   if (!valid_mult)
3526     {
3527       machine_mode address_mode = targetm.addr_space.address_mode (as);
3528       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3529       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3530       rtx addr, scaled;
3531       HOST_WIDE_INT i;
3532
3533       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3534       bitmap_clear (valid_mult);
3535       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3536       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3537       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3538         {
3539           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3540           if (memory_address_addr_space_p (mode, addr, as)
3541               || memory_address_addr_space_p (mode, scaled, as))
3542             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3543         }
3544
3545       if (dump_file && (dump_flags & TDF_DETAILS))
3546         {
3547           fprintf (dump_file, "  allowed multipliers:");
3548           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3549             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3550               fprintf (dump_file, " %d", (int) i);
3551           fprintf (dump_file, "\n");
3552           fprintf (dump_file, "\n");
3553         }
3554
3555       valid_mult_list[data_index] = valid_mult;
3556     }
3557
3558   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3559     return false;
3560
3561   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3562 }
3563
3564 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3565    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3566    variable is omitted.  Compute the cost for a memory reference that accesses
3567    a memory location of mode MEM_MODE in address space AS.
3568
3569    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3570    size of MEM_MODE / RATIO) is available.  To make this determination, we
3571    look at the size of the increment to be made, which is given in CSTEP.
3572    CSTEP may be zero if the step is unknown.
3573    STMT_AFTER_INC is true iff the statement we're looking at is after the
3574    increment of the original biv.
3575
3576    TODO -- there must be some better way.  This all is quite crude.  */
3577
3578 enum ainc_type
3579 {
3580   AINC_PRE_INC,         /* Pre increment.  */
3581   AINC_PRE_DEC,         /* Pre decrement.  */
3582   AINC_POST_INC,        /* Post increment.  */
3583   AINC_POST_DEC,        /* Post decrement.  */
3584   AINC_NONE             /* Also the number of auto increment types.  */
3585 };
3586
3587 typedef struct address_cost_data_s
3588 {
3589   HOST_WIDE_INT min_offset, max_offset;
3590   unsigned costs[2][2][2][2];
3591   unsigned ainc_costs[AINC_NONE];
3592 } *address_cost_data;
3593
3594
3595 static comp_cost
3596 get_address_cost (bool symbol_present, bool var_present,
3597                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3598                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3599                   addr_space_t as, bool speed,
3600                   bool stmt_after_inc, bool *may_autoinc)
3601 {
3602   machine_mode address_mode = targetm.addr_space.address_mode (as);
3603   static vec<address_cost_data> address_cost_data_list;
3604   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3605   address_cost_data data;
3606   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3607   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3608   unsigned cost, acost, complexity;
3609   enum ainc_type autoinc_type;
3610   bool offset_p, ratio_p, autoinc;
3611   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3612   unsigned HOST_WIDE_INT mask;
3613   unsigned bits;
3614
3615   if (data_index >= address_cost_data_list.length ())
3616     address_cost_data_list.safe_grow_cleared (data_index + 1);
3617
3618   data = address_cost_data_list[data_index];
3619   if (!data)
3620     {
3621       HOST_WIDE_INT i;
3622       HOST_WIDE_INT rat, off = 0;
3623       int old_cse_not_expected, width;
3624       unsigned sym_p, var_p, off_p, rat_p, add_c;
3625       rtx_insn *seq;
3626       rtx addr, base;
3627       rtx reg0, reg1;
3628
3629       data = (address_cost_data) xcalloc (1, sizeof (*data));
3630
3631       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3632
3633       width = GET_MODE_BITSIZE (address_mode) - 1;
3634       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3635         width = HOST_BITS_PER_WIDE_INT - 1;
3636       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3637
3638       for (i = width; i >= 0; i--)
3639         {
3640           off = -((unsigned HOST_WIDE_INT) 1 << i);
3641           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3642           if (memory_address_addr_space_p (mem_mode, addr, as))
3643             break;
3644         }
3645       data->min_offset = (i == -1? 0 : off);
3646
3647       for (i = width; i >= 0; i--)
3648         {
3649           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3650           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3651           if (memory_address_addr_space_p (mem_mode, addr, as))
3652             break;
3653           /* For some strict-alignment targets, the offset must be naturally
3654              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3655           off = mem_mode != QImode
3656                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3657                     - GET_MODE_SIZE (mem_mode)
3658                 : 0;
3659           if (off > 0)
3660             {
3661               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3662               if (memory_address_addr_space_p (mem_mode, addr, as))
3663                 break;
3664             }
3665         }
3666       if (i == -1)
3667         off = 0;
3668       data->max_offset = off;
3669
3670       if (dump_file && (dump_flags & TDF_DETAILS))
3671         {
3672           fprintf (dump_file, "get_address_cost:\n");
3673           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3674                    GET_MODE_NAME (mem_mode),
3675                    data->min_offset);
3676           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3677                    GET_MODE_NAME (mem_mode),
3678                    data->max_offset);
3679         }
3680
3681       rat = 1;
3682       for (i = 2; i <= MAX_RATIO; i++)
3683         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3684           {
3685             rat = i;
3686             break;
3687           }
3688
3689       /* Compute the cost of various addressing modes.  */
3690       acost = 0;
3691       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3692       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3693
3694       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3695           || USE_STORE_PRE_DECREMENT (mem_mode))
3696         {
3697           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3698           has_predec[mem_mode]
3699             = memory_address_addr_space_p (mem_mode, addr, as);
3700
3701           if (has_predec[mem_mode])
3702             data->ainc_costs[AINC_PRE_DEC]
3703               = address_cost (addr, mem_mode, as, speed);
3704         }
3705       if (USE_LOAD_POST_DECREMENT (mem_mode)
3706           || USE_STORE_POST_DECREMENT (mem_mode))
3707         {
3708           addr = gen_rtx_POST_DEC (address_mode, reg0);
3709           has_postdec[mem_mode]
3710             = memory_address_addr_space_p (mem_mode, addr, as);
3711
3712           if (has_postdec[mem_mode])
3713             data->ainc_costs[AINC_POST_DEC]
3714               = address_cost (addr, mem_mode, as, speed);
3715         }
3716       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3717           || USE_STORE_PRE_DECREMENT (mem_mode))
3718         {
3719           addr = gen_rtx_PRE_INC (address_mode, reg0);
3720           has_preinc[mem_mode]
3721             = memory_address_addr_space_p (mem_mode, addr, as);
3722
3723           if (has_preinc[mem_mode])
3724             data->ainc_costs[AINC_PRE_INC]
3725               = address_cost (addr, mem_mode, as, speed);
3726         }
3727       if (USE_LOAD_POST_INCREMENT (mem_mode)
3728           || USE_STORE_POST_INCREMENT (mem_mode))
3729         {
3730           addr = gen_rtx_POST_INC (address_mode, reg0);
3731           has_postinc[mem_mode]
3732             = memory_address_addr_space_p (mem_mode, addr, as);
3733
3734           if (has_postinc[mem_mode])
3735             data->ainc_costs[AINC_POST_INC]
3736               = address_cost (addr, mem_mode, as, speed);
3737         }
3738       for (i = 0; i < 16; i++)
3739         {
3740           sym_p = i & 1;
3741           var_p = (i >> 1) & 1;
3742           off_p = (i >> 2) & 1;
3743           rat_p = (i >> 3) & 1;
3744
3745           addr = reg0;
3746           if (rat_p)
3747             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3748                                    gen_int_mode (rat, address_mode));
3749
3750           if (var_p)
3751             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3752
3753           if (sym_p)
3754             {
3755               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3756               /* ??? We can run into trouble with some backends by presenting
3757                  it with symbols which haven't been properly passed through
3758                  targetm.encode_section_info.  By setting the local bit, we
3759                  enhance the probability of things working.  */
3760               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3761
3762               if (off_p)
3763                 base = gen_rtx_fmt_e (CONST, address_mode,
3764                                       gen_rtx_fmt_ee
3765                                         (PLUS, address_mode, base,
3766                                          gen_int_mode (off, address_mode)));
3767             }
3768           else if (off_p)
3769             base = gen_int_mode (off, address_mode);
3770           else
3771             base = NULL_RTX;
3772
3773           if (base)
3774             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3775
3776           start_sequence ();
3777           /* To avoid splitting addressing modes, pretend that no cse will
3778              follow.  */
3779           old_cse_not_expected = cse_not_expected;
3780           cse_not_expected = true;
3781           addr = memory_address_addr_space (mem_mode, addr, as);
3782           cse_not_expected = old_cse_not_expected;
3783           seq = get_insns ();
3784           end_sequence ();
3785
3786           acost = seq_cost (seq, speed);
3787           acost += address_cost (addr, mem_mode, as, speed);
3788
3789           if (!acost)
3790             acost = 1;
3791           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3792         }
3793
3794       /* On some targets, it is quite expensive to load symbol to a register,
3795          which makes addresses that contain symbols look much more expensive.
3796          However, the symbol will have to be loaded in any case before the
3797          loop (and quite likely we have it in register already), so it does not
3798          make much sense to penalize them too heavily.  So make some final
3799          tweaks for the SYMBOL_PRESENT modes:
3800
3801          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3802          var is cheaper, use this mode with small penalty.
3803          If VAR_PRESENT is true, try whether the mode with
3804          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3805          if this is the case, use it.  */
3806       add_c = add_cost (speed, address_mode);
3807       for (i = 0; i < 8; i++)
3808         {
3809           var_p = i & 1;
3810           off_p = (i >> 1) & 1;
3811           rat_p = (i >> 2) & 1;
3812
3813           acost = data->costs[0][1][off_p][rat_p] + 1;
3814           if (var_p)
3815             acost += add_c;
3816
3817           if (acost < data->costs[1][var_p][off_p][rat_p])
3818             data->costs[1][var_p][off_p][rat_p] = acost;
3819         }
3820
3821       if (dump_file && (dump_flags & TDF_DETAILS))
3822         {
3823           fprintf (dump_file, "Address costs:\n");
3824
3825           for (i = 0; i < 16; i++)
3826             {
3827               sym_p = i & 1;
3828               var_p = (i >> 1) & 1;
3829               off_p = (i >> 2) & 1;
3830               rat_p = (i >> 3) & 1;
3831
3832               fprintf (dump_file, "  ");
3833               if (sym_p)
3834                 fprintf (dump_file, "sym + ");
3835               if (var_p)
3836                 fprintf (dump_file, "var + ");
3837               if (off_p)
3838                 fprintf (dump_file, "cst + ");
3839               if (rat_p)
3840                 fprintf (dump_file, "rat * ");
3841
3842               acost = data->costs[sym_p][var_p][off_p][rat_p];
3843               fprintf (dump_file, "index costs %d\n", acost);
3844             }
3845           if (has_predec[mem_mode] || has_postdec[mem_mode]
3846               || has_preinc[mem_mode] || has_postinc[mem_mode])
3847             fprintf (dump_file, "  May include autoinc/dec\n");
3848           fprintf (dump_file, "\n");
3849         }
3850
3851       address_cost_data_list[data_index] = data;
3852     }
3853
3854   bits = GET_MODE_BITSIZE (address_mode);
3855   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3856   offset &= mask;
3857   if ((offset >> (bits - 1) & 1))
3858     offset |= ~mask;
3859   s_offset = offset;
3860
3861   autoinc = false;
3862   autoinc_type = AINC_NONE;
3863   msize = GET_MODE_SIZE (mem_mode);
3864   autoinc_offset = offset;
3865   if (stmt_after_inc)
3866     autoinc_offset += ratio * cstep;
3867   if (symbol_present || var_present || ratio != 1)
3868     autoinc = false;
3869   else
3870     {
3871       if (has_postinc[mem_mode] && autoinc_offset == 0
3872           && msize == cstep)
3873         autoinc_type = AINC_POST_INC;
3874       else if (has_postdec[mem_mode] && autoinc_offset == 0
3875                && msize == -cstep)
3876         autoinc_type = AINC_POST_DEC;
3877       else if (has_preinc[mem_mode] && autoinc_offset == msize
3878                && msize == cstep)
3879         autoinc_type = AINC_PRE_INC;
3880       else if (has_predec[mem_mode] && autoinc_offset == -msize
3881                && msize == -cstep)
3882         autoinc_type = AINC_PRE_DEC;
3883
3884       if (autoinc_type != AINC_NONE)
3885         autoinc = true;
3886     }
3887
3888   cost = 0;
3889   offset_p = (s_offset != 0
3890               && data->min_offset <= s_offset
3891               && s_offset <= data->max_offset);
3892   ratio_p = (ratio != 1
3893              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3894
3895   if (ratio != 1 && !ratio_p)
3896     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3897
3898   if (s_offset && !offset_p && !symbol_present)
3899     cost += add_cost (speed, address_mode);
3900
3901   if (may_autoinc)
3902     *may_autoinc = autoinc;
3903   if (autoinc)
3904     acost = data->ainc_costs[autoinc_type];
3905   else
3906     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3907   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3908   return new_cost (cost + acost, complexity);
3909 }
3910
3911  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3912     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3913     calculating the operands of EXPR.  Returns true if successful, and returns
3914     the cost in COST.  */
3915
3916 static bool
3917 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3918                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3919 {
3920   comp_cost res;
3921   tree op1 = TREE_OPERAND (expr, 1);
3922   tree cst = TREE_OPERAND (mult, 1);
3923   tree multop = TREE_OPERAND (mult, 0);
3924   int m = exact_log2 (int_cst_value (cst));
3925   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3926   int as_cost, sa_cost;
3927   bool mult_in_op1;
3928
3929   if (!(m >= 0 && m < maxm))
3930     return false;
3931
3932   mult_in_op1 = operand_equal_p (op1, mult, 0);
3933
3934   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3935
3936   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3937      use that in preference to a shift insn followed by an add insn.  */
3938   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3939              ? shiftadd_cost (speed, mode, m)
3940              : (mult_in_op1
3941                 ? shiftsub1_cost (speed, mode, m)
3942                 : shiftsub0_cost (speed, mode, m)));
3943
3944   res = new_cost (MIN (as_cost, sa_cost), 0);
3945   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3946
3947   STRIP_NOPS (multop);
3948   if (!is_gimple_val (multop))
3949     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3950
3951   *cost = res;
3952   return true;
3953 }
3954
3955 /* Estimates cost of forcing expression EXPR into a variable.  */
3956
3957 static comp_cost
3958 force_expr_to_var_cost (tree expr, bool speed)
3959 {
3960   static bool costs_initialized = false;
3961   static unsigned integer_cost [2];
3962   static unsigned symbol_cost [2];
3963   static unsigned address_cost [2];
3964   tree op0, op1;
3965   comp_cost cost0, cost1, cost;
3966   machine_mode mode;
3967
3968   if (!costs_initialized)
3969     {
3970       tree type = build_pointer_type (integer_type_node);
3971       tree var, addr;
3972       rtx x;
3973       int i;
3974
3975       var = create_tmp_var_raw (integer_type_node, "test_var");
3976       TREE_STATIC (var) = 1;
3977       x = produce_memory_decl_rtl (var, NULL);
3978       SET_DECL_RTL (var, x);
3979
3980       addr = build1 (ADDR_EXPR, type, var);
3981
3982
3983       for (i = 0; i < 2; i++)
3984         {
3985           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3986                                                              2000), i);
3987
3988           symbol_cost[i] = computation_cost (addr, i) + 1;
3989
3990           address_cost[i]
3991             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3992           if (dump_file && (dump_flags & TDF_DETAILS))
3993             {
3994               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3995               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3996               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3997               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3998               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3999               fprintf (dump_file, "\n");
4000             }
4001         }
4002
4003       costs_initialized = true;
4004     }
4005
4006   STRIP_NOPS (expr);
4007
4008   if (SSA_VAR_P (expr))
4009     return no_cost;
4010
4011   if (is_gimple_min_invariant (expr))
4012     {
4013       if (TREE_CODE (expr) == INTEGER_CST)
4014         return new_cost (integer_cost [speed], 0);
4015
4016       if (TREE_CODE (expr) == ADDR_EXPR)
4017         {
4018           tree obj = TREE_OPERAND (expr, 0);
4019
4020           if (TREE_CODE (obj) == VAR_DECL
4021               || TREE_CODE (obj) == PARM_DECL
4022               || TREE_CODE (obj) == RESULT_DECL)
4023             return new_cost (symbol_cost [speed], 0);
4024         }
4025
4026       return new_cost (address_cost [speed], 0);
4027     }
4028
4029   switch (TREE_CODE (expr))
4030     {
4031     case POINTER_PLUS_EXPR:
4032     case PLUS_EXPR:
4033     case MINUS_EXPR:
4034     case MULT_EXPR:
4035       op0 = TREE_OPERAND (expr, 0);
4036       op1 = TREE_OPERAND (expr, 1);
4037       STRIP_NOPS (op0);
4038       STRIP_NOPS (op1);
4039       break;
4040
4041     CASE_CONVERT:
4042     case NEGATE_EXPR:
4043       op0 = TREE_OPERAND (expr, 0);
4044       STRIP_NOPS (op0);
4045       op1 = NULL_TREE;
4046       break;
4047
4048     default:
4049       /* Just an arbitrary value, FIXME.  */
4050       return new_cost (target_spill_cost[speed], 0);
4051     }
4052
4053   if (op0 == NULL_TREE
4054       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4055     cost0 = no_cost;
4056   else
4057     cost0 = force_expr_to_var_cost (op0, speed);
4058
4059   if (op1 == NULL_TREE
4060       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4061     cost1 = no_cost;
4062   else
4063     cost1 = force_expr_to_var_cost (op1, speed);
4064
4065   mode = TYPE_MODE (TREE_TYPE (expr));
4066   switch (TREE_CODE (expr))
4067     {
4068     case POINTER_PLUS_EXPR:
4069     case PLUS_EXPR:
4070     case MINUS_EXPR:
4071     case NEGATE_EXPR:
4072       cost = new_cost (add_cost (speed, mode), 0);
4073       if (TREE_CODE (expr) != NEGATE_EXPR)
4074         {
4075           tree mult = NULL_TREE;
4076           comp_cost sa_cost;
4077           if (TREE_CODE (op1) == MULT_EXPR)
4078             mult = op1;
4079           else if (TREE_CODE (op0) == MULT_EXPR)
4080             mult = op0;
4081
4082           if (mult != NULL_TREE
4083               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4084               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4085                                     speed, &sa_cost))
4086             return sa_cost;
4087         }
4088       break;
4089
4090     CASE_CONVERT:
4091       {
4092         tree inner_mode, outer_mode;
4093         outer_mode = TREE_TYPE (expr);
4094         inner_mode = TREE_TYPE (op0);
4095         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4096                                        TYPE_MODE (inner_mode), speed), 0);
4097       }
4098       break;
4099
4100     case MULT_EXPR:
4101       if (cst_and_fits_in_hwi (op0))
4102         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4103                                              mode, speed), 0);
4104       else if (cst_and_fits_in_hwi (op1))
4105         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4106                                              mode, speed), 0);
4107       else
4108         return new_cost (target_spill_cost [speed], 0);
4109       break;
4110
4111     default:
4112       gcc_unreachable ();
4113     }
4114
4115   cost = add_costs (cost, cost0);
4116   cost = add_costs (cost, cost1);
4117
4118   /* Bound the cost by target_spill_cost.  The parts of complicated
4119      computations often are either loop invariant or at least can
4120      be shared between several iv uses, so letting this grow without
4121      limits would not give reasonable results.  */
4122   if (cost.cost > (int) target_spill_cost [speed])
4123     cost.cost = target_spill_cost [speed];
4124
4125   return cost;
4126 }
4127
4128 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4129    invariants the computation depends on.  */
4130
4131 static comp_cost
4132 force_var_cost (struct ivopts_data *data,
4133                 tree expr, bitmap *depends_on)
4134 {
4135   if (depends_on)
4136     {
4137       fd_ivopts_data = data;
4138       walk_tree (&expr, find_depends, depends_on, NULL);
4139     }
4140
4141   return force_expr_to_var_cost (expr, data->speed);
4142 }
4143
4144 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4145    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4146    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4147    invariants the computation depends on.  */
4148
4149 static comp_cost
4150 split_address_cost (struct ivopts_data *data,
4151                     tree addr, bool *symbol_present, bool *var_present,
4152                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4153 {
4154   tree core;
4155   HOST_WIDE_INT bitsize;
4156   HOST_WIDE_INT bitpos;
4157   tree toffset;
4158   machine_mode mode;
4159   int unsignedp, volatilep;
4160
4161   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4162                               &unsignedp, &volatilep, false);
4163
4164   if (toffset != 0
4165       || bitpos % BITS_PER_UNIT != 0
4166       || TREE_CODE (core) != VAR_DECL)
4167     {
4168       *symbol_present = false;
4169       *var_present = true;
4170       fd_ivopts_data = data;
4171       walk_tree (&addr, find_depends, depends_on, NULL);
4172       return new_cost (target_spill_cost[data->speed], 0);
4173     }
4174
4175   *offset += bitpos / BITS_PER_UNIT;
4176   if (TREE_STATIC (core)
4177       || DECL_EXTERNAL (core))
4178     {
4179       *symbol_present = true;
4180       *var_present = false;
4181       return no_cost;
4182     }
4183
4184   *symbol_present = false;
4185   *var_present = true;
4186   return no_cost;
4187 }
4188
4189 /* Estimates cost of expressing difference of addresses E1 - E2 as
4190    var + symbol + offset.  The value of offset is added to OFFSET,
4191    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4192    part is missing.  DEPENDS_ON is a set of the invariants the computation
4193    depends on.  */
4194
4195 static comp_cost
4196 ptr_difference_cost (struct ivopts_data *data,
4197                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4198                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4199 {
4200   HOST_WIDE_INT diff = 0;
4201   aff_tree aff_e1, aff_e2;
4202   tree type;
4203
4204   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4205
4206   if (ptr_difference_const (e1, e2, &diff))
4207     {
4208       *offset += diff;
4209       *symbol_present = false;
4210       *var_present = false;
4211       return no_cost;
4212     }
4213
4214   if (integer_zerop (e2))
4215     return split_address_cost (data, TREE_OPERAND (e1, 0),
4216                                symbol_present, var_present, offset, depends_on);
4217
4218   *symbol_present = false;
4219   *var_present = true;
4220
4221   type = signed_type_for (TREE_TYPE (e1));
4222   tree_to_aff_combination (e1, type, &aff_e1);
4223   tree_to_aff_combination (e2, type, &aff_e2);
4224   aff_combination_scale (&aff_e2, -1);
4225   aff_combination_add (&aff_e1, &aff_e2);
4226
4227   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4228 }
4229
4230 /* Estimates cost of expressing difference E1 - E2 as
4231    var + symbol + offset.  The value of offset is added to OFFSET,
4232    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4233    part is missing.  DEPENDS_ON is a set of the invariants the computation
4234    depends on.  */
4235
4236 static comp_cost
4237 difference_cost (struct ivopts_data *data,
4238                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4239                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4240 {
4241   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4242   unsigned HOST_WIDE_INT off1, off2;
4243   aff_tree aff_e1, aff_e2;
4244   tree type;
4245
4246   e1 = strip_offset (e1, &off1);
4247   e2 = strip_offset (e2, &off2);
4248   *offset += off1 - off2;
4249
4250   STRIP_NOPS (e1);
4251   STRIP_NOPS (e2);
4252
4253   if (TREE_CODE (e1) == ADDR_EXPR)
4254     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4255                                 offset, depends_on);
4256   *symbol_present = false;
4257
4258   if (operand_equal_p (e1, e2, 0))
4259     {
4260       *var_present = false;
4261       return no_cost;
4262     }
4263
4264   *var_present = true;
4265
4266   if (integer_zerop (e2))
4267     return force_var_cost (data, e1, depends_on);
4268
4269   if (integer_zerop (e1))
4270     {
4271       comp_cost cost = force_var_cost (data, e2, depends_on);
4272       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4273       return cost;
4274     }
4275
4276   type = signed_type_for (TREE_TYPE (e1));
4277   tree_to_aff_combination (e1, type, &aff_e1);
4278   tree_to_aff_combination (e2, type, &aff_e2);
4279   aff_combination_scale (&aff_e2, -1);
4280   aff_combination_add (&aff_e1, &aff_e2);
4281
4282   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4283 }
4284
4285 /* Returns true if AFF1 and AFF2 are identical.  */
4286
4287 static bool
4288 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4289 {
4290   unsigned i;
4291
4292   if (aff1->n != aff2->n)
4293     return false;
4294
4295   for (i = 0; i < aff1->n; i++)
4296     {
4297       if (aff1->elts[i].coef != aff2->elts[i].coef)
4298         return false;
4299
4300       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4301         return false;
4302     }
4303   return true;
4304 }
4305
4306 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4307
4308 static int
4309 get_expr_id (struct ivopts_data *data, tree expr)
4310 {
4311   struct iv_inv_expr_ent ent;
4312   struct iv_inv_expr_ent **slot;
4313
4314   ent.expr = expr;
4315   ent.hash = iterative_hash_expr (expr, 0);
4316   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4317   if (*slot)
4318     return (*slot)->id;
4319
4320   *slot = XNEW (struct iv_inv_expr_ent);
4321   (*slot)->expr = expr;
4322   (*slot)->hash = ent.hash;
4323   (*slot)->id = data->inv_expr_id++;
4324   return (*slot)->id;
4325 }
4326
4327 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4328    requires a new compiler generated temporary.  Returns -1 otherwise.
4329    ADDRESS_P is a flag indicating if the expression is for address
4330    computation.  */
4331
4332 static int
4333 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4334                             tree cbase, HOST_WIDE_INT ratio,
4335                             bool address_p)
4336 {
4337   aff_tree ubase_aff, cbase_aff;
4338   tree expr, ub, cb;
4339
4340   STRIP_NOPS (ubase);
4341   STRIP_NOPS (cbase);
4342   ub = ubase;
4343   cb = cbase;
4344
4345   if ((TREE_CODE (ubase) == INTEGER_CST)
4346       && (TREE_CODE (cbase) == INTEGER_CST))
4347     return -1;
4348
4349   /* Strips the constant part. */
4350   if (TREE_CODE (ubase) == PLUS_EXPR
4351       || TREE_CODE (ubase) == MINUS_EXPR
4352       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4353     {
4354       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4355         ubase = TREE_OPERAND (ubase, 0);
4356     }
4357
4358   /* Strips the constant part. */
4359   if (TREE_CODE (cbase) == PLUS_EXPR
4360       || TREE_CODE (cbase) == MINUS_EXPR
4361       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4362     {
4363       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4364         cbase = TREE_OPERAND (cbase, 0);
4365     }
4366
4367   if (address_p)
4368     {
4369       if (((TREE_CODE (ubase) == SSA_NAME)
4370            || (TREE_CODE (ubase) == ADDR_EXPR
4371                && is_gimple_min_invariant (ubase)))
4372           && (TREE_CODE (cbase) == INTEGER_CST))
4373         return -1;
4374
4375       if (((TREE_CODE (cbase) == SSA_NAME)
4376            || (TREE_CODE (cbase) == ADDR_EXPR
4377                && is_gimple_min_invariant (cbase)))
4378           && (TREE_CODE (ubase) == INTEGER_CST))
4379         return -1;
4380     }
4381
4382   if (ratio == 1)
4383     {
4384       if (operand_equal_p (ubase, cbase, 0))
4385         return -1;
4386
4387       if (TREE_CODE (ubase) == ADDR_EXPR
4388           && TREE_CODE (cbase) == ADDR_EXPR)
4389         {
4390           tree usym, csym;
4391
4392           usym = TREE_OPERAND (ubase, 0);
4393           csym = TREE_OPERAND (cbase, 0);
4394           if (TREE_CODE (usym) == ARRAY_REF)
4395             {
4396               tree ind = TREE_OPERAND (usym, 1);
4397               if (TREE_CODE (ind) == INTEGER_CST
4398                   && tree_fits_shwi_p (ind)
4399                   && tree_to_shwi (ind) == 0)
4400                 usym = TREE_OPERAND (usym, 0);
4401             }
4402           if (TREE_CODE (csym) == ARRAY_REF)
4403             {
4404               tree ind = TREE_OPERAND (csym, 1);
4405               if (TREE_CODE (ind) == INTEGER_CST
4406                   && tree_fits_shwi_p (ind)
4407                   && tree_to_shwi (ind) == 0)
4408                 csym = TREE_OPERAND (csym, 0);
4409             }
4410           if (operand_equal_p (usym, csym, 0))
4411             return -1;
4412         }
4413       /* Now do more complex comparison  */
4414       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4415       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4416       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4417         return -1;
4418     }
4419
4420   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4421   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4422
4423   aff_combination_scale (&cbase_aff, -1 * ratio);
4424   aff_combination_add (&ubase_aff, &cbase_aff);
4425   expr = aff_combination_to_tree (&ubase_aff);
4426   return get_expr_id (data, expr);
4427 }
4428
4429
4430
4431 /* Determines the cost of the computation by that USE is expressed
4432    from induction variable CAND.  If ADDRESS_P is true, we just need
4433    to create an address from it, otherwise we want to get it into
4434    register.  A set of invariants we depend on is stored in
4435    DEPENDS_ON.  AT is the statement at that the value is computed.
4436    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4437    addressing is likely.  */
4438
4439 static comp_cost
4440 get_computation_cost_at (struct ivopts_data *data,
4441                          struct iv_use *use, struct iv_cand *cand,
4442                          bool address_p, bitmap *depends_on, gimple at,
4443                          bool *can_autoinc,
4444                          int *inv_expr_id)
4445 {
4446   tree ubase = use->iv->base, ustep = use->iv->step;
4447   tree cbase, cstep;
4448   tree utype = TREE_TYPE (ubase), ctype;
4449   unsigned HOST_WIDE_INT cstepi, offset = 0;
4450   HOST_WIDE_INT ratio, aratio;
4451   bool var_present, symbol_present, stmt_is_after_inc;
4452   comp_cost cost;
4453   widest_int rat;
4454   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4455   machine_mode mem_mode = (address_p
4456                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4457                                 : VOIDmode);
4458
4459   *depends_on = NULL;
4460
4461   /* Only consider real candidates.  */
4462   if (!cand->iv)
4463     return infinite_cost;
4464
4465   cbase = cand->iv->base;
4466   cstep = cand->iv->step;
4467   ctype = TREE_TYPE (cbase);
4468
4469   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4470     {
4471       /* We do not have a precision to express the values of use.  */
4472       return infinite_cost;
4473     }
4474
4475   if (address_p
4476       || (use->iv->base_object
4477           && cand->iv->base_object
4478           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4479           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4480     {
4481       /* Do not try to express address of an object with computation based
4482          on address of a different object.  This may cause problems in rtl
4483          level alias analysis (that does not expect this to be happening,
4484          as this is illegal in C), and would be unlikely to be useful
4485          anyway.  */
4486       if (use->iv->base_object
4487           && cand->iv->base_object
4488           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4489         return infinite_cost;
4490     }
4491
4492   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4493     {
4494       /* TODO -- add direct handling of this case.  */
4495       goto fallback;
4496     }
4497
4498   /* CSTEPI is removed from the offset in case statement is after the
4499      increment.  If the step is not constant, we use zero instead.
4500      This is a bit imprecise (there is the extra addition), but
4501      redundancy elimination is likely to transform the code so that
4502      it uses value of the variable before increment anyway,
4503      so it is not that much unrealistic.  */
4504   if (cst_and_fits_in_hwi (cstep))
4505     cstepi = int_cst_value (cstep);
4506   else
4507     cstepi = 0;
4508
4509   if (!constant_multiple_of (ustep, cstep, &rat))
4510     return infinite_cost;
4511
4512   if (wi::fits_shwi_p (rat))
4513     ratio = rat.to_shwi ();
4514   else
4515     return infinite_cost;
4516
4517   STRIP_NOPS (cbase);
4518   ctype = TREE_TYPE (cbase);
4519
4520   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4521
4522   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4523      or ratio == 1, it is better to handle this like
4524
4525      ubase - ratio * cbase + ratio * var
4526
4527      (also holds in the case ratio == -1, TODO.  */
4528
4529   if (cst_and_fits_in_hwi (cbase))
4530     {
4531       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4532       cost = difference_cost (data,
4533                               ubase, build_int_cst (utype, 0),
4534                               &symbol_present, &var_present, &offset,
4535                               depends_on);
4536       cost.cost /= avg_loop_niter (data->current_loop);
4537     }
4538   else if (ratio == 1)
4539     {
4540       tree real_cbase = cbase;
4541
4542       /* Check to see if any adjustment is needed.  */
4543       if (cstepi == 0 && stmt_is_after_inc)
4544         {
4545           aff_tree real_cbase_aff;
4546           aff_tree cstep_aff;
4547
4548           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4549                                    &real_cbase_aff);
4550           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4551
4552           aff_combination_add (&real_cbase_aff, &cstep_aff);
4553           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4554         }
4555
4556       cost = difference_cost (data,
4557                               ubase, real_cbase,
4558                               &symbol_present, &var_present, &offset,
4559                               depends_on);
4560       cost.cost /= avg_loop_niter (data->current_loop);
4561     }
4562   else if (address_p
4563            && !POINTER_TYPE_P (ctype)
4564            && multiplier_allowed_in_address_p
4565                 (ratio, mem_mode,
4566                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4567     {
4568       cbase
4569         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4570       cost = difference_cost (data,
4571                               ubase, cbase,
4572                               &symbol_present, &var_present, &offset,
4573                               depends_on);
4574       cost.cost /= avg_loop_niter (data->current_loop);
4575     }
4576   else
4577     {
4578       cost = force_var_cost (data, cbase, depends_on);
4579       cost = add_costs (cost,
4580                         difference_cost (data,
4581                                          ubase, build_int_cst (utype, 0),
4582                                          &symbol_present, &var_present,
4583                                          &offset, depends_on));
4584       cost.cost /= avg_loop_niter (data->current_loop);
4585       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4586     }
4587
4588   /* Set of invariants depended on by sub use has already been computed
4589      for the first use in the group.  */
4590   if (use->sub_id)
4591     {
4592       cost.cost = 0;
4593       if (depends_on && *depends_on)
4594         bitmap_clear (*depends_on);
4595     }
4596   else if (inv_expr_id)
4597     {
4598       *inv_expr_id =
4599           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4600       /* Clear depends on.  */
4601       if (*inv_expr_id != -1 && depends_on && *depends_on)
4602         bitmap_clear (*depends_on);
4603     }
4604
4605   /* If we are after the increment, the value of the candidate is higher by
4606      one iteration.  */
4607   if (stmt_is_after_inc)
4608     offset -= ratio * cstepi;
4609
4610   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4611      (symbol/var1/const parts may be omitted).  If we are looking for an
4612      address, find the cost of addressing this.  */
4613   if (address_p)
4614     return add_costs (cost,
4615                       get_address_cost (symbol_present, var_present,
4616                                         offset, ratio, cstepi,
4617                                         mem_mode,
4618                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4619                                         speed, stmt_is_after_inc,
4620                                         can_autoinc));
4621
4622   /* Otherwise estimate the costs for computing the expression.  */
4623   if (!symbol_present && !var_present && !offset)
4624     {
4625       if (ratio != 1)
4626         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4627       return cost;
4628     }
4629
4630   /* Symbol + offset should be compile-time computable so consider that they
4631       are added once to the variable, if present.  */
4632   if (var_present && (symbol_present || offset))
4633     cost.cost += adjust_setup_cost (data,
4634                                     add_cost (speed, TYPE_MODE (ctype)));
4635
4636   /* Having offset does not affect runtime cost in case it is added to
4637      symbol, but it increases complexity.  */
4638   if (offset)
4639     cost.complexity++;
4640
4641   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4642
4643   aratio = ratio > 0 ? ratio : -ratio;
4644   if (aratio != 1)
4645     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4646   return cost;
4647
4648 fallback:
4649   if (can_autoinc)
4650     *can_autoinc = false;
4651
4652   {
4653     /* Just get the expression, expand it and measure the cost.  */
4654     tree comp = get_computation_at (data->current_loop, use, cand, at);
4655
4656     if (!comp)
4657       return infinite_cost;
4658
4659     if (address_p)
4660       comp = build_simple_mem_ref (comp);
4661
4662     return new_cost (computation_cost (comp, speed), 0);
4663   }
4664 }
4665
4666 /* Determines the cost of the computation by that USE is expressed
4667    from induction variable CAND.  If ADDRESS_P is true, we just need
4668    to create an address from it, otherwise we want to get it into
4669    register.  A set of invariants we depend on is stored in
4670    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4671    autoinc addressing is likely.  */
4672
4673 static comp_cost
4674 get_computation_cost (struct ivopts_data *data,
4675                       struct iv_use *use, struct iv_cand *cand,
4676                       bool address_p, bitmap *depends_on,
4677                       bool *can_autoinc, int *inv_expr_id)
4678 {
4679   return get_computation_cost_at (data,
4680                                   use, cand, address_p, depends_on, use->stmt,
4681                                   can_autoinc, inv_expr_id);
4682 }
4683
4684 /* Determines cost of basing replacement of USE on CAND in a generic
4685    expression.  */
4686
4687 static bool
4688 determine_use_iv_cost_generic (struct ivopts_data *data,
4689                                struct iv_use *use, struct iv_cand *cand)
4690 {
4691   bitmap depends_on;
4692   comp_cost cost;
4693   int inv_expr_id = -1;
4694
4695   /* The simple case first -- if we need to express value of the preserved
4696      original biv, the cost is 0.  This also prevents us from counting the
4697      cost of increment twice -- once at this use and once in the cost of
4698      the candidate.  */
4699   if (cand->pos == IP_ORIGINAL
4700       && cand->incremented_at == use->stmt)
4701     {
4702       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4703                        ERROR_MARK, -1);
4704       return true;
4705     }
4706
4707   cost = get_computation_cost (data, use, cand, false, &depends_on,
4708                                NULL, &inv_expr_id);
4709
4710   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4711                    inv_expr_id);
4712
4713   return !infinite_cost_p (cost);
4714 }
4715
4716 /* Determines cost of basing replacement of USE on CAND in an address.  */
4717
4718 static bool
4719 determine_use_iv_cost_address (struct ivopts_data *data,
4720                                struct iv_use *use, struct iv_cand *cand)
4721 {
4722   bitmap depends_on;
4723   bool can_autoinc;
4724   int inv_expr_id = -1;
4725   struct iv_use *sub_use;
4726   comp_cost sub_cost;
4727   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4728                                          &can_autoinc, &inv_expr_id);
4729
4730   if (cand->ainc_use == use)
4731     {
4732       if (can_autoinc)
4733         cost.cost -= cand->cost_step;
4734       /* If we generated the candidate solely for exploiting autoincrement
4735          opportunities, and it turns out it can't be used, set the cost to
4736          infinity to make sure we ignore it.  */
4737       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4738         cost = infinite_cost;
4739     }
4740   for (sub_use = use->next;
4741        sub_use && !infinite_cost_p (cost);
4742        sub_use = sub_use->next)
4743     {
4744        sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4745                                         &can_autoinc, &inv_expr_id);
4746        cost = add_costs (cost, sub_cost);
4747     }
4748
4749   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4750                    inv_expr_id);
4751
4752   return !infinite_cost_p (cost);
4753 }
4754
4755 /* Computes value of candidate CAND at position AT in iteration NITER, and
4756    stores it to VAL.  */
4757
4758 static void
4759 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4760                aff_tree *val)
4761 {
4762   aff_tree step, delta, nit;
4763   struct iv *iv = cand->iv;
4764   tree type = TREE_TYPE (iv->base);
4765   tree steptype = type;
4766   if (POINTER_TYPE_P (type))
4767     steptype = sizetype;
4768   steptype = unsigned_type_for (type);
4769
4770   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4771   aff_combination_convert (&step, steptype);
4772   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4773   aff_combination_convert (&nit, steptype);
4774   aff_combination_mult (&nit, &step, &delta);
4775   if (stmt_after_increment (loop, cand, at))
4776     aff_combination_add (&delta, &step);
4777
4778   tree_to_aff_combination (iv->base, type, val);
4779   if (!POINTER_TYPE_P (type))
4780     aff_combination_convert (val, steptype);
4781   aff_combination_add (val, &delta);
4782 }
4783
4784 /* Returns period of induction variable iv.  */
4785
4786 static tree
4787 iv_period (struct iv *iv)
4788 {
4789   tree step = iv->step, period, type;
4790   tree pow2div;
4791
4792   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4793
4794   type = unsigned_type_for (TREE_TYPE (step));
4795   /* Period of the iv is lcm (step, type_range)/step -1,
4796      i.e., N*type_range/step - 1. Since type range is power
4797      of two, N == (step >> num_of_ending_zeros_binary (step),
4798      so the final result is
4799
4800        (type_range >> num_of_ending_zeros_binary (step)) - 1
4801
4802   */
4803   pow2div = num_ending_zeros (step);
4804
4805   period = build_low_bits_mask (type,
4806                                 (TYPE_PRECISION (type)
4807                                  - tree_to_uhwi (pow2div)));
4808
4809   return period;
4810 }
4811
4812 /* Returns the comparison operator used when eliminating the iv USE.  */
4813
4814 static enum tree_code
4815 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4816 {
4817   struct loop *loop = data->current_loop;
4818   basic_block ex_bb;
4819   edge exit;
4820
4821   ex_bb = gimple_bb (use->stmt);
4822   exit = EDGE_SUCC (ex_bb, 0);
4823   if (flow_bb_inside_loop_p (loop, exit->dest))
4824     exit = EDGE_SUCC (ex_bb, 1);
4825
4826   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4827 }
4828
4829 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4830    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4831    calculation is performed in non-wrapping type.
4832
4833    TODO: More generally, we could test for the situation that
4834          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4835          This would require knowing the sign of OFFSET.  */
4836
4837 static bool
4838 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4839 {
4840   enum tree_code code;
4841   tree e1, e2;
4842   aff_tree aff_e1, aff_e2, aff_offset;
4843
4844   if (!nowrap_type_p (TREE_TYPE (base)))
4845     return false;
4846
4847   base = expand_simple_operations (base);
4848
4849   if (TREE_CODE (base) == SSA_NAME)
4850     {
4851       gimple stmt = SSA_NAME_DEF_STMT (base);
4852
4853       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4854         return false;
4855
4856       code = gimple_assign_rhs_code (stmt);
4857       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4858         return false;
4859
4860       e1 = gimple_assign_rhs1 (stmt);
4861       e2 = gimple_assign_rhs2 (stmt);
4862     }
4863   else
4864     {
4865       code = TREE_CODE (base);
4866       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4867         return false;
4868       e1 = TREE_OPERAND (base, 0);
4869       e2 = TREE_OPERAND (base, 1);
4870     }
4871
4872   /* Use affine expansion as deeper inspection to prove the equality.  */
4873   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4874                                   &aff_e2, &data->name_expansion_cache);
4875   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4876                                   &aff_offset, &data->name_expansion_cache);
4877   aff_combination_scale (&aff_offset, -1);
4878   switch (code)
4879     {
4880     case PLUS_EXPR:
4881       aff_combination_add (&aff_e2, &aff_offset);
4882       if (aff_combination_zero_p (&aff_e2))
4883         return true;
4884
4885       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4886                                       &aff_e1, &data->name_expansion_cache);
4887       aff_combination_add (&aff_e1, &aff_offset);
4888       return aff_combination_zero_p (&aff_e1);
4889
4890     case POINTER_PLUS_EXPR:
4891       aff_combination_add (&aff_e2, &aff_offset);
4892       return aff_combination_zero_p (&aff_e2);
4893
4894     default:
4895       return false;
4896     }
4897 }
4898
4899 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4900    comparison with CAND.  NITER describes the number of iterations of
4901    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4902
4903    We aim to handle the following situation:
4904
4905    sometype *base, *p;
4906    int a, b, i;
4907
4908    i = a;
4909    p = p_0 = base + a;
4910
4911    do
4912      {
4913        bla (*p);
4914        p++;
4915        i++;
4916      }
4917    while (i < b);
4918
4919    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4920    We aim to optimize this to
4921
4922    p = p_0 = base + a;
4923    do
4924      {
4925        bla (*p);
4926        p++;
4927      }
4928    while (p < p_0 - a + b);
4929
4930    This preserves the correctness, since the pointer arithmetics does not
4931    overflow.  More precisely:
4932
4933    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4934       overflow in computing it or the values of p.
4935    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4936       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4937
4938 static bool
4939 iv_elimination_compare_lt (struct ivopts_data *data,
4940                            struct iv_cand *cand, enum tree_code *comp_p,
4941                            struct tree_niter_desc *niter)
4942 {
4943   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4944   struct aff_tree nit, tmpa, tmpb;
4945   enum tree_code comp;
4946   HOST_WIDE_INT step;
4947
4948   /* We need to know that the candidate induction variable does not overflow.
4949      While more complex analysis may be used to prove this, for now just
4950      check that the variable appears in the original program and that it
4951      is computed in a type that guarantees no overflows.  */
4952   cand_type = TREE_TYPE (cand->iv->base);
4953   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4954     return false;
4955
4956   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4957      the calculation of the BOUND could overflow, making the comparison
4958      invalid.  */
4959   if (!data->loop_single_exit_p)
4960     return false;
4961
4962   /* We need to be able to decide whether candidate is increasing or decreasing
4963      in order to choose the right comparison operator.  */
4964   if (!cst_and_fits_in_hwi (cand->iv->step))
4965     return false;
4966   step = int_cst_value (cand->iv->step);
4967
4968   /* Check that the number of iterations matches the expected pattern:
4969      a + 1 > b ? 0 : b - a - 1.  */
4970   mbz = niter->may_be_zero;
4971   if (TREE_CODE (mbz) == GT_EXPR)
4972     {
4973       /* Handle a + 1 > b.  */
4974       tree op0 = TREE_OPERAND (mbz, 0);
4975       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4976         {
4977           a = TREE_OPERAND (op0, 0);
4978           b = TREE_OPERAND (mbz, 1);
4979         }
4980       else
4981         return false;
4982     }
4983   else if (TREE_CODE (mbz) == LT_EXPR)
4984     {
4985       tree op1 = TREE_OPERAND (mbz, 1);
4986
4987       /* Handle b < a + 1.  */
4988       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4989         {
4990           a = TREE_OPERAND (op1, 0);
4991           b = TREE_OPERAND (mbz, 0);
4992         }
4993       else
4994         return false;
4995     }
4996   else
4997     return false;
4998
4999   /* Expected number of iterations is B - A - 1.  Check that it matches
5000      the actual number, i.e., that B - A - NITER = 1.  */
5001   tree_to_aff_combination (niter->niter, nit_type, &nit);
5002   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5003   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5004   aff_combination_scale (&nit, -1);
5005   aff_combination_scale (&tmpa, -1);
5006   aff_combination_add (&tmpb, &tmpa);
5007   aff_combination_add (&tmpb, &nit);
5008   if (tmpb.n != 0 || tmpb.offset != 1)
5009     return false;
5010
5011   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5012      overflow.  */
5013   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5014                         cand->iv->step,
5015                         fold_convert (TREE_TYPE (cand->iv->step), a));
5016   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5017     return false;
5018
5019   /* Determine the new comparison operator.  */
5020   comp = step < 0 ? GT_EXPR : LT_EXPR;
5021   if (*comp_p == NE_EXPR)
5022     *comp_p = comp;
5023   else if (*comp_p == EQ_EXPR)
5024     *comp_p = invert_tree_comparison (comp, false);
5025   else
5026     gcc_unreachable ();
5027
5028   return true;
5029 }
5030
5031 /* Check whether it is possible to express the condition in USE by comparison
5032    of candidate CAND.  If so, store the value compared with to BOUND, and the
5033    comparison operator to COMP.  */
5034
5035 static bool
5036 may_eliminate_iv (struct ivopts_data *data,
5037                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5038                   enum tree_code *comp)
5039 {
5040   basic_block ex_bb;
5041   edge exit;
5042   tree period;
5043   struct loop *loop = data->current_loop;
5044   aff_tree bnd;
5045   struct tree_niter_desc *desc = NULL;
5046
5047   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5048     return false;
5049
5050   /* For now works only for exits that dominate the loop latch.
5051      TODO: extend to other conditions inside loop body.  */
5052   ex_bb = gimple_bb (use->stmt);
5053   if (use->stmt != last_stmt (ex_bb)
5054       || gimple_code (use->stmt) != GIMPLE_COND
5055       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5056     return false;
5057
5058   exit = EDGE_SUCC (ex_bb, 0);
5059   if (flow_bb_inside_loop_p (loop, exit->dest))
5060     exit = EDGE_SUCC (ex_bb, 1);
5061   if (flow_bb_inside_loop_p (loop, exit->dest))
5062     return false;
5063
5064   desc = niter_for_exit (data, exit);
5065   if (!desc)
5066     return false;
5067
5068   /* Determine whether we can use the variable to test the exit condition.
5069      This is the case iff the period of the induction variable is greater
5070      than the number of iterations for which the exit condition is true.  */
5071   period = iv_period (cand->iv);
5072
5073   /* If the number of iterations is constant, compare against it directly.  */
5074   if (TREE_CODE (desc->niter) == INTEGER_CST)
5075     {
5076       /* See cand_value_at.  */
5077       if (stmt_after_increment (loop, cand, use->stmt))
5078         {
5079           if (!tree_int_cst_lt (desc->niter, period))
5080             return false;
5081         }
5082       else
5083         {
5084           if (tree_int_cst_lt (period, desc->niter))
5085             return false;
5086         }
5087     }
5088
5089   /* If not, and if this is the only possible exit of the loop, see whether
5090      we can get a conservative estimate on the number of iterations of the
5091      entire loop and compare against that instead.  */
5092   else
5093     {
5094       widest_int period_value, max_niter;
5095
5096       max_niter = desc->max;
5097       if (stmt_after_increment (loop, cand, use->stmt))
5098         max_niter += 1;
5099       period_value = wi::to_widest (period);
5100       if (wi::gtu_p (max_niter, period_value))
5101         {
5102           /* See if we can take advantage of inferred loop bound information.  */
5103           if (data->loop_single_exit_p)
5104             {
5105               if (!max_loop_iterations (loop, &max_niter))
5106                 return false;
5107               /* The loop bound is already adjusted by adding 1.  */
5108               if (wi::gtu_p (max_niter, period_value))
5109                 return false;
5110             }
5111           else
5112             return false;
5113         }
5114     }
5115
5116   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5117
5118   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5119                          aff_combination_to_tree (&bnd));
5120   *comp = iv_elimination_compare (data, use);
5121
5122   /* It is unlikely that computing the number of iterations using division
5123      would be more profitable than keeping the original induction variable.  */
5124   if (expression_expensive_p (*bound))
5125     return false;
5126
5127   /* Sometimes, it is possible to handle the situation that the number of
5128      iterations may be zero unless additional assumtions by using <
5129      instead of != in the exit condition.
5130
5131      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5132            base the exit condition on it.  However, that is often too
5133            expensive.  */
5134   if (!integer_zerop (desc->may_be_zero))
5135     return iv_elimination_compare_lt (data, cand, comp, desc);
5136
5137   return true;
5138 }
5139
5140  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5141     be copied, if is is used in the loop body and DATA->body_includes_call.  */
5142
5143 static int
5144 parm_decl_cost (struct ivopts_data *data, tree bound)
5145 {
5146   tree sbound = bound;
5147   STRIP_NOPS (sbound);
5148
5149   if (TREE_CODE (sbound) == SSA_NAME
5150       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5151       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5152       && data->body_includes_call)
5153     return COSTS_N_INSNS (1);
5154
5155   return 0;
5156 }
5157
5158 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5159
5160 static bool
5161 determine_use_iv_cost_condition (struct ivopts_data *data,
5162                                  struct iv_use *use, struct iv_cand *cand)
5163 {
5164   tree bound = NULL_TREE;
5165   struct iv *cmp_iv;
5166   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5167   comp_cost elim_cost, express_cost, cost, bound_cost;
5168   bool ok;
5169   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5170   tree *control_var, *bound_cst;
5171   enum tree_code comp = ERROR_MARK;
5172
5173   /* Only consider real candidates.  */
5174   if (!cand->iv)
5175     {
5176       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5177                        ERROR_MARK, -1);
5178       return false;
5179     }
5180
5181   /* Try iv elimination.  */
5182   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5183     {
5184       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5185       if (elim_cost.cost == 0)
5186         elim_cost.cost = parm_decl_cost (data, bound);
5187       else if (TREE_CODE (bound) == INTEGER_CST)
5188         elim_cost.cost = 0;
5189       /* If we replace a loop condition 'i < n' with 'p < base + n',
5190          depends_on_elim will have 'base' and 'n' set, which implies
5191          that both 'base' and 'n' will be live during the loop.  More likely,
5192          'base + n' will be loop invariant, resulting in only one live value
5193          during the loop.  So in that case we clear depends_on_elim and set
5194         elim_inv_expr_id instead.  */
5195       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5196         {
5197           elim_inv_expr_id = get_expr_id (data, bound);
5198           bitmap_clear (depends_on_elim);
5199         }
5200       /* The bound is a loop invariant, so it will be only computed
5201          once.  */
5202       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5203     }
5204   else
5205     elim_cost = infinite_cost;
5206
5207   /* Try expressing the original giv.  If it is compared with an invariant,
5208      note that we cannot get rid of it.  */
5209   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5210                               NULL, &cmp_iv);
5211   gcc_assert (ok);
5212
5213   /* When the condition is a comparison of the candidate IV against
5214      zero, prefer this IV.
5215
5216      TODO: The constant that we're subtracting from the cost should
5217      be target-dependent.  This information should be added to the
5218      target costs for each backend.  */
5219   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5220       && integer_zerop (*bound_cst)
5221       && (operand_equal_p (*control_var, cand->var_after, 0)
5222           || operand_equal_p (*control_var, cand->var_before, 0)))
5223     elim_cost.cost -= 1;
5224
5225   express_cost = get_computation_cost (data, use, cand, false,
5226                                        &depends_on_express, NULL,
5227                                        &express_inv_expr_id);
5228   fd_ivopts_data = data;
5229   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5230
5231   /* Count the cost of the original bound as well.  */
5232   bound_cost = force_var_cost (data, *bound_cst, NULL);
5233   if (bound_cost.cost == 0)
5234     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5235   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5236     bound_cost.cost = 0;
5237   express_cost.cost += bound_cost.cost;
5238
5239   /* Choose the better approach, preferring the eliminated IV. */
5240   if (compare_costs (elim_cost, express_cost) <= 0)
5241     {
5242       cost = elim_cost;
5243       depends_on = depends_on_elim;
5244       depends_on_elim = NULL;
5245       inv_expr_id = elim_inv_expr_id;
5246     }
5247   else
5248     {
5249       cost = express_cost;
5250       depends_on = depends_on_express;
5251       depends_on_express = NULL;
5252       bound = NULL_TREE;
5253       comp = ERROR_MARK;
5254       inv_expr_id = express_inv_expr_id;
5255     }
5256
5257   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5258
5259   if (depends_on_elim)
5260     BITMAP_FREE (depends_on_elim);
5261   if (depends_on_express)
5262     BITMAP_FREE (depends_on_express);
5263
5264   return !infinite_cost_p (cost);
5265 }
5266
5267 /* Determines cost of basing replacement of USE on CAND.  Returns false
5268    if USE cannot be based on CAND.  */
5269
5270 static bool
5271 determine_use_iv_cost (struct ivopts_data *data,
5272                        struct iv_use *use, struct iv_cand *cand)
5273 {
5274   switch (use->type)
5275     {
5276     case USE_NONLINEAR_EXPR:
5277       return determine_use_iv_cost_generic (data, use, cand);
5278
5279     case USE_ADDRESS:
5280       return determine_use_iv_cost_address (data, use, cand);
5281
5282     case USE_COMPARE:
5283       return determine_use_iv_cost_condition (data, use, cand);
5284
5285     default:
5286       gcc_unreachable ();
5287     }
5288 }
5289
5290 /* Return true if get_computation_cost indicates that autoincrement is
5291    a possibility for the pair of USE and CAND, false otherwise.  */
5292
5293 static bool
5294 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5295                            struct iv_cand *cand)
5296 {
5297   bitmap depends_on;
5298   bool can_autoinc;
5299   comp_cost cost;
5300
5301   if (use->type != USE_ADDRESS)
5302     return false;
5303
5304   cost = get_computation_cost (data, use, cand, true, &depends_on,
5305                                &can_autoinc, NULL);
5306
5307   BITMAP_FREE (depends_on);
5308
5309   return !infinite_cost_p (cost) && can_autoinc;
5310 }
5311
5312 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5313    use that allows autoincrement, and set their AINC_USE if possible.  */
5314
5315 static void
5316 set_autoinc_for_original_candidates (struct ivopts_data *data)
5317 {
5318   unsigned i, j;
5319
5320   for (i = 0; i < n_iv_cands (data); i++)
5321     {
5322       struct iv_cand *cand = iv_cand (data, i);
5323       struct iv_use *closest_before = NULL;
5324       struct iv_use *closest_after = NULL;
5325       if (cand->pos != IP_ORIGINAL)
5326         continue;
5327
5328       for (j = 0; j < n_iv_uses (data); j++)
5329         {
5330           struct iv_use *use = iv_use (data, j);
5331           unsigned uid = gimple_uid (use->stmt);
5332
5333           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5334             continue;
5335
5336           if (uid < gimple_uid (cand->incremented_at)
5337               && (closest_before == NULL
5338                   || uid > gimple_uid (closest_before->stmt)))
5339             closest_before = use;
5340
5341           if (uid > gimple_uid (cand->incremented_at)
5342               && (closest_after == NULL
5343                   || uid < gimple_uid (closest_after->stmt)))
5344             closest_after = use;
5345         }
5346
5347       if (closest_before != NULL
5348           && autoinc_possible_for_pair (data, closest_before, cand))
5349         cand->ainc_use = closest_before;
5350       else if (closest_after != NULL
5351                && autoinc_possible_for_pair (data, closest_after, cand))
5352         cand->ainc_use = closest_after;
5353     }
5354 }
5355
5356 /* Finds the candidates for the induction variables.  */
5357
5358 static void
5359 find_iv_candidates (struct ivopts_data *data)
5360 {
5361   /* Add commonly used ivs.  */
5362   add_standard_iv_candidates (data);
5363
5364   /* Add old induction variables.  */
5365   add_old_ivs_candidates (data);
5366
5367   /* Add induction variables derived from uses.  */
5368   add_derived_ivs_candidates (data);
5369
5370   set_autoinc_for_original_candidates (data);
5371
5372   /* Record the important candidates.  */
5373   record_important_candidates (data);
5374 }
5375
5376 /* Determines costs of basing the use of the iv on an iv candidate.  */
5377
5378 static void
5379 determine_use_iv_costs (struct ivopts_data *data)
5380 {
5381   unsigned i, j;
5382   struct iv_use *use;
5383   struct iv_cand *cand;
5384   bitmap to_clear = BITMAP_ALLOC (NULL);
5385
5386   alloc_use_cost_map (data);
5387
5388   for (i = 0; i < n_iv_uses (data); i++)
5389     {
5390       use = iv_use (data, i);
5391
5392       if (data->consider_all_candidates)
5393         {
5394           for (j = 0; j < n_iv_cands (data); j++)
5395             {
5396               cand = iv_cand (data, j);
5397               determine_use_iv_cost (data, use, cand);
5398             }
5399         }
5400       else
5401         {
5402           bitmap_iterator bi;
5403
5404           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5405             {
5406               cand = iv_cand (data, j);
5407               if (!determine_use_iv_cost (data, use, cand))
5408                 bitmap_set_bit (to_clear, j);
5409             }
5410
5411           /* Remove the candidates for that the cost is infinite from
5412              the list of related candidates.  */
5413           bitmap_and_compl_into (use->related_cands, to_clear);
5414           bitmap_clear (to_clear);
5415         }
5416     }
5417
5418   BITMAP_FREE (to_clear);
5419
5420   if (dump_file && (dump_flags & TDF_DETAILS))
5421     {
5422       fprintf (dump_file, "Use-candidate costs:\n");
5423
5424       for (i = 0; i < n_iv_uses (data); i++)
5425         {
5426           use = iv_use (data, i);
5427
5428           fprintf (dump_file, "Use %d:\n", i);
5429           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5430           for (j = 0; j < use->n_map_members; j++)
5431             {
5432               if (!use->cost_map[j].cand
5433                   || infinite_cost_p (use->cost_map[j].cost))
5434                 continue;
5435
5436               fprintf (dump_file, "  %d\t%d\t%d\t",
5437                        use->cost_map[j].cand->id,
5438                        use->cost_map[j].cost.cost,
5439                        use->cost_map[j].cost.complexity);
5440               if (use->cost_map[j].depends_on)
5441                 bitmap_print (dump_file,
5442                               use->cost_map[j].depends_on, "","");
5443               if (use->cost_map[j].inv_expr_id != -1)
5444                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5445               fprintf (dump_file, "\n");
5446             }
5447
5448           fprintf (dump_file, "\n");
5449         }
5450       fprintf (dump_file, "\n");
5451     }
5452 }
5453
5454 /* Determines cost of the candidate CAND.  */
5455
5456 static void
5457 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5458 {
5459   comp_cost cost_base;
5460   unsigned cost, cost_step;
5461   tree base;
5462
5463   if (!cand->iv)
5464     {
5465       cand->cost = 0;
5466       return;
5467     }
5468
5469   /* There are two costs associated with the candidate -- its increment
5470      and its initialization.  The second is almost negligible for any loop
5471      that rolls enough, so we take it just very little into account.  */
5472
5473   base = cand->iv->base;
5474   cost_base = force_var_cost (data, base, NULL);
5475   /* It will be exceptional that the iv register happens to be initialized with
5476      the proper value at no cost.  In general, there will at least be a regcopy
5477      or a const set.  */
5478   if (cost_base.cost == 0)
5479     cost_base.cost = COSTS_N_INSNS (1);
5480   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5481
5482   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5483
5484   /* Prefer the original ivs unless we may gain something by replacing it.
5485      The reason is to make debugging simpler; so this is not relevant for
5486      artificial ivs created by other optimization passes.  */
5487   if (cand->pos != IP_ORIGINAL
5488       || !SSA_NAME_VAR (cand->var_before)
5489       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5490     cost++;
5491
5492   /* Prefer not to insert statements into latch unless there are some
5493      already (so that we do not create unnecessary jumps).  */
5494   if (cand->pos == IP_END
5495       && empty_block_p (ip_end_pos (data->current_loop)))
5496     cost++;
5497
5498   cand->cost = cost;
5499   cand->cost_step = cost_step;
5500 }
5501
5502 /* Determines costs of computation of the candidates.  */
5503
5504 static void
5505 determine_iv_costs (struct ivopts_data *data)
5506 {
5507   unsigned i;
5508
5509   if (dump_file && (dump_flags & TDF_DETAILS))
5510     {
5511       fprintf (dump_file, "Candidate costs:\n");
5512       fprintf (dump_file, "  cand\tcost\n");
5513     }
5514
5515   for (i = 0; i < n_iv_cands (data); i++)
5516     {
5517       struct iv_cand *cand = iv_cand (data, i);
5518
5519       determine_iv_cost (data, cand);
5520
5521       if (dump_file && (dump_flags & TDF_DETAILS))
5522         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5523     }
5524
5525   if (dump_file && (dump_flags & TDF_DETAILS))
5526     fprintf (dump_file, "\n");
5527 }
5528
5529 /* Calculates cost for having SIZE induction variables.  */
5530
5531 static unsigned
5532 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5533 {
5534   /* We add size to the cost, so that we prefer eliminating ivs
5535      if possible.  */
5536   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5537                                             data->body_includes_call);
5538 }
5539
5540 /* For each size of the induction variable set determine the penalty.  */
5541
5542 static void
5543 determine_set_costs (struct ivopts_data *data)
5544 {
5545   unsigned j, n;
5546   gphi *phi;
5547   gphi_iterator psi;
5548   tree op;
5549   struct loop *loop = data->current_loop;
5550   bitmap_iterator bi;
5551
5552   if (dump_file && (dump_flags & TDF_DETAILS))
5553     {
5554       fprintf (dump_file, "Global costs:\n");
5555       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5556       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5557       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5558       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5559     }
5560
5561   n = 0;
5562   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5563     {
5564       phi = psi.phi ();
5565       op = PHI_RESULT (phi);
5566
5567       if (virtual_operand_p (op))
5568         continue;
5569
5570       if (get_iv (data, op))
5571         continue;
5572
5573       n++;
5574     }
5575
5576   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5577     {
5578       struct version_info *info = ver_info (data, j);
5579
5580       if (info->inv_id && info->has_nonlin_use)
5581         n++;
5582     }
5583
5584   data->regs_used = n;
5585   if (dump_file && (dump_flags & TDF_DETAILS))
5586     fprintf (dump_file, "  regs_used %d\n", n);
5587
5588   if (dump_file && (dump_flags & TDF_DETAILS))
5589     {
5590       fprintf (dump_file, "  cost for size:\n");
5591       fprintf (dump_file, "  ivs\tcost\n");
5592       for (j = 0; j <= 2 * target_avail_regs; j++)
5593         fprintf (dump_file, "  %d\t%d\n", j,
5594                  ivopts_global_cost_for_size (data, j));
5595       fprintf (dump_file, "\n");
5596     }
5597 }
5598
5599 /* Returns true if A is a cheaper cost pair than B.  */
5600
5601 static bool
5602 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5603 {
5604   int cmp;
5605
5606   if (!a)
5607     return false;
5608
5609   if (!b)
5610     return true;
5611
5612   cmp = compare_costs (a->cost, b->cost);
5613   if (cmp < 0)
5614     return true;
5615
5616   if (cmp > 0)
5617     return false;
5618
5619   /* In case the costs are the same, prefer the cheaper candidate.  */
5620   if (a->cand->cost < b->cand->cost)
5621     return true;
5622
5623   return false;
5624 }
5625
5626
5627 /* Returns candidate by that USE is expressed in IVS.  */
5628
5629 static struct cost_pair *
5630 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5631 {
5632   return ivs->cand_for_use[use->id];
5633 }
5634
5635 /* Computes the cost field of IVS structure.  */
5636
5637 static void
5638 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5639 {
5640   comp_cost cost = ivs->cand_use_cost;
5641
5642   cost.cost += ivs->cand_cost;
5643
5644   cost.cost += ivopts_global_cost_for_size (data,
5645                                             ivs->n_regs + ivs->num_used_inv_expr);
5646
5647   ivs->cost = cost;
5648 }
5649
5650 /* Remove invariants in set INVS to set IVS.  */
5651
5652 static void
5653 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5654 {
5655   bitmap_iterator bi;
5656   unsigned iid;
5657
5658   if (!invs)
5659     return;
5660
5661   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5662     {
5663       ivs->n_invariant_uses[iid]--;
5664       if (ivs->n_invariant_uses[iid] == 0)
5665         ivs->n_regs--;
5666     }
5667 }
5668
5669 /* Set USE not to be expressed by any candidate in IVS.  */
5670
5671 static void
5672 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5673                  struct iv_use *use)
5674 {
5675   unsigned uid = use->id, cid;
5676   struct cost_pair *cp;
5677
5678   cp = ivs->cand_for_use[uid];
5679   if (!cp)
5680     return;
5681   cid = cp->cand->id;
5682
5683   ivs->bad_uses++;
5684   ivs->cand_for_use[uid] = NULL;
5685   ivs->n_cand_uses[cid]--;
5686
5687   if (ivs->n_cand_uses[cid] == 0)
5688     {
5689       bitmap_clear_bit (ivs->cands, cid);
5690       /* Do not count the pseudocandidates.  */
5691       if (cp->cand->iv)
5692         ivs->n_regs--;
5693       ivs->n_cands--;
5694       ivs->cand_cost -= cp->cand->cost;
5695
5696       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5697     }
5698
5699   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5700
5701   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5702
5703   if (cp->inv_expr_id != -1)
5704     {
5705       ivs->used_inv_expr[cp->inv_expr_id]--;
5706       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5707         ivs->num_used_inv_expr--;
5708     }
5709   iv_ca_recount_cost (data, ivs);
5710 }
5711
5712 /* Add invariants in set INVS to set IVS.  */
5713
5714 static void
5715 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5716 {
5717   bitmap_iterator bi;
5718   unsigned iid;
5719
5720   if (!invs)
5721     return;
5722
5723   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5724     {
5725       ivs->n_invariant_uses[iid]++;
5726       if (ivs->n_invariant_uses[iid] == 1)
5727         ivs->n_regs++;
5728     }
5729 }
5730
5731 /* Set cost pair for USE in set IVS to CP.  */
5732
5733 static void
5734 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5735               struct iv_use *use, struct cost_pair *cp)
5736 {
5737   unsigned uid = use->id, cid;
5738
5739   if (ivs->cand_for_use[uid] == cp)
5740     return;
5741
5742   if (ivs->cand_for_use[uid])
5743     iv_ca_set_no_cp (data, ivs, use);
5744
5745   if (cp)
5746     {
5747       cid = cp->cand->id;
5748
5749       ivs->bad_uses--;
5750       ivs->cand_for_use[uid] = cp;
5751       ivs->n_cand_uses[cid]++;
5752       if (ivs->n_cand_uses[cid] == 1)
5753         {
5754           bitmap_set_bit (ivs->cands, cid);
5755           /* Do not count the pseudocandidates.  */
5756           if (cp->cand->iv)
5757             ivs->n_regs++;
5758           ivs->n_cands++;
5759           ivs->cand_cost += cp->cand->cost;
5760
5761           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5762         }
5763
5764       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5765       iv_ca_set_add_invariants (ivs, cp->depends_on);
5766
5767       if (cp->inv_expr_id != -1)
5768         {
5769           ivs->used_inv_expr[cp->inv_expr_id]++;
5770           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5771             ivs->num_used_inv_expr++;
5772         }
5773       iv_ca_recount_cost (data, ivs);
5774     }
5775 }
5776
5777 /* Extend set IVS by expressing USE by some of the candidates in it
5778    if possible.  Consider all important candidates if candidates in
5779    set IVS don't give any result.  */
5780
5781 static void
5782 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5783                struct iv_use *use)
5784 {
5785   struct cost_pair *best_cp = NULL, *cp;
5786   bitmap_iterator bi;
5787   unsigned i;
5788   struct iv_cand *cand;
5789
5790   gcc_assert (ivs->upto >= use->id);
5791   ivs->upto++;
5792   ivs->bad_uses++;
5793
5794   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5795     {
5796       cand = iv_cand (data, i);
5797       cp = get_use_iv_cost (data, use, cand);
5798       if (cheaper_cost_pair (cp, best_cp))
5799         best_cp = cp;
5800     }
5801
5802   if (best_cp == NULL)
5803     {
5804       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5805         {
5806           cand = iv_cand (data, i);
5807           cp = get_use_iv_cost (data, use, cand);
5808           if (cheaper_cost_pair (cp, best_cp))
5809             best_cp = cp;
5810         }
5811     }
5812
5813   iv_ca_set_cp (data, ivs, use, best_cp);
5814 }
5815
5816 /* Get cost for assignment IVS.  */
5817
5818 static comp_cost
5819 iv_ca_cost (struct iv_ca *ivs)
5820 {
5821   /* This was a conditional expression but it triggered a bug in
5822      Sun C 5.5.  */
5823   if (ivs->bad_uses)
5824     return infinite_cost;
5825   else
5826     return ivs->cost;
5827 }
5828
5829 /* Returns true if all dependences of CP are among invariants in IVS.  */
5830
5831 static bool
5832 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5833 {
5834   unsigned i;
5835   bitmap_iterator bi;
5836
5837   if (!cp->depends_on)
5838     return true;
5839
5840   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5841     {
5842       if (ivs->n_invariant_uses[i] == 0)
5843         return false;
5844     }
5845
5846   return true;
5847 }
5848
5849 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5850    it before NEXT_CHANGE.  */
5851
5852 static struct iv_ca_delta *
5853 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5854                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5855 {
5856   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5857
5858   change->use = use;
5859   change->old_cp = old_cp;
5860   change->new_cp = new_cp;
5861   change->next_change = next_change;
5862
5863   return change;
5864 }
5865
5866 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5867    are rewritten.  */
5868
5869 static struct iv_ca_delta *
5870 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5871 {
5872   struct iv_ca_delta *last;
5873
5874   if (!l2)
5875     return l1;
5876
5877   if (!l1)
5878     return l2;
5879
5880   for (last = l1; last->next_change; last = last->next_change)
5881     continue;
5882   last->next_change = l2;
5883
5884   return l1;
5885 }
5886
5887 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5888
5889 static struct iv_ca_delta *
5890 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5891 {
5892   struct iv_ca_delta *act, *next, *prev = NULL;
5893
5894   for (act = delta; act; act = next)
5895     {
5896       next = act->next_change;
5897       act->next_change = prev;
5898       prev = act;
5899
5900       std::swap (act->old_cp, act->new_cp);
5901     }
5902
5903   return prev;
5904 }
5905
5906 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5907    reverted instead.  */
5908
5909 static void
5910 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5911                     struct iv_ca_delta *delta, bool forward)
5912 {
5913   struct cost_pair *from, *to;
5914   struct iv_ca_delta *act;
5915
5916   if (!forward)
5917     delta = iv_ca_delta_reverse (delta);
5918
5919   for (act = delta; act; act = act->next_change)
5920     {
5921       from = act->old_cp;
5922       to = act->new_cp;
5923       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5924       iv_ca_set_cp (data, ivs, act->use, to);
5925     }
5926
5927   if (!forward)
5928     iv_ca_delta_reverse (delta);
5929 }
5930
5931 /* Returns true if CAND is used in IVS.  */
5932
5933 static bool
5934 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5935 {
5936   return ivs->n_cand_uses[cand->id] > 0;
5937 }
5938
5939 /* Returns number of induction variable candidates in the set IVS.  */
5940
5941 static unsigned
5942 iv_ca_n_cands (struct iv_ca *ivs)
5943 {
5944   return ivs->n_cands;
5945 }
5946
5947 /* Free the list of changes DELTA.  */
5948
5949 static void
5950 iv_ca_delta_free (struct iv_ca_delta **delta)
5951 {
5952   struct iv_ca_delta *act, *next;
5953
5954   for (act = *delta; act; act = next)
5955     {
5956       next = act->next_change;
5957       free (act);
5958     }
5959
5960   *delta = NULL;
5961 }
5962
5963 /* Allocates new iv candidates assignment.  */
5964
5965 static struct iv_ca *
5966 iv_ca_new (struct ivopts_data *data)
5967 {
5968   struct iv_ca *nw = XNEW (struct iv_ca);
5969
5970   nw->upto = 0;
5971   nw->bad_uses = 0;
5972   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5973   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5974   nw->cands = BITMAP_ALLOC (NULL);
5975   nw->n_cands = 0;
5976   nw->n_regs = 0;
5977   nw->cand_use_cost = no_cost;
5978   nw->cand_cost = 0;
5979   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5980   nw->cost = no_cost;
5981   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5982   nw->num_used_inv_expr = 0;
5983
5984   return nw;
5985 }
5986
5987 /* Free memory occupied by the set IVS.  */
5988
5989 static void
5990 iv_ca_free (struct iv_ca **ivs)
5991 {
5992   free ((*ivs)->cand_for_use);
5993   free ((*ivs)->n_cand_uses);
5994   BITMAP_FREE ((*ivs)->cands);
5995   free ((*ivs)->n_invariant_uses);
5996   free ((*ivs)->used_inv_expr);
5997   free (*ivs);
5998   *ivs = NULL;
5999 }
6000
6001 /* Dumps IVS to FILE.  */
6002
6003 static void
6004 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6005 {
6006   const char *pref = "  invariants ";
6007   unsigned i;
6008   comp_cost cost = iv_ca_cost (ivs);
6009
6010   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6011   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
6012            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6013   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6014
6015    for (i = 0; i < ivs->upto; i++)
6016     {
6017       struct iv_use *use = iv_use (data, i);
6018       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6019       if (cp)
6020         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6021                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6022       else
6023         fprintf (file, "   use:%d --> ??\n", use->id);
6024     }
6025
6026   for (i = 1; i <= data->max_inv_id; i++)
6027     if (ivs->n_invariant_uses[i])
6028       {
6029         fprintf (file, "%s%d", pref, i);
6030         pref = ", ";
6031       }
6032   fprintf (file, "\n\n");
6033 }
6034
6035 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6036    new set, and store differences in DELTA.  Number of induction variables
6037    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6038    the function will try to find a solution with mimimal iv candidates.  */
6039
6040 static comp_cost
6041 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6042               struct iv_cand *cand, struct iv_ca_delta **delta,
6043               unsigned *n_ivs, bool min_ncand)
6044 {
6045   unsigned i;
6046   comp_cost cost;
6047   struct iv_use *use;
6048   struct cost_pair *old_cp, *new_cp;
6049
6050   *delta = NULL;
6051   for (i = 0; i < ivs->upto; i++)
6052     {
6053       use = iv_use (data, i);
6054       old_cp = iv_ca_cand_for_use (ivs, use);
6055
6056       if (old_cp
6057           && old_cp->cand == cand)
6058         continue;
6059
6060       new_cp = get_use_iv_cost (data, use, cand);
6061       if (!new_cp)
6062         continue;
6063
6064       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6065         continue;
6066
6067       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6068         continue;
6069
6070       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6071     }
6072
6073   iv_ca_delta_commit (data, ivs, *delta, true);
6074   cost = iv_ca_cost (ivs);
6075   if (n_ivs)
6076     *n_ivs = iv_ca_n_cands (ivs);
6077   iv_ca_delta_commit (data, ivs, *delta, false);
6078
6079   return cost;
6080 }
6081
6082 /* Try narrowing set IVS by removing CAND.  Return the cost of
6083    the new set and store the differences in DELTA.  START is
6084    the candidate with which we start narrowing.  */
6085
6086 static comp_cost
6087 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6088               struct iv_cand *cand, struct iv_cand *start,
6089               struct iv_ca_delta **delta)
6090 {
6091   unsigned i, ci;
6092   struct iv_use *use;
6093   struct cost_pair *old_cp, *new_cp, *cp;
6094   bitmap_iterator bi;
6095   struct iv_cand *cnd;
6096   comp_cost cost, best_cost, acost;
6097
6098   *delta = NULL;
6099   for (i = 0; i < n_iv_uses (data); i++)
6100     {
6101       use = iv_use (data, i);
6102
6103       old_cp = iv_ca_cand_for_use (ivs, use);
6104       if (old_cp->cand != cand)
6105         continue;
6106
6107       best_cost = iv_ca_cost (ivs);
6108       /* Start narrowing with START.  */
6109       new_cp = get_use_iv_cost (data, use, start);
6110
6111       if (data->consider_all_candidates)
6112         {
6113           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6114             {
6115               if (ci == cand->id || (start && ci == start->id))
6116                 continue;
6117
6118               cnd = iv_cand (data, ci);
6119
6120               cp = get_use_iv_cost (data, use, cnd);
6121               if (!cp)
6122                 continue;
6123
6124               iv_ca_set_cp (data, ivs, use, cp);
6125               acost = iv_ca_cost (ivs);
6126
6127               if (compare_costs (acost, best_cost) < 0)
6128                 {
6129                   best_cost = acost;
6130                   new_cp = cp;
6131                 }
6132             }
6133         }
6134       else
6135         {
6136           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6137             {
6138               if (ci == cand->id || (start && ci == start->id))
6139                 continue;
6140
6141               cnd = iv_cand (data, ci);
6142
6143               cp = get_use_iv_cost (data, use, cnd);
6144               if (!cp)
6145                 continue;
6146
6147               iv_ca_set_cp (data, ivs, use, cp);
6148               acost = iv_ca_cost (ivs);
6149
6150               if (compare_costs (acost, best_cost) < 0)
6151                 {
6152                   best_cost = acost;
6153                   new_cp = cp;
6154                 }
6155             }
6156         }
6157       /* Restore to old cp for use.  */
6158       iv_ca_set_cp (data, ivs, use, old_cp);
6159
6160       if (!new_cp)
6161         {
6162           iv_ca_delta_free (delta);
6163           return infinite_cost;
6164         }
6165
6166       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6167     }
6168
6169   iv_ca_delta_commit (data, ivs, *delta, true);
6170   cost = iv_ca_cost (ivs);
6171   iv_ca_delta_commit (data, ivs, *delta, false);
6172
6173   return cost;
6174 }
6175
6176 /* Try optimizing the set of candidates IVS by removing candidates different
6177    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6178    differences in DELTA.  */
6179
6180 static comp_cost
6181 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6182              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6183 {
6184   bitmap_iterator bi;
6185   struct iv_ca_delta *act_delta, *best_delta;
6186   unsigned i;
6187   comp_cost best_cost, acost;
6188   struct iv_cand *cand;
6189
6190   best_delta = NULL;
6191   best_cost = iv_ca_cost (ivs);
6192
6193   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6194     {
6195       cand = iv_cand (data, i);
6196
6197       if (cand == except_cand)
6198         continue;
6199
6200       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6201
6202       if (compare_costs (acost, best_cost) < 0)
6203         {
6204           best_cost = acost;
6205           iv_ca_delta_free (&best_delta);
6206           best_delta = act_delta;
6207         }
6208       else
6209         iv_ca_delta_free (&act_delta);
6210     }
6211
6212   if (!best_delta)
6213     {
6214       *delta = NULL;
6215       return best_cost;
6216     }
6217
6218   /* Recurse to possibly remove other unnecessary ivs.  */
6219   iv_ca_delta_commit (data, ivs, best_delta, true);
6220   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6221   iv_ca_delta_commit (data, ivs, best_delta, false);
6222   *delta = iv_ca_delta_join (best_delta, *delta);
6223   return best_cost;
6224 }
6225
6226 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6227    cheaper local cost for USE than BEST_CP.  Return pointer to
6228    the corresponding cost_pair, otherwise just return BEST_CP.  */
6229
6230 static struct cost_pair*
6231 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6232                         unsigned int cand_idx, struct iv_cand *old_cand,
6233                         struct cost_pair *best_cp)
6234 {
6235   struct iv_cand *cand;
6236   struct cost_pair *cp;
6237
6238   gcc_assert (old_cand != NULL && best_cp != NULL);
6239   if (cand_idx == old_cand->id)
6240     return best_cp;
6241
6242   cand = iv_cand (data, cand_idx);
6243   cp = get_use_iv_cost (data, use, cand);
6244   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6245     return cp;
6246
6247   return best_cp;
6248 }
6249
6250 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6251    which are used by more than one iv uses.  For each of those candidates,
6252    this function tries to represent iv uses under that candidate using
6253    other ones with lower local cost, then tries to prune the new set.
6254    If the new set has lower cost, It returns the new cost after recording
6255    candidate replacement in list DELTA.  */
6256
6257 static comp_cost
6258 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6259                struct iv_ca_delta **delta)
6260 {
6261   bitmap_iterator bi, bj;
6262   unsigned int i, j, k;
6263   struct iv_use *use;
6264   struct iv_cand *cand;
6265   comp_cost orig_cost, acost;
6266   struct iv_ca_delta *act_delta, *tmp_delta;
6267   struct cost_pair *old_cp, *best_cp = NULL;
6268
6269   *delta = NULL;
6270   orig_cost = iv_ca_cost (ivs);
6271
6272   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6273     {
6274       if (ivs->n_cand_uses[i] == 1
6275           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6276         continue;
6277
6278       cand = iv_cand (data, i);
6279
6280       act_delta = NULL;
6281       /*  Represent uses under current candidate using other ones with
6282           lower local cost.  */
6283       for (j = 0; j < ivs->upto; j++)
6284         {
6285           use = iv_use (data, j);
6286           old_cp = iv_ca_cand_for_use (ivs, use);
6287
6288           if (old_cp->cand != cand)
6289             continue;
6290
6291           best_cp = old_cp;
6292           if (data->consider_all_candidates)
6293             for (k = 0; k < n_iv_cands (data); k++)
6294               best_cp = cheaper_cost_with_cand (data, use, k,
6295                                                 old_cp->cand, best_cp);
6296           else
6297             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6298               best_cp = cheaper_cost_with_cand (data, use, k,
6299                                                 old_cp->cand, best_cp);
6300
6301           if (best_cp == old_cp)
6302             continue;
6303
6304           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6305         }
6306       /* No need for further prune.  */
6307       if (!act_delta)
6308         continue;
6309
6310       /* Prune the new candidate set.  */
6311       iv_ca_delta_commit (data, ivs, act_delta, true);
6312       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6313       iv_ca_delta_commit (data, ivs, act_delta, false);
6314       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6315
6316       if (compare_costs (acost, orig_cost) < 0)
6317         {
6318           *delta = act_delta;
6319           return acost;
6320         }
6321       else
6322         iv_ca_delta_free (&act_delta);
6323     }
6324
6325   return orig_cost;
6326 }
6327
6328 /* Tries to extend the sets IVS in the best possible way in order
6329    to express the USE.  If ORIGINALP is true, prefer candidates from
6330    the original set of IVs, otherwise favor important candidates not
6331    based on any memory object.  */
6332
6333 static bool
6334 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6335                   struct iv_use *use, bool originalp)
6336 {
6337   comp_cost best_cost, act_cost;
6338   unsigned i;
6339   bitmap_iterator bi;
6340   struct iv_cand *cand;
6341   struct iv_ca_delta *best_delta = NULL, *act_delta;
6342   struct cost_pair *cp;
6343
6344   iv_ca_add_use (data, ivs, use);
6345   best_cost = iv_ca_cost (ivs);
6346   cp = iv_ca_cand_for_use (ivs, use);
6347   if (cp)
6348     {
6349       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6350       iv_ca_set_no_cp (data, ivs, use);
6351     }
6352
6353   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6354      first try important candidates not based on any memory object.  Only if
6355      this fails, try the specific ones.  Rationale -- in loops with many
6356      variables the best choice often is to use just one generic biv.  If we
6357      added here many ivs specific to the uses, the optimization algorithm later
6358      would be likely to get stuck in a local minimum, thus causing us to create
6359      too many ivs.  The approach from few ivs to more seems more likely to be
6360      successful -- starting from few ivs, replacing an expensive use by a
6361      specific iv should always be a win.  */
6362   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6363     {
6364       cand = iv_cand (data, i);
6365
6366       if (originalp && cand->pos !=IP_ORIGINAL)
6367         continue;
6368
6369       if (!originalp && cand->iv->base_object != NULL_TREE)
6370         continue;
6371
6372       if (iv_ca_cand_used_p (ivs, cand))
6373         continue;
6374
6375       cp = get_use_iv_cost (data, use, cand);
6376       if (!cp)
6377         continue;
6378
6379       iv_ca_set_cp (data, ivs, use, cp);
6380       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6381                                true);
6382       iv_ca_set_no_cp (data, ivs, use);
6383       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6384
6385       if (compare_costs (act_cost, best_cost) < 0)
6386         {
6387           best_cost = act_cost;
6388
6389           iv_ca_delta_free (&best_delta);
6390           best_delta = act_delta;
6391         }
6392       else
6393         iv_ca_delta_free (&act_delta);
6394     }
6395
6396   if (infinite_cost_p (best_cost))
6397     {
6398       for (i = 0; i < use->n_map_members; i++)
6399         {
6400           cp = use->cost_map + i;
6401           cand = cp->cand;
6402           if (!cand)
6403             continue;
6404
6405           /* Already tried this.  */
6406           if (cand->important)
6407             {
6408               if (originalp && cand->pos == IP_ORIGINAL)
6409                 continue;
6410               if (!originalp && cand->iv->base_object == NULL_TREE)
6411                 continue;
6412             }
6413
6414           if (iv_ca_cand_used_p (ivs, cand))
6415             continue;
6416
6417           act_delta = NULL;
6418           iv_ca_set_cp (data, ivs, use, cp);
6419           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6420           iv_ca_set_no_cp (data, ivs, use);
6421           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6422                                        cp, act_delta);
6423
6424           if (compare_costs (act_cost, best_cost) < 0)
6425             {
6426               best_cost = act_cost;
6427
6428               if (best_delta)
6429                 iv_ca_delta_free (&best_delta);
6430               best_delta = act_delta;
6431             }
6432           else
6433             iv_ca_delta_free (&act_delta);
6434         }
6435     }
6436
6437   iv_ca_delta_commit (data, ivs, best_delta, true);
6438   iv_ca_delta_free (&best_delta);
6439
6440   return !infinite_cost_p (best_cost);
6441 }
6442
6443 /* Finds an initial assignment of candidates to uses.  */
6444
6445 static struct iv_ca *
6446 get_initial_solution (struct ivopts_data *data, bool originalp)
6447 {
6448   struct iv_ca *ivs = iv_ca_new (data);
6449   unsigned i;
6450
6451   for (i = 0; i < n_iv_uses (data); i++)
6452     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6453       {
6454         iv_ca_free (&ivs);
6455         return NULL;
6456       }
6457
6458   return ivs;
6459 }
6460
6461 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6462    points to a bool variable, this function tries to break local
6463    optimal fixed-point by replacing candidates in IVS if it's true.  */
6464
6465 static bool
6466 try_improve_iv_set (struct ivopts_data *data,
6467                     struct iv_ca *ivs, bool *try_replace_p)
6468 {
6469   unsigned i, n_ivs;
6470   comp_cost acost, best_cost = iv_ca_cost (ivs);
6471   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6472   struct iv_cand *cand;
6473
6474   /* Try extending the set of induction variables by one.  */
6475   for (i = 0; i < n_iv_cands (data); i++)
6476     {
6477       cand = iv_cand (data, i);
6478
6479       if (iv_ca_cand_used_p (ivs, cand))
6480         continue;
6481
6482       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6483       if (!act_delta)
6484         continue;
6485
6486       /* If we successfully added the candidate and the set is small enough,
6487          try optimizing it by removing other candidates.  */
6488       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6489         {
6490           iv_ca_delta_commit (data, ivs, act_delta, true);
6491           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6492           iv_ca_delta_commit (data, ivs, act_delta, false);
6493           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6494         }
6495
6496       if (compare_costs (acost, best_cost) < 0)
6497         {
6498           best_cost = acost;
6499           iv_ca_delta_free (&best_delta);
6500           best_delta = act_delta;
6501         }
6502       else
6503         iv_ca_delta_free (&act_delta);
6504     }
6505
6506   if (!best_delta)
6507     {
6508       /* Try removing the candidates from the set instead.  */
6509       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6510
6511       if (!best_delta && *try_replace_p)
6512         {
6513           *try_replace_p = false;
6514           /* So far candidate selecting algorithm tends to choose fewer IVs
6515              so that it can handle cases in which loops have many variables
6516              but the best choice is often to use only one general biv.  One
6517              weakness is it can't handle opposite cases, in which different
6518              candidates should be chosen with respect to each use.  To solve
6519              the problem, we replace candidates in a manner described by the
6520              comments of iv_ca_replace, thus give general algorithm a chance
6521              to break local optimal fixed-point in these cases.  */
6522           best_cost = iv_ca_replace (data, ivs, &best_delta);
6523         }
6524
6525       if (!best_delta)
6526         return false;
6527     }
6528
6529   iv_ca_delta_commit (data, ivs, best_delta, true);
6530   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6531   iv_ca_delta_free (&best_delta);
6532   return true;
6533 }
6534
6535 /* Attempts to find the optimal set of induction variables.  We do simple
6536    greedy heuristic -- we try to replace at most one candidate in the selected
6537    solution and remove the unused ivs while this improves the cost.  */
6538
6539 static struct iv_ca *
6540 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6541 {
6542   struct iv_ca *set;
6543   bool try_replace_p = true;
6544
6545   /* Get the initial solution.  */
6546   set = get_initial_solution (data, originalp);
6547   if (!set)
6548     {
6549       if (dump_file && (dump_flags & TDF_DETAILS))
6550         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6551       return NULL;
6552     }
6553
6554   if (dump_file && (dump_flags & TDF_DETAILS))
6555     {
6556       fprintf (dump_file, "Initial set of candidates:\n");
6557       iv_ca_dump (data, dump_file, set);
6558     }
6559
6560   while (try_improve_iv_set (data, set, &try_replace_p))
6561     {
6562       if (dump_file && (dump_flags & TDF_DETAILS))
6563         {
6564           fprintf (dump_file, "Improved to:\n");
6565           iv_ca_dump (data, dump_file, set);
6566         }
6567     }
6568
6569   return set;
6570 }
6571
6572 static struct iv_ca *
6573 find_optimal_iv_set (struct ivopts_data *data)
6574 {
6575   unsigned i;
6576   struct iv_ca *set, *origset;
6577   struct iv_use *use;
6578   comp_cost cost, origcost;
6579
6580   /* Determine the cost based on a strategy that starts with original IVs,
6581      and try again using a strategy that prefers candidates not based
6582      on any IVs.  */
6583   origset = find_optimal_iv_set_1 (data, true);
6584   set = find_optimal_iv_set_1 (data, false);
6585
6586   if (!origset && !set)
6587     return NULL;
6588
6589   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6590   cost = set ? iv_ca_cost (set) : infinite_cost;
6591
6592   if (dump_file && (dump_flags & TDF_DETAILS))
6593     {
6594       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6595                origcost.cost, origcost.complexity);
6596       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6597                cost.cost, cost.complexity);
6598     }
6599
6600   /* Choose the one with the best cost.  */
6601   if (compare_costs (origcost, cost) <= 0)
6602     {
6603       if (set)
6604         iv_ca_free (&set);
6605       set = origset;
6606     }
6607   else if (origset)
6608     iv_ca_free (&origset);
6609
6610   for (i = 0; i < n_iv_uses (data); i++)
6611     {
6612       use = iv_use (data, i);
6613       use->selected = iv_ca_cand_for_use (set, use)->cand;
6614     }
6615
6616   return set;
6617 }
6618
6619 /* Creates a new induction variable corresponding to CAND.  */
6620
6621 static void
6622 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6623 {
6624   gimple_stmt_iterator incr_pos;
6625   tree base;
6626   bool after = false;
6627
6628   if (!cand->iv)
6629     return;
6630
6631   switch (cand->pos)
6632     {
6633     case IP_NORMAL:
6634       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6635       break;
6636
6637     case IP_END:
6638       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6639       after = true;
6640       break;
6641
6642     case IP_AFTER_USE:
6643       after = true;
6644       /* fall through */
6645     case IP_BEFORE_USE:
6646       incr_pos = gsi_for_stmt (cand->incremented_at);
6647       break;
6648
6649     case IP_ORIGINAL:
6650       /* Mark that the iv is preserved.  */
6651       name_info (data, cand->var_before)->preserve_biv = true;
6652       name_info (data, cand->var_after)->preserve_biv = true;
6653
6654       /* Rewrite the increment so that it uses var_before directly.  */
6655       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6656       return;
6657     }
6658
6659   gimple_add_tmp_var (cand->var_before);
6660
6661   base = unshare_expr (cand->iv->base);
6662
6663   create_iv (base, unshare_expr (cand->iv->step),
6664              cand->var_before, data->current_loop,
6665              &incr_pos, after, &cand->var_before, &cand->var_after);
6666 }
6667
6668 /* Creates new induction variables described in SET.  */
6669
6670 static void
6671 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6672 {
6673   unsigned i;
6674   struct iv_cand *cand;
6675   bitmap_iterator bi;
6676
6677   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6678     {
6679       cand = iv_cand (data, i);
6680       create_new_iv (data, cand);
6681     }
6682
6683   if (dump_file && (dump_flags & TDF_DETAILS))
6684     {
6685       fprintf (dump_file, "Selected IV set for loop %d",
6686                data->current_loop->num);
6687       if (data->loop_loc != UNKNOWN_LOCATION)
6688         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6689                  LOCATION_LINE (data->loop_loc));
6690       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6691       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6692         {
6693           cand = iv_cand (data, i);
6694           dump_cand (dump_file, cand);
6695         }
6696       fprintf (dump_file, "\n");
6697     }
6698 }
6699
6700 /* Rewrites USE (definition of iv used in a nonlinear expression)
6701    using candidate CAND.  */
6702
6703 static void
6704 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6705                             struct iv_use *use, struct iv_cand *cand)
6706 {
6707   tree comp;
6708   tree op, tgt;
6709   gassign *ass;
6710   gimple_stmt_iterator bsi;
6711
6712   /* An important special case -- if we are asked to express value of
6713      the original iv by itself, just exit; there is no need to
6714      introduce a new computation (that might also need casting the
6715      variable to unsigned and back).  */
6716   if (cand->pos == IP_ORIGINAL
6717       && cand->incremented_at == use->stmt)
6718     {
6719       enum tree_code stmt_code;
6720
6721       gcc_assert (is_gimple_assign (use->stmt));
6722       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6723
6724       /* Check whether we may leave the computation unchanged.
6725          This is the case only if it does not rely on other
6726          computations in the loop -- otherwise, the computation
6727          we rely upon may be removed in remove_unused_ivs,
6728          thus leading to ICE.  */
6729       stmt_code = gimple_assign_rhs_code (use->stmt);
6730       if (stmt_code == PLUS_EXPR
6731           || stmt_code == MINUS_EXPR
6732           || stmt_code == POINTER_PLUS_EXPR)
6733         {
6734           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6735             op = gimple_assign_rhs2 (use->stmt);
6736           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6737             op = gimple_assign_rhs1 (use->stmt);
6738           else
6739             op = NULL_TREE;
6740         }
6741       else
6742         op = NULL_TREE;
6743
6744       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6745         return;
6746     }
6747
6748   comp = get_computation (data->current_loop, use, cand);
6749   gcc_assert (comp != NULL_TREE);
6750
6751   switch (gimple_code (use->stmt))
6752     {
6753     case GIMPLE_PHI:
6754       tgt = PHI_RESULT (use->stmt);
6755
6756       /* If we should keep the biv, do not replace it.  */
6757       if (name_info (data, tgt)->preserve_biv)
6758         return;
6759
6760       bsi = gsi_after_labels (gimple_bb (use->stmt));
6761       break;
6762
6763     case GIMPLE_ASSIGN:
6764       tgt = gimple_assign_lhs (use->stmt);
6765       bsi = gsi_for_stmt (use->stmt);
6766       break;
6767
6768     default:
6769       gcc_unreachable ();
6770     }
6771
6772   if (!valid_gimple_rhs_p (comp)
6773       || (gimple_code (use->stmt) != GIMPLE_PHI
6774           /* We can't allow re-allocating the stmt as it might be pointed
6775              to still.  */
6776           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6777               >= gimple_num_ops (gsi_stmt (bsi)))))
6778     {
6779       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6780                                        true, GSI_SAME_STMT);
6781       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6782         {
6783           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6784           /* As this isn't a plain copy we have to reset alignment
6785              information.  */
6786           if (SSA_NAME_PTR_INFO (comp))
6787             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6788         }
6789     }
6790
6791   if (gimple_code (use->stmt) == GIMPLE_PHI)
6792     {
6793       ass = gimple_build_assign (tgt, comp);
6794       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6795
6796       bsi = gsi_for_stmt (use->stmt);
6797       remove_phi_node (&bsi, false);
6798     }
6799   else
6800     {
6801       gimple_assign_set_rhs_from_tree (&bsi, comp);
6802       use->stmt = gsi_stmt (bsi);
6803     }
6804 }
6805
6806 /* Performs a peephole optimization to reorder the iv update statement with
6807    a mem ref to enable instruction combining in later phases. The mem ref uses
6808    the iv value before the update, so the reordering transformation requires
6809    adjustment of the offset. CAND is the selected IV_CAND.
6810
6811    Example:
6812
6813    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6814    iv2 = iv1 + 1;
6815
6816    if (t < val)      (1)
6817      goto L;
6818    goto Head;
6819
6820
6821    directly propagating t over to (1) will introduce overlapping live range
6822    thus increase register pressure. This peephole transform it into:
6823
6824
6825    iv2 = iv1 + 1;
6826    t = MEM_REF (base, iv2, 8, 8);
6827    if (t < val)
6828      goto L;
6829    goto Head;
6830 */
6831
6832 static void
6833 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6834 {
6835   tree var_after;
6836   gimple iv_update, stmt;
6837   basic_block bb;
6838   gimple_stmt_iterator gsi, gsi_iv;
6839
6840   if (cand->pos != IP_NORMAL)
6841     return;
6842
6843   var_after = cand->var_after;
6844   iv_update = SSA_NAME_DEF_STMT (var_after);
6845
6846   bb = gimple_bb (iv_update);
6847   gsi = gsi_last_nondebug_bb (bb);
6848   stmt = gsi_stmt (gsi);
6849
6850   /* Only handle conditional statement for now.  */
6851   if (gimple_code (stmt) != GIMPLE_COND)
6852     return;
6853
6854   gsi_prev_nondebug (&gsi);
6855   stmt = gsi_stmt (gsi);
6856   if (stmt != iv_update)
6857     return;
6858
6859   gsi_prev_nondebug (&gsi);
6860   if (gsi_end_p (gsi))
6861     return;
6862
6863   stmt = gsi_stmt (gsi);
6864   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6865     return;
6866
6867   if (stmt != use->stmt)
6868     return;
6869
6870   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6871     return;
6872
6873   if (dump_file && (dump_flags & TDF_DETAILS))
6874     {
6875       fprintf (dump_file, "Reordering \n");
6876       print_gimple_stmt (dump_file, iv_update, 0, 0);
6877       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6878       fprintf (dump_file, "\n");
6879     }
6880
6881   gsi = gsi_for_stmt (use->stmt);
6882   gsi_iv = gsi_for_stmt (iv_update);
6883   gsi_move_before (&gsi_iv, &gsi);
6884
6885   cand->pos = IP_BEFORE_USE;
6886   cand->incremented_at = use->stmt;
6887 }
6888
6889 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6890
6891 static void
6892 rewrite_use_address_1 (struct ivopts_data *data,
6893                        struct iv_use *use, struct iv_cand *cand)
6894 {
6895   aff_tree aff;
6896   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6897   tree base_hint = NULL_TREE;
6898   tree ref, iv;
6899   bool ok;
6900
6901   adjust_iv_update_pos (cand, use);
6902   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6903   gcc_assert (ok);
6904   unshare_aff_combination (&aff);
6905
6906   /* To avoid undefined overflow problems, all IV candidates use unsigned
6907      integer types.  The drawback is that this makes it impossible for
6908      create_mem_ref to distinguish an IV that is based on a memory object
6909      from one that represents simply an offset.
6910
6911      To work around this problem, we pass a hint to create_mem_ref that
6912      indicates which variable (if any) in aff is an IV based on a memory
6913      object.  Note that we only consider the candidate.  If this is not
6914      based on an object, the base of the reference is in some subexpression
6915      of the use -- but these will use pointer types, so they are recognized
6916      by the create_mem_ref heuristics anyway.  */
6917   if (cand->iv->base_object)
6918     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6919
6920   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6921   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6922                         reference_alias_ptr_type (*use->op_p),
6923                         iv, base_hint, data->speed);
6924   copy_ref_info (ref, *use->op_p);
6925   *use->op_p = ref;
6926 }
6927
6928 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
6929    first use of a group, rewrites sub uses in the group too.  */
6930
6931 static void
6932 rewrite_use_address (struct ivopts_data *data,
6933                       struct iv_use *use, struct iv_cand *cand)
6934 {
6935   struct iv_use *next;
6936
6937   gcc_assert (use->sub_id == 0);
6938   rewrite_use_address_1 (data, use, cand);
6939   update_stmt (use->stmt);
6940
6941   for (next = use->next; next != NULL; next = next->next)
6942     {
6943       rewrite_use_address_1 (data, next, cand);
6944       update_stmt (next->stmt);
6945     }
6946
6947   return;
6948 }
6949
6950 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6951    candidate CAND.  */
6952
6953 static void
6954 rewrite_use_compare (struct ivopts_data *data,
6955                      struct iv_use *use, struct iv_cand *cand)
6956 {
6957   tree comp, *var_p, op, bound;
6958   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6959   enum tree_code compare;
6960   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6961   bool ok;
6962
6963   bound = cp->value;
6964   if (bound)
6965     {
6966       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6967       tree var_type = TREE_TYPE (var);
6968       gimple_seq stmts;
6969
6970       if (dump_file && (dump_flags & TDF_DETAILS))
6971         {
6972           fprintf (dump_file, "Replacing exit test: ");
6973           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6974         }
6975       compare = cp->comp;
6976       bound = unshare_expr (fold_convert (var_type, bound));
6977       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6978       if (stmts)
6979         gsi_insert_seq_on_edge_immediate (
6980                 loop_preheader_edge (data->current_loop),
6981                 stmts);
6982
6983       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6984       gimple_cond_set_lhs (cond_stmt, var);
6985       gimple_cond_set_code (cond_stmt, compare);
6986       gimple_cond_set_rhs (cond_stmt, op);
6987       return;
6988     }
6989
6990   /* The induction variable elimination failed; just express the original
6991      giv.  */
6992   comp = get_computation (data->current_loop, use, cand);
6993   gcc_assert (comp != NULL_TREE);
6994
6995   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6996   gcc_assert (ok);
6997
6998   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6999                                      true, GSI_SAME_STMT);
7000 }
7001
7002 /* Rewrites USE using candidate CAND.  */
7003
7004 static void
7005 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7006 {
7007   switch (use->type)
7008     {
7009       case USE_NONLINEAR_EXPR:
7010         rewrite_use_nonlinear_expr (data, use, cand);
7011         break;
7012
7013       case USE_ADDRESS:
7014         rewrite_use_address (data, use, cand);
7015         break;
7016
7017       case USE_COMPARE:
7018         rewrite_use_compare (data, use, cand);
7019         break;
7020
7021       default:
7022         gcc_unreachable ();
7023     }
7024
7025   update_stmt (use->stmt);
7026 }
7027
7028 /* Rewrite the uses using the selected induction variables.  */
7029
7030 static void
7031 rewrite_uses (struct ivopts_data *data)
7032 {
7033   unsigned i;
7034   struct iv_cand *cand;
7035   struct iv_use *use;
7036
7037   for (i = 0; i < n_iv_uses (data); i++)
7038     {
7039       use = iv_use (data, i);
7040       cand = use->selected;
7041       gcc_assert (cand);
7042
7043       rewrite_use (data, use, cand);
7044     }
7045 }
7046
7047 /* Removes the ivs that are not used after rewriting.  */
7048
7049 static void
7050 remove_unused_ivs (struct ivopts_data *data)
7051 {
7052   unsigned j;
7053   bitmap_iterator bi;
7054   bitmap toremove = BITMAP_ALLOC (NULL);
7055
7056   /* Figure out an order in which to release SSA DEFs so that we don't
7057      release something that we'd have to propagate into a debug stmt
7058      afterwards.  */
7059   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7060     {
7061       struct version_info *info;
7062
7063       info = ver_info (data, j);
7064       if (info->iv
7065           && !integer_zerop (info->iv->step)
7066           && !info->inv_id
7067           && !info->iv->have_use_for
7068           && !info->preserve_biv)
7069         {
7070           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7071
7072           tree def = info->iv->ssa_name;
7073
7074           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7075             {
7076               imm_use_iterator imm_iter;
7077               use_operand_p use_p;
7078               gimple stmt;
7079               int count = 0;
7080
7081               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7082                 {
7083                   if (!gimple_debug_bind_p (stmt))
7084                     continue;
7085
7086                   /* We just want to determine whether to do nothing
7087                      (count == 0), to substitute the computed
7088                      expression into a single use of the SSA DEF by
7089                      itself (count == 1), or to use a debug temp
7090                      because the SSA DEF is used multiple times or as
7091                      part of a larger expression (count > 1). */
7092                   count++;
7093                   if (gimple_debug_bind_get_value (stmt) != def)
7094                     count++;
7095
7096                   if (count > 1)
7097                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7098                 }
7099
7100               if (!count)
7101                 continue;
7102
7103               struct iv_use dummy_use;
7104               struct iv_cand *best_cand = NULL, *cand;
7105               unsigned i, best_pref = 0, cand_pref;
7106
7107               memset (&dummy_use, 0, sizeof (dummy_use));
7108               dummy_use.iv = info->iv;
7109               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7110                 {
7111                   cand = iv_use (data, i)->selected;
7112                   if (cand == best_cand)
7113                     continue;
7114                   cand_pref = operand_equal_p (cand->iv->step,
7115                                                info->iv->step, 0)
7116                     ? 4 : 0;
7117                   cand_pref
7118                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7119                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7120                     ? 2 : 0;
7121                   cand_pref
7122                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7123                     ? 1 : 0;
7124                   if (best_cand == NULL || best_pref < cand_pref)
7125                     {
7126                       best_cand = cand;
7127                       best_pref = cand_pref;
7128                     }
7129                 }
7130
7131               if (!best_cand)
7132                 continue;
7133
7134               tree comp = get_computation_at (data->current_loop,
7135                                               &dummy_use, best_cand,
7136                                               SSA_NAME_DEF_STMT (def));
7137               if (!comp)
7138                 continue;
7139
7140               if (count > 1)
7141                 {
7142                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7143                   DECL_ARTIFICIAL (vexpr) = 1;
7144                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7145                   if (SSA_NAME_VAR (def))
7146                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7147                   else
7148                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7149                   gdebug *def_temp
7150                     = gimple_build_debug_bind (vexpr, comp, NULL);
7151                   gimple_stmt_iterator gsi;
7152
7153                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7154                     gsi = gsi_after_labels (gimple_bb
7155                                             (SSA_NAME_DEF_STMT (def)));
7156                   else
7157                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7158
7159                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7160                   comp = vexpr;
7161                 }
7162
7163               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7164                 {
7165                   if (!gimple_debug_bind_p (stmt))
7166                     continue;
7167
7168                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7169                     SET_USE (use_p, comp);
7170
7171                   update_stmt (stmt);
7172                 }
7173             }
7174         }
7175     }
7176
7177   release_defs_bitset (toremove);
7178
7179   BITMAP_FREE (toremove);
7180 }
7181
7182 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7183    for hash_map::traverse.  */
7184
7185 bool
7186 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7187 {
7188   free (value);
7189   return true;
7190 }
7191
7192 /* Frees data allocated by the optimization of a single loop.  */
7193
7194 static void
7195 free_loop_data (struct ivopts_data *data)
7196 {
7197   unsigned i, j;
7198   bitmap_iterator bi;
7199   tree obj;
7200
7201   if (data->niters)
7202     {
7203       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7204       delete data->niters;
7205       data->niters = NULL;
7206     }
7207
7208   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7209     {
7210       struct version_info *info;
7211
7212       info = ver_info (data, i);
7213       free (info->iv);
7214       info->iv = NULL;
7215       info->has_nonlin_use = false;
7216       info->preserve_biv = false;
7217       info->inv_id = 0;
7218     }
7219   bitmap_clear (data->relevant);
7220   bitmap_clear (data->important_candidates);
7221
7222   for (i = 0; i < n_iv_uses (data); i++)
7223     {
7224       struct iv_use *use = iv_use (data, i);
7225       struct iv_use *pre = use, *sub = use->next;
7226
7227       while (sub)
7228         {
7229           gcc_assert (sub->related_cands == NULL);
7230           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7231
7232           free (sub->iv);
7233           pre = sub;
7234           sub = sub->next;
7235           free (pre);
7236         }
7237
7238       free (use->iv);
7239       BITMAP_FREE (use->related_cands);
7240       for (j = 0; j < use->n_map_members; j++)
7241         if (use->cost_map[j].depends_on)
7242           BITMAP_FREE (use->cost_map[j].depends_on);
7243       free (use->cost_map);
7244       free (use);
7245     }
7246   data->iv_uses.truncate (0);
7247
7248   for (i = 0; i < n_iv_cands (data); i++)
7249     {
7250       struct iv_cand *cand = iv_cand (data, i);
7251
7252       free (cand->iv);
7253       if (cand->depends_on)
7254         BITMAP_FREE (cand->depends_on);
7255       free (cand);
7256     }
7257   data->iv_candidates.truncate (0);
7258
7259   if (data->version_info_size < num_ssa_names)
7260     {
7261       data->version_info_size = 2 * num_ssa_names;
7262       free (data->version_info);
7263       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7264     }
7265
7266   data->max_inv_id = 0;
7267
7268   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7269     SET_DECL_RTL (obj, NULL_RTX);
7270
7271   decl_rtl_to_reset.truncate (0);
7272
7273   data->inv_expr_tab->empty ();
7274   data->inv_expr_id = 0;
7275 }
7276
7277 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7278    loop tree.  */
7279
7280 static void
7281 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7282 {
7283   free_loop_data (data);
7284   free (data->version_info);
7285   BITMAP_FREE (data->relevant);
7286   BITMAP_FREE (data->important_candidates);
7287
7288   decl_rtl_to_reset.release ();
7289   data->iv_uses.release ();
7290   data->iv_candidates.release ();
7291   delete data->inv_expr_tab;
7292   data->inv_expr_tab = NULL;
7293   free_affine_expand_cache (&data->name_expansion_cache);
7294 }
7295
7296 /* Returns true if the loop body BODY includes any function calls.  */
7297
7298 static bool
7299 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7300 {
7301   gimple_stmt_iterator gsi;
7302   unsigned i;
7303
7304   for (i = 0; i < num_nodes; i++)
7305     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7306       {
7307         gimple stmt = gsi_stmt (gsi);
7308         if (is_gimple_call (stmt)
7309             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7310           return true;
7311       }
7312   return false;
7313 }
7314
7315 /* Optimizes the LOOP.  Returns true if anything changed.  */
7316
7317 static bool
7318 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7319 {
7320   bool changed = false;
7321   struct iv_ca *iv_ca;
7322   edge exit = single_dom_exit (loop);
7323   basic_block *body;
7324
7325   gcc_assert (!data->niters);
7326   data->current_loop = loop;
7327   data->loop_loc = find_loop_location (loop);
7328   data->speed = optimize_loop_for_speed_p (loop);
7329
7330   if (dump_file && (dump_flags & TDF_DETAILS))
7331     {
7332       fprintf (dump_file, "Processing loop %d", loop->num);
7333       if (data->loop_loc != UNKNOWN_LOCATION)
7334         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7335                  LOCATION_LINE (data->loop_loc));
7336       fprintf (dump_file, "\n");
7337
7338       if (exit)
7339         {
7340           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7341                    exit->src->index, exit->dest->index);
7342           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7343           fprintf (dump_file, "\n");
7344         }
7345
7346       fprintf (dump_file, "\n");
7347     }
7348
7349   body = get_loop_body (loop);
7350   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7351   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7352   free (body);
7353
7354   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7355
7356   /* For each ssa name determines whether it behaves as an induction variable
7357      in some loop.  */
7358   if (!find_induction_variables (data))
7359     goto finish;
7360
7361   /* Finds interesting uses (item 1).  */
7362   find_interesting_uses (data);
7363   group_address_uses (data);
7364   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7365     goto finish;
7366
7367   /* Finds candidates for the induction variables (item 2).  */
7368   find_iv_candidates (data);
7369
7370   /* Calculates the costs (item 3, part 1).  */
7371   determine_iv_costs (data);
7372   determine_use_iv_costs (data);
7373   determine_set_costs (data);
7374
7375   /* Find the optimal set of induction variables (item 3, part 2).  */
7376   iv_ca = find_optimal_iv_set (data);
7377   if (!iv_ca)
7378     goto finish;
7379   changed = true;
7380
7381   /* Create the new induction variables (item 4, part 1).  */
7382   create_new_ivs (data, iv_ca);
7383   iv_ca_free (&iv_ca);
7384
7385   /* Rewrite the uses (item 4, part 2).  */
7386   rewrite_uses (data);
7387
7388   /* Remove the ivs that are unused after rewriting.  */
7389   remove_unused_ivs (data);
7390
7391   /* We have changed the structure of induction variables; it might happen
7392      that definitions in the scev database refer to some of them that were
7393      eliminated.  */
7394   scev_reset ();
7395
7396 finish:
7397   free_loop_data (data);
7398
7399   return changed;
7400 }
7401
7402 /* Main entry point.  Optimizes induction variables in loops.  */
7403
7404 void
7405 tree_ssa_iv_optimize (void)
7406 {
7407   struct loop *loop;
7408   struct ivopts_data data;
7409
7410   tree_ssa_iv_optimize_init (&data);
7411
7412   /* Optimize the loops starting with the innermost ones.  */
7413   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7414     {
7415       if (dump_file && (dump_flags & TDF_DETAILS))
7416         flow_loop_dump (loop, dump_file, NULL, 1);
7417
7418       tree_ssa_iv_optimize_loop (&data, loop);
7419     }
7420
7421   tree_ssa_iv_optimize_finalize (&data);
7422 }