gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "alias.h"
  69 #include "symtab.h"
  70 #include "tree.h"
  71 #include "fold-const.h"
  72 #include "stor-layout.h"
  73 #include "tm_p.h"
  74 #include "predict.h"
  75 #include "hard-reg-set.h"
  76 #include "function.h"
  77 #include "dominance.h"
  78 #include "cfg.h"
  79 #include "basic-block.h"
  80 #include "gimple-pretty-print.h"
  81 #include "tree-ssa-alias.h"
  82 #include "internal-fn.h"
  83 #include "tree-eh.h"
  84 #include "gimple-expr.h"
  85 #include "gimple.h"
  86 #include "gimplify.h"
  87 #include "gimple-iterator.h"
  88 #include "gimplify-me.h"
  89 #include "gimple-ssa.h"
  90 #include "plugin-api.h"
  91 #include "ipa-ref.h"
  92 #include "cgraph.h"
  93 #include "tree-cfg.h"
  94 #include "tree-phinodes.h"
  95 #include "ssa-iterators.h"
  96 #include "stringpool.h"
  97 #include "tree-ssanames.h"
  98 #include "tree-ssa-loop-ivopts.h"
  99 #include "tree-ssa-loop-manip.h"
 100 #include "tree-ssa-loop-niter.h"
 101 #include "tree-ssa-loop.h"
 102 #include "rtl.h"
 103 #include "flags.h"
 104 #include "insn-config.h"
 105 #include "expmed.h"
 106 #include "dojump.h"
 107 #include "explow.h"
 108 #include "calls.h"
 109 #include "emit-rtl.h"
 110 #include "varasm.h"
 111 #include "stmt.h"
 112 #include "expr.h"
 113 #include "tree-dfa.h"
 114 #include "tree-ssa.h"
 115 #include "cfgloop.h"
 116 #include "tree-pass.h"
 117 #include "tree-chrec.h"
 118 #include "tree-scalar-evolution.h"
 119 #include "params.h"
 120 #include "langhooks.h"
 121 #include "tree-affine.h"
 122 #include "target.h"
 123 #include "tree-inline.h"
 124 #include "tree-ssa-propagate.h"
 125 #include "tree-ssa-address.h"
 126 #include "builtins.h"
 127 #include "tree-vectorizer.h"
 128
 129 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 130    cost of different addressing modes.  This should be moved to a TBD
 131    interface between the GIMPLE and RTL worlds.  */
 132 #include "recog.h"
 133
 134 /* The infinite cost.  */
 135 #define INFTY 10000000
 136
 137 #define AVG_LOOP_NITER(LOOP) 5
 138
 139 /* Returns the expected number of loop iterations for LOOP.
 140    The average trip count is computed from profile data if it
 141    exists. */
 142
 143 static inline HOST_WIDE_INT
 144 avg_loop_niter (struct loop *loop)
 145 {
 146   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 147   if (niter == -1)
 148     return AVG_LOOP_NITER (loop);
 149
 150   return niter;
 151 }
 152
 153 /* Representation of the induction variable.  */
 154 struct iv
 155 {
 156   tree base;            /* Initial value of the iv.  */
 157   tree base_object;     /* A memory object to that the induction variable points.  */
 158   tree step;            /* Step of the iv (constant only).  */
 159   tree ssa_name;        /* The ssa name with the value.  */
 160   unsigned use_id;      /* The identifier in the use if it is the case.  */
 161   bool biv_p;           /* Is it a biv?  */
 162   bool have_use_for;    /* Do we already have a use for it?  */
 163   bool no_overflow;     /* True if the iv doesn't overflow.  */
 164 };
 165
 166 /* Per-ssa version information (induction variable descriptions, etc.).  */
 167 struct version_info
 168 {
 169   tree name;            /* The ssa name.  */
 170   struct iv *iv;        /* Induction variable description.  */
 171   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 172                            an expression that is not an induction variable.  */
 173   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 174   unsigned inv_id;      /* Id of an invariant.  */
 175 };
 176
 177 /* Types of uses.  */
 178 enum use_type
 179 {
 180   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 181   USE_ADDRESS,          /* Use in an address.  */
 182   USE_COMPARE           /* Use is a compare.  */
 183 };
 184
 185 /* Cost of a computation.  */
 186 typedef struct
 187 {
 188   int cost;             /* The runtime cost.  */
 189   unsigned complexity;  /* The estimate of the complexity of the code for
 190                            the computation (in no concrete units --
 191                            complexity field should be larger for more
 192                            complex expressions and addressing modes).  */
 193 } comp_cost;
 194
 195 static const comp_cost no_cost = {0, 0};
 196 static const comp_cost infinite_cost = {INFTY, INFTY};
 197
 198 /* The candidate - cost pair.  */
 199 struct cost_pair
 200 {
 201   struct iv_cand *cand; /* The candidate.  */
 202   comp_cost cost;       /* The cost.  */
 203   bitmap depends_on;    /* The list of invariants that have to be
 204                            preserved.  */
 205   tree value;           /* For final value elimination, the expression for
 206                            the final value of the iv.  For iv elimination,
 207                            the new bound to compare with.  */
 208   enum tree_code comp;  /* For iv elimination, the comparison.  */
 209   int inv_expr_id;      /* Loop invariant expression id.  */
 210 };
 211
 212 /* Use.  */
 213 struct iv_use
 214 {
 215   unsigned id;          /* The id of the use.  */
 216   unsigned sub_id;      /* The id of the sub use.  */
 217   enum use_type type;   /* Type of the use.  */
 218   struct iv *iv;        /* The induction variable it is based on.  */
 219   gimple stmt;          /* Statement in that it occurs.  */
 220   tree *op_p;           /* The place where it occurs.  */
 221   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 222                            important ones.  */
 223
 224   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 225   struct cost_pair *cost_map;
 226                         /* The costs wrto the iv candidates.  */
 227
 228   struct iv_cand *selected;
 229                         /* The selected candidate.  */
 230
 231   struct iv_use *next;  /* The next sub use.  */
 232   tree addr_base;       /* Base address with const offset stripped.  */
 233   unsigned HOST_WIDE_INT addr_offset;
 234                         /* Const offset stripped from base address.  */
 235 };
 236
 237 /* The position where the iv is computed.  */
 238 enum iv_position
 239 {
 240   IP_NORMAL,            /* At the end, just before the exit condition.  */
 241   IP_END,               /* At the end of the latch block.  */
 242   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 243   IP_AFTER_USE,         /* Immediately after a specific use.  */
 244   IP_ORIGINAL           /* The original biv.  */
 245 };
 246
 247 /* The induction variable candidate.  */
 248 struct iv_cand
 249 {
 250   unsigned id;          /* The number of the candidate.  */
 251   bool important;       /* Whether this is an "important" candidate, i.e. such
 252                            that it should be considered by all uses.  */
 253   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 254   gimple incremented_at;/* For original biv, the statement where it is
 255                            incremented.  */
 256   tree var_before;      /* The variable used for it before increment.  */
 257   tree var_after;       /* The variable used for it after increment.  */
 258   struct iv *iv;        /* The value of the candidate.  NULL for
 259                            "pseudocandidate" used to indicate the possibility
 260                            to replace the final value of an iv by direct
 261                            computation of the value.  */
 262   unsigned cost;        /* Cost of the candidate.  */
 263   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 264   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 265                               where it is incremented.  */
 266   bitmap depends_on;    /* The list of invariants that are used in step of the
 267                            biv.  */
 268 };
 269
 270 /* Loop invariant expression hashtable entry.  */
 271 struct iv_inv_expr_ent
 272 {
 273   tree expr;
 274   int id;
 275   hashval_t hash;
 276 };
 277
 278 /* The data used by the induction variable optimizations.  */
 279
 280 typedef struct iv_use *iv_use_p;
 281
 282 typedef struct iv_cand *iv_cand_p;
 283
 284 /* Hashtable helpers.  */
 285
 286 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 287 {
 288   typedef iv_inv_expr_ent *value_type;
 289   typedef iv_inv_expr_ent *compare_type;
 290   static inline hashval_t hash (const iv_inv_expr_ent *);
 291   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 292 };
 293
 294 /* Hash function for loop invariant expressions.  */
 295
 296 inline hashval_t
 297 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 298 {
 299   return expr->hash;
 300 }
 301
 302 /* Hash table equality function for expressions.  */
 303
 304 inline bool
 305 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 306                            const iv_inv_expr_ent *expr2)
 307 {
 308   return expr1->hash == expr2->hash
 309          && operand_equal_p (expr1->expr, expr2->expr, 0);
 310 }
 311
 312 struct ivopts_data
 313 {
 314   /* The currently optimized loop.  */
 315   struct loop *current_loop;
 316   source_location loop_loc;
 317
 318   /* Numbers of iterations for all exits of the current loop.  */
 319   hash_map<edge, tree_niter_desc *> *niters;
 320
 321   /* Number of registers used in it.  */
 322   unsigned regs_used;
 323
 324   /* The size of version_info array allocated.  */
 325   unsigned version_info_size;
 326
 327   /* The array of information for the ssa names.  */
 328   struct version_info *version_info;
 329
 330   /* The hashtable of loop invariant expressions created
 331      by ivopt.  */
 332   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 333
 334   /* Loop invariant expression id.  */
 335   int inv_expr_id;
 336
 337   /* The bitmap of indices in version_info whose value was changed.  */
 338   bitmap relevant;
 339
 340   /* The uses of induction variables.  */
 341   vec<iv_use_p> iv_uses;
 342
 343   /* The candidates.  */
 344   vec<iv_cand_p> iv_candidates;
 345
 346   /* A bitmap of important candidates.  */
 347   bitmap important_candidates;
 348
 349   /* Cache used by tree_to_aff_combination_expand.  */
 350   hash_map<tree, name_expansion *> *name_expansion_cache;
 351
 352   /* The maximum invariant id.  */
 353   unsigned max_inv_id;
 354
 355   /* Whether to consider just related and important candidates when replacing a
 356      use.  */
 357   bool consider_all_candidates;
 358
 359   /* Are we optimizing for speed?  */
 360   bool speed;
 361
 362   /* Whether the loop body includes any function calls.  */
 363   bool body_includes_call;
 364
 365   /* Whether the loop body can only be exited via single exit.  */
 366   bool loop_single_exit_p;
 367 };
 368
 369 /* An assignment of iv candidates to uses.  */
 370
 371 struct iv_ca
 372 {
 373   /* The number of uses covered by the assignment.  */
 374   unsigned upto;
 375
 376   /* Number of uses that cannot be expressed by the candidates in the set.  */
 377   unsigned bad_uses;
 378
 379   /* Candidate assigned to a use, together with the related costs.  */
 380   struct cost_pair **cand_for_use;
 381
 382   /* Number of times each candidate is used.  */
 383   unsigned *n_cand_uses;
 384
 385   /* The candidates used.  */
 386   bitmap cands;
 387
 388   /* The number of candidates in the set.  */
 389   unsigned n_cands;
 390
 391   /* Total number of registers needed.  */
 392   unsigned n_regs;
 393
 394   /* Total cost of expressing uses.  */
 395   comp_cost cand_use_cost;
 396
 397   /* Total cost of candidates.  */
 398   unsigned cand_cost;
 399
 400   /* Number of times each invariant is used.  */
 401   unsigned *n_invariant_uses;
 402
 403   /* The array holding the number of uses of each loop
 404      invariant expressions created by ivopt.  */
 405   unsigned *used_inv_expr;
 406
 407   /* The number of created loop invariants.  */
 408   unsigned num_used_inv_expr;
 409
 410   /* Total cost of the assignment.  */
 411   comp_cost cost;
 412 };
 413
 414 /* Difference of two iv candidate assignments.  */
 415
 416 struct iv_ca_delta
 417 {
 418   /* Changed use.  */
 419   struct iv_use *use;
 420
 421   /* An old assignment (for rollback purposes).  */
 422   struct cost_pair *old_cp;
 423
 424   /* A new assignment.  */
 425   struct cost_pair *new_cp;
 426
 427   /* Next change in the list.  */
 428   struct iv_ca_delta *next_change;
 429 };
 430
 431 /* Bound on number of candidates below that all candidates are considered.  */
 432
 433 #define CONSIDER_ALL_CANDIDATES_BOUND \
 434   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 435
 436 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 437    optimizing such a loop would help, and it would take ages).  */
 438
 439 #define MAX_CONSIDERED_USES \
 440   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 441
 442 /* If there are at most this number of ivs in the set, try removing unnecessary
 443    ivs from the set always.  */
 444
 445 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 446   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 447
 448 /* The list of trees for that the decl_rtl field must be reset is stored
 449    here.  */
 450
 451 static vec<tree> decl_rtl_to_reset;
 452
 453 static comp_cost force_expr_to_var_cost (tree, bool);
 454
 455 /* Number of uses recorded in DATA.  */
 456
 457 static inline unsigned
 458 n_iv_uses (struct ivopts_data *data)
 459 {
 460   return data->iv_uses.length ();
 461 }
 462
 463 /* Ith use recorded in DATA.  */
 464
 465 static inline struct iv_use *
 466 iv_use (struct ivopts_data *data, unsigned i)
 467 {
 468   return data->iv_uses[i];
 469 }
 470
 471 /* Number of candidates recorded in DATA.  */
 472
 473 static inline unsigned
 474 n_iv_cands (struct ivopts_data *data)
 475 {
 476   return data->iv_candidates.length ();
 477 }
 478
 479 /* Ith candidate recorded in DATA.  */
 480
 481 static inline struct iv_cand *
 482 iv_cand (struct ivopts_data *data, unsigned i)
 483 {
 484   return data->iv_candidates[i];
 485 }
 486
 487 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 488
 489 edge
 490 single_dom_exit (struct loop *loop)
 491 {
 492   edge exit = single_exit (loop);
 493
 494   if (!exit)
 495     return NULL;
 496
 497   if (!just_once_each_iteration_p (loop, exit->src))
 498     return NULL;
 499
 500   return exit;
 501 }
 502
 503 /* Dumps information about the induction variable IV to FILE.  */
 504
 505 void
 506 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 507 {
 508   if (iv->ssa_name && dump_name)
 509     {
 510       fprintf (file, "ssa name ");
 511       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 512       fprintf (file, "\n");
 513     }
 514
 515   fprintf (file, "  type ");
 516   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 517   fprintf (file, "\n");
 518
 519   if (iv->step)
 520     {
 521       fprintf (file, "  base ");
 522       print_generic_expr (file, iv->base, TDF_SLIM);
 523       fprintf (file, "\n");
 524
 525       fprintf (file, "  step ");
 526       print_generic_expr (file, iv->step, TDF_SLIM);
 527       fprintf (file, "\n");
 528     }
 529   else
 530     {
 531       fprintf (file, "  invariant ");
 532       print_generic_expr (file, iv->base, TDF_SLIM);
 533       fprintf (file, "\n");
 534     }
 535
 536   if (iv->base_object)
 537     {
 538       fprintf (file, "  base object ");
 539       print_generic_expr (file, iv->base_object, TDF_SLIM);
 540       fprintf (file, "\n");
 541     }
 542
 543   if (iv->biv_p)
 544     fprintf (file, "  is a biv\n");
 545 }
 546
 547 /* Dumps information about the USE to FILE.  */
 548
 549 void
 550 dump_use (FILE *file, struct iv_use *use)
 551 {
 552   fprintf (file, "use %d", use->id);
 553   if (use->sub_id)
 554     fprintf (file, ".%d", use->sub_id);
 555
 556   fprintf (file, "\n");
 557
 558   switch (use->type)
 559     {
 560     case USE_NONLINEAR_EXPR:
 561       fprintf (file, "  generic\n");
 562       break;
 563
 564     case USE_ADDRESS:
 565       fprintf (file, "  address\n");
 566       break;
 567
 568     case USE_COMPARE:
 569       fprintf (file, "  compare\n");
 570       break;
 571
 572     default:
 573       gcc_unreachable ();
 574     }
 575
 576   fprintf (file, "  in statement ");
 577   print_gimple_stmt (file, use->stmt, 0, 0);
 578   fprintf (file, "\n");
 579
 580   fprintf (file, "  at position ");
 581   if (use->op_p)
 582     print_generic_expr (file, *use->op_p, TDF_SLIM);
 583   fprintf (file, "\n");
 584
 585   dump_iv (file, use->iv, false);
 586
 587   if (use->related_cands)
 588     {
 589       fprintf (file, "  related candidates ");
 590       dump_bitmap (file, use->related_cands);
 591     }
 592 }
 593
 594 /* Dumps information about the uses to FILE.  */
 595
 596 void
 597 dump_uses (FILE *file, struct ivopts_data *data)
 598 {
 599   unsigned i;
 600   struct iv_use *use;
 601
 602   for (i = 0; i < n_iv_uses (data); i++)
 603     {
 604       use = iv_use (data, i);
 605       do
 606         {
 607           dump_use (file, use);
 608           use = use->next;
 609         }
 610       while (use);
 611       fprintf (file, "\n");
 612     }
 613 }
 614
 615 /* Dumps information about induction variable candidate CAND to FILE.  */
 616
 617 void
 618 dump_cand (FILE *file, struct iv_cand *cand)
 619 {
 620   struct iv *iv = cand->iv;
 621
 622   fprintf (file, "candidate %d%s\n",
 623            cand->id, cand->important ? " (important)" : "");
 624
 625   if (cand->depends_on)
 626     {
 627       fprintf (file, "  depends on ");
 628       dump_bitmap (file, cand->depends_on);
 629     }
 630
 631   if (!iv)
 632     {
 633       fprintf (file, "  final value replacement\n");
 634       return;
 635     }
 636
 637   if (cand->var_before)
 638     {
 639       fprintf (file, "  var_before ");
 640       print_generic_expr (file, cand->var_before, TDF_SLIM);
 641       fprintf (file, "\n");
 642     }
 643   if (cand->var_after)
 644     {
 645       fprintf (file, "  var_after ");
 646       print_generic_expr (file, cand->var_after, TDF_SLIM);
 647       fprintf (file, "\n");
 648     }
 649
 650   switch (cand->pos)
 651     {
 652     case IP_NORMAL:
 653       fprintf (file, "  incremented before exit test\n");
 654       break;
 655
 656     case IP_BEFORE_USE:
 657       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 658       break;
 659
 660     case IP_AFTER_USE:
 661       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 662       break;
 663
 664     case IP_END:
 665       fprintf (file, "  incremented at end\n");
 666       break;
 667
 668     case IP_ORIGINAL:
 669       fprintf (file, "  original biv\n");
 670       break;
 671     }
 672
 673   dump_iv (file, iv, false);
 674 }
 675
 676 /* Returns the info for ssa version VER.  */
 677
 678 static inline struct version_info *
 679 ver_info (struct ivopts_data *data, unsigned ver)
 680 {
 681   return data->version_info + ver;
 682 }
 683
 684 /* Returns the info for ssa name NAME.  */
 685
 686 static inline struct version_info *
 687 name_info (struct ivopts_data *data, tree name)
 688 {
 689   return ver_info (data, SSA_NAME_VERSION (name));
 690 }
 691
 692 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 693    emitted in LOOP.  */
 694
 695 static bool
 696 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 697 {
 698   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 699
 700   gcc_assert (bb);
 701
 702   if (sbb == loop->latch)
 703     return true;
 704
 705   if (sbb != bb)
 706     return false;
 707
 708   return stmt == last_stmt (bb);
 709 }
 710
 711 /* Returns true if STMT if after the place where the original induction
 712    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 713    if the positions are identical.  */
 714
 715 static bool
 716 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 717 {
 718   basic_block cand_bb = gimple_bb (cand->incremented_at);
 719   basic_block stmt_bb = gimple_bb (stmt);
 720
 721   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 722     return false;
 723
 724   if (stmt_bb != cand_bb)
 725     return true;
 726
 727   if (true_if_equal
 728       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 729     return true;
 730   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 731 }
 732
 733 /* Returns true if STMT if after the place where the induction variable
 734    CAND is incremented in LOOP.  */
 735
 736 static bool
 737 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 738 {
 739   switch (cand->pos)
 740     {
 741     case IP_END:
 742       return false;
 743
 744     case IP_NORMAL:
 745       return stmt_after_ip_normal_pos (loop, stmt);
 746
 747     case IP_ORIGINAL:
 748     case IP_AFTER_USE:
 749       return stmt_after_inc_pos (cand, stmt, false);
 750
 751     case IP_BEFORE_USE:
 752       return stmt_after_inc_pos (cand, stmt, true);
 753
 754     default:
 755       gcc_unreachable ();
 756     }
 757 }
 758
 759 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 760
 761 static bool
 762 abnormal_ssa_name_p (tree exp)
 763 {
 764   if (!exp)
 765     return false;
 766
 767   if (TREE_CODE (exp) != SSA_NAME)
 768     return false;
 769
 770   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 771 }
 772
 773 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 774    abnormal phi node.  Callback for for_each_index.  */
 775
 776 static bool
 777 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 778                                   void *data ATTRIBUTE_UNUSED)
 779 {
 780   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 781     {
 782       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 783         return false;
 784       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 785         return false;
 786     }
 787
 788   return !abnormal_ssa_name_p (*index);
 789 }
 790
 791 /* Returns true if EXPR contains a ssa name that occurs in an
 792    abnormal phi node.  */
 793
 794 bool
 795 contains_abnormal_ssa_name_p (tree expr)
 796 {
 797   enum tree_code code;
 798   enum tree_code_class codeclass;
 799
 800   if (!expr)
 801     return false;
 802
 803   code = TREE_CODE (expr);
 804   codeclass = TREE_CODE_CLASS (code);
 805
 806   if (code == SSA_NAME)
 807     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 808
 809   if (code == INTEGER_CST
 810       || is_gimple_min_invariant (expr))
 811     return false;
 812
 813   if (code == ADDR_EXPR)
 814     return !for_each_index (&TREE_OPERAND (expr, 0),
 815                             idx_contains_abnormal_ssa_name_p,
 816                             NULL);
 817
 818   if (code == COND_EXPR)
 819     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 820       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 821       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 822
 823   switch (codeclass)
 824     {
 825     case tcc_binary:
 826     case tcc_comparison:
 827       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 828         return true;
 829
 830       /* Fallthru.  */
 831     case tcc_unary:
 832       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 833         return true;
 834
 835       break;
 836
 837     default:
 838       gcc_unreachable ();
 839     }
 840
 841   return false;
 842 }
 843
 844 /*  Returns the structure describing number of iterations determined from
 845     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 846
 847 static struct tree_niter_desc *
 848 niter_for_exit (struct ivopts_data *data, edge exit)
 849 {
 850   struct tree_niter_desc *desc;
 851   tree_niter_desc **slot;
 852
 853   if (!data->niters)
 854     {
 855       data->niters = new hash_map<edge, tree_niter_desc *>;
 856       slot = NULL;
 857     }
 858   else
 859     slot = data->niters->get (exit);
 860
 861   if (!slot)
 862     {
 863       /* Try to determine number of iterations.  We cannot safely work with ssa
 864          names that appear in phi nodes on abnormal edges, so that we do not
 865          create overlapping life ranges for them (PR 27283).  */
 866       desc = XNEW (struct tree_niter_desc);
 867       if (!number_of_iterations_exit (data->current_loop,
 868                                       exit, desc, true)
 869           || contains_abnormal_ssa_name_p (desc->niter))
 870         {
 871           XDELETE (desc);
 872           desc = NULL;
 873         }
 874       data->niters->put (exit, desc);
 875     }
 876   else
 877     desc = *slot;
 878
 879   return desc;
 880 }
 881
 882 /* Returns the structure describing number of iterations determined from
 883    single dominating exit of DATA->current_loop, or NULL if something
 884    goes wrong.  */
 885
 886 static struct tree_niter_desc *
 887 niter_for_single_dom_exit (struct ivopts_data *data)
 888 {
 889   edge exit = single_dom_exit (data->current_loop);
 890
 891   if (!exit)
 892     return NULL;
 893
 894   return niter_for_exit (data, exit);
 895 }
 896
 897 /* Initializes data structures used by the iv optimization pass, stored
 898    in DATA.  */
 899
 900 static void
 901 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 902 {
 903   data->version_info_size = 2 * num_ssa_names;
 904   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 905   data->relevant = BITMAP_ALLOC (NULL);
 906   data->important_candidates = BITMAP_ALLOC (NULL);
 907   data->max_inv_id = 0;
 908   data->niters = NULL;
 909   data->iv_uses.create (20);
 910   data->iv_candidates.create (20);
 911   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 912   data->inv_expr_id = 0;
 913   data->name_expansion_cache = NULL;
 914   decl_rtl_to_reset.create (20);
 915 }
 916
 917 /* Returns a memory object to that EXPR points.  In case we are able to
 918    determine that it does not point to any such object, NULL is returned.  */
 919
 920 static tree
 921 determine_base_object (tree expr)
 922 {
 923   enum tree_code code = TREE_CODE (expr);
 924   tree base, obj;
 925
 926   /* If this is a pointer casted to any type, we need to determine
 927      the base object for the pointer; so handle conversions before
 928      throwing away non-pointer expressions.  */
 929   if (CONVERT_EXPR_P (expr))
 930     return determine_base_object (TREE_OPERAND (expr, 0));
 931
 932   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 933     return NULL_TREE;
 934
 935   switch (code)
 936     {
 937     case INTEGER_CST:
 938       return NULL_TREE;
 939
 940     case ADDR_EXPR:
 941       obj = TREE_OPERAND (expr, 0);
 942       base = get_base_address (obj);
 943
 944       if (!base)
 945         return expr;
 946
 947       if (TREE_CODE (base) == MEM_REF)
 948         return determine_base_object (TREE_OPERAND (base, 0));
 949
 950       return fold_convert (ptr_type_node,
 951                            build_fold_addr_expr (base));
 952
 953     case POINTER_PLUS_EXPR:
 954       return determine_base_object (TREE_OPERAND (expr, 0));
 955
 956     case PLUS_EXPR:
 957     case MINUS_EXPR:
 958       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 959       gcc_unreachable ();
 960
 961     default:
 962       return fold_convert (ptr_type_node, expr);
 963     }
 964 }
 965
 966 /* Return true if address expression with non-DECL_P operand appears
 967    in EXPR.  */
 968
 969 static bool
 970 contain_complex_addr_expr (tree expr)
 971 {
 972   bool res = false;
 973
 974   STRIP_NOPS (expr);
 975   switch (TREE_CODE (expr))
 976     {
 977     case POINTER_PLUS_EXPR:
 978     case PLUS_EXPR:
 979     case MINUS_EXPR:
 980       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 981       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 982       break;
 983
 984     case ADDR_EXPR:
 985       return (!DECL_P (TREE_OPERAND (expr, 0)));
 986
 987     default:
 988       return false;
 989     }
 990
 991   return res;
 992 }
 993
 994 /* Allocates an induction variable with given initial value BASE and step STEP
 995    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 996
 997 static struct iv *
 998 alloc_iv (tree base, tree step, bool no_overflow = false)
 999 {
1000   tree expr = base;
1001   struct iv *iv = XCNEW (struct iv);
1002   gcc_assert (step != NULL_TREE);
1003
1004   /* Lower address expression in base except ones with DECL_P as operand.
1005      By doing this:
1006        1) More accurate cost can be computed for address expressions;
1007        2) Duplicate candidates won't be created for bases in different
1008           forms, like &a[0] and &a.  */
1009   STRIP_NOPS (expr);
1010   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1011       || contain_complex_addr_expr (expr))
1012     {
1013       aff_tree comb;
1014       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1015       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1016     }
1017
1018   iv->base = base;
1019   iv->base_object = determine_base_object (base);
1020   iv->step = step;
1021   iv->biv_p = false;
1022   iv->have_use_for = false;
1023   iv->use_id = 0;
1024   iv->ssa_name = NULL_TREE;
1025   iv->no_overflow = no_overflow;
1026
1027   return iv;
1028 }
1029
1030 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1031    doesn't overflow.  */
1032
1033 static void
1034 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1035         bool no_overflow)
1036 {
1037   struct version_info *info = name_info (data, iv);
1038
1039   gcc_assert (!info->iv);
1040
1041   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1042   info->iv = alloc_iv (base, step, no_overflow);
1043   info->iv->ssa_name = iv;
1044 }
1045
1046 /* Finds induction variable declaration for VAR.  */
1047
1048 static struct iv *
1049 get_iv (struct ivopts_data *data, tree var)
1050 {
1051   basic_block bb;
1052   tree type = TREE_TYPE (var);
1053
1054   if (!POINTER_TYPE_P (type)
1055       && !INTEGRAL_TYPE_P (type))
1056     return NULL;
1057
1058   if (!name_info (data, var)->iv)
1059     {
1060       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1061
1062       if (!bb
1063           || !flow_bb_inside_loop_p (data->current_loop, bb))
1064         set_iv (data, var, var, build_int_cst (type, 0), true);
1065     }
1066
1067   return name_info (data, var)->iv;
1068 }
1069
1070 /* Return the first non-invariant ssa var found in EXPR.  */
1071
1072 static tree
1073 extract_single_var_from_expr (tree expr)
1074 {
1075   int i, n;
1076   tree tmp;
1077   enum tree_code code;
1078
1079   if (!expr || is_gimple_min_invariant (expr))
1080     return NULL;
1081
1082   code = TREE_CODE (expr);
1083   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1084     {
1085       n = TREE_OPERAND_LENGTH (expr);
1086       for (i = 0; i < n; i++)
1087         {
1088           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1089
1090           if (tmp)
1091             return tmp;
1092         }
1093     }
1094   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1095 }
1096
1097 /* Finds basic ivs.  */
1098
1099 static bool
1100 find_bivs (struct ivopts_data *data)
1101 {
1102   gphi *phi;
1103   affine_iv iv;
1104   tree step, type, base, stop;
1105   bool found = false;
1106   struct loop *loop = data->current_loop;
1107   gphi_iterator psi;
1108
1109   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1110     {
1111       phi = psi.phi ();
1112
1113       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1114         continue;
1115
1116       if (virtual_operand_p (PHI_RESULT (phi)))
1117         continue;
1118
1119       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1120         continue;
1121
1122       if (integer_zerop (iv.step))
1123         continue;
1124
1125       step = iv.step;
1126       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1127       /* Stop expanding iv base at the first ssa var referred by iv step.
1128          Ideally we should stop at any ssa var, because that's expensive
1129          and unusual to happen, we just do it on the first one.
1130
1131          See PR64705 for the rationale.  */
1132       stop = extract_single_var_from_expr (step);
1133       base = expand_simple_operations (base, stop);
1134       if (contains_abnormal_ssa_name_p (base)
1135           || contains_abnormal_ssa_name_p (step))
1136         continue;
1137
1138       type = TREE_TYPE (PHI_RESULT (phi));
1139       base = fold_convert (type, base);
1140       if (step)
1141         {
1142           if (POINTER_TYPE_P (type))
1143             step = convert_to_ptrofftype (step);
1144           else
1145             step = fold_convert (type, step);
1146         }
1147
1148       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1149       found = true;
1150     }
1151
1152   return found;
1153 }
1154
1155 /* Marks basic ivs.  */
1156
1157 static void
1158 mark_bivs (struct ivopts_data *data)
1159 {
1160   gphi *phi;
1161   gimple def;
1162   tree var;
1163   struct iv *iv, *incr_iv;
1164   struct loop *loop = data->current_loop;
1165   basic_block incr_bb;
1166   gphi_iterator psi;
1167
1168   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1169     {
1170       phi = psi.phi ();
1171
1172       iv = get_iv (data, PHI_RESULT (phi));
1173       if (!iv)
1174         continue;
1175
1176       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1177       def = SSA_NAME_DEF_STMT (var);
1178       /* Don't mark iv peeled from other one as biv.  */
1179       if (def
1180           && gimple_code (def) == GIMPLE_PHI
1181           && gimple_bb (def) == loop->header)
1182         continue;
1183
1184       incr_iv = get_iv (data, var);
1185       if (!incr_iv)
1186         continue;
1187
1188       /* If the increment is in the subloop, ignore it.  */
1189       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1190       if (incr_bb->loop_father != data->current_loop
1191           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1192         continue;
1193
1194       iv->biv_p = true;
1195       incr_iv->biv_p = true;
1196     }
1197 }
1198
1199 /* Checks whether STMT defines a linear induction variable and stores its
1200    parameters to IV.  */
1201
1202 static bool
1203 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1204 {
1205   tree lhs, stop;
1206   struct loop *loop = data->current_loop;
1207
1208   iv->base = NULL_TREE;
1209   iv->step = NULL_TREE;
1210
1211   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1212     return false;
1213
1214   lhs = gimple_assign_lhs (stmt);
1215   if (TREE_CODE (lhs) != SSA_NAME)
1216     return false;
1217
1218   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1219     return false;
1220
1221   /* Stop expanding iv base at the first ssa var referred by iv step.
1222      Ideally we should stop at any ssa var, because that's expensive
1223      and unusual to happen, we just do it on the first one.
1224
1225      See PR64705 for the rationale.  */
1226   stop = extract_single_var_from_expr (iv->step);
1227   iv->base = expand_simple_operations (iv->base, stop);
1228   if (contains_abnormal_ssa_name_p (iv->base)
1229       || contains_abnormal_ssa_name_p (iv->step))
1230     return false;
1231
1232   /* If STMT could throw, then do not consider STMT as defining a GIV.
1233      While this will suppress optimizations, we can not safely delete this
1234      GIV and associated statements, even if it appears it is not used.  */
1235   if (stmt_could_throw_p (stmt))
1236     return false;
1237
1238   return true;
1239 }
1240
1241 /* Finds general ivs in statement STMT.  */
1242
1243 static void
1244 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1245 {
1246   affine_iv iv;
1247
1248   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1249     return;
1250
1251   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1252 }
1253
1254 /* Finds general ivs in basic block BB.  */
1255
1256 static void
1257 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1258 {
1259   gimple_stmt_iterator bsi;
1260
1261   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1262     find_givs_in_stmt (data, gsi_stmt (bsi));
1263 }
1264
1265 /* Finds general ivs.  */
1266
1267 static void
1268 find_givs (struct ivopts_data *data)
1269 {
1270   struct loop *loop = data->current_loop;
1271   basic_block *body = get_loop_body_in_dom_order (loop);
1272   unsigned i;
1273
1274   for (i = 0; i < loop->num_nodes; i++)
1275     find_givs_in_bb (data, body[i]);
1276   free (body);
1277 }
1278
1279 /* For each ssa name defined in LOOP determines whether it is an induction
1280    variable and if so, its initial value and step.  */
1281
1282 static bool
1283 find_induction_variables (struct ivopts_data *data)
1284 {
1285   unsigned i;
1286   bitmap_iterator bi;
1287
1288   if (!find_bivs (data))
1289     return false;
1290
1291   find_givs (data);
1292   mark_bivs (data);
1293
1294   if (dump_file && (dump_flags & TDF_DETAILS))
1295     {
1296       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1297
1298       if (niter)
1299         {
1300           fprintf (dump_file, "  number of iterations ");
1301           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1302           if (!integer_zerop (niter->may_be_zero))
1303             {
1304               fprintf (dump_file, "; zero if ");
1305               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1306             }
1307           fprintf (dump_file, "\n\n");
1308         };
1309
1310       fprintf (dump_file, "Induction variables:\n\n");
1311
1312       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1313         {
1314           if (ver_info (data, i)->iv)
1315             dump_iv (dump_file, ver_info (data, i)->iv, true);
1316         }
1317     }
1318
1319   return true;
1320 }
1321
1322 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1323    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1324    is the const offset stripped from IV base.  For uses of other types,
1325    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1326
1327 static struct iv_use *
1328 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1329             gimple stmt, enum use_type use_type, tree addr_base = NULL,
1330             unsigned HOST_WIDE_INT addr_offset = 0)
1331 {
1332   struct iv_use *use = XCNEW (struct iv_use);
1333
1334   use->id = n_iv_uses (data);
1335   use->sub_id = 0;
1336   use->type = use_type;
1337   use->iv = iv;
1338   use->stmt = stmt;
1339   use->op_p = use_p;
1340   use->related_cands = BITMAP_ALLOC (NULL);
1341   use->next = NULL;
1342   use->addr_base = addr_base;
1343   use->addr_offset = addr_offset;
1344
1345   data->iv_uses.safe_push (use);
1346
1347   return use;
1348 }
1349
1350 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1351    The sub use is recorded under the one whose use id is ID_GROUP.  */
1352
1353 static struct iv_use *
1354 record_sub_use (struct ivopts_data *data, tree *use_p,
1355                     struct iv *iv, gimple stmt, enum use_type use_type,
1356                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1357                     unsigned int id_group)
1358 {
1359   struct iv_use *use = XCNEW (struct iv_use);
1360   struct iv_use *group = iv_use (data, id_group);
1361
1362   use->id = group->id;
1363   use->sub_id = 0;
1364   use->type = use_type;
1365   use->iv = iv;
1366   use->stmt = stmt;
1367   use->op_p = use_p;
1368   use->related_cands = NULL;
1369   use->addr_base = addr_base;
1370   use->addr_offset = addr_offset;
1371
1372   /* Sub use list is maintained in offset ascending order.  */
1373   if (addr_offset <= group->addr_offset)
1374     {
1375       use->related_cands = group->related_cands;
1376       group->related_cands = NULL;
1377       use->next = group;
1378       data->iv_uses[id_group] = use;
1379     }
1380   else
1381     {
1382       struct iv_use *pre;
1383       do
1384         {
1385           pre = group;
1386           group = group->next;
1387         }
1388       while (group && addr_offset > group->addr_offset);
1389       use->next = pre->next;
1390       pre->next = use;
1391     }
1392
1393   /* To avoid showing ssa name in the dumps, if it was not reset by the
1394      caller.  */
1395   iv->ssa_name = NULL_TREE;
1396
1397   return use;
1398 }
1399
1400 /* Checks whether OP is a loop-level invariant and if so, records it.
1401    NONLINEAR_USE is true if the invariant is used in a way we do not
1402    handle specially.  */
1403
1404 static void
1405 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1406 {
1407   basic_block bb;
1408   struct version_info *info;
1409
1410   if (TREE_CODE (op) != SSA_NAME
1411       || virtual_operand_p (op))
1412     return;
1413
1414   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1415   if (bb
1416       && flow_bb_inside_loop_p (data->current_loop, bb))
1417     return;
1418
1419   info = name_info (data, op);
1420   info->name = op;
1421   info->has_nonlin_use |= nonlinear_use;
1422   if (!info->inv_id)
1423     info->inv_id = ++data->max_inv_id;
1424   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1425 }
1426
1427 /* Checks whether the use OP is interesting and if so, records it.  */
1428
1429 static struct iv_use *
1430 find_interesting_uses_op (struct ivopts_data *data, tree op)
1431 {
1432   struct iv *iv;
1433   struct iv *civ;
1434   gimple stmt;
1435   struct iv_use *use;
1436
1437   if (TREE_CODE (op) != SSA_NAME)
1438     return NULL;
1439
1440   iv = get_iv (data, op);
1441   if (!iv)
1442     return NULL;
1443
1444   if (iv->have_use_for)
1445     {
1446       use = iv_use (data, iv->use_id);
1447
1448       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1449       return use;
1450     }
1451
1452   if (integer_zerop (iv->step))
1453     {
1454       record_invariant (data, op, true);
1455       return NULL;
1456     }
1457   iv->have_use_for = true;
1458
1459   civ = XNEW (struct iv);
1460   *civ = *iv;
1461
1462   stmt = SSA_NAME_DEF_STMT (op);
1463   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1464               || is_gimple_assign (stmt));
1465
1466   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1467   iv->use_id = use->id;
1468
1469   return use;
1470 }
1471
1472 /* Given a condition in statement STMT, checks whether it is a compare
1473    of an induction variable and an invariant.  If this is the case,
1474    CONTROL_VAR is set to location of the iv, BOUND to the location of
1475    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1476    induction variable descriptions, and true is returned.  If this is not
1477    the case, CONTROL_VAR and BOUND are set to the arguments of the
1478    condition and false is returned.  */
1479
1480 static bool
1481 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1482                        tree **control_var, tree **bound,
1483                        struct iv **iv_var, struct iv **iv_bound)
1484 {
1485   /* The objects returned when COND has constant operands.  */
1486   static struct iv const_iv;
1487   static tree zero;
1488   tree *op0 = &zero, *op1 = &zero;
1489   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1490   bool ret = false;
1491
1492   if (gimple_code (stmt) == GIMPLE_COND)
1493     {
1494       gcond *cond_stmt = as_a <gcond *> (stmt);
1495       op0 = gimple_cond_lhs_ptr (cond_stmt);
1496       op1 = gimple_cond_rhs_ptr (cond_stmt);
1497     }
1498   else
1499     {
1500       op0 = gimple_assign_rhs1_ptr (stmt);
1501       op1 = gimple_assign_rhs2_ptr (stmt);
1502     }
1503
1504   zero = integer_zero_node;
1505   const_iv.step = integer_zero_node;
1506
1507   if (TREE_CODE (*op0) == SSA_NAME)
1508     iv0 = get_iv (data, *op0);
1509   if (TREE_CODE (*op1) == SSA_NAME)
1510     iv1 = get_iv (data, *op1);
1511
1512   /* Exactly one of the compared values must be an iv, and the other one must
1513      be an invariant.  */
1514   if (!iv0 || !iv1)
1515     goto end;
1516
1517   if (integer_zerop (iv0->step))
1518     {
1519       /* Control variable may be on the other side.  */
1520       std::swap (op0, op1);
1521       std::swap (iv0, iv1);
1522     }
1523   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1524
1525 end:
1526   if (control_var)
1527     *control_var = op0;
1528   if (iv_var)
1529     *iv_var = iv0;
1530   if (bound)
1531     *bound = op1;
1532   if (iv_bound)
1533     *iv_bound = iv1;
1534
1535   return ret;
1536 }
1537
1538 /* Checks whether the condition in STMT is interesting and if so,
1539    records it.  */
1540
1541 static void
1542 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1543 {
1544   tree *var_p, *bound_p;
1545   struct iv *var_iv, *civ;
1546
1547   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1548     {
1549       find_interesting_uses_op (data, *var_p);
1550       find_interesting_uses_op (data, *bound_p);
1551       return;
1552     }
1553
1554   civ = XNEW (struct iv);
1555   *civ = *var_iv;
1556   record_use (data, NULL, civ, stmt, USE_COMPARE);
1557 }
1558
1559 /* Returns the outermost loop EXPR is obviously invariant in
1560    relative to the loop LOOP, i.e. if all its operands are defined
1561    outside of the returned loop.  Returns NULL if EXPR is not
1562    even obviously invariant in LOOP.  */
1563
1564 struct loop *
1565 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1566 {
1567   basic_block def_bb;
1568   unsigned i, len;
1569
1570   if (is_gimple_min_invariant (expr))
1571     return current_loops->tree_root;
1572
1573   if (TREE_CODE (expr) == SSA_NAME)
1574     {
1575       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1576       if (def_bb)
1577         {
1578           if (flow_bb_inside_loop_p (loop, def_bb))
1579             return NULL;
1580           return superloop_at_depth (loop,
1581                                      loop_depth (def_bb->loop_father) + 1);
1582         }
1583
1584       return current_loops->tree_root;
1585     }
1586
1587   if (!EXPR_P (expr))
1588     return NULL;
1589
1590   unsigned maxdepth = 0;
1591   len = TREE_OPERAND_LENGTH (expr);
1592   for (i = 0; i < len; i++)
1593     {
1594       struct loop *ivloop;
1595       if (!TREE_OPERAND (expr, i))
1596         continue;
1597
1598       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1599       if (!ivloop)
1600         return NULL;
1601       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1602     }
1603
1604   return superloop_at_depth (loop, maxdepth);
1605 }
1606
1607 /* Returns true if expression EXPR is obviously invariant in LOOP,
1608    i.e. if all its operands are defined outside of the LOOP.  LOOP
1609    should not be the function body.  */
1610
1611 bool
1612 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1613 {
1614   basic_block def_bb;
1615   unsigned i, len;
1616
1617   gcc_assert (loop_depth (loop) > 0);
1618
1619   if (is_gimple_min_invariant (expr))
1620     return true;
1621
1622   if (TREE_CODE (expr) == SSA_NAME)
1623     {
1624       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1625       if (def_bb
1626           && flow_bb_inside_loop_p (loop, def_bb))
1627         return false;
1628
1629       return true;
1630     }
1631
1632   if (!EXPR_P (expr))
1633     return false;
1634
1635   len = TREE_OPERAND_LENGTH (expr);
1636   for (i = 0; i < len; i++)
1637     if (TREE_OPERAND (expr, i)
1638         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1639       return false;
1640
1641   return true;
1642 }
1643
1644 /* Cumulates the steps of indices into DATA and replaces their values with the
1645    initial ones.  Returns false when the value of the index cannot be determined.
1646    Callback for for_each_index.  */
1647
1648 struct ifs_ivopts_data
1649 {
1650   struct ivopts_data *ivopts_data;
1651   gimple stmt;
1652   tree step;
1653 };
1654
1655 static bool
1656 idx_find_step (tree base, tree *idx, void *data)
1657 {
1658   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1659   struct iv *iv;
1660   bool use_overflow_semantics = false;
1661   tree step, iv_base, iv_step, lbound, off;
1662   struct loop *loop = dta->ivopts_data->current_loop;
1663
1664   /* If base is a component ref, require that the offset of the reference
1665      be invariant.  */
1666   if (TREE_CODE (base) == COMPONENT_REF)
1667     {
1668       off = component_ref_field_offset (base);
1669       return expr_invariant_in_loop_p (loop, off);
1670     }
1671
1672   /* If base is array, first check whether we will be able to move the
1673      reference out of the loop (in order to take its address in strength
1674      reduction).  In order for this to work we need both lower bound
1675      and step to be loop invariants.  */
1676   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1677     {
1678       /* Moreover, for a range, the size needs to be invariant as well.  */
1679       if (TREE_CODE (base) == ARRAY_RANGE_REF
1680           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1681         return false;
1682
1683       step = array_ref_element_size (base);
1684       lbound = array_ref_low_bound (base);
1685
1686       if (!expr_invariant_in_loop_p (loop, step)
1687           || !expr_invariant_in_loop_p (loop, lbound))
1688         return false;
1689     }
1690
1691   if (TREE_CODE (*idx) != SSA_NAME)
1692     return true;
1693
1694   iv = get_iv (dta->ivopts_data, *idx);
1695   if (!iv)
1696     return false;
1697
1698   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1699           *&x[0], which is not folded and does not trigger the
1700           ARRAY_REF path below.  */
1701   *idx = iv->base;
1702
1703   if (integer_zerop (iv->step))
1704     return true;
1705
1706   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1707     {
1708       step = array_ref_element_size (base);
1709
1710       /* We only handle addresses whose step is an integer constant.  */
1711       if (TREE_CODE (step) != INTEGER_CST)
1712         return false;
1713     }
1714   else
1715     /* The step for pointer arithmetics already is 1 byte.  */
1716     step = size_one_node;
1717
1718   iv_base = iv->base;
1719   iv_step = iv->step;
1720   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1721     use_overflow_semantics = true;
1722
1723   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1724                             sizetype, &iv_base, &iv_step, dta->stmt,
1725                             use_overflow_semantics))
1726     {
1727       /* The index might wrap.  */
1728       return false;
1729     }
1730
1731   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1732   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1733
1734   return true;
1735 }
1736
1737 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1738    object is passed to it in DATA.  */
1739
1740 static bool
1741 idx_record_use (tree base, tree *idx,
1742                 void *vdata)
1743 {
1744   struct ivopts_data *data = (struct ivopts_data *) vdata;
1745   find_interesting_uses_op (data, *idx);
1746   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1747     {
1748       find_interesting_uses_op (data, array_ref_element_size (base));
1749       find_interesting_uses_op (data, array_ref_low_bound (base));
1750     }
1751   return true;
1752 }
1753
1754 /* If we can prove that TOP = cst * BOT for some constant cst,
1755    store cst to MUL and return true.  Otherwise return false.
1756    The returned value is always sign-extended, regardless of the
1757    signedness of TOP and BOT.  */
1758
1759 static bool
1760 constant_multiple_of (tree top, tree bot, widest_int *mul)
1761 {
1762   tree mby;
1763   enum tree_code code;
1764   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1765   widest_int res, p0, p1;
1766
1767   STRIP_NOPS (top);
1768   STRIP_NOPS (bot);
1769
1770   if (operand_equal_p (top, bot, 0))
1771     {
1772       *mul = 1;
1773       return true;
1774     }
1775
1776   code = TREE_CODE (top);
1777   switch (code)
1778     {
1779     case MULT_EXPR:
1780       mby = TREE_OPERAND (top, 1);
1781       if (TREE_CODE (mby) != INTEGER_CST)
1782         return false;
1783
1784       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1785         return false;
1786
1787       *mul = wi::sext (res * wi::to_widest (mby), precision);
1788       return true;
1789
1790     case PLUS_EXPR:
1791     case MINUS_EXPR:
1792       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1793           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1794         return false;
1795
1796       if (code == MINUS_EXPR)
1797         p1 = -p1;
1798       *mul = wi::sext (p0 + p1, precision);
1799       return true;
1800
1801     case INTEGER_CST:
1802       if (TREE_CODE (bot) != INTEGER_CST)
1803         return false;
1804
1805       p0 = widest_int::from (top, SIGNED);
1806       p1 = widest_int::from (bot, SIGNED);
1807       if (p1 == 0)
1808         return false;
1809       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1810       return res == 0;
1811
1812     default:
1813       return false;
1814     }
1815 }
1816
1817 /* Return true if memory reference REF with step STEP may be unaligned.  */
1818
1819 static bool
1820 may_be_unaligned_p (tree ref, tree step)
1821 {
1822   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1823      thus they are not misaligned.  */
1824   if (TREE_CODE (ref) == TARGET_MEM_REF)
1825     return false;
1826
1827   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1828   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1829     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1830
1831   unsigned HOST_WIDE_INT bitpos;
1832   unsigned int ref_align;
1833   get_object_alignment_1 (ref, &ref_align, &bitpos);
1834   if (ref_align < align
1835       || (bitpos % align) != 0
1836       || (bitpos % BITS_PER_UNIT) != 0)
1837     return true;
1838
1839   unsigned int trailing_zeros = tree_ctz (step);
1840   if (trailing_zeros < HOST_BITS_PER_INT
1841       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1842     return true;
1843
1844   return false;
1845 }
1846
1847 /* Return true if EXPR may be non-addressable.   */
1848
1849 bool
1850 may_be_nonaddressable_p (tree expr)
1851 {
1852   switch (TREE_CODE (expr))
1853     {
1854     case TARGET_MEM_REF:
1855       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1856          target, thus they are always addressable.  */
1857       return false;
1858
1859     case COMPONENT_REF:
1860       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1861              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1862
1863     case VIEW_CONVERT_EXPR:
1864       /* This kind of view-conversions may wrap non-addressable objects
1865          and make them look addressable.  After some processing the
1866          non-addressability may be uncovered again, causing ADDR_EXPRs
1867          of inappropriate objects to be built.  */
1868       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1869           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1870         return true;
1871
1872       /* ... fall through ... */
1873
1874     case ARRAY_REF:
1875     case ARRAY_RANGE_REF:
1876       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1877
1878     CASE_CONVERT:
1879       return true;
1880
1881     default:
1882       break;
1883     }
1884
1885   return false;
1886 }
1887
1888 static tree
1889 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1890
1891 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1892    If there is an existing use which has same stripped iv base and step,
1893    this function records this one as a sub use to that; otherwise records
1894    it as a normal one.  */
1895
1896 static struct iv_use *
1897 record_group_use (struct ivopts_data *data, tree *use_p,
1898                   struct iv *iv, gimple stmt, enum use_type use_type)
1899 {
1900   unsigned int i;
1901   struct iv_use *use;
1902   tree addr_base;
1903   unsigned HOST_WIDE_INT addr_offset;
1904
1905   /* Only support sub use for address type uses, that is, with base
1906      object.  */
1907   if (!iv->base_object)
1908     return record_use (data, use_p, iv, stmt, use_type);
1909
1910   addr_base = strip_offset (iv->base, &addr_offset);
1911   for (i = 0; i < n_iv_uses (data); i++)
1912     {
1913       use = iv_use (data, i);
1914       if (use->type != USE_ADDRESS || !use->iv->base_object)
1915         continue;
1916
1917       /* Check if it has the same stripped base and step.  */
1918       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1919           && operand_equal_p (iv->step, use->iv->step, 0)
1920           && operand_equal_p (addr_base, use->addr_base, 0))
1921         break;
1922     }
1923
1924   if (i == n_iv_uses (data))
1925     return record_use (data, use_p, iv, stmt,
1926                        use_type, addr_base, addr_offset);
1927   else
1928     return record_sub_use (data, use_p, iv, stmt,
1929                            use_type, addr_base, addr_offset, i);
1930 }
1931
1932 /* Finds addresses in *OP_P inside STMT.  */
1933
1934 static void
1935 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1936 {
1937   tree base = *op_p, step = size_zero_node;
1938   struct iv *civ;
1939   struct ifs_ivopts_data ifs_ivopts_data;
1940
1941   /* Do not play with volatile memory references.  A bit too conservative,
1942      perhaps, but safe.  */
1943   if (gimple_has_volatile_ops (stmt))
1944     goto fail;
1945
1946   /* Ignore bitfields for now.  Not really something terribly complicated
1947      to handle.  TODO.  */
1948   if (TREE_CODE (base) == BIT_FIELD_REF)
1949     goto fail;
1950
1951   base = unshare_expr (base);
1952
1953   if (TREE_CODE (base) == TARGET_MEM_REF)
1954     {
1955       tree type = build_pointer_type (TREE_TYPE (base));
1956       tree astep;
1957
1958       if (TMR_BASE (base)
1959           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1960         {
1961           civ = get_iv (data, TMR_BASE (base));
1962           if (!civ)
1963             goto fail;
1964
1965           TMR_BASE (base) = civ->base;
1966           step = civ->step;
1967         }
1968       if (TMR_INDEX2 (base)
1969           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1970         {
1971           civ = get_iv (data, TMR_INDEX2 (base));
1972           if (!civ)
1973             goto fail;
1974
1975           TMR_INDEX2 (base) = civ->base;
1976           step = civ->step;
1977         }
1978       if (TMR_INDEX (base)
1979           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1980         {
1981           civ = get_iv (data, TMR_INDEX (base));
1982           if (!civ)
1983             goto fail;
1984
1985           TMR_INDEX (base) = civ->base;
1986           astep = civ->step;
1987
1988           if (astep)
1989             {
1990               if (TMR_STEP (base))
1991                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1992
1993               step = fold_build2 (PLUS_EXPR, type, step, astep);
1994             }
1995         }
1996
1997       if (integer_zerop (step))
1998         goto fail;
1999       base = tree_mem_ref_addr (type, base);
2000     }
2001   else
2002     {
2003       ifs_ivopts_data.ivopts_data = data;
2004       ifs_ivopts_data.stmt = stmt;
2005       ifs_ivopts_data.step = size_zero_node;
2006       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2007           || integer_zerop (ifs_ivopts_data.step))
2008         goto fail;
2009       step = ifs_ivopts_data.step;
2010
2011       /* Check that the base expression is addressable.  This needs
2012          to be done after substituting bases of IVs into it.  */
2013       if (may_be_nonaddressable_p (base))
2014         goto fail;
2015
2016       /* Moreover, on strict alignment platforms, check that it is
2017          sufficiently aligned.  */
2018       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2019         goto fail;
2020
2021       base = build_fold_addr_expr (base);
2022
2023       /* Substituting bases of IVs into the base expression might
2024          have caused folding opportunities.  */
2025       if (TREE_CODE (base) == ADDR_EXPR)
2026         {
2027           tree *ref = &TREE_OPERAND (base, 0);
2028           while (handled_component_p (*ref))
2029             ref = &TREE_OPERAND (*ref, 0);
2030           if (TREE_CODE (*ref) == MEM_REF)
2031             {
2032               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2033                                       TREE_OPERAND (*ref, 0),
2034                                       TREE_OPERAND (*ref, 1));
2035               if (tem)
2036                 *ref = tem;
2037             }
2038         }
2039     }
2040
2041   civ = alloc_iv (base, step);
2042   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2043   return;
2044
2045 fail:
2046   for_each_index (op_p, idx_record_use, data);
2047 }
2048
2049 /* Finds and records invariants used in STMT.  */
2050
2051 static void
2052 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2053 {
2054   ssa_op_iter iter;
2055   use_operand_p use_p;
2056   tree op;
2057
2058   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2059     {
2060       op = USE_FROM_PTR (use_p);
2061       record_invariant (data, op, false);
2062     }
2063 }
2064
2065 /* Finds interesting uses of induction variables in the statement STMT.  */
2066
2067 static void
2068 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2069 {
2070   struct iv *iv;
2071   tree op, *lhs, *rhs;
2072   ssa_op_iter iter;
2073   use_operand_p use_p;
2074   enum tree_code code;
2075
2076   find_invariants_stmt (data, stmt);
2077
2078   if (gimple_code (stmt) == GIMPLE_COND)
2079     {
2080       find_interesting_uses_cond (data, stmt);
2081       return;
2082     }
2083
2084   if (is_gimple_assign (stmt))
2085     {
2086       lhs = gimple_assign_lhs_ptr (stmt);
2087       rhs = gimple_assign_rhs1_ptr (stmt);
2088
2089       if (TREE_CODE (*lhs) == SSA_NAME)
2090         {
2091           /* If the statement defines an induction variable, the uses are not
2092              interesting by themselves.  */
2093
2094           iv = get_iv (data, *lhs);
2095
2096           if (iv && !integer_zerop (iv->step))
2097             return;
2098         }
2099
2100       code = gimple_assign_rhs_code (stmt);
2101       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2102           && (REFERENCE_CLASS_P (*rhs)
2103               || is_gimple_val (*rhs)))
2104         {
2105           if (REFERENCE_CLASS_P (*rhs))
2106             find_interesting_uses_address (data, stmt, rhs);
2107           else
2108             find_interesting_uses_op (data, *rhs);
2109
2110           if (REFERENCE_CLASS_P (*lhs))
2111             find_interesting_uses_address (data, stmt, lhs);
2112           return;
2113         }
2114       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2115         {
2116           find_interesting_uses_cond (data, stmt);
2117           return;
2118         }
2119
2120       /* TODO -- we should also handle address uses of type
2121
2122          memory = call (whatever);
2123
2124          and
2125
2126          call (memory).  */
2127     }
2128
2129   if (gimple_code (stmt) == GIMPLE_PHI
2130       && gimple_bb (stmt) == data->current_loop->header)
2131     {
2132       iv = get_iv (data, PHI_RESULT (stmt));
2133
2134       if (iv && !integer_zerop (iv->step))
2135         return;
2136     }
2137
2138   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2139     {
2140       op = USE_FROM_PTR (use_p);
2141
2142       if (TREE_CODE (op) != SSA_NAME)
2143         continue;
2144
2145       iv = get_iv (data, op);
2146       if (!iv)
2147         continue;
2148
2149       find_interesting_uses_op (data, op);
2150     }
2151 }
2152
2153 /* Finds interesting uses of induction variables outside of loops
2154    on loop exit edge EXIT.  */
2155
2156 static void
2157 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2158 {
2159   gphi *phi;
2160   gphi_iterator psi;
2161   tree def;
2162
2163   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2164     {
2165       phi = psi.phi ();
2166       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2167       if (!virtual_operand_p (def))
2168         find_interesting_uses_op (data, def);
2169     }
2170 }
2171
2172 /* Finds uses of the induction variables that are interesting.  */
2173
2174 static void
2175 find_interesting_uses (struct ivopts_data *data)
2176 {
2177   basic_block bb;
2178   gimple_stmt_iterator bsi;
2179   basic_block *body = get_loop_body (data->current_loop);
2180   unsigned i;
2181   struct version_info *info;
2182   edge e;
2183
2184   if (dump_file && (dump_flags & TDF_DETAILS))
2185     fprintf (dump_file, "Uses:\n\n");
2186
2187   for (i = 0; i < data->current_loop->num_nodes; i++)
2188     {
2189       edge_iterator ei;
2190       bb = body[i];
2191
2192       FOR_EACH_EDGE (e, ei, bb->succs)
2193         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2194             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2195           find_interesting_uses_outside (data, e);
2196
2197       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2198         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2199       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2200         if (!is_gimple_debug (gsi_stmt (bsi)))
2201           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2202     }
2203
2204   if (dump_file && (dump_flags & TDF_DETAILS))
2205     {
2206       bitmap_iterator bi;
2207
2208       fprintf (dump_file, "\n");
2209
2210       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2211         {
2212           info = ver_info (data, i);
2213           if (info->inv_id)
2214             {
2215               fprintf (dump_file, "  ");
2216               print_generic_expr (dump_file, info->name, TDF_SLIM);
2217               fprintf (dump_file, " is invariant (%d)%s\n",
2218                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2219             }
2220         }
2221
2222       fprintf (dump_file, "\n");
2223     }
2224
2225   free (body);
2226 }
2227
2228 /* Compute maximum offset of [base + offset] addressing mode
2229    for memory reference represented by USE.  */
2230
2231 static HOST_WIDE_INT
2232 compute_max_addr_offset (struct iv_use *use)
2233 {
2234   int width;
2235   rtx reg, addr;
2236   HOST_WIDE_INT i, off;
2237   unsigned list_index, num;
2238   addr_space_t as;
2239   machine_mode mem_mode, addr_mode;
2240   static vec<HOST_WIDE_INT> max_offset_list;
2241
2242   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2243   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2244
2245   num = max_offset_list.length ();
2246   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2247   if (list_index >= num)
2248     {
2249       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2250       for (; num < max_offset_list.length (); num++)
2251         max_offset_list[num] = -1;
2252     }
2253
2254   off = max_offset_list[list_index];
2255   if (off != -1)
2256     return off;
2257
2258   addr_mode = targetm.addr_space.address_mode (as);
2259   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2260   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2261
2262   width = GET_MODE_BITSIZE (addr_mode) - 1;
2263   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2264     width = HOST_BITS_PER_WIDE_INT - 1;
2265
2266   for (i = width; i > 0; i--)
2267     {
2268       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2269       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2270       if (memory_address_addr_space_p (mem_mode, addr, as))
2271         break;
2272
2273       /* For some strict-alignment targets, the offset must be naturally
2274          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2275       off = ((unsigned HOST_WIDE_INT) 1 << i);
2276       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2277         {
2278           off -= GET_MODE_SIZE (mem_mode);
2279           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2280           if (memory_address_addr_space_p (mem_mode, addr, as))
2281             break;
2282         }
2283     }
2284   if (i == 0)
2285     off = 0;
2286
2287   max_offset_list[list_index] = off;
2288   return off;
2289 }
2290
2291 /* Check if all small groups should be split.  Return true if and
2292    only if:
2293
2294      1) At least one groups contain two uses with different offsets.
2295      2) No group contains more than two uses with different offsets.
2296
2297    Return false otherwise.  We want to split such groups because:
2298
2299      1) Small groups don't have much benefit and may interfer with
2300         general candidate selection.
2301      2) Size for problem with only small groups is usually small and
2302         general algorithm can handle it well.
2303
2304    TODO -- Above claim may not hold when auto increment is supported.  */
2305
2306 static bool
2307 split_all_small_groups (struct ivopts_data *data)
2308 {
2309   bool split_p = false;
2310   unsigned int i, n, distinct;
2311   struct iv_use *pre, *use;
2312
2313   n = n_iv_uses (data);
2314   for (i = 0; i < n; i++)
2315     {
2316       use = iv_use (data, i);
2317       if (!use->next)
2318         continue;
2319
2320       distinct = 1;
2321       gcc_assert (use->type == USE_ADDRESS);
2322       for (pre = use, use = use->next; use; pre = use, use = use->next)
2323         {
2324           if (pre->addr_offset != use->addr_offset)
2325             distinct++;
2326
2327           if (distinct > 2)
2328             return false;
2329         }
2330       if (distinct == 2)
2331         split_p = true;
2332     }
2333
2334   return split_p;
2335 }
2336
2337 /* For each group of address type uses, this function further groups
2338    these uses according to the maximum offset supported by target's
2339    [base + offset] addressing mode.  */
2340
2341 static void
2342 group_address_uses (struct ivopts_data *data)
2343 {
2344   HOST_WIDE_INT max_offset = -1;
2345   unsigned int i, n, sub_id;
2346   struct iv_use *pre, *use;
2347   unsigned HOST_WIDE_INT addr_offset_first;
2348
2349   /* Reset max offset to split all small groups.  */
2350   if (split_all_small_groups (data))
2351     max_offset = 0;
2352
2353   n = n_iv_uses (data);
2354   for (i = 0; i < n; i++)
2355     {
2356       use = iv_use (data, i);
2357       if (!use->next)
2358         continue;
2359
2360       gcc_assert (use->type == USE_ADDRESS);
2361       if (max_offset != 0)
2362         max_offset = compute_max_addr_offset (use);
2363
2364       while (use)
2365         {
2366           sub_id = 0;
2367           addr_offset_first = use->addr_offset;
2368           /* Only uses with offset that can fit in offset part against
2369              the first use can be grouped together.  */
2370           for (pre = use, use = use->next;
2371                use && (use->addr_offset - addr_offset_first
2372                        <= (unsigned HOST_WIDE_INT) max_offset);
2373                pre = use, use = use->next)
2374             {
2375               use->id = pre->id;
2376               use->sub_id = ++sub_id;
2377             }
2378
2379           /* Break the list and create new group.  */
2380           if (use)
2381             {
2382               pre->next = NULL;
2383               use->id = n_iv_uses (data);
2384               use->related_cands = BITMAP_ALLOC (NULL);
2385               data->iv_uses.safe_push (use);
2386             }
2387         }
2388     }
2389
2390   if (dump_file && (dump_flags & TDF_DETAILS))
2391     dump_uses (dump_file, data);
2392 }
2393
2394 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2395    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2396    we are at the top-level of the processed address.  */
2397
2398 static tree
2399 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2400                 HOST_WIDE_INT *offset)
2401 {
2402   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2403   enum tree_code code;
2404   tree type, orig_type = TREE_TYPE (expr);
2405   HOST_WIDE_INT off0, off1, st;
2406   tree orig_expr = expr;
2407
2408   STRIP_NOPS (expr);
2409
2410   type = TREE_TYPE (expr);
2411   code = TREE_CODE (expr);
2412   *offset = 0;
2413
2414   switch (code)
2415     {
2416     case INTEGER_CST:
2417       if (!cst_and_fits_in_hwi (expr)
2418           || integer_zerop (expr))
2419         return orig_expr;
2420
2421       *offset = int_cst_value (expr);
2422       return build_int_cst (orig_type, 0);
2423
2424     case POINTER_PLUS_EXPR:
2425     case PLUS_EXPR:
2426     case MINUS_EXPR:
2427       op0 = TREE_OPERAND (expr, 0);
2428       op1 = TREE_OPERAND (expr, 1);
2429
2430       op0 = strip_offset_1 (op0, false, false, &off0);
2431       op1 = strip_offset_1 (op1, false, false, &off1);
2432
2433       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2434       if (op0 == TREE_OPERAND (expr, 0)
2435           && op1 == TREE_OPERAND (expr, 1))
2436         return orig_expr;
2437
2438       if (integer_zerop (op1))
2439         expr = op0;
2440       else if (integer_zerop (op0))
2441         {
2442           if (code == MINUS_EXPR)
2443             expr = fold_build1 (NEGATE_EXPR, type, op1);
2444           else
2445             expr = op1;
2446         }
2447       else
2448         expr = fold_build2 (code, type, op0, op1);
2449
2450       return fold_convert (orig_type, expr);
2451
2452     case MULT_EXPR:
2453       op1 = TREE_OPERAND (expr, 1);
2454       if (!cst_and_fits_in_hwi (op1))
2455         return orig_expr;
2456
2457       op0 = TREE_OPERAND (expr, 0);
2458       op0 = strip_offset_1 (op0, false, false, &off0);
2459       if (op0 == TREE_OPERAND (expr, 0))
2460         return orig_expr;
2461
2462       *offset = off0 * int_cst_value (op1);
2463       if (integer_zerop (op0))
2464         expr = op0;
2465       else
2466         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2467
2468       return fold_convert (orig_type, expr);
2469
2470     case ARRAY_REF:
2471     case ARRAY_RANGE_REF:
2472       if (!inside_addr)
2473         return orig_expr;
2474
2475       step = array_ref_element_size (expr);
2476       if (!cst_and_fits_in_hwi (step))
2477         break;
2478
2479       st = int_cst_value (step);
2480       op1 = TREE_OPERAND (expr, 1);
2481       op1 = strip_offset_1 (op1, false, false, &off1);
2482       *offset = off1 * st;
2483
2484       if (top_compref
2485           && integer_zerop (op1))
2486         {
2487           /* Strip the component reference completely.  */
2488           op0 = TREE_OPERAND (expr, 0);
2489           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2490           *offset += off0;
2491           return op0;
2492         }
2493       break;
2494
2495     case COMPONENT_REF:
2496       {
2497         tree field;
2498
2499         if (!inside_addr)
2500           return orig_expr;
2501
2502         tmp = component_ref_field_offset (expr);
2503         field = TREE_OPERAND (expr, 1);
2504         if (top_compref
2505             && cst_and_fits_in_hwi (tmp)
2506             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2507           {
2508             HOST_WIDE_INT boffset, abs_off;
2509
2510             /* Strip the component reference completely.  */
2511             op0 = TREE_OPERAND (expr, 0);
2512             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2513             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2514             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2515             if (boffset < 0)
2516               abs_off = -abs_off;
2517
2518             *offset = off0 + int_cst_value (tmp) + abs_off;
2519             return op0;
2520           }
2521       }
2522       break;
2523
2524     case ADDR_EXPR:
2525       op0 = TREE_OPERAND (expr, 0);
2526       op0 = strip_offset_1 (op0, true, true, &off0);
2527       *offset += off0;
2528
2529       if (op0 == TREE_OPERAND (expr, 0))
2530         return orig_expr;
2531
2532       expr = build_fold_addr_expr (op0);
2533       return fold_convert (orig_type, expr);
2534
2535     case MEM_REF:
2536       /* ???  Offset operand?  */
2537       inside_addr = false;
2538       break;
2539
2540     default:
2541       return orig_expr;
2542     }
2543
2544   /* Default handling of expressions for that we want to recurse into
2545      the first operand.  */
2546   op0 = TREE_OPERAND (expr, 0);
2547   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2548   *offset += off0;
2549
2550   if (op0 == TREE_OPERAND (expr, 0)
2551       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2552     return orig_expr;
2553
2554   expr = copy_node (expr);
2555   TREE_OPERAND (expr, 0) = op0;
2556   if (op1)
2557     TREE_OPERAND (expr, 1) = op1;
2558
2559   /* Inside address, we might strip the top level component references,
2560      thus changing type of the expression.  Handling of ADDR_EXPR
2561      will fix that.  */
2562   expr = fold_convert (orig_type, expr);
2563
2564   return expr;
2565 }
2566
2567 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2568
2569 static tree
2570 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2571 {
2572   HOST_WIDE_INT off;
2573   tree core = strip_offset_1 (expr, false, false, &off);
2574   *offset = off;
2575   return core;
2576 }
2577
2578 /* Returns variant of TYPE that can be used as base for different uses.
2579    We return unsigned type with the same precision, which avoids problems
2580    with overflows.  */
2581
2582 static tree
2583 generic_type_for (tree type)
2584 {
2585   if (POINTER_TYPE_P (type))
2586     return unsigned_type_for (type);
2587
2588   if (TYPE_UNSIGNED (type))
2589     return type;
2590
2591   return unsigned_type_for (type);
2592 }
2593
2594 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2595    the bitmap to that we should store it.  */
2596
2597 static struct ivopts_data *fd_ivopts_data;
2598 static tree
2599 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2600 {
2601   bitmap *depends_on = (bitmap *) data;
2602   struct version_info *info;
2603
2604   if (TREE_CODE (*expr_p) != SSA_NAME)
2605     return NULL_TREE;
2606   info = name_info (fd_ivopts_data, *expr_p);
2607
2608   if (!info->inv_id || info->has_nonlin_use)
2609     return NULL_TREE;
2610
2611   if (!*depends_on)
2612     *depends_on = BITMAP_ALLOC (NULL);
2613   bitmap_set_bit (*depends_on, info->inv_id);
2614
2615   return NULL_TREE;
2616 }
2617
2618 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2619    position to POS.  If USE is not NULL, the candidate is set as related to
2620    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2621    replacement of the final value of the iv by a direct computation.  */
2622
2623 static struct iv_cand *
2624 add_candidate_1 (struct ivopts_data *data,
2625                  tree base, tree step, bool important, enum iv_position pos,
2626                  struct iv_use *use, gimple incremented_at)
2627 {
2628   unsigned i;
2629   struct iv_cand *cand = NULL;
2630   tree type, orig_type;
2631
2632   /* For non-original variables, make sure their values are computed in a type
2633      that does not invoke undefined behavior on overflows (since in general,
2634      we cannot prove that these induction variables are non-wrapping).  */
2635   if (pos != IP_ORIGINAL)
2636     {
2637       orig_type = TREE_TYPE (base);
2638       type = generic_type_for (orig_type);
2639       if (type != orig_type)
2640         {
2641           base = fold_convert (type, base);
2642           step = fold_convert (type, step);
2643         }
2644     }
2645
2646   for (i = 0; i < n_iv_cands (data); i++)
2647     {
2648       cand = iv_cand (data, i);
2649
2650       if (cand->pos != pos)
2651         continue;
2652
2653       if (cand->incremented_at != incremented_at
2654           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2655               && cand->ainc_use != use))
2656         continue;
2657
2658       if (!cand->iv)
2659         {
2660           if (!base && !step)
2661             break;
2662
2663           continue;
2664         }
2665
2666       if (!base && !step)
2667         continue;
2668
2669       if (operand_equal_p (base, cand->iv->base, 0)
2670           && operand_equal_p (step, cand->iv->step, 0)
2671           && (TYPE_PRECISION (TREE_TYPE (base))
2672               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2673         break;
2674     }
2675
2676   if (i == n_iv_cands (data))
2677     {
2678       cand = XCNEW (struct iv_cand);
2679       cand->id = i;
2680
2681       if (!base && !step)
2682         cand->iv = NULL;
2683       else
2684         cand->iv = alloc_iv (base, step);
2685
2686       cand->pos = pos;
2687       if (pos != IP_ORIGINAL && cand->iv)
2688         {
2689           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2690           cand->var_after = cand->var_before;
2691         }
2692       cand->important = important;
2693       cand->incremented_at = incremented_at;
2694       data->iv_candidates.safe_push (cand);
2695
2696       if (step
2697           && TREE_CODE (step) != INTEGER_CST)
2698         {
2699           fd_ivopts_data = data;
2700           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2701         }
2702
2703       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2704         cand->ainc_use = use;
2705       else
2706         cand->ainc_use = NULL;
2707
2708       if (dump_file && (dump_flags & TDF_DETAILS))
2709         dump_cand (dump_file, cand);
2710     }
2711
2712   if (important && !cand->important)
2713     {
2714       cand->important = true;
2715       if (dump_file && (dump_flags & TDF_DETAILS))
2716         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2717     }
2718
2719   if (use)
2720     {
2721       bitmap_set_bit (use->related_cands, i);
2722       if (dump_file && (dump_flags & TDF_DETAILS))
2723         fprintf (dump_file, "Candidate %d is related to use %d\n",
2724                  cand->id, use->id);
2725     }
2726
2727   return cand;
2728 }
2729
2730 /* Returns true if incrementing the induction variable at the end of the LOOP
2731    is allowed.
2732
2733    The purpose is to avoid splitting latch edge with a biv increment, thus
2734    creating a jump, possibly confusing other optimization passes and leaving
2735    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2736    is not available (so we do not have a better alternative), or if the latch
2737    edge is already nonempty.  */
2738
2739 static bool
2740 allow_ip_end_pos_p (struct loop *loop)
2741 {
2742   if (!ip_normal_pos (loop))
2743     return true;
2744
2745   if (!empty_block_p (ip_end_pos (loop)))
2746     return true;
2747
2748   return false;
2749 }
2750
2751 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2752    Important field is set to IMPORTANT.  */
2753
2754 static void
2755 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2756                         bool important, struct iv_use *use)
2757 {
2758   basic_block use_bb = gimple_bb (use->stmt);
2759   machine_mode mem_mode;
2760   unsigned HOST_WIDE_INT cstepi;
2761
2762   /* If we insert the increment in any position other than the standard
2763      ones, we must ensure that it is incremented once per iteration.
2764      It must not be in an inner nested loop, or one side of an if
2765      statement.  */
2766   if (use_bb->loop_father != data->current_loop
2767       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2768       || stmt_could_throw_p (use->stmt)
2769       || !cst_and_fits_in_hwi (step))
2770     return;
2771
2772   cstepi = int_cst_value (step);
2773
2774   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2775   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2776         || USE_STORE_PRE_INCREMENT (mem_mode))
2777        && GET_MODE_SIZE (mem_mode) == cstepi)
2778       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2779            || USE_STORE_PRE_DECREMENT (mem_mode))
2780           && GET_MODE_SIZE (mem_mode) == -cstepi))
2781     {
2782       enum tree_code code = MINUS_EXPR;
2783       tree new_base;
2784       tree new_step = step;
2785
2786       if (POINTER_TYPE_P (TREE_TYPE (base)))
2787         {
2788           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2789           code = POINTER_PLUS_EXPR;
2790         }
2791       else
2792         new_step = fold_convert (TREE_TYPE (base), new_step);
2793       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2794       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2795                        use->stmt);
2796     }
2797   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2798         || USE_STORE_POST_INCREMENT (mem_mode))
2799        && GET_MODE_SIZE (mem_mode) == cstepi)
2800       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2801            || USE_STORE_POST_DECREMENT (mem_mode))
2802           && GET_MODE_SIZE (mem_mode) == -cstepi))
2803     {
2804       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2805                        use->stmt);
2806     }
2807 }
2808
2809 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2810    position to POS.  If USE is not NULL, the candidate is set as related to
2811    it.  The candidate computation is scheduled on all available positions.  */
2812
2813 static void
2814 add_candidate (struct ivopts_data *data,
2815                tree base, tree step, bool important, struct iv_use *use)
2816 {
2817   gcc_assert (use == NULL || use->sub_id == 0);
2818
2819   if (ip_normal_pos (data->current_loop))
2820     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2821   if (ip_end_pos (data->current_loop)
2822       && allow_ip_end_pos_p (data->current_loop))
2823     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2824
2825   if (use != NULL && use->type == USE_ADDRESS)
2826     add_autoinc_candidates (data, base, step, important, use);
2827 }
2828
2829 /* Adds standard iv candidates.  */
2830
2831 static void
2832 add_standard_iv_candidates (struct ivopts_data *data)
2833 {
2834   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2835
2836   /* The same for a double-integer type if it is still fast enough.  */
2837   if (TYPE_PRECISION
2838         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2839       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2840     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2841                    build_int_cst (long_integer_type_node, 1), true, NULL);
2842
2843   /* The same for a double-integer type if it is still fast enough.  */
2844   if (TYPE_PRECISION
2845         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2846       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2847     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2848                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2849 }
2850
2851
2852 /* Adds candidates bases on the old induction variable IV.  */
2853
2854 static void
2855 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2856 {
2857   gimple phi;
2858   tree def;
2859   struct iv_cand *cand;
2860
2861   add_candidate (data, iv->base, iv->step, true, NULL);
2862
2863   /* The same, but with initial value zero.  */
2864   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2865     add_candidate (data, size_int (0), iv->step, true, NULL);
2866   else
2867     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2868                    iv->step, true, NULL);
2869
2870   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2871   if (gimple_code (phi) == GIMPLE_PHI)
2872     {
2873       /* Additionally record the possibility of leaving the original iv
2874          untouched.  */
2875       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2876       /* Don't add candidate if it's from another PHI node because
2877          it's an affine iv appearing in the form of PEELED_CHREC.  */
2878       phi = SSA_NAME_DEF_STMT (def);
2879       if (gimple_code (phi) != GIMPLE_PHI)
2880         {
2881           cand = add_candidate_1 (data,
2882                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2883                                   SSA_NAME_DEF_STMT (def));
2884           cand->var_before = iv->ssa_name;
2885           cand->var_after = def;
2886         }
2887       else
2888         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2889     }
2890 }
2891
2892 /* Adds candidates based on the old induction variables.  */
2893
2894 static void
2895 add_old_ivs_candidates (struct ivopts_data *data)
2896 {
2897   unsigned i;
2898   struct iv *iv;
2899   bitmap_iterator bi;
2900
2901   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2902     {
2903       iv = ver_info (data, i)->iv;
2904       if (iv && iv->biv_p && !integer_zerop (iv->step))
2905         add_old_iv_candidates (data, iv);
2906     }
2907 }
2908
2909 /* Adds candidates based on the value of the induction variable IV and USE.  */
2910
2911 static void
2912 add_iv_value_candidates (struct ivopts_data *data,
2913                          struct iv *iv, struct iv_use *use)
2914 {
2915   unsigned HOST_WIDE_INT offset;
2916   tree base;
2917   tree basetype;
2918
2919   add_candidate (data, iv->base, iv->step, false, use);
2920
2921   /* The same, but with initial value zero.  Make such variable important,
2922      since it is generic enough so that possibly many uses may be based
2923      on it.  */
2924   basetype = TREE_TYPE (iv->base);
2925   if (POINTER_TYPE_P (basetype))
2926     basetype = sizetype;
2927   add_candidate (data, build_int_cst (basetype, 0),
2928                  iv->step, true, use);
2929
2930   /* Third, try removing the constant offset.  Make sure to even
2931      add a candidate for &a[0] vs. (T *)&a.  */
2932   base = strip_offset (iv->base, &offset);
2933   if (offset
2934       || base != iv->base)
2935     add_candidate (data, base, iv->step, false, use);
2936 }
2937
2938 /* Adds candidates based on the uses.  */
2939
2940 static void
2941 add_derived_ivs_candidates (struct ivopts_data *data)
2942 {
2943   unsigned i;
2944
2945   for (i = 0; i < n_iv_uses (data); i++)
2946     {
2947       struct iv_use *use = iv_use (data, i);
2948
2949       if (!use)
2950         continue;
2951
2952       switch (use->type)
2953         {
2954         case USE_NONLINEAR_EXPR:
2955         case USE_COMPARE:
2956         case USE_ADDRESS:
2957           /* Just add the ivs based on the value of the iv used here.  */
2958           add_iv_value_candidates (data, use->iv, use);
2959           break;
2960
2961         default:
2962           gcc_unreachable ();
2963         }
2964     }
2965 }
2966
2967 /* Record important candidates and add them to related_cands bitmaps
2968    if needed.  */
2969
2970 static void
2971 record_important_candidates (struct ivopts_data *data)
2972 {
2973   unsigned i;
2974   struct iv_use *use;
2975
2976   for (i = 0; i < n_iv_cands (data); i++)
2977     {
2978       struct iv_cand *cand = iv_cand (data, i);
2979
2980       if (cand->important)
2981         bitmap_set_bit (data->important_candidates, i);
2982     }
2983
2984   data->consider_all_candidates = (n_iv_cands (data)
2985                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2986
2987   if (data->consider_all_candidates)
2988     {
2989       /* We will not need "related_cands" bitmaps in this case,
2990          so release them to decrease peak memory consumption.  */
2991       for (i = 0; i < n_iv_uses (data); i++)
2992         {
2993           use = iv_use (data, i);
2994           BITMAP_FREE (use->related_cands);
2995         }
2996     }
2997   else
2998     {
2999       /* Add important candidates to the related_cands bitmaps.  */
3000       for (i = 0; i < n_iv_uses (data); i++)
3001         bitmap_ior_into (iv_use (data, i)->related_cands,
3002                          data->important_candidates);
3003     }
3004 }
3005
3006 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3007    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3008    we allocate a simple list to every use.  */
3009
3010 static void
3011 alloc_use_cost_map (struct ivopts_data *data)
3012 {
3013   unsigned i, size, s;
3014
3015   for (i = 0; i < n_iv_uses (data); i++)
3016     {
3017       struct iv_use *use = iv_use (data, i);
3018
3019       if (data->consider_all_candidates)
3020         size = n_iv_cands (data);
3021       else
3022         {
3023           s = bitmap_count_bits (use->related_cands);
3024
3025           /* Round up to the power of two, so that moduling by it is fast.  */
3026           size = s ? (1 << ceil_log2 (s)) : 1;
3027         }
3028
3029       use->n_map_members = size;
3030       use->cost_map = XCNEWVEC (struct cost_pair, size);
3031     }
3032 }
3033
3034 /* Returns description of computation cost of expression whose runtime
3035    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3036
3037 static comp_cost
3038 new_cost (unsigned runtime, unsigned complexity)
3039 {
3040   comp_cost cost;
3041
3042   cost.cost = runtime;
3043   cost.complexity = complexity;
3044
3045   return cost;
3046 }
3047
3048 /* Returns true if COST is infinite.  */
3049
3050 static bool
3051 infinite_cost_p (comp_cost cost)
3052 {
3053   return cost.cost == INFTY;
3054 }
3055
3056 /* Adds costs COST1 and COST2.  */
3057
3058 static comp_cost
3059 add_costs (comp_cost cost1, comp_cost cost2)
3060 {
3061   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3062     return infinite_cost;
3063
3064   cost1.cost += cost2.cost;
3065   cost1.complexity += cost2.complexity;
3066
3067   return cost1;
3068 }
3069 /* Subtracts costs COST1 and COST2.  */
3070
3071 static comp_cost
3072 sub_costs (comp_cost cost1, comp_cost cost2)
3073 {
3074   cost1.cost -= cost2.cost;
3075   cost1.complexity -= cost2.complexity;
3076
3077   return cost1;
3078 }
3079
3080 /* Returns a negative number if COST1 < COST2, a positive number if
3081    COST1 > COST2, and 0 if COST1 = COST2.  */
3082
3083 static int
3084 compare_costs (comp_cost cost1, comp_cost cost2)
3085 {
3086   if (cost1.cost == cost2.cost)
3087     return cost1.complexity - cost2.complexity;
3088
3089   return cost1.cost - cost2.cost;
3090 }
3091
3092 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3093    on invariants DEPENDS_ON and that the value used in expressing it
3094    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3095
3096 static void
3097 set_use_iv_cost (struct ivopts_data *data,
3098                  struct iv_use *use, struct iv_cand *cand,
3099                  comp_cost cost, bitmap depends_on, tree value,
3100                  enum tree_code comp, int inv_expr_id)
3101 {
3102   unsigned i, s;
3103
3104   if (infinite_cost_p (cost))
3105     {
3106       BITMAP_FREE (depends_on);
3107       return;
3108     }
3109
3110   if (data->consider_all_candidates)
3111     {
3112       use->cost_map[cand->id].cand = cand;
3113       use->cost_map[cand->id].cost = cost;
3114       use->cost_map[cand->id].depends_on = depends_on;
3115       use->cost_map[cand->id].value = value;
3116       use->cost_map[cand->id].comp = comp;
3117       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3118       return;
3119     }
3120
3121   /* n_map_members is a power of two, so this computes modulo.  */
3122   s = cand->id & (use->n_map_members - 1);
3123   for (i = s; i < use->n_map_members; i++)
3124     if (!use->cost_map[i].cand)
3125       goto found;
3126   for (i = 0; i < s; i++)
3127     if (!use->cost_map[i].cand)
3128       goto found;
3129
3130   gcc_unreachable ();
3131
3132 found:
3133   use->cost_map[i].cand = cand;
3134   use->cost_map[i].cost = cost;
3135   use->cost_map[i].depends_on = depends_on;
3136   use->cost_map[i].value = value;
3137   use->cost_map[i].comp = comp;
3138   use->cost_map[i].inv_expr_id = inv_expr_id;
3139 }
3140
3141 /* Gets cost of (USE, CANDIDATE) pair.  */
3142
3143 static struct cost_pair *
3144 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3145                  struct iv_cand *cand)
3146 {
3147   unsigned i, s;
3148   struct cost_pair *ret;
3149
3150   if (!cand)
3151     return NULL;
3152
3153   if (data->consider_all_candidates)
3154     {
3155       ret = use->cost_map + cand->id;
3156       if (!ret->cand)
3157         return NULL;
3158
3159       return ret;
3160     }
3161
3162   /* n_map_members is a power of two, so this computes modulo.  */
3163   s = cand->id & (use->n_map_members - 1);
3164   for (i = s; i < use->n_map_members; i++)
3165     if (use->cost_map[i].cand == cand)
3166       return use->cost_map + i;
3167     else if (use->cost_map[i].cand == NULL)
3168       return NULL;
3169   for (i = 0; i < s; i++)
3170     if (use->cost_map[i].cand == cand)
3171       return use->cost_map + i;
3172     else if (use->cost_map[i].cand == NULL)
3173       return NULL;
3174
3175   return NULL;
3176 }
3177
3178 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3179 static rtx
3180 produce_memory_decl_rtl (tree obj, int *regno)
3181 {
3182   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3183   machine_mode address_mode = targetm.addr_space.address_mode (as);
3184   rtx x;
3185
3186   gcc_assert (obj);
3187   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3188     {
3189       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3190       x = gen_rtx_SYMBOL_REF (address_mode, name);
3191       SET_SYMBOL_REF_DECL (x, obj);
3192       x = gen_rtx_MEM (DECL_MODE (obj), x);
3193       set_mem_addr_space (x, as);
3194       targetm.encode_section_info (obj, x, true);
3195     }
3196   else
3197     {
3198       x = gen_raw_REG (address_mode, (*regno)++);
3199       x = gen_rtx_MEM (DECL_MODE (obj), x);
3200       set_mem_addr_space (x, as);
3201     }
3202
3203   return x;
3204 }
3205
3206 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3207    walk_tree.  DATA contains the actual fake register number.  */
3208
3209 static tree
3210 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3211 {
3212   tree obj = NULL_TREE;
3213   rtx x = NULL_RTX;
3214   int *regno = (int *) data;
3215
3216   switch (TREE_CODE (*expr_p))
3217     {
3218     case ADDR_EXPR:
3219       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3220            handled_component_p (*expr_p);
3221            expr_p = &TREE_OPERAND (*expr_p, 0))
3222         continue;
3223       obj = *expr_p;
3224       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3225         x = produce_memory_decl_rtl (obj, regno);
3226       break;
3227
3228     case SSA_NAME:
3229       *ws = 0;
3230       obj = SSA_NAME_VAR (*expr_p);
3231       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3232       if (!obj)
3233         return NULL_TREE;
3234       if (!DECL_RTL_SET_P (obj))
3235         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3236       break;
3237
3238     case VAR_DECL:
3239     case PARM_DECL:
3240     case RESULT_DECL:
3241       *ws = 0;
3242       obj = *expr_p;
3243
3244       if (DECL_RTL_SET_P (obj))
3245         break;
3246
3247       if (DECL_MODE (obj) == BLKmode)
3248         x = produce_memory_decl_rtl (obj, regno);
3249       else
3250         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3251
3252       break;
3253
3254     default:
3255       break;
3256     }
3257
3258   if (x)
3259     {
3260       decl_rtl_to_reset.safe_push (obj);
3261       SET_DECL_RTL (obj, x);
3262     }
3263
3264   return NULL_TREE;
3265 }
3266
3267 /* Determines cost of the computation of EXPR.  */
3268
3269 static unsigned
3270 computation_cost (tree expr, bool speed)
3271 {
3272   rtx_insn *seq;
3273   rtx rslt;
3274   tree type = TREE_TYPE (expr);
3275   unsigned cost;
3276   /* Avoid using hard regs in ways which may be unsupported.  */
3277   int regno = LAST_VIRTUAL_REGISTER + 1;
3278   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3279   enum node_frequency real_frequency = node->frequency;
3280
3281   node->frequency = NODE_FREQUENCY_NORMAL;
3282   crtl->maybe_hot_insn_p = speed;
3283   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3284   start_sequence ();
3285   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3286   seq = get_insns ();
3287   end_sequence ();
3288   default_rtl_profile ();
3289   node->frequency = real_frequency;
3290
3291   cost = seq_cost (seq, speed);
3292   if (MEM_P (rslt))
3293     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3294                           TYPE_ADDR_SPACE (type), speed);
3295   else if (!REG_P (rslt))
3296     cost += set_src_cost (rslt, speed);
3297
3298   return cost;
3299 }
3300
3301 /* Returns variable containing the value of candidate CAND at statement AT.  */
3302
3303 static tree
3304 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3305 {
3306   if (stmt_after_increment (loop, cand, stmt))
3307     return cand->var_after;
3308   else
3309     return cand->var_before;
3310 }
3311
3312 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3313    same precision that is at least as wide as the precision of TYPE, stores
3314    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3315    type of A and B.  */
3316
3317 static tree
3318 determine_common_wider_type (tree *a, tree *b)
3319 {
3320   tree wider_type = NULL;
3321   tree suba, subb;
3322   tree atype = TREE_TYPE (*a);
3323
3324   if (CONVERT_EXPR_P (*a))
3325     {
3326       suba = TREE_OPERAND (*a, 0);
3327       wider_type = TREE_TYPE (suba);
3328       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3329         return atype;
3330     }
3331   else
3332     return atype;
3333
3334   if (CONVERT_EXPR_P (*b))
3335     {
3336       subb = TREE_OPERAND (*b, 0);
3337       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3338         return atype;
3339     }
3340   else
3341     return atype;
3342
3343   *a = suba;
3344   *b = subb;
3345   return wider_type;
3346 }
3347
3348 /* Determines the expression by that USE is expressed from induction variable
3349    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3350    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3351
3352 static bool
3353 get_computation_aff (struct loop *loop,
3354                      struct iv_use *use, struct iv_cand *cand, gimple at,
3355                      struct aff_tree *aff)
3356 {
3357   tree ubase = use->iv->base;
3358   tree ustep = use->iv->step;
3359   tree cbase = cand->iv->base;
3360   tree cstep = cand->iv->step, cstep_common;
3361   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3362   tree common_type, var;
3363   tree uutype;
3364   aff_tree cbase_aff, var_aff;
3365   widest_int rat;
3366
3367   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3368     {
3369       /* We do not have a precision to express the values of use.  */
3370       return false;
3371     }
3372
3373   var = var_at_stmt (loop, cand, at);
3374   uutype = unsigned_type_for (utype);
3375
3376   /* If the conversion is not noop, perform it.  */
3377   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3378     {
3379       cstep = fold_convert (uutype, cstep);
3380       cbase = fold_convert (uutype, cbase);
3381       var = fold_convert (uutype, var);
3382     }
3383
3384   if (!constant_multiple_of (ustep, cstep, &rat))
3385     return false;
3386
3387   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3388      type, we achieve better folding by computing their difference in this
3389      wider type, and cast the result to UUTYPE.  We do not need to worry about
3390      overflows, as all the arithmetics will in the end be performed in UUTYPE
3391      anyway.  */
3392   common_type = determine_common_wider_type (&ubase, &cbase);
3393
3394   /* use = ubase - ratio * cbase + ratio * var.  */
3395   tree_to_aff_combination (ubase, common_type, aff);
3396   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3397   tree_to_aff_combination (var, uutype, &var_aff);
3398
3399   /* We need to shift the value if we are after the increment.  */
3400   if (stmt_after_increment (loop, cand, at))
3401     {
3402       aff_tree cstep_aff;
3403
3404       if (common_type != uutype)
3405         cstep_common = fold_convert (common_type, cstep);
3406       else
3407         cstep_common = cstep;
3408
3409       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3410       aff_combination_add (&cbase_aff, &cstep_aff);
3411     }
3412
3413   aff_combination_scale (&cbase_aff, -rat);
3414   aff_combination_add (aff, &cbase_aff);
3415   if (common_type != uutype)
3416     aff_combination_convert (aff, uutype);
3417
3418   aff_combination_scale (&var_aff, rat);
3419   aff_combination_add (aff, &var_aff);
3420
3421   return true;
3422 }
3423
3424 /* Return the type of USE.  */
3425
3426 static tree
3427 get_use_type (struct iv_use *use)
3428 {
3429   tree base_type = TREE_TYPE (use->iv->base);
3430   tree type;
3431
3432   if (use->type == USE_ADDRESS)
3433     {
3434       /* The base_type may be a void pointer.  Create a pointer type based on
3435          the mem_ref instead.  */
3436       type = build_pointer_type (TREE_TYPE (*use->op_p));
3437       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3438                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3439     }
3440   else
3441     type = base_type;
3442
3443   return type;
3444 }
3445
3446 /* Determines the expression by that USE is expressed from induction variable
3447    CAND at statement AT in LOOP.  The computation is unshared.  */
3448
3449 static tree
3450 get_computation_at (struct loop *loop,
3451                     struct iv_use *use, struct iv_cand *cand, gimple at)
3452 {
3453   aff_tree aff;
3454   tree type = get_use_type (use);
3455
3456   if (!get_computation_aff (loop, use, cand, at, &aff))
3457     return NULL_TREE;
3458   unshare_aff_combination (&aff);
3459   return fold_convert (type, aff_combination_to_tree (&aff));
3460 }
3461
3462 /* Determines the expression by that USE is expressed from induction variable
3463    CAND in LOOP.  The computation is unshared.  */
3464
3465 static tree
3466 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3467 {
3468   return get_computation_at (loop, use, cand, use->stmt);
3469 }
3470
3471 /* Adjust the cost COST for being in loop setup rather than loop body.
3472    If we're optimizing for space, the loop setup overhead is constant;
3473    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3474 static unsigned
3475 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3476 {
3477   if (cost == INFTY)
3478     return cost;
3479   else if (optimize_loop_for_speed_p (data->current_loop))
3480     return cost / avg_loop_niter (data->current_loop);
3481   else
3482     return cost;
3483 }
3484
3485 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3486    validity for a memory reference accessing memory of mode MODE in
3487    address space AS.  */
3488
3489
3490 bool
3491 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3492                                  addr_space_t as)
3493 {
3494 #define MAX_RATIO 128
3495   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3496   static vec<sbitmap> valid_mult_list;
3497   sbitmap valid_mult;
3498
3499   if (data_index >= valid_mult_list.length ())
3500     valid_mult_list.safe_grow_cleared (data_index + 1);
3501
3502   valid_mult = valid_mult_list[data_index];
3503   if (!valid_mult)
3504     {
3505       machine_mode address_mode = targetm.addr_space.address_mode (as);
3506       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3507       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3508       rtx addr, scaled;
3509       HOST_WIDE_INT i;
3510
3511       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3512       bitmap_clear (valid_mult);
3513       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3514       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3515       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3516         {
3517           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3518           if (memory_address_addr_space_p (mode, addr, as)
3519               || memory_address_addr_space_p (mode, scaled, as))
3520             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3521         }
3522
3523       if (dump_file && (dump_flags & TDF_DETAILS))
3524         {
3525           fprintf (dump_file, "  allowed multipliers:");
3526           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3527             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3528               fprintf (dump_file, " %d", (int) i);
3529           fprintf (dump_file, "\n");
3530           fprintf (dump_file, "\n");
3531         }
3532
3533       valid_mult_list[data_index] = valid_mult;
3534     }
3535
3536   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3537     return false;
3538
3539   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3540 }
3541
3542 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3543    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3544    variable is omitted.  Compute the cost for a memory reference that accesses
3545    a memory location of mode MEM_MODE in address space AS.
3546
3547    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3548    size of MEM_MODE / RATIO) is available.  To make this determination, we
3549    look at the size of the increment to be made, which is given in CSTEP.
3550    CSTEP may be zero if the step is unknown.
3551    STMT_AFTER_INC is true iff the statement we're looking at is after the
3552    increment of the original biv.
3553
3554    TODO -- there must be some better way.  This all is quite crude.  */
3555
3556 enum ainc_type
3557 {
3558   AINC_PRE_INC,         /* Pre increment.  */
3559   AINC_PRE_DEC,         /* Pre decrement.  */
3560   AINC_POST_INC,        /* Post increment.  */
3561   AINC_POST_DEC,        /* Post decrement.  */
3562   AINC_NONE             /* Also the number of auto increment types.  */
3563 };
3564
3565 typedef struct address_cost_data_s
3566 {
3567   HOST_WIDE_INT min_offset, max_offset;
3568   unsigned costs[2][2][2][2];
3569   unsigned ainc_costs[AINC_NONE];
3570 } *address_cost_data;
3571
3572
3573 static comp_cost
3574 get_address_cost (bool symbol_present, bool var_present,
3575                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3576                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3577                   addr_space_t as, bool speed,
3578                   bool stmt_after_inc, bool *may_autoinc)
3579 {
3580   machine_mode address_mode = targetm.addr_space.address_mode (as);
3581   static vec<address_cost_data> address_cost_data_list;
3582   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3583   address_cost_data data;
3584   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3585   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3586   unsigned cost, acost, complexity;
3587   enum ainc_type autoinc_type;
3588   bool offset_p, ratio_p, autoinc;
3589   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3590   unsigned HOST_WIDE_INT mask;
3591   unsigned bits;
3592
3593   if (data_index >= address_cost_data_list.length ())
3594     address_cost_data_list.safe_grow_cleared (data_index + 1);
3595
3596   data = address_cost_data_list[data_index];
3597   if (!data)
3598     {
3599       HOST_WIDE_INT i;
3600       HOST_WIDE_INT rat, off = 0;
3601       int old_cse_not_expected, width;
3602       unsigned sym_p, var_p, off_p, rat_p, add_c;
3603       rtx_insn *seq;
3604       rtx addr, base;
3605       rtx reg0, reg1;
3606
3607       data = (address_cost_data) xcalloc (1, sizeof (*data));
3608
3609       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3610
3611       width = GET_MODE_BITSIZE (address_mode) - 1;
3612       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3613         width = HOST_BITS_PER_WIDE_INT - 1;
3614       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3615
3616       for (i = width; i >= 0; i--)
3617         {
3618           off = -((unsigned HOST_WIDE_INT) 1 << i);
3619           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3620           if (memory_address_addr_space_p (mem_mode, addr, as))
3621             break;
3622         }
3623       data->min_offset = (i == -1? 0 : off);
3624
3625       for (i = width; i >= 0; i--)
3626         {
3627           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3628           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3629           if (memory_address_addr_space_p (mem_mode, addr, as))
3630             break;
3631           /* For some strict-alignment targets, the offset must be naturally
3632              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3633           off = mem_mode != QImode
3634                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3635                     - GET_MODE_SIZE (mem_mode)
3636                 : 0;
3637           if (off > 0)
3638             {
3639               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3640               if (memory_address_addr_space_p (mem_mode, addr, as))
3641                 break;
3642             }
3643         }
3644       if (i == -1)
3645         off = 0;
3646       data->max_offset = off;
3647
3648       if (dump_file && (dump_flags & TDF_DETAILS))
3649         {
3650           fprintf (dump_file, "get_address_cost:\n");
3651           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3652                    GET_MODE_NAME (mem_mode),
3653                    data->min_offset);
3654           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3655                    GET_MODE_NAME (mem_mode),
3656                    data->max_offset);
3657         }
3658
3659       rat = 1;
3660       for (i = 2; i <= MAX_RATIO; i++)
3661         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3662           {
3663             rat = i;
3664             break;
3665           }
3666
3667       /* Compute the cost of various addressing modes.  */
3668       acost = 0;
3669       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3670       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3671
3672       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3673           || USE_STORE_PRE_DECREMENT (mem_mode))
3674         {
3675           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3676           has_predec[mem_mode]
3677             = memory_address_addr_space_p (mem_mode, addr, as);
3678
3679           if (has_predec[mem_mode])
3680             data->ainc_costs[AINC_PRE_DEC]
3681               = address_cost (addr, mem_mode, as, speed);
3682         }
3683       if (USE_LOAD_POST_DECREMENT (mem_mode)
3684           || USE_STORE_POST_DECREMENT (mem_mode))
3685         {
3686           addr = gen_rtx_POST_DEC (address_mode, reg0);
3687           has_postdec[mem_mode]
3688             = memory_address_addr_space_p (mem_mode, addr, as);
3689
3690           if (has_postdec[mem_mode])
3691             data->ainc_costs[AINC_POST_DEC]
3692               = address_cost (addr, mem_mode, as, speed);
3693         }
3694       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3695           || USE_STORE_PRE_DECREMENT (mem_mode))
3696         {
3697           addr = gen_rtx_PRE_INC (address_mode, reg0);
3698           has_preinc[mem_mode]
3699             = memory_address_addr_space_p (mem_mode, addr, as);
3700
3701           if (has_preinc[mem_mode])
3702             data->ainc_costs[AINC_PRE_INC]
3703               = address_cost (addr, mem_mode, as, speed);
3704         }
3705       if (USE_LOAD_POST_INCREMENT (mem_mode)
3706           || USE_STORE_POST_INCREMENT (mem_mode))
3707         {
3708           addr = gen_rtx_POST_INC (address_mode, reg0);
3709           has_postinc[mem_mode]
3710             = memory_address_addr_space_p (mem_mode, addr, as);
3711
3712           if (has_postinc[mem_mode])
3713             data->ainc_costs[AINC_POST_INC]
3714               = address_cost (addr, mem_mode, as, speed);
3715         }
3716       for (i = 0; i < 16; i++)
3717         {
3718           sym_p = i & 1;
3719           var_p = (i >> 1) & 1;
3720           off_p = (i >> 2) & 1;
3721           rat_p = (i >> 3) & 1;
3722
3723           addr = reg0;
3724           if (rat_p)
3725             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3726                                    gen_int_mode (rat, address_mode));
3727
3728           if (var_p)
3729             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3730
3731           if (sym_p)
3732             {
3733               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3734               /* ??? We can run into trouble with some backends by presenting
3735                  it with symbols which haven't been properly passed through
3736                  targetm.encode_section_info.  By setting the local bit, we
3737                  enhance the probability of things working.  */
3738               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3739
3740               if (off_p)
3741                 base = gen_rtx_fmt_e (CONST, address_mode,
3742                                       gen_rtx_fmt_ee
3743                                         (PLUS, address_mode, base,
3744                                          gen_int_mode (off, address_mode)));
3745             }
3746           else if (off_p)
3747             base = gen_int_mode (off, address_mode);
3748           else
3749             base = NULL_RTX;
3750
3751           if (base)
3752             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3753
3754           start_sequence ();
3755           /* To avoid splitting addressing modes, pretend that no cse will
3756              follow.  */
3757           old_cse_not_expected = cse_not_expected;
3758           cse_not_expected = true;
3759           addr = memory_address_addr_space (mem_mode, addr, as);
3760           cse_not_expected = old_cse_not_expected;
3761           seq = get_insns ();
3762           end_sequence ();
3763
3764           acost = seq_cost (seq, speed);
3765           acost += address_cost (addr, mem_mode, as, speed);
3766
3767           if (!acost)
3768             acost = 1;
3769           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3770         }
3771
3772       /* On some targets, it is quite expensive to load symbol to a register,
3773          which makes addresses that contain symbols look much more expensive.
3774          However, the symbol will have to be loaded in any case before the
3775          loop (and quite likely we have it in register already), so it does not
3776          make much sense to penalize them too heavily.  So make some final
3777          tweaks for the SYMBOL_PRESENT modes:
3778
3779          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3780          var is cheaper, use this mode with small penalty.
3781          If VAR_PRESENT is true, try whether the mode with
3782          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3783          if this is the case, use it.  */
3784       add_c = add_cost (speed, address_mode);
3785       for (i = 0; i < 8; i++)
3786         {
3787           var_p = i & 1;
3788           off_p = (i >> 1) & 1;
3789           rat_p = (i >> 2) & 1;
3790
3791           acost = data->costs[0][1][off_p][rat_p] + 1;
3792           if (var_p)
3793             acost += add_c;
3794
3795           if (acost < data->costs[1][var_p][off_p][rat_p])
3796             data->costs[1][var_p][off_p][rat_p] = acost;
3797         }
3798
3799       if (dump_file && (dump_flags & TDF_DETAILS))
3800         {
3801           fprintf (dump_file, "Address costs:\n");
3802
3803           for (i = 0; i < 16; i++)
3804             {
3805               sym_p = i & 1;
3806               var_p = (i >> 1) & 1;
3807               off_p = (i >> 2) & 1;
3808               rat_p = (i >> 3) & 1;
3809
3810               fprintf (dump_file, "  ");
3811               if (sym_p)
3812                 fprintf (dump_file, "sym + ");
3813               if (var_p)
3814                 fprintf (dump_file, "var + ");
3815               if (off_p)
3816                 fprintf (dump_file, "cst + ");
3817               if (rat_p)
3818                 fprintf (dump_file, "rat * ");
3819
3820               acost = data->costs[sym_p][var_p][off_p][rat_p];
3821               fprintf (dump_file, "index costs %d\n", acost);
3822             }
3823           if (has_predec[mem_mode] || has_postdec[mem_mode]
3824               || has_preinc[mem_mode] || has_postinc[mem_mode])
3825             fprintf (dump_file, "  May include autoinc/dec\n");
3826           fprintf (dump_file, "\n");
3827         }
3828
3829       address_cost_data_list[data_index] = data;
3830     }
3831
3832   bits = GET_MODE_BITSIZE (address_mode);
3833   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3834   offset &= mask;
3835   if ((offset >> (bits - 1) & 1))
3836     offset |= ~mask;
3837   s_offset = offset;
3838
3839   autoinc = false;
3840   autoinc_type = AINC_NONE;
3841   msize = GET_MODE_SIZE (mem_mode);
3842   autoinc_offset = offset;
3843   if (stmt_after_inc)
3844     autoinc_offset += ratio * cstep;
3845   if (symbol_present || var_present || ratio != 1)
3846     autoinc = false;
3847   else
3848     {
3849       if (has_postinc[mem_mode] && autoinc_offset == 0
3850           && msize == cstep)
3851         autoinc_type = AINC_POST_INC;
3852       else if (has_postdec[mem_mode] && autoinc_offset == 0
3853                && msize == -cstep)
3854         autoinc_type = AINC_POST_DEC;
3855       else if (has_preinc[mem_mode] && autoinc_offset == msize
3856                && msize == cstep)
3857         autoinc_type = AINC_PRE_INC;
3858       else if (has_predec[mem_mode] && autoinc_offset == -msize
3859                && msize == -cstep)
3860         autoinc_type = AINC_PRE_DEC;
3861
3862       if (autoinc_type != AINC_NONE)
3863         autoinc = true;
3864     }
3865
3866   cost = 0;
3867   offset_p = (s_offset != 0
3868               && data->min_offset <= s_offset
3869               && s_offset <= data->max_offset);
3870   ratio_p = (ratio != 1
3871              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3872
3873   if (ratio != 1 && !ratio_p)
3874     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3875
3876   if (s_offset && !offset_p && !symbol_present)
3877     cost += add_cost (speed, address_mode);
3878
3879   if (may_autoinc)
3880     *may_autoinc = autoinc;
3881   if (autoinc)
3882     acost = data->ainc_costs[autoinc_type];
3883   else
3884     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3885   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3886   return new_cost (cost + acost, complexity);
3887 }
3888
3889  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3890     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3891     calculating the operands of EXPR.  Returns true if successful, and returns
3892     the cost in COST.  */
3893
3894 static bool
3895 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3896                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3897 {
3898   comp_cost res;
3899   tree op1 = TREE_OPERAND (expr, 1);
3900   tree cst = TREE_OPERAND (mult, 1);
3901   tree multop = TREE_OPERAND (mult, 0);
3902   int m = exact_log2 (int_cst_value (cst));
3903   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3904   int as_cost, sa_cost;
3905   bool mult_in_op1;
3906
3907   if (!(m >= 0 && m < maxm))
3908     return false;
3909
3910   mult_in_op1 = operand_equal_p (op1, mult, 0);
3911
3912   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3913
3914   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3915      use that in preference to a shift insn followed by an add insn.  */
3916   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3917              ? shiftadd_cost (speed, mode, m)
3918              : (mult_in_op1
3919                 ? shiftsub1_cost (speed, mode, m)
3920                 : shiftsub0_cost (speed, mode, m)));
3921
3922   res = new_cost (MIN (as_cost, sa_cost), 0);
3923   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3924
3925   STRIP_NOPS (multop);
3926   if (!is_gimple_val (multop))
3927     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3928
3929   *cost = res;
3930   return true;
3931 }
3932
3933 /* Estimates cost of forcing expression EXPR into a variable.  */
3934
3935 static comp_cost
3936 force_expr_to_var_cost (tree expr, bool speed)
3937 {
3938   static bool costs_initialized = false;
3939   static unsigned integer_cost [2];
3940   static unsigned symbol_cost [2];
3941   static unsigned address_cost [2];
3942   tree op0, op1;
3943   comp_cost cost0, cost1, cost;
3944   machine_mode mode;
3945
3946   if (!costs_initialized)
3947     {
3948       tree type = build_pointer_type (integer_type_node);
3949       tree var, addr;
3950       rtx x;
3951       int i;
3952
3953       var = create_tmp_var_raw (integer_type_node, "test_var");
3954       TREE_STATIC (var) = 1;
3955       x = produce_memory_decl_rtl (var, NULL);
3956       SET_DECL_RTL (var, x);
3957
3958       addr = build1 (ADDR_EXPR, type, var);
3959
3960
3961       for (i = 0; i < 2; i++)
3962         {
3963           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3964                                                              2000), i);
3965
3966           symbol_cost[i] = computation_cost (addr, i) + 1;
3967
3968           address_cost[i]
3969             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3970           if (dump_file && (dump_flags & TDF_DETAILS))
3971             {
3972               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3973               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3974               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3975               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3976               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3977               fprintf (dump_file, "\n");
3978             }
3979         }
3980
3981       costs_initialized = true;
3982     }
3983
3984   STRIP_NOPS (expr);
3985
3986   if (SSA_VAR_P (expr))
3987     return no_cost;
3988
3989   if (is_gimple_min_invariant (expr))
3990     {
3991       if (TREE_CODE (expr) == INTEGER_CST)
3992         return new_cost (integer_cost [speed], 0);
3993
3994       if (TREE_CODE (expr) == ADDR_EXPR)
3995         {
3996           tree obj = TREE_OPERAND (expr, 0);
3997
3998           if (TREE_CODE (obj) == VAR_DECL
3999               || TREE_CODE (obj) == PARM_DECL
4000               || TREE_CODE (obj) == RESULT_DECL)
4001             return new_cost (symbol_cost [speed], 0);
4002         }
4003
4004       return new_cost (address_cost [speed], 0);
4005     }
4006
4007   switch (TREE_CODE (expr))
4008     {
4009     case POINTER_PLUS_EXPR:
4010     case PLUS_EXPR:
4011     case MINUS_EXPR:
4012     case MULT_EXPR:
4013       op0 = TREE_OPERAND (expr, 0);
4014       op1 = TREE_OPERAND (expr, 1);
4015       STRIP_NOPS (op0);
4016       STRIP_NOPS (op1);
4017       break;
4018
4019     CASE_CONVERT:
4020     case NEGATE_EXPR:
4021       op0 = TREE_OPERAND (expr, 0);
4022       STRIP_NOPS (op0);
4023       op1 = NULL_TREE;
4024       break;
4025
4026     default:
4027       /* Just an arbitrary value, FIXME.  */
4028       return new_cost (target_spill_cost[speed], 0);
4029     }
4030
4031   if (op0 == NULL_TREE
4032       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4033     cost0 = no_cost;
4034   else
4035     cost0 = force_expr_to_var_cost (op0, speed);
4036
4037   if (op1 == NULL_TREE
4038       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4039     cost1 = no_cost;
4040   else
4041     cost1 = force_expr_to_var_cost (op1, speed);
4042
4043   mode = TYPE_MODE (TREE_TYPE (expr));
4044   switch (TREE_CODE (expr))
4045     {
4046     case POINTER_PLUS_EXPR:
4047     case PLUS_EXPR:
4048     case MINUS_EXPR:
4049     case NEGATE_EXPR:
4050       cost = new_cost (add_cost (speed, mode), 0);
4051       if (TREE_CODE (expr) != NEGATE_EXPR)
4052         {
4053           tree mult = NULL_TREE;
4054           comp_cost sa_cost;
4055           if (TREE_CODE (op1) == MULT_EXPR)
4056             mult = op1;
4057           else if (TREE_CODE (op0) == MULT_EXPR)
4058             mult = op0;
4059
4060           if (mult != NULL_TREE
4061               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4062               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4063                                     speed, &sa_cost))
4064             return sa_cost;
4065         }
4066       break;
4067
4068     CASE_CONVERT:
4069       {
4070         tree inner_mode, outer_mode;
4071         outer_mode = TREE_TYPE (expr);
4072         inner_mode = TREE_TYPE (op0);
4073         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4074                                        TYPE_MODE (inner_mode), speed), 0);
4075       }
4076       break;
4077
4078     case MULT_EXPR:
4079       if (cst_and_fits_in_hwi (op0))
4080         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4081                                              mode, speed), 0);
4082       else if (cst_and_fits_in_hwi (op1))
4083         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4084                                              mode, speed), 0);
4085       else
4086         return new_cost (target_spill_cost [speed], 0);
4087       break;
4088
4089     default:
4090       gcc_unreachable ();
4091     }
4092
4093   cost = add_costs (cost, cost0);
4094   cost = add_costs (cost, cost1);
4095
4096   /* Bound the cost by target_spill_cost.  The parts of complicated
4097      computations often are either loop invariant or at least can
4098      be shared between several iv uses, so letting this grow without
4099      limits would not give reasonable results.  */
4100   if (cost.cost > (int) target_spill_cost [speed])
4101     cost.cost = target_spill_cost [speed];
4102
4103   return cost;
4104 }
4105
4106 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4107    invariants the computation depends on.  */
4108
4109 static comp_cost
4110 force_var_cost (struct ivopts_data *data,
4111                 tree expr, bitmap *depends_on)
4112 {
4113   if (depends_on)
4114     {
4115       fd_ivopts_data = data;
4116       walk_tree (&expr, find_depends, depends_on, NULL);
4117     }
4118
4119   return force_expr_to_var_cost (expr, data->speed);
4120 }
4121
4122 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4123    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4124    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4125    invariants the computation depends on.  */
4126
4127 static comp_cost
4128 split_address_cost (struct ivopts_data *data,
4129                     tree addr, bool *symbol_present, bool *var_present,
4130                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4131 {
4132   tree core;
4133   HOST_WIDE_INT bitsize;
4134   HOST_WIDE_INT bitpos;
4135   tree toffset;
4136   machine_mode mode;
4137   int unsignedp, volatilep;
4138
4139   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4140                               &unsignedp, &volatilep, false);
4141
4142   if (toffset != 0
4143       || bitpos % BITS_PER_UNIT != 0
4144       || TREE_CODE (core) != VAR_DECL)
4145     {
4146       *symbol_present = false;
4147       *var_present = true;
4148       fd_ivopts_data = data;
4149       walk_tree (&addr, find_depends, depends_on, NULL);
4150       return new_cost (target_spill_cost[data->speed], 0);
4151     }
4152
4153   *offset += bitpos / BITS_PER_UNIT;
4154   if (TREE_STATIC (core)
4155       || DECL_EXTERNAL (core))
4156     {
4157       *symbol_present = true;
4158       *var_present = false;
4159       return no_cost;
4160     }
4161
4162   *symbol_present = false;
4163   *var_present = true;
4164   return no_cost;
4165 }
4166
4167 /* Estimates cost of expressing difference of addresses E1 - E2 as
4168    var + symbol + offset.  The value of offset is added to OFFSET,
4169    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4170    part is missing.  DEPENDS_ON is a set of the invariants the computation
4171    depends on.  */
4172
4173 static comp_cost
4174 ptr_difference_cost (struct ivopts_data *data,
4175                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4176                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4177 {
4178   HOST_WIDE_INT diff = 0;
4179   aff_tree aff_e1, aff_e2;
4180   tree type;
4181
4182   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4183
4184   if (ptr_difference_const (e1, e2, &diff))
4185     {
4186       *offset += diff;
4187       *symbol_present = false;
4188       *var_present = false;
4189       return no_cost;
4190     }
4191
4192   if (integer_zerop (e2))
4193     return split_address_cost (data, TREE_OPERAND (e1, 0),
4194                                symbol_present, var_present, offset, depends_on);
4195
4196   *symbol_present = false;
4197   *var_present = true;
4198
4199   type = signed_type_for (TREE_TYPE (e1));
4200   tree_to_aff_combination (e1, type, &aff_e1);
4201   tree_to_aff_combination (e2, type, &aff_e2);
4202   aff_combination_scale (&aff_e2, -1);
4203   aff_combination_add (&aff_e1, &aff_e2);
4204
4205   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4206 }
4207
4208 /* Estimates cost of expressing difference E1 - E2 as
4209    var + symbol + offset.  The value of offset is added to OFFSET,
4210    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4211    part is missing.  DEPENDS_ON is a set of the invariants the computation
4212    depends on.  */
4213
4214 static comp_cost
4215 difference_cost (struct ivopts_data *data,
4216                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4217                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4218 {
4219   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4220   unsigned HOST_WIDE_INT off1, off2;
4221   aff_tree aff_e1, aff_e2;
4222   tree type;
4223
4224   e1 = strip_offset (e1, &off1);
4225   e2 = strip_offset (e2, &off2);
4226   *offset += off1 - off2;
4227
4228   STRIP_NOPS (e1);
4229   STRIP_NOPS (e2);
4230
4231   if (TREE_CODE (e1) == ADDR_EXPR)
4232     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4233                                 offset, depends_on);
4234   *symbol_present = false;
4235
4236   if (operand_equal_p (e1, e2, 0))
4237     {
4238       *var_present = false;
4239       return no_cost;
4240     }
4241
4242   *var_present = true;
4243
4244   if (integer_zerop (e2))
4245     return force_var_cost (data, e1, depends_on);
4246
4247   if (integer_zerop (e1))
4248     {
4249       comp_cost cost = force_var_cost (data, e2, depends_on);
4250       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4251       return cost;
4252     }
4253
4254   type = signed_type_for (TREE_TYPE (e1));
4255   tree_to_aff_combination (e1, type, &aff_e1);
4256   tree_to_aff_combination (e2, type, &aff_e2);
4257   aff_combination_scale (&aff_e2, -1);
4258   aff_combination_add (&aff_e1, &aff_e2);
4259
4260   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4261 }
4262
4263 /* Returns true if AFF1 and AFF2 are identical.  */
4264
4265 static bool
4266 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4267 {
4268   unsigned i;
4269
4270   if (aff1->n != aff2->n)
4271     return false;
4272
4273   for (i = 0; i < aff1->n; i++)
4274     {
4275       if (aff1->elts[i].coef != aff2->elts[i].coef)
4276         return false;
4277
4278       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4279         return false;
4280     }
4281   return true;
4282 }
4283
4284 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4285
4286 static int
4287 get_expr_id (struct ivopts_data *data, tree expr)
4288 {
4289   struct iv_inv_expr_ent ent;
4290   struct iv_inv_expr_ent **slot;
4291
4292   ent.expr = expr;
4293   ent.hash = iterative_hash_expr (expr, 0);
4294   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4295   if (*slot)
4296     return (*slot)->id;
4297
4298   *slot = XNEW (struct iv_inv_expr_ent);
4299   (*slot)->expr = expr;
4300   (*slot)->hash = ent.hash;
4301   (*slot)->id = data->inv_expr_id++;
4302   return (*slot)->id;
4303 }
4304
4305 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4306    requires a new compiler generated temporary.  Returns -1 otherwise.
4307    ADDRESS_P is a flag indicating if the expression is for address
4308    computation.  */
4309
4310 static int
4311 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4312                             tree cbase, HOST_WIDE_INT ratio,
4313                             bool address_p)
4314 {
4315   aff_tree ubase_aff, cbase_aff;
4316   tree expr, ub, cb;
4317
4318   STRIP_NOPS (ubase);
4319   STRIP_NOPS (cbase);
4320   ub = ubase;
4321   cb = cbase;
4322
4323   if ((TREE_CODE (ubase) == INTEGER_CST)
4324       && (TREE_CODE (cbase) == INTEGER_CST))
4325     return -1;
4326
4327   /* Strips the constant part. */
4328   if (TREE_CODE (ubase) == PLUS_EXPR
4329       || TREE_CODE (ubase) == MINUS_EXPR
4330       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4331     {
4332       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4333         ubase = TREE_OPERAND (ubase, 0);
4334     }
4335
4336   /* Strips the constant part. */
4337   if (TREE_CODE (cbase) == PLUS_EXPR
4338       || TREE_CODE (cbase) == MINUS_EXPR
4339       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4340     {
4341       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4342         cbase = TREE_OPERAND (cbase, 0);
4343     }
4344
4345   if (address_p)
4346     {
4347       if (((TREE_CODE (ubase) == SSA_NAME)
4348            || (TREE_CODE (ubase) == ADDR_EXPR
4349                && is_gimple_min_invariant (ubase)))
4350           && (TREE_CODE (cbase) == INTEGER_CST))
4351         return -1;
4352
4353       if (((TREE_CODE (cbase) == SSA_NAME)
4354            || (TREE_CODE (cbase) == ADDR_EXPR
4355                && is_gimple_min_invariant (cbase)))
4356           && (TREE_CODE (ubase) == INTEGER_CST))
4357         return -1;
4358     }
4359
4360   if (ratio == 1)
4361     {
4362       if (operand_equal_p (ubase, cbase, 0))
4363         return -1;
4364
4365       if (TREE_CODE (ubase) == ADDR_EXPR
4366           && TREE_CODE (cbase) == ADDR_EXPR)
4367         {
4368           tree usym, csym;
4369
4370           usym = TREE_OPERAND (ubase, 0);
4371           csym = TREE_OPERAND (cbase, 0);
4372           if (TREE_CODE (usym) == ARRAY_REF)
4373             {
4374               tree ind = TREE_OPERAND (usym, 1);
4375               if (TREE_CODE (ind) == INTEGER_CST
4376                   && tree_fits_shwi_p (ind)
4377                   && tree_to_shwi (ind) == 0)
4378                 usym = TREE_OPERAND (usym, 0);
4379             }
4380           if (TREE_CODE (csym) == ARRAY_REF)
4381             {
4382               tree ind = TREE_OPERAND (csym, 1);
4383               if (TREE_CODE (ind) == INTEGER_CST
4384                   && tree_fits_shwi_p (ind)
4385                   && tree_to_shwi (ind) == 0)
4386                 csym = TREE_OPERAND (csym, 0);
4387             }
4388           if (operand_equal_p (usym, csym, 0))
4389             return -1;
4390         }
4391       /* Now do more complex comparison  */
4392       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4393       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4394       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4395         return -1;
4396     }
4397
4398   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4399   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4400
4401   aff_combination_scale (&cbase_aff, -1 * ratio);
4402   aff_combination_add (&ubase_aff, &cbase_aff);
4403   expr = aff_combination_to_tree (&ubase_aff);
4404   return get_expr_id (data, expr);
4405 }
4406
4407
4408
4409 /* Determines the cost of the computation by that USE is expressed
4410    from induction variable CAND.  If ADDRESS_P is true, we just need
4411    to create an address from it, otherwise we want to get it into
4412    register.  A set of invariants we depend on is stored in
4413    DEPENDS_ON.  AT is the statement at that the value is computed.
4414    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4415    addressing is likely.  */
4416
4417 static comp_cost
4418 get_computation_cost_at (struct ivopts_data *data,
4419                          struct iv_use *use, struct iv_cand *cand,
4420                          bool address_p, bitmap *depends_on, gimple at,
4421                          bool *can_autoinc,
4422                          int *inv_expr_id)
4423 {
4424   tree ubase = use->iv->base, ustep = use->iv->step;
4425   tree cbase, cstep;
4426   tree utype = TREE_TYPE (ubase), ctype;
4427   unsigned HOST_WIDE_INT cstepi, offset = 0;
4428   HOST_WIDE_INT ratio, aratio;
4429   bool var_present, symbol_present, stmt_is_after_inc;
4430   comp_cost cost;
4431   widest_int rat;
4432   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4433   machine_mode mem_mode = (address_p
4434                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4435                                 : VOIDmode);
4436
4437   *depends_on = NULL;
4438
4439   /* Only consider real candidates.  */
4440   if (!cand->iv)
4441     return infinite_cost;
4442
4443   cbase = cand->iv->base;
4444   cstep = cand->iv->step;
4445   ctype = TREE_TYPE (cbase);
4446
4447   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4448     {
4449       /* We do not have a precision to express the values of use.  */
4450       return infinite_cost;
4451     }
4452
4453   if (address_p
4454       || (use->iv->base_object
4455           && cand->iv->base_object
4456           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4457           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4458     {
4459       /* Do not try to express address of an object with computation based
4460          on address of a different object.  This may cause problems in rtl
4461          level alias analysis (that does not expect this to be happening,
4462          as this is illegal in C), and would be unlikely to be useful
4463          anyway.  */
4464       if (use->iv->base_object
4465           && cand->iv->base_object
4466           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4467         return infinite_cost;
4468     }
4469
4470   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4471     {
4472       /* TODO -- add direct handling of this case.  */
4473       goto fallback;
4474     }
4475
4476   /* CSTEPI is removed from the offset in case statement is after the
4477      increment.  If the step is not constant, we use zero instead.
4478      This is a bit imprecise (there is the extra addition), but
4479      redundancy elimination is likely to transform the code so that
4480      it uses value of the variable before increment anyway,
4481      so it is not that much unrealistic.  */
4482   if (cst_and_fits_in_hwi (cstep))
4483     cstepi = int_cst_value (cstep);
4484   else
4485     cstepi = 0;
4486
4487   if (!constant_multiple_of (ustep, cstep, &rat))
4488     return infinite_cost;
4489
4490   if (wi::fits_shwi_p (rat))
4491     ratio = rat.to_shwi ();
4492   else
4493     return infinite_cost;
4494
4495   STRIP_NOPS (cbase);
4496   ctype = TREE_TYPE (cbase);
4497
4498   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4499
4500   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4501      or ratio == 1, it is better to handle this like
4502
4503      ubase - ratio * cbase + ratio * var
4504
4505      (also holds in the case ratio == -1, TODO.  */
4506
4507   if (cst_and_fits_in_hwi (cbase))
4508     {
4509       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4510       cost = difference_cost (data,
4511                               ubase, build_int_cst (utype, 0),
4512                               &symbol_present, &var_present, &offset,
4513                               depends_on);
4514       cost.cost /= avg_loop_niter (data->current_loop);
4515     }
4516   else if (ratio == 1)
4517     {
4518       tree real_cbase = cbase;
4519
4520       /* Check to see if any adjustment is needed.  */
4521       if (cstepi == 0 && stmt_is_after_inc)
4522         {
4523           aff_tree real_cbase_aff;
4524           aff_tree cstep_aff;
4525
4526           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4527                                    &real_cbase_aff);
4528           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4529
4530           aff_combination_add (&real_cbase_aff, &cstep_aff);
4531           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4532         }
4533
4534       cost = difference_cost (data,
4535                               ubase, real_cbase,
4536                               &symbol_present, &var_present, &offset,
4537                               depends_on);
4538       cost.cost /= avg_loop_niter (data->current_loop);
4539     }
4540   else if (address_p
4541            && !POINTER_TYPE_P (ctype)
4542            && multiplier_allowed_in_address_p
4543                 (ratio, mem_mode,
4544                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4545     {
4546       cbase
4547         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4548       cost = difference_cost (data,
4549                               ubase, cbase,
4550                               &symbol_present, &var_present, &offset,
4551                               depends_on);
4552       cost.cost /= avg_loop_niter (data->current_loop);
4553     }
4554   else
4555     {
4556       cost = force_var_cost (data, cbase, depends_on);
4557       cost = add_costs (cost,
4558                         difference_cost (data,
4559                                          ubase, build_int_cst (utype, 0),
4560                                          &symbol_present, &var_present,
4561                                          &offset, depends_on));
4562       cost.cost /= avg_loop_niter (data->current_loop);
4563       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4564     }
4565
4566   /* Set of invariants depended on by sub use has already been computed
4567      for the first use in the group.  */
4568   if (use->sub_id)
4569     {
4570       cost.cost = 0;
4571       if (depends_on && *depends_on)
4572         bitmap_clear (*depends_on);
4573     }
4574   else if (inv_expr_id)
4575     {
4576       *inv_expr_id =
4577           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4578       /* Clear depends on.  */
4579       if (*inv_expr_id != -1 && depends_on && *depends_on)
4580         bitmap_clear (*depends_on);
4581     }
4582
4583   /* If we are after the increment, the value of the candidate is higher by
4584      one iteration.  */
4585   if (stmt_is_after_inc)
4586     offset -= ratio * cstepi;
4587
4588   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4589      (symbol/var1/const parts may be omitted).  If we are looking for an
4590      address, find the cost of addressing this.  */
4591   if (address_p)
4592     return add_costs (cost,
4593                       get_address_cost (symbol_present, var_present,
4594                                         offset, ratio, cstepi,
4595                                         mem_mode,
4596                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4597                                         speed, stmt_is_after_inc,
4598                                         can_autoinc));
4599
4600   /* Otherwise estimate the costs for computing the expression.  */
4601   if (!symbol_present && !var_present && !offset)
4602     {
4603       if (ratio != 1)
4604         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4605       return cost;
4606     }
4607
4608   /* Symbol + offset should be compile-time computable so consider that they
4609       are added once to the variable, if present.  */
4610   if (var_present && (symbol_present || offset))
4611     cost.cost += adjust_setup_cost (data,
4612                                     add_cost (speed, TYPE_MODE (ctype)));
4613
4614   /* Having offset does not affect runtime cost in case it is added to
4615      symbol, but it increases complexity.  */
4616   if (offset)
4617     cost.complexity++;
4618
4619   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4620
4621   aratio = ratio > 0 ? ratio : -ratio;
4622   if (aratio != 1)
4623     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4624   return cost;
4625
4626 fallback:
4627   if (can_autoinc)
4628     *can_autoinc = false;
4629
4630   {
4631     /* Just get the expression, expand it and measure the cost.  */
4632     tree comp = get_computation_at (data->current_loop, use, cand, at);
4633
4634     if (!comp)
4635       return infinite_cost;
4636
4637     if (address_p)
4638       comp = build_simple_mem_ref (comp);
4639
4640     return new_cost (computation_cost (comp, speed), 0);
4641   }
4642 }
4643
4644 /* Determines the cost of the computation by that USE is expressed
4645    from induction variable CAND.  If ADDRESS_P is true, we just need
4646    to create an address from it, otherwise we want to get it into
4647    register.  A set of invariants we depend on is stored in
4648    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4649    autoinc addressing is likely.  */
4650
4651 static comp_cost
4652 get_computation_cost (struct ivopts_data *data,
4653                       struct iv_use *use, struct iv_cand *cand,
4654                       bool address_p, bitmap *depends_on,
4655                       bool *can_autoinc, int *inv_expr_id)
4656 {
4657   return get_computation_cost_at (data,
4658                                   use, cand, address_p, depends_on, use->stmt,
4659                                   can_autoinc, inv_expr_id);
4660 }
4661
4662 /* Determines cost of basing replacement of USE on CAND in a generic
4663    expression.  */
4664
4665 static bool
4666 determine_use_iv_cost_generic (struct ivopts_data *data,
4667                                struct iv_use *use, struct iv_cand *cand)
4668 {
4669   bitmap depends_on;
4670   comp_cost cost;
4671   int inv_expr_id = -1;
4672
4673   /* The simple case first -- if we need to express value of the preserved
4674      original biv, the cost is 0.  This also prevents us from counting the
4675      cost of increment twice -- once at this use and once in the cost of
4676      the candidate.  */
4677   if (cand->pos == IP_ORIGINAL
4678       && cand->incremented_at == use->stmt)
4679     {
4680       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4681                        ERROR_MARK, -1);
4682       return true;
4683     }
4684
4685   cost = get_computation_cost (data, use, cand, false, &depends_on,
4686                                NULL, &inv_expr_id);
4687
4688   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4689                    inv_expr_id);
4690
4691   return !infinite_cost_p (cost);
4692 }
4693
4694 /* Determines cost of basing replacement of USE on CAND in an address.  */
4695
4696 static bool
4697 determine_use_iv_cost_address (struct ivopts_data *data,
4698                                struct iv_use *use, struct iv_cand *cand)
4699 {
4700   bitmap depends_on;
4701   bool can_autoinc;
4702   int inv_expr_id = -1;
4703   struct iv_use *sub_use;
4704   comp_cost sub_cost;
4705   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4706                                          &can_autoinc, &inv_expr_id);
4707
4708   if (cand->ainc_use == use)
4709     {
4710       if (can_autoinc)
4711         cost.cost -= cand->cost_step;
4712       /* If we generated the candidate solely for exploiting autoincrement
4713          opportunities, and it turns out it can't be used, set the cost to
4714          infinity to make sure we ignore it.  */
4715       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4716         cost = infinite_cost;
4717     }
4718   for (sub_use = use->next;
4719        sub_use && !infinite_cost_p (cost);
4720        sub_use = sub_use->next)
4721     {
4722        sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4723                                         &can_autoinc, &inv_expr_id);
4724        cost = add_costs (cost, sub_cost);
4725     }
4726
4727   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4728                    inv_expr_id);
4729
4730   return !infinite_cost_p (cost);
4731 }
4732
4733 /* Computes value of candidate CAND at position AT in iteration NITER, and
4734    stores it to VAL.  */
4735
4736 static void
4737 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4738                aff_tree *val)
4739 {
4740   aff_tree step, delta, nit;
4741   struct iv *iv = cand->iv;
4742   tree type = TREE_TYPE (iv->base);
4743   tree steptype = type;
4744   if (POINTER_TYPE_P (type))
4745     steptype = sizetype;
4746   steptype = unsigned_type_for (type);
4747
4748   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4749   aff_combination_convert (&step, steptype);
4750   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4751   aff_combination_convert (&nit, steptype);
4752   aff_combination_mult (&nit, &step, &delta);
4753   if (stmt_after_increment (loop, cand, at))
4754     aff_combination_add (&delta, &step);
4755
4756   tree_to_aff_combination (iv->base, type, val);
4757   if (!POINTER_TYPE_P (type))
4758     aff_combination_convert (val, steptype);
4759   aff_combination_add (val, &delta);
4760 }
4761
4762 /* Returns period of induction variable iv.  */
4763
4764 static tree
4765 iv_period (struct iv *iv)
4766 {
4767   tree step = iv->step, period, type;
4768   tree pow2div;
4769
4770   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4771
4772   type = unsigned_type_for (TREE_TYPE (step));
4773   /* Period of the iv is lcm (step, type_range)/step -1,
4774      i.e., N*type_range/step - 1. Since type range is power
4775      of two, N == (step >> num_of_ending_zeros_binary (step),
4776      so the final result is
4777
4778        (type_range >> num_of_ending_zeros_binary (step)) - 1
4779
4780   */
4781   pow2div = num_ending_zeros (step);
4782
4783   period = build_low_bits_mask (type,
4784                                 (TYPE_PRECISION (type)
4785                                  - tree_to_uhwi (pow2div)));
4786
4787   return period;
4788 }
4789
4790 /* Returns the comparison operator used when eliminating the iv USE.  */
4791
4792 static enum tree_code
4793 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4794 {
4795   struct loop *loop = data->current_loop;
4796   basic_block ex_bb;
4797   edge exit;
4798
4799   ex_bb = gimple_bb (use->stmt);
4800   exit = EDGE_SUCC (ex_bb, 0);
4801   if (flow_bb_inside_loop_p (loop, exit->dest))
4802     exit = EDGE_SUCC (ex_bb, 1);
4803
4804   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4805 }
4806
4807 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4808    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4809    calculation is performed in non-wrapping type.
4810
4811    TODO: More generally, we could test for the situation that
4812          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4813          This would require knowing the sign of OFFSET.  */
4814
4815 static bool
4816 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4817 {
4818   enum tree_code code;
4819   tree e1, e2;
4820   aff_tree aff_e1, aff_e2, aff_offset;
4821
4822   if (!nowrap_type_p (TREE_TYPE (base)))
4823     return false;
4824
4825   base = expand_simple_operations (base);
4826
4827   if (TREE_CODE (base) == SSA_NAME)
4828     {
4829       gimple stmt = SSA_NAME_DEF_STMT (base);
4830
4831       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4832         return false;
4833
4834       code = gimple_assign_rhs_code (stmt);
4835       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4836         return false;
4837
4838       e1 = gimple_assign_rhs1 (stmt);
4839       e2 = gimple_assign_rhs2 (stmt);
4840     }
4841   else
4842     {
4843       code = TREE_CODE (base);
4844       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4845         return false;
4846       e1 = TREE_OPERAND (base, 0);
4847       e2 = TREE_OPERAND (base, 1);
4848     }
4849
4850   /* Use affine expansion as deeper inspection to prove the equality.  */
4851   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4852                                   &aff_e2, &data->name_expansion_cache);
4853   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4854                                   &aff_offset, &data->name_expansion_cache);
4855   aff_combination_scale (&aff_offset, -1);
4856   switch (code)
4857     {
4858     case PLUS_EXPR:
4859       aff_combination_add (&aff_e2, &aff_offset);
4860       if (aff_combination_zero_p (&aff_e2))
4861         return true;
4862
4863       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4864                                       &aff_e1, &data->name_expansion_cache);
4865       aff_combination_add (&aff_e1, &aff_offset);
4866       return aff_combination_zero_p (&aff_e1);
4867
4868     case POINTER_PLUS_EXPR:
4869       aff_combination_add (&aff_e2, &aff_offset);
4870       return aff_combination_zero_p (&aff_e2);
4871
4872     default:
4873       return false;
4874     }
4875 }
4876
4877 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4878    comparison with CAND.  NITER describes the number of iterations of
4879    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4880
4881    We aim to handle the following situation:
4882
4883    sometype *base, *p;
4884    int a, b, i;
4885
4886    i = a;
4887    p = p_0 = base + a;
4888
4889    do
4890      {
4891        bla (*p);
4892        p++;
4893        i++;
4894      }
4895    while (i < b);
4896
4897    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4898    We aim to optimize this to
4899
4900    p = p_0 = base + a;
4901    do
4902      {
4903        bla (*p);
4904        p++;
4905      }
4906    while (p < p_0 - a + b);
4907
4908    This preserves the correctness, since the pointer arithmetics does not
4909    overflow.  More precisely:
4910
4911    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4912       overflow in computing it or the values of p.
4913    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4914       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4915
4916 static bool
4917 iv_elimination_compare_lt (struct ivopts_data *data,
4918                            struct iv_cand *cand, enum tree_code *comp_p,
4919                            struct tree_niter_desc *niter)
4920 {
4921   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4922   struct aff_tree nit, tmpa, tmpb;
4923   enum tree_code comp;
4924   HOST_WIDE_INT step;
4925
4926   /* We need to know that the candidate induction variable does not overflow.
4927      While more complex analysis may be used to prove this, for now just
4928      check that the variable appears in the original program and that it
4929      is computed in a type that guarantees no overflows.  */
4930   cand_type = TREE_TYPE (cand->iv->base);
4931   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4932     return false;
4933
4934   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4935      the calculation of the BOUND could overflow, making the comparison
4936      invalid.  */
4937   if (!data->loop_single_exit_p)
4938     return false;
4939
4940   /* We need to be able to decide whether candidate is increasing or decreasing
4941      in order to choose the right comparison operator.  */
4942   if (!cst_and_fits_in_hwi (cand->iv->step))
4943     return false;
4944   step = int_cst_value (cand->iv->step);
4945
4946   /* Check that the number of iterations matches the expected pattern:
4947      a + 1 > b ? 0 : b - a - 1.  */
4948   mbz = niter->may_be_zero;
4949   if (TREE_CODE (mbz) == GT_EXPR)
4950     {
4951       /* Handle a + 1 > b.  */
4952       tree op0 = TREE_OPERAND (mbz, 0);
4953       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4954         {
4955           a = TREE_OPERAND (op0, 0);
4956           b = TREE_OPERAND (mbz, 1);
4957         }
4958       else
4959         return false;
4960     }
4961   else if (TREE_CODE (mbz) == LT_EXPR)
4962     {
4963       tree op1 = TREE_OPERAND (mbz, 1);
4964
4965       /* Handle b < a + 1.  */
4966       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4967         {
4968           a = TREE_OPERAND (op1, 0);
4969           b = TREE_OPERAND (mbz, 0);
4970         }
4971       else
4972         return false;
4973     }
4974   else
4975     return false;
4976
4977   /* Expected number of iterations is B - A - 1.  Check that it matches
4978      the actual number, i.e., that B - A - NITER = 1.  */
4979   tree_to_aff_combination (niter->niter, nit_type, &nit);
4980   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4981   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4982   aff_combination_scale (&nit, -1);
4983   aff_combination_scale (&tmpa, -1);
4984   aff_combination_add (&tmpb, &tmpa);
4985   aff_combination_add (&tmpb, &nit);
4986   if (tmpb.n != 0 || tmpb.offset != 1)
4987     return false;
4988
4989   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4990      overflow.  */
4991   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4992                         cand->iv->step,
4993                         fold_convert (TREE_TYPE (cand->iv->step), a));
4994   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4995     return false;
4996
4997   /* Determine the new comparison operator.  */
4998   comp = step < 0 ? GT_EXPR : LT_EXPR;
4999   if (*comp_p == NE_EXPR)
5000     *comp_p = comp;
5001   else if (*comp_p == EQ_EXPR)
5002     *comp_p = invert_tree_comparison (comp, false);
5003   else
5004     gcc_unreachable ();
5005
5006   return true;
5007 }
5008
5009 /* Check whether it is possible to express the condition in USE by comparison
5010    of candidate CAND.  If so, store the value compared with to BOUND, and the
5011    comparison operator to COMP.  */
5012
5013 static bool
5014 may_eliminate_iv (struct ivopts_data *data,
5015                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5016                   enum tree_code *comp)
5017 {
5018   basic_block ex_bb;
5019   edge exit;
5020   tree period;
5021   struct loop *loop = data->current_loop;
5022   aff_tree bnd;
5023   struct tree_niter_desc *desc = NULL;
5024
5025   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5026     return false;
5027
5028   /* For now works only for exits that dominate the loop latch.
5029      TODO: extend to other conditions inside loop body.  */
5030   ex_bb = gimple_bb (use->stmt);
5031   if (use->stmt != last_stmt (ex_bb)
5032       || gimple_code (use->stmt) != GIMPLE_COND
5033       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5034     return false;
5035
5036   exit = EDGE_SUCC (ex_bb, 0);
5037   if (flow_bb_inside_loop_p (loop, exit->dest))
5038     exit = EDGE_SUCC (ex_bb, 1);
5039   if (flow_bb_inside_loop_p (loop, exit->dest))
5040     return false;
5041
5042   desc = niter_for_exit (data, exit);
5043   if (!desc)
5044     return false;
5045
5046   /* Determine whether we can use the variable to test the exit condition.
5047      This is the case iff the period of the induction variable is greater
5048      than the number of iterations for which the exit condition is true.  */
5049   period = iv_period (cand->iv);
5050
5051   /* If the number of iterations is constant, compare against it directly.  */
5052   if (TREE_CODE (desc->niter) == INTEGER_CST)
5053     {
5054       /* See cand_value_at.  */
5055       if (stmt_after_increment (loop, cand, use->stmt))
5056         {
5057           if (!tree_int_cst_lt (desc->niter, period))
5058             return false;
5059         }
5060       else
5061         {
5062           if (tree_int_cst_lt (period, desc->niter))
5063             return false;
5064         }
5065     }
5066
5067   /* If not, and if this is the only possible exit of the loop, see whether
5068      we can get a conservative estimate on the number of iterations of the
5069      entire loop and compare against that instead.  */
5070   else
5071     {
5072       widest_int period_value, max_niter;
5073
5074       max_niter = desc->max;
5075       if (stmt_after_increment (loop, cand, use->stmt))
5076         max_niter += 1;
5077       period_value = wi::to_widest (period);
5078       if (wi::gtu_p (max_niter, period_value))
5079         {
5080           /* See if we can take advantage of inferred loop bound information.  */
5081           if (data->loop_single_exit_p)
5082             {
5083               if (!max_loop_iterations (loop, &max_niter))
5084                 return false;
5085               /* The loop bound is already adjusted by adding 1.  */
5086               if (wi::gtu_p (max_niter, period_value))
5087                 return false;
5088             }
5089           else
5090             return false;
5091         }
5092     }
5093
5094   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5095
5096   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5097                          aff_combination_to_tree (&bnd));
5098   *comp = iv_elimination_compare (data, use);
5099
5100   /* It is unlikely that computing the number of iterations using division
5101      would be more profitable than keeping the original induction variable.  */
5102   if (expression_expensive_p (*bound))
5103     return false;
5104
5105   /* Sometimes, it is possible to handle the situation that the number of
5106      iterations may be zero unless additional assumtions by using <
5107      instead of != in the exit condition.
5108
5109      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5110            base the exit condition on it.  However, that is often too
5111            expensive.  */
5112   if (!integer_zerop (desc->may_be_zero))
5113     return iv_elimination_compare_lt (data, cand, comp, desc);
5114
5115   return true;
5116 }
5117
5118  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5119     be copied, if is is used in the loop body and DATA->body_includes_call.  */
5120
5121 static int
5122 parm_decl_cost (struct ivopts_data *data, tree bound)
5123 {
5124   tree sbound = bound;
5125   STRIP_NOPS (sbound);
5126
5127   if (TREE_CODE (sbound) == SSA_NAME
5128       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5129       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5130       && data->body_includes_call)
5131     return COSTS_N_INSNS (1);
5132
5133   return 0;
5134 }
5135
5136 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5137
5138 static bool
5139 determine_use_iv_cost_condition (struct ivopts_data *data,
5140                                  struct iv_use *use, struct iv_cand *cand)
5141 {
5142   tree bound = NULL_TREE;
5143   struct iv *cmp_iv;
5144   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5145   comp_cost elim_cost, express_cost, cost, bound_cost;
5146   bool ok;
5147   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5148   tree *control_var, *bound_cst;
5149   enum tree_code comp = ERROR_MARK;
5150
5151   /* Only consider real candidates.  */
5152   if (!cand->iv)
5153     {
5154       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5155                        ERROR_MARK, -1);
5156       return false;
5157     }
5158
5159   /* Try iv elimination.  */
5160   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5161     {
5162       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5163       if (elim_cost.cost == 0)
5164         elim_cost.cost = parm_decl_cost (data, bound);
5165       else if (TREE_CODE (bound) == INTEGER_CST)
5166         elim_cost.cost = 0;
5167       /* If we replace a loop condition 'i < n' with 'p < base + n',
5168          depends_on_elim will have 'base' and 'n' set, which implies
5169          that both 'base' and 'n' will be live during the loop.  More likely,
5170          'base + n' will be loop invariant, resulting in only one live value
5171          during the loop.  So in that case we clear depends_on_elim and set
5172         elim_inv_expr_id instead.  */
5173       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5174         {
5175           elim_inv_expr_id = get_expr_id (data, bound);
5176           bitmap_clear (depends_on_elim);
5177         }
5178       /* The bound is a loop invariant, so it will be only computed
5179          once.  */
5180       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5181     }
5182   else
5183     elim_cost = infinite_cost;
5184
5185   /* Try expressing the original giv.  If it is compared with an invariant,
5186      note that we cannot get rid of it.  */
5187   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5188                               NULL, &cmp_iv);
5189   gcc_assert (ok);
5190
5191   /* When the condition is a comparison of the candidate IV against
5192      zero, prefer this IV.
5193
5194      TODO: The constant that we're subtracting from the cost should
5195      be target-dependent.  This information should be added to the
5196      target costs for each backend.  */
5197   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5198       && integer_zerop (*bound_cst)
5199       && (operand_equal_p (*control_var, cand->var_after, 0)
5200           || operand_equal_p (*control_var, cand->var_before, 0)))
5201     elim_cost.cost -= 1;
5202
5203   express_cost = get_computation_cost (data, use, cand, false,
5204                                        &depends_on_express, NULL,
5205                                        &express_inv_expr_id);
5206   fd_ivopts_data = data;
5207   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5208
5209   /* Count the cost of the original bound as well.  */
5210   bound_cost = force_var_cost (data, *bound_cst, NULL);
5211   if (bound_cost.cost == 0)
5212     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5213   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5214     bound_cost.cost = 0;
5215   express_cost.cost += bound_cost.cost;
5216
5217   /* Choose the better approach, preferring the eliminated IV. */
5218   if (compare_costs (elim_cost, express_cost) <= 0)
5219     {
5220       cost = elim_cost;
5221       depends_on = depends_on_elim;
5222       depends_on_elim = NULL;
5223       inv_expr_id = elim_inv_expr_id;
5224     }
5225   else
5226     {
5227       cost = express_cost;
5228       depends_on = depends_on_express;
5229       depends_on_express = NULL;
5230       bound = NULL_TREE;
5231       comp = ERROR_MARK;
5232       inv_expr_id = express_inv_expr_id;
5233     }
5234
5235   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5236
5237   if (depends_on_elim)
5238     BITMAP_FREE (depends_on_elim);
5239   if (depends_on_express)
5240     BITMAP_FREE (depends_on_express);
5241
5242   return !infinite_cost_p (cost);
5243 }
5244
5245 /* Determines cost of basing replacement of USE on CAND.  Returns false
5246    if USE cannot be based on CAND.  */
5247
5248 static bool
5249 determine_use_iv_cost (struct ivopts_data *data,
5250                        struct iv_use *use, struct iv_cand *cand)
5251 {
5252   switch (use->type)
5253     {
5254     case USE_NONLINEAR_EXPR:
5255       return determine_use_iv_cost_generic (data, use, cand);
5256
5257     case USE_ADDRESS:
5258       return determine_use_iv_cost_address (data, use, cand);
5259
5260     case USE_COMPARE:
5261       return determine_use_iv_cost_condition (data, use, cand);
5262
5263     default:
5264       gcc_unreachable ();
5265     }
5266 }
5267
5268 /* Return true if get_computation_cost indicates that autoincrement is
5269    a possibility for the pair of USE and CAND, false otherwise.  */
5270
5271 static bool
5272 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5273                            struct iv_cand *cand)
5274 {
5275   bitmap depends_on;
5276   bool can_autoinc;
5277   comp_cost cost;
5278
5279   if (use->type != USE_ADDRESS)
5280     return false;
5281
5282   cost = get_computation_cost (data, use, cand, true, &depends_on,
5283                                &can_autoinc, NULL);
5284
5285   BITMAP_FREE (depends_on);
5286
5287   return !infinite_cost_p (cost) && can_autoinc;
5288 }
5289
5290 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5291    use that allows autoincrement, and set their AINC_USE if possible.  */
5292
5293 static void
5294 set_autoinc_for_original_candidates (struct ivopts_data *data)
5295 {
5296   unsigned i, j;
5297
5298   for (i = 0; i < n_iv_cands (data); i++)
5299     {
5300       struct iv_cand *cand = iv_cand (data, i);
5301       struct iv_use *closest_before = NULL;
5302       struct iv_use *closest_after = NULL;
5303       if (cand->pos != IP_ORIGINAL)
5304         continue;
5305
5306       for (j = 0; j < n_iv_uses (data); j++)
5307         {
5308           struct iv_use *use = iv_use (data, j);
5309           unsigned uid = gimple_uid (use->stmt);
5310
5311           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5312             continue;
5313
5314           if (uid < gimple_uid (cand->incremented_at)
5315               && (closest_before == NULL
5316                   || uid > gimple_uid (closest_before->stmt)))
5317             closest_before = use;
5318
5319           if (uid > gimple_uid (cand->incremented_at)
5320               && (closest_after == NULL
5321                   || uid < gimple_uid (closest_after->stmt)))
5322             closest_after = use;
5323         }
5324
5325       if (closest_before != NULL
5326           && autoinc_possible_for_pair (data, closest_before, cand))
5327         cand->ainc_use = closest_before;
5328       else if (closest_after != NULL
5329                && autoinc_possible_for_pair (data, closest_after, cand))
5330         cand->ainc_use = closest_after;
5331     }
5332 }
5333
5334 /* Finds the candidates for the induction variables.  */
5335
5336 static void
5337 find_iv_candidates (struct ivopts_data *data)
5338 {
5339   /* Add commonly used ivs.  */
5340   add_standard_iv_candidates (data);
5341
5342   /* Add old induction variables.  */
5343   add_old_ivs_candidates (data);
5344
5345   /* Add induction variables derived from uses.  */
5346   add_derived_ivs_candidates (data);
5347
5348   set_autoinc_for_original_candidates (data);
5349
5350   /* Record the important candidates.  */
5351   record_important_candidates (data);
5352 }
5353
5354 /* Determines costs of basing the use of the iv on an iv candidate.  */
5355
5356 static void
5357 determine_use_iv_costs (struct ivopts_data *data)
5358 {
5359   unsigned i, j;
5360   struct iv_use *use;
5361   struct iv_cand *cand;
5362   bitmap to_clear = BITMAP_ALLOC (NULL);
5363
5364   alloc_use_cost_map (data);
5365
5366   for (i = 0; i < n_iv_uses (data); i++)
5367     {
5368       use = iv_use (data, i);
5369
5370       if (data->consider_all_candidates)
5371         {
5372           for (j = 0; j < n_iv_cands (data); j++)
5373             {
5374               cand = iv_cand (data, j);
5375               determine_use_iv_cost (data, use, cand);
5376             }
5377         }
5378       else
5379         {
5380           bitmap_iterator bi;
5381
5382           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5383             {
5384               cand = iv_cand (data, j);
5385               if (!determine_use_iv_cost (data, use, cand))
5386                 bitmap_set_bit (to_clear, j);
5387             }
5388
5389           /* Remove the candidates for that the cost is infinite from
5390              the list of related candidates.  */
5391           bitmap_and_compl_into (use->related_cands, to_clear);
5392           bitmap_clear (to_clear);
5393         }
5394     }
5395
5396   BITMAP_FREE (to_clear);
5397
5398   if (dump_file && (dump_flags & TDF_DETAILS))
5399     {
5400       fprintf (dump_file, "Use-candidate costs:\n");
5401
5402       for (i = 0; i < n_iv_uses (data); i++)
5403         {
5404           use = iv_use (data, i);
5405
5406           fprintf (dump_file, "Use %d:\n", i);
5407           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5408           for (j = 0; j < use->n_map_members; j++)
5409             {
5410               if (!use->cost_map[j].cand
5411                   || infinite_cost_p (use->cost_map[j].cost))
5412                 continue;
5413
5414               fprintf (dump_file, "  %d\t%d\t%d\t",
5415                        use->cost_map[j].cand->id,
5416                        use->cost_map[j].cost.cost,
5417                        use->cost_map[j].cost.complexity);
5418               if (use->cost_map[j].depends_on)
5419                 bitmap_print (dump_file,
5420                               use->cost_map[j].depends_on, "","");
5421               if (use->cost_map[j].inv_expr_id != -1)
5422                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5423               fprintf (dump_file, "\n");
5424             }
5425
5426           fprintf (dump_file, "\n");
5427         }
5428       fprintf (dump_file, "\n");
5429     }
5430 }
5431
5432 /* Determines cost of the candidate CAND.  */
5433
5434 static void
5435 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5436 {
5437   comp_cost cost_base;
5438   unsigned cost, cost_step;
5439   tree base;
5440
5441   if (!cand->iv)
5442     {
5443       cand->cost = 0;
5444       return;
5445     }
5446
5447   /* There are two costs associated with the candidate -- its increment
5448      and its initialization.  The second is almost negligible for any loop
5449      that rolls enough, so we take it just very little into account.  */
5450
5451   base = cand->iv->base;
5452   cost_base = force_var_cost (data, base, NULL);
5453   /* It will be exceptional that the iv register happens to be initialized with
5454      the proper value at no cost.  In general, there will at least be a regcopy
5455      or a const set.  */
5456   if (cost_base.cost == 0)
5457     cost_base.cost = COSTS_N_INSNS (1);
5458   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5459
5460   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5461
5462   /* Prefer the original ivs unless we may gain something by replacing it.
5463      The reason is to make debugging simpler; so this is not relevant for
5464      artificial ivs created by other optimization passes.  */
5465   if (cand->pos != IP_ORIGINAL
5466       || !SSA_NAME_VAR (cand->var_before)
5467       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5468     cost++;
5469
5470   /* Prefer not to insert statements into latch unless there are some
5471      already (so that we do not create unnecessary jumps).  */
5472   if (cand->pos == IP_END
5473       && empty_block_p (ip_end_pos (data->current_loop)))
5474     cost++;
5475
5476   cand->cost = cost;
5477   cand->cost_step = cost_step;
5478 }
5479
5480 /* Determines costs of computation of the candidates.  */
5481
5482 static void
5483 determine_iv_costs (struct ivopts_data *data)
5484 {
5485   unsigned i;
5486
5487   if (dump_file && (dump_flags & TDF_DETAILS))
5488     {
5489       fprintf (dump_file, "Candidate costs:\n");
5490       fprintf (dump_file, "  cand\tcost\n");
5491     }
5492
5493   for (i = 0; i < n_iv_cands (data); i++)
5494     {
5495       struct iv_cand *cand = iv_cand (data, i);
5496
5497       determine_iv_cost (data, cand);
5498
5499       if (dump_file && (dump_flags & TDF_DETAILS))
5500         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5501     }
5502
5503   if (dump_file && (dump_flags & TDF_DETAILS))
5504     fprintf (dump_file, "\n");
5505 }
5506
5507 /* Calculates cost for having SIZE induction variables.  */
5508
5509 static unsigned
5510 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5511 {
5512   /* We add size to the cost, so that we prefer eliminating ivs
5513      if possible.  */
5514   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5515                                             data->body_includes_call);
5516 }
5517
5518 /* For each size of the induction variable set determine the penalty.  */
5519
5520 static void
5521 determine_set_costs (struct ivopts_data *data)
5522 {
5523   unsigned j, n;
5524   gphi *phi;
5525   gphi_iterator psi;
5526   tree op;
5527   struct loop *loop = data->current_loop;
5528   bitmap_iterator bi;
5529
5530   if (dump_file && (dump_flags & TDF_DETAILS))
5531     {
5532       fprintf (dump_file, "Global costs:\n");
5533       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5534       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5535       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5536       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5537     }
5538
5539   n = 0;
5540   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5541     {
5542       phi = psi.phi ();
5543       op = PHI_RESULT (phi);
5544
5545       if (virtual_operand_p (op))
5546         continue;
5547
5548       if (get_iv (data, op))
5549         continue;
5550
5551       n++;
5552     }
5553
5554   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5555     {
5556       struct version_info *info = ver_info (data, j);
5557
5558       if (info->inv_id && info->has_nonlin_use)
5559         n++;
5560     }
5561
5562   data->regs_used = n;
5563   if (dump_file && (dump_flags & TDF_DETAILS))
5564     fprintf (dump_file, "  regs_used %d\n", n);
5565
5566   if (dump_file && (dump_flags & TDF_DETAILS))
5567     {
5568       fprintf (dump_file, "  cost for size:\n");
5569       fprintf (dump_file, "  ivs\tcost\n");
5570       for (j = 0; j <= 2 * target_avail_regs; j++)
5571         fprintf (dump_file, "  %d\t%d\n", j,
5572                  ivopts_global_cost_for_size (data, j));
5573       fprintf (dump_file, "\n");
5574     }
5575 }
5576
5577 /* Returns true if A is a cheaper cost pair than B.  */
5578
5579 static bool
5580 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5581 {
5582   int cmp;
5583
5584   if (!a)
5585     return false;
5586
5587   if (!b)
5588     return true;
5589
5590   cmp = compare_costs (a->cost, b->cost);
5591   if (cmp < 0)
5592     return true;
5593
5594   if (cmp > 0)
5595     return false;
5596
5597   /* In case the costs are the same, prefer the cheaper candidate.  */
5598   if (a->cand->cost < b->cand->cost)
5599     return true;
5600
5601   return false;
5602 }
5603
5604
5605 /* Returns candidate by that USE is expressed in IVS.  */
5606
5607 static struct cost_pair *
5608 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5609 {
5610   return ivs->cand_for_use[use->id];
5611 }
5612
5613 /* Computes the cost field of IVS structure.  */
5614
5615 static void
5616 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5617 {
5618   comp_cost cost = ivs->cand_use_cost;
5619
5620   cost.cost += ivs->cand_cost;
5621
5622   cost.cost += ivopts_global_cost_for_size (data,
5623                                             ivs->n_regs + ivs->num_used_inv_expr);
5624
5625   ivs->cost = cost;
5626 }
5627
5628 /* Remove invariants in set INVS to set IVS.  */
5629
5630 static void
5631 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5632 {
5633   bitmap_iterator bi;
5634   unsigned iid;
5635
5636   if (!invs)
5637     return;
5638
5639   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5640     {
5641       ivs->n_invariant_uses[iid]--;
5642       if (ivs->n_invariant_uses[iid] == 0)
5643         ivs->n_regs--;
5644     }
5645 }
5646
5647 /* Set USE not to be expressed by any candidate in IVS.  */
5648
5649 static void
5650 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5651                  struct iv_use *use)
5652 {
5653   unsigned uid = use->id, cid;
5654   struct cost_pair *cp;
5655
5656   cp = ivs->cand_for_use[uid];
5657   if (!cp)
5658     return;
5659   cid = cp->cand->id;
5660
5661   ivs->bad_uses++;
5662   ivs->cand_for_use[uid] = NULL;
5663   ivs->n_cand_uses[cid]--;
5664
5665   if (ivs->n_cand_uses[cid] == 0)
5666     {
5667       bitmap_clear_bit (ivs->cands, cid);
5668       /* Do not count the pseudocandidates.  */
5669       if (cp->cand->iv)
5670         ivs->n_regs--;
5671       ivs->n_cands--;
5672       ivs->cand_cost -= cp->cand->cost;
5673
5674       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5675     }
5676
5677   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5678
5679   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5680
5681   if (cp->inv_expr_id != -1)
5682     {
5683       ivs->used_inv_expr[cp->inv_expr_id]--;
5684       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5685         ivs->num_used_inv_expr--;
5686     }
5687   iv_ca_recount_cost (data, ivs);
5688 }
5689
5690 /* Add invariants in set INVS to set IVS.  */
5691
5692 static void
5693 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5694 {
5695   bitmap_iterator bi;
5696   unsigned iid;
5697
5698   if (!invs)
5699     return;
5700
5701   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5702     {
5703       ivs->n_invariant_uses[iid]++;
5704       if (ivs->n_invariant_uses[iid] == 1)
5705         ivs->n_regs++;
5706     }
5707 }
5708
5709 /* Set cost pair for USE in set IVS to CP.  */
5710
5711 static void
5712 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5713               struct iv_use *use, struct cost_pair *cp)
5714 {
5715   unsigned uid = use->id, cid;
5716
5717   if (ivs->cand_for_use[uid] == cp)
5718     return;
5719
5720   if (ivs->cand_for_use[uid])
5721     iv_ca_set_no_cp (data, ivs, use);
5722
5723   if (cp)
5724     {
5725       cid = cp->cand->id;
5726
5727       ivs->bad_uses--;
5728       ivs->cand_for_use[uid] = cp;
5729       ivs->n_cand_uses[cid]++;
5730       if (ivs->n_cand_uses[cid] == 1)
5731         {
5732           bitmap_set_bit (ivs->cands, cid);
5733           /* Do not count the pseudocandidates.  */
5734           if (cp->cand->iv)
5735             ivs->n_regs++;
5736           ivs->n_cands++;
5737           ivs->cand_cost += cp->cand->cost;
5738
5739           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5740         }
5741
5742       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5743       iv_ca_set_add_invariants (ivs, cp->depends_on);
5744
5745       if (cp->inv_expr_id != -1)
5746         {
5747           ivs->used_inv_expr[cp->inv_expr_id]++;
5748           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5749             ivs->num_used_inv_expr++;
5750         }
5751       iv_ca_recount_cost (data, ivs);
5752     }
5753 }
5754
5755 /* Extend set IVS by expressing USE by some of the candidates in it
5756    if possible.  Consider all important candidates if candidates in
5757    set IVS don't give any result.  */
5758
5759 static void
5760 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5761                struct iv_use *use)
5762 {
5763   struct cost_pair *best_cp = NULL, *cp;
5764   bitmap_iterator bi;
5765   unsigned i;
5766   struct iv_cand *cand;
5767
5768   gcc_assert (ivs->upto >= use->id);
5769   ivs->upto++;
5770   ivs->bad_uses++;
5771
5772   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5773     {
5774       cand = iv_cand (data, i);
5775       cp = get_use_iv_cost (data, use, cand);
5776       if (cheaper_cost_pair (cp, best_cp))
5777         best_cp = cp;
5778     }
5779
5780   if (best_cp == NULL)
5781     {
5782       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5783         {
5784           cand = iv_cand (data, i);
5785           cp = get_use_iv_cost (data, use, cand);
5786           if (cheaper_cost_pair (cp, best_cp))
5787             best_cp = cp;
5788         }
5789     }
5790
5791   iv_ca_set_cp (data, ivs, use, best_cp);
5792 }
5793
5794 /* Get cost for assignment IVS.  */
5795
5796 static comp_cost
5797 iv_ca_cost (struct iv_ca *ivs)
5798 {
5799   /* This was a conditional expression but it triggered a bug in
5800      Sun C 5.5.  */
5801   if (ivs->bad_uses)
5802     return infinite_cost;
5803   else
5804     return ivs->cost;
5805 }
5806
5807 /* Returns true if all dependences of CP are among invariants in IVS.  */
5808
5809 static bool
5810 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5811 {
5812   unsigned i;
5813   bitmap_iterator bi;
5814
5815   if (!cp->depends_on)
5816     return true;
5817
5818   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5819     {
5820       if (ivs->n_invariant_uses[i] == 0)
5821         return false;
5822     }
5823
5824   return true;
5825 }
5826
5827 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5828    it before NEXT_CHANGE.  */
5829
5830 static struct iv_ca_delta *
5831 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5832                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5833 {
5834   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5835
5836   change->use = use;
5837   change->old_cp = old_cp;
5838   change->new_cp = new_cp;
5839   change->next_change = next_change;
5840
5841   return change;
5842 }
5843
5844 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5845    are rewritten.  */
5846
5847 static struct iv_ca_delta *
5848 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5849 {
5850   struct iv_ca_delta *last;
5851
5852   if (!l2)
5853     return l1;
5854
5855   if (!l1)
5856     return l2;
5857
5858   for (last = l1; last->next_change; last = last->next_change)
5859     continue;
5860   last->next_change = l2;
5861
5862   return l1;
5863 }
5864
5865 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5866
5867 static struct iv_ca_delta *
5868 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5869 {
5870   struct iv_ca_delta *act, *next, *prev = NULL;
5871
5872   for (act = delta; act; act = next)
5873     {
5874       next = act->next_change;
5875       act->next_change = prev;
5876       prev = act;
5877
5878       std::swap (act->old_cp, act->new_cp);
5879     }
5880
5881   return prev;
5882 }
5883
5884 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5885    reverted instead.  */
5886
5887 static void
5888 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5889                     struct iv_ca_delta *delta, bool forward)
5890 {
5891   struct cost_pair *from, *to;
5892   struct iv_ca_delta *act;
5893
5894   if (!forward)
5895     delta = iv_ca_delta_reverse (delta);
5896
5897   for (act = delta; act; act = act->next_change)
5898     {
5899       from = act->old_cp;
5900       to = act->new_cp;
5901       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5902       iv_ca_set_cp (data, ivs, act->use, to);
5903     }
5904
5905   if (!forward)
5906     iv_ca_delta_reverse (delta);
5907 }
5908
5909 /* Returns true if CAND is used in IVS.  */
5910
5911 static bool
5912 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5913 {
5914   return ivs->n_cand_uses[cand->id] > 0;
5915 }
5916
5917 /* Returns number of induction variable candidates in the set IVS.  */
5918
5919 static unsigned
5920 iv_ca_n_cands (struct iv_ca *ivs)
5921 {
5922   return ivs->n_cands;
5923 }
5924
5925 /* Free the list of changes DELTA.  */
5926
5927 static void
5928 iv_ca_delta_free (struct iv_ca_delta **delta)
5929 {
5930   struct iv_ca_delta *act, *next;
5931
5932   for (act = *delta; act; act = next)
5933     {
5934       next = act->next_change;
5935       free (act);
5936     }
5937
5938   *delta = NULL;
5939 }
5940
5941 /* Allocates new iv candidates assignment.  */
5942
5943 static struct iv_ca *
5944 iv_ca_new (struct ivopts_data *data)
5945 {
5946   struct iv_ca *nw = XNEW (struct iv_ca);
5947
5948   nw->upto = 0;
5949   nw->bad_uses = 0;
5950   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5951   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5952   nw->cands = BITMAP_ALLOC (NULL);
5953   nw->n_cands = 0;
5954   nw->n_regs = 0;
5955   nw->cand_use_cost = no_cost;
5956   nw->cand_cost = 0;
5957   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5958   nw->cost = no_cost;
5959   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5960   nw->num_used_inv_expr = 0;
5961
5962   return nw;
5963 }
5964
5965 /* Free memory occupied by the set IVS.  */
5966
5967 static void
5968 iv_ca_free (struct iv_ca **ivs)
5969 {
5970   free ((*ivs)->cand_for_use);
5971   free ((*ivs)->n_cand_uses);
5972   BITMAP_FREE ((*ivs)->cands);
5973   free ((*ivs)->n_invariant_uses);
5974   free ((*ivs)->used_inv_expr);
5975   free (*ivs);
5976   *ivs = NULL;
5977 }
5978
5979 /* Dumps IVS to FILE.  */
5980
5981 static void
5982 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5983 {
5984   const char *pref = "  invariants ";
5985   unsigned i;
5986   comp_cost cost = iv_ca_cost (ivs);
5987
5988   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5989   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5990            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5991   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5992
5993    for (i = 0; i < ivs->upto; i++)
5994     {
5995       struct iv_use *use = iv_use (data, i);
5996       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5997       if (cp)
5998         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5999                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6000       else
6001         fprintf (file, "   use:%d --> ??\n", use->id);
6002     }
6003
6004   for (i = 1; i <= data->max_inv_id; i++)
6005     if (ivs->n_invariant_uses[i])
6006       {
6007         fprintf (file, "%s%d", pref, i);
6008         pref = ", ";
6009       }
6010   fprintf (file, "\n\n");
6011 }
6012
6013 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6014    new set, and store differences in DELTA.  Number of induction variables
6015    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6016    the function will try to find a solution with mimimal iv candidates.  */
6017
6018 static comp_cost
6019 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6020               struct iv_cand *cand, struct iv_ca_delta **delta,
6021               unsigned *n_ivs, bool min_ncand)
6022 {
6023   unsigned i;
6024   comp_cost cost;
6025   struct iv_use *use;
6026   struct cost_pair *old_cp, *new_cp;
6027
6028   *delta = NULL;
6029   for (i = 0; i < ivs->upto; i++)
6030     {
6031       use = iv_use (data, i);
6032       old_cp = iv_ca_cand_for_use (ivs, use);
6033
6034       if (old_cp
6035           && old_cp->cand == cand)
6036         continue;
6037
6038       new_cp = get_use_iv_cost (data, use, cand);
6039       if (!new_cp)
6040         continue;
6041
6042       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6043         continue;
6044
6045       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6046         continue;
6047
6048       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6049     }
6050
6051   iv_ca_delta_commit (data, ivs, *delta, true);
6052   cost = iv_ca_cost (ivs);
6053   if (n_ivs)
6054     *n_ivs = iv_ca_n_cands (ivs);
6055   iv_ca_delta_commit (data, ivs, *delta, false);
6056
6057   return cost;
6058 }
6059
6060 /* Try narrowing set IVS by removing CAND.  Return the cost of
6061    the new set and store the differences in DELTA.  START is
6062    the candidate with which we start narrowing.  */
6063
6064 static comp_cost
6065 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6066               struct iv_cand *cand, struct iv_cand *start,
6067               struct iv_ca_delta **delta)
6068 {
6069   unsigned i, ci;
6070   struct iv_use *use;
6071   struct cost_pair *old_cp, *new_cp, *cp;
6072   bitmap_iterator bi;
6073   struct iv_cand *cnd;
6074   comp_cost cost, best_cost, acost;
6075
6076   *delta = NULL;
6077   for (i = 0; i < n_iv_uses (data); i++)
6078     {
6079       use = iv_use (data, i);
6080
6081       old_cp = iv_ca_cand_for_use (ivs, use);
6082       if (old_cp->cand != cand)
6083         continue;
6084
6085       best_cost = iv_ca_cost (ivs);
6086       /* Start narrowing with START.  */
6087       new_cp = get_use_iv_cost (data, use, start);
6088
6089       if (data->consider_all_candidates)
6090         {
6091           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6092             {
6093               if (ci == cand->id || (start && ci == start->id))
6094                 continue;
6095
6096               cnd = iv_cand (data, ci);
6097
6098               cp = get_use_iv_cost (data, use, cnd);
6099               if (!cp)
6100                 continue;
6101
6102               iv_ca_set_cp (data, ivs, use, cp);
6103               acost = iv_ca_cost (ivs);
6104
6105               if (compare_costs (acost, best_cost) < 0)
6106                 {
6107                   best_cost = acost;
6108                   new_cp = cp;
6109                 }
6110             }
6111         }
6112       else
6113         {
6114           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6115             {
6116               if (ci == cand->id || (start && ci == start->id))
6117                 continue;
6118
6119               cnd = iv_cand (data, ci);
6120
6121               cp = get_use_iv_cost (data, use, cnd);
6122               if (!cp)
6123                 continue;
6124
6125               iv_ca_set_cp (data, ivs, use, cp);
6126               acost = iv_ca_cost (ivs);
6127
6128               if (compare_costs (acost, best_cost) < 0)
6129                 {
6130                   best_cost = acost;
6131                   new_cp = cp;
6132                 }
6133             }
6134         }
6135       /* Restore to old cp for use.  */
6136       iv_ca_set_cp (data, ivs, use, old_cp);
6137
6138       if (!new_cp)
6139         {
6140           iv_ca_delta_free (delta);
6141           return infinite_cost;
6142         }
6143
6144       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6145     }
6146
6147   iv_ca_delta_commit (data, ivs, *delta, true);
6148   cost = iv_ca_cost (ivs);
6149   iv_ca_delta_commit (data, ivs, *delta, false);
6150
6151   return cost;
6152 }
6153
6154 /* Try optimizing the set of candidates IVS by removing candidates different
6155    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6156    differences in DELTA.  */
6157
6158 static comp_cost
6159 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6160              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6161 {
6162   bitmap_iterator bi;
6163   struct iv_ca_delta *act_delta, *best_delta;
6164   unsigned i;
6165   comp_cost best_cost, acost;
6166   struct iv_cand *cand;
6167
6168   best_delta = NULL;
6169   best_cost = iv_ca_cost (ivs);
6170
6171   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6172     {
6173       cand = iv_cand (data, i);
6174
6175       if (cand == except_cand)
6176         continue;
6177
6178       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6179
6180       if (compare_costs (acost, best_cost) < 0)
6181         {
6182           best_cost = acost;
6183           iv_ca_delta_free (&best_delta);
6184           best_delta = act_delta;
6185         }
6186       else
6187         iv_ca_delta_free (&act_delta);
6188     }
6189
6190   if (!best_delta)
6191     {
6192       *delta = NULL;
6193       return best_cost;
6194     }
6195
6196   /* Recurse to possibly remove other unnecessary ivs.  */
6197   iv_ca_delta_commit (data, ivs, best_delta, true);
6198   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6199   iv_ca_delta_commit (data, ivs, best_delta, false);
6200   *delta = iv_ca_delta_join (best_delta, *delta);
6201   return best_cost;
6202 }
6203
6204 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6205    cheaper local cost for USE than BEST_CP.  Return pointer to
6206    the corresponding cost_pair, otherwise just return BEST_CP.  */
6207
6208 static struct cost_pair*
6209 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6210                         unsigned int cand_idx, struct iv_cand *old_cand,
6211                         struct cost_pair *best_cp)
6212 {
6213   struct iv_cand *cand;
6214   struct cost_pair *cp;
6215
6216   gcc_assert (old_cand != NULL && best_cp != NULL);
6217   if (cand_idx == old_cand->id)
6218     return best_cp;
6219
6220   cand = iv_cand (data, cand_idx);
6221   cp = get_use_iv_cost (data, use, cand);
6222   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6223     return cp;
6224
6225   return best_cp;
6226 }
6227
6228 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6229    which are used by more than one iv uses.  For each of those candidates,
6230    this function tries to represent iv uses under that candidate using
6231    other ones with lower local cost, then tries to prune the new set.
6232    If the new set has lower cost, It returns the new cost after recording
6233    candidate replacement in list DELTA.  */
6234
6235 static comp_cost
6236 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6237                struct iv_ca_delta **delta)
6238 {
6239   bitmap_iterator bi, bj;
6240   unsigned int i, j, k;
6241   struct iv_use *use;
6242   struct iv_cand *cand;
6243   comp_cost orig_cost, acost;
6244   struct iv_ca_delta *act_delta, *tmp_delta;
6245   struct cost_pair *old_cp, *best_cp = NULL;
6246
6247   *delta = NULL;
6248   orig_cost = iv_ca_cost (ivs);
6249
6250   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6251     {
6252       if (ivs->n_cand_uses[i] == 1
6253           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6254         continue;
6255
6256       cand = iv_cand (data, i);
6257
6258       act_delta = NULL;
6259       /*  Represent uses under current candidate using other ones with
6260           lower local cost.  */
6261       for (j = 0; j < ivs->upto; j++)
6262         {
6263           use = iv_use (data, j);
6264           old_cp = iv_ca_cand_for_use (ivs, use);
6265
6266           if (old_cp->cand != cand)
6267             continue;
6268
6269           best_cp = old_cp;
6270           if (data->consider_all_candidates)
6271             for (k = 0; k < n_iv_cands (data); k++)
6272               best_cp = cheaper_cost_with_cand (data, use, k,
6273                                                 old_cp->cand, best_cp);
6274           else
6275             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6276               best_cp = cheaper_cost_with_cand (data, use, k,
6277                                                 old_cp->cand, best_cp);
6278
6279           if (best_cp == old_cp)
6280             continue;
6281
6282           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6283         }
6284       /* No need for further prune.  */
6285       if (!act_delta)
6286         continue;
6287
6288       /* Prune the new candidate set.  */
6289       iv_ca_delta_commit (data, ivs, act_delta, true);
6290       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6291       iv_ca_delta_commit (data, ivs, act_delta, false);
6292       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6293
6294       if (compare_costs (acost, orig_cost) < 0)
6295         {
6296           *delta = act_delta;
6297           return acost;
6298         }
6299       else
6300         iv_ca_delta_free (&act_delta);
6301     }
6302
6303   return orig_cost;
6304 }
6305
6306 /* Tries to extend the sets IVS in the best possible way in order
6307    to express the USE.  If ORIGINALP is true, prefer candidates from
6308    the original set of IVs, otherwise favor important candidates not
6309    based on any memory object.  */
6310
6311 static bool
6312 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6313                   struct iv_use *use, bool originalp)
6314 {
6315   comp_cost best_cost, act_cost;
6316   unsigned i;
6317   bitmap_iterator bi;
6318   struct iv_cand *cand;
6319   struct iv_ca_delta *best_delta = NULL, *act_delta;
6320   struct cost_pair *cp;
6321
6322   iv_ca_add_use (data, ivs, use);
6323   best_cost = iv_ca_cost (ivs);
6324   cp = iv_ca_cand_for_use (ivs, use);
6325   if (cp)
6326     {
6327       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6328       iv_ca_set_no_cp (data, ivs, use);
6329     }
6330
6331   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6332      first try important candidates not based on any memory object.  Only if
6333      this fails, try the specific ones.  Rationale -- in loops with many
6334      variables the best choice often is to use just one generic biv.  If we
6335      added here many ivs specific to the uses, the optimization algorithm later
6336      would be likely to get stuck in a local minimum, thus causing us to create
6337      too many ivs.  The approach from few ivs to more seems more likely to be
6338      successful -- starting from few ivs, replacing an expensive use by a
6339      specific iv should always be a win.  */
6340   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6341     {
6342       cand = iv_cand (data, i);
6343
6344       if (originalp && cand->pos !=IP_ORIGINAL)
6345         continue;
6346
6347       if (!originalp && cand->iv->base_object != NULL_TREE)
6348         continue;
6349
6350       if (iv_ca_cand_used_p (ivs, cand))
6351         continue;
6352
6353       cp = get_use_iv_cost (data, use, cand);
6354       if (!cp)
6355         continue;
6356
6357       iv_ca_set_cp (data, ivs, use, cp);
6358       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6359                                true);
6360       iv_ca_set_no_cp (data, ivs, use);
6361       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6362
6363       if (compare_costs (act_cost, best_cost) < 0)
6364         {
6365           best_cost = act_cost;
6366
6367           iv_ca_delta_free (&best_delta);
6368           best_delta = act_delta;
6369         }
6370       else
6371         iv_ca_delta_free (&act_delta);
6372     }
6373
6374   if (infinite_cost_p (best_cost))
6375     {
6376       for (i = 0; i < use->n_map_members; i++)
6377         {
6378           cp = use->cost_map + i;
6379           cand = cp->cand;
6380           if (!cand)
6381             continue;
6382
6383           /* Already tried this.  */
6384           if (cand->important)
6385             {
6386               if (originalp && cand->pos == IP_ORIGINAL)
6387                 continue;
6388               if (!originalp && cand->iv->base_object == NULL_TREE)
6389                 continue;
6390             }
6391
6392           if (iv_ca_cand_used_p (ivs, cand))
6393             continue;
6394
6395           act_delta = NULL;
6396           iv_ca_set_cp (data, ivs, use, cp);
6397           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6398           iv_ca_set_no_cp (data, ivs, use);
6399           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6400                                        cp, act_delta);
6401
6402           if (compare_costs (act_cost, best_cost) < 0)
6403             {
6404               best_cost = act_cost;
6405
6406               if (best_delta)
6407                 iv_ca_delta_free (&best_delta);
6408               best_delta = act_delta;
6409             }
6410           else
6411             iv_ca_delta_free (&act_delta);
6412         }
6413     }
6414
6415   iv_ca_delta_commit (data, ivs, best_delta, true);
6416   iv_ca_delta_free (&best_delta);
6417
6418   return !infinite_cost_p (best_cost);
6419 }
6420
6421 /* Finds an initial assignment of candidates to uses.  */
6422
6423 static struct iv_ca *
6424 get_initial_solution (struct ivopts_data *data, bool originalp)
6425 {
6426   struct iv_ca *ivs = iv_ca_new (data);
6427   unsigned i;
6428
6429   for (i = 0; i < n_iv_uses (data); i++)
6430     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6431       {
6432         iv_ca_free (&ivs);
6433         return NULL;
6434       }
6435
6436   return ivs;
6437 }
6438
6439 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6440    points to a bool variable, this function tries to break local
6441    optimal fixed-point by replacing candidates in IVS if it's true.  */
6442
6443 static bool
6444 try_improve_iv_set (struct ivopts_data *data,
6445                     struct iv_ca *ivs, bool *try_replace_p)
6446 {
6447   unsigned i, n_ivs;
6448   comp_cost acost, best_cost = iv_ca_cost (ivs);
6449   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6450   struct iv_cand *cand;
6451
6452   /* Try extending the set of induction variables by one.  */
6453   for (i = 0; i < n_iv_cands (data); i++)
6454     {
6455       cand = iv_cand (data, i);
6456
6457       if (iv_ca_cand_used_p (ivs, cand))
6458         continue;
6459
6460       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6461       if (!act_delta)
6462         continue;
6463
6464       /* If we successfully added the candidate and the set is small enough,
6465          try optimizing it by removing other candidates.  */
6466       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6467         {
6468           iv_ca_delta_commit (data, ivs, act_delta, true);
6469           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6470           iv_ca_delta_commit (data, ivs, act_delta, false);
6471           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6472         }
6473
6474       if (compare_costs (acost, best_cost) < 0)
6475         {
6476           best_cost = acost;
6477           iv_ca_delta_free (&best_delta);
6478           best_delta = act_delta;
6479         }
6480       else
6481         iv_ca_delta_free (&act_delta);
6482     }
6483
6484   if (!best_delta)
6485     {
6486       /* Try removing the candidates from the set instead.  */
6487       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6488
6489       if (!best_delta && *try_replace_p)
6490         {
6491           *try_replace_p = false;
6492           /* So far candidate selecting algorithm tends to choose fewer IVs
6493              so that it can handle cases in which loops have many variables
6494              but the best choice is often to use only one general biv.  One
6495              weakness is it can't handle opposite cases, in which different
6496              candidates should be chosen with respect to each use.  To solve
6497              the problem, we replace candidates in a manner described by the
6498              comments of iv_ca_replace, thus give general algorithm a chance
6499              to break local optimal fixed-point in these cases.  */
6500           best_cost = iv_ca_replace (data, ivs, &best_delta);
6501         }
6502
6503       if (!best_delta)
6504         return false;
6505     }
6506
6507   iv_ca_delta_commit (data, ivs, best_delta, true);
6508   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6509   iv_ca_delta_free (&best_delta);
6510   return true;
6511 }
6512
6513 /* Attempts to find the optimal set of induction variables.  We do simple
6514    greedy heuristic -- we try to replace at most one candidate in the selected
6515    solution and remove the unused ivs while this improves the cost.  */
6516
6517 static struct iv_ca *
6518 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6519 {
6520   struct iv_ca *set;
6521   bool try_replace_p = true;
6522
6523   /* Get the initial solution.  */
6524   set = get_initial_solution (data, originalp);
6525   if (!set)
6526     {
6527       if (dump_file && (dump_flags & TDF_DETAILS))
6528         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6529       return NULL;
6530     }
6531
6532   if (dump_file && (dump_flags & TDF_DETAILS))
6533     {
6534       fprintf (dump_file, "Initial set of candidates:\n");
6535       iv_ca_dump (data, dump_file, set);
6536     }
6537
6538   while (try_improve_iv_set (data, set, &try_replace_p))
6539     {
6540       if (dump_file && (dump_flags & TDF_DETAILS))
6541         {
6542           fprintf (dump_file, "Improved to:\n");
6543           iv_ca_dump (data, dump_file, set);
6544         }
6545     }
6546
6547   return set;
6548 }
6549
6550 static struct iv_ca *
6551 find_optimal_iv_set (struct ivopts_data *data)
6552 {
6553   unsigned i;
6554   struct iv_ca *set, *origset;
6555   struct iv_use *use;
6556   comp_cost cost, origcost;
6557
6558   /* Determine the cost based on a strategy that starts with original IVs,
6559      and try again using a strategy that prefers candidates not based
6560      on any IVs.  */
6561   origset = find_optimal_iv_set_1 (data, true);
6562   set = find_optimal_iv_set_1 (data, false);
6563
6564   if (!origset && !set)
6565     return NULL;
6566
6567   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6568   cost = set ? iv_ca_cost (set) : infinite_cost;
6569
6570   if (dump_file && (dump_flags & TDF_DETAILS))
6571     {
6572       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6573                origcost.cost, origcost.complexity);
6574       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6575                cost.cost, cost.complexity);
6576     }
6577
6578   /* Choose the one with the best cost.  */
6579   if (compare_costs (origcost, cost) <= 0)
6580     {
6581       if (set)
6582         iv_ca_free (&set);
6583       set = origset;
6584     }
6585   else if (origset)
6586     iv_ca_free (&origset);
6587
6588   for (i = 0; i < n_iv_uses (data); i++)
6589     {
6590       use = iv_use (data, i);
6591       use->selected = iv_ca_cand_for_use (set, use)->cand;
6592     }
6593
6594   return set;
6595 }
6596
6597 /* Creates a new induction variable corresponding to CAND.  */
6598
6599 static void
6600 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6601 {
6602   gimple_stmt_iterator incr_pos;
6603   tree base;
6604   bool after = false;
6605
6606   if (!cand->iv)
6607     return;
6608
6609   switch (cand->pos)
6610     {
6611     case IP_NORMAL:
6612       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6613       break;
6614
6615     case IP_END:
6616       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6617       after = true;
6618       break;
6619
6620     case IP_AFTER_USE:
6621       after = true;
6622       /* fall through */
6623     case IP_BEFORE_USE:
6624       incr_pos = gsi_for_stmt (cand->incremented_at);
6625       break;
6626
6627     case IP_ORIGINAL:
6628       /* Mark that the iv is preserved.  */
6629       name_info (data, cand->var_before)->preserve_biv = true;
6630       name_info (data, cand->var_after)->preserve_biv = true;
6631
6632       /* Rewrite the increment so that it uses var_before directly.  */
6633       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6634       return;
6635     }
6636
6637   gimple_add_tmp_var (cand->var_before);
6638
6639   base = unshare_expr (cand->iv->base);
6640
6641   create_iv (base, unshare_expr (cand->iv->step),
6642              cand->var_before, data->current_loop,
6643              &incr_pos, after, &cand->var_before, &cand->var_after);
6644 }
6645
6646 /* Creates new induction variables described in SET.  */
6647
6648 static void
6649 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6650 {
6651   unsigned i;
6652   struct iv_cand *cand;
6653   bitmap_iterator bi;
6654
6655   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6656     {
6657       cand = iv_cand (data, i);
6658       create_new_iv (data, cand);
6659     }
6660
6661   if (dump_file && (dump_flags & TDF_DETAILS))
6662     {
6663       fprintf (dump_file, "Selected IV set for loop %d",
6664                data->current_loop->num);
6665       if (data->loop_loc != UNKNOWN_LOCATION)
6666         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6667                  LOCATION_LINE (data->loop_loc));
6668       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6669       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6670         {
6671           cand = iv_cand (data, i);
6672           dump_cand (dump_file, cand);
6673         }
6674       fprintf (dump_file, "\n");
6675     }
6676 }
6677
6678 /* Rewrites USE (definition of iv used in a nonlinear expression)
6679    using candidate CAND.  */
6680
6681 static void
6682 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6683                             struct iv_use *use, struct iv_cand *cand)
6684 {
6685   tree comp;
6686   tree op, tgt;
6687   gassign *ass;
6688   gimple_stmt_iterator bsi;
6689
6690   /* An important special case -- if we are asked to express value of
6691      the original iv by itself, just exit; there is no need to
6692      introduce a new computation (that might also need casting the
6693      variable to unsigned and back).  */
6694   if (cand->pos == IP_ORIGINAL
6695       && cand->incremented_at == use->stmt)
6696     {
6697       enum tree_code stmt_code;
6698
6699       gcc_assert (is_gimple_assign (use->stmt));
6700       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6701
6702       /* Check whether we may leave the computation unchanged.
6703          This is the case only if it does not rely on other
6704          computations in the loop -- otherwise, the computation
6705          we rely upon may be removed in remove_unused_ivs,
6706          thus leading to ICE.  */
6707       stmt_code = gimple_assign_rhs_code (use->stmt);
6708       if (stmt_code == PLUS_EXPR
6709           || stmt_code == MINUS_EXPR
6710           || stmt_code == POINTER_PLUS_EXPR)
6711         {
6712           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6713             op = gimple_assign_rhs2 (use->stmt);
6714           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6715             op = gimple_assign_rhs1 (use->stmt);
6716           else
6717             op = NULL_TREE;
6718         }
6719       else
6720         op = NULL_TREE;
6721
6722       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6723         return;
6724     }
6725
6726   comp = get_computation (data->current_loop, use, cand);
6727   gcc_assert (comp != NULL_TREE);
6728
6729   switch (gimple_code (use->stmt))
6730     {
6731     case GIMPLE_PHI:
6732       tgt = PHI_RESULT (use->stmt);
6733
6734       /* If we should keep the biv, do not replace it.  */
6735       if (name_info (data, tgt)->preserve_biv)
6736         return;
6737
6738       bsi = gsi_after_labels (gimple_bb (use->stmt));
6739       break;
6740
6741     case GIMPLE_ASSIGN:
6742       tgt = gimple_assign_lhs (use->stmt);
6743       bsi = gsi_for_stmt (use->stmt);
6744       break;
6745
6746     default:
6747       gcc_unreachable ();
6748     }
6749
6750   if (!valid_gimple_rhs_p (comp)
6751       || (gimple_code (use->stmt) != GIMPLE_PHI
6752           /* We can't allow re-allocating the stmt as it might be pointed
6753              to still.  */
6754           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6755               >= gimple_num_ops (gsi_stmt (bsi)))))
6756     {
6757       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6758                                        true, GSI_SAME_STMT);
6759       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6760         {
6761           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6762           /* As this isn't a plain copy we have to reset alignment
6763              information.  */
6764           if (SSA_NAME_PTR_INFO (comp))
6765             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6766         }
6767     }
6768
6769   if (gimple_code (use->stmt) == GIMPLE_PHI)
6770     {
6771       ass = gimple_build_assign (tgt, comp);
6772       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6773
6774       bsi = gsi_for_stmt (use->stmt);
6775       remove_phi_node (&bsi, false);
6776     }
6777   else
6778     {
6779       gimple_assign_set_rhs_from_tree (&bsi, comp);
6780       use->stmt = gsi_stmt (bsi);
6781     }
6782 }
6783
6784 /* Performs a peephole optimization to reorder the iv update statement with
6785    a mem ref to enable instruction combining in later phases. The mem ref uses
6786    the iv value before the update, so the reordering transformation requires
6787    adjustment of the offset. CAND is the selected IV_CAND.
6788
6789    Example:
6790
6791    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6792    iv2 = iv1 + 1;
6793
6794    if (t < val)      (1)
6795      goto L;
6796    goto Head;
6797
6798
6799    directly propagating t over to (1) will introduce overlapping live range
6800    thus increase register pressure. This peephole transform it into:
6801
6802
6803    iv2 = iv1 + 1;
6804    t = MEM_REF (base, iv2, 8, 8);
6805    if (t < val)
6806      goto L;
6807    goto Head;
6808 */
6809
6810 static void
6811 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6812 {
6813   tree var_after;
6814   gimple iv_update, stmt;
6815   basic_block bb;
6816   gimple_stmt_iterator gsi, gsi_iv;
6817
6818   if (cand->pos != IP_NORMAL)
6819     return;
6820
6821   var_after = cand->var_after;
6822   iv_update = SSA_NAME_DEF_STMT (var_after);
6823
6824   bb = gimple_bb (iv_update);
6825   gsi = gsi_last_nondebug_bb (bb);
6826   stmt = gsi_stmt (gsi);
6827
6828   /* Only handle conditional statement for now.  */
6829   if (gimple_code (stmt) != GIMPLE_COND)
6830     return;
6831
6832   gsi_prev_nondebug (&gsi);
6833   stmt = gsi_stmt (gsi);
6834   if (stmt != iv_update)
6835     return;
6836
6837   gsi_prev_nondebug (&gsi);
6838   if (gsi_end_p (gsi))
6839     return;
6840
6841   stmt = gsi_stmt (gsi);
6842   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6843     return;
6844
6845   if (stmt != use->stmt)
6846     return;
6847
6848   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6849     return;
6850
6851   if (dump_file && (dump_flags & TDF_DETAILS))
6852     {
6853       fprintf (dump_file, "Reordering \n");
6854       print_gimple_stmt (dump_file, iv_update, 0, 0);
6855       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6856       fprintf (dump_file, "\n");
6857     }
6858
6859   gsi = gsi_for_stmt (use->stmt);
6860   gsi_iv = gsi_for_stmt (iv_update);
6861   gsi_move_before (&gsi_iv, &gsi);
6862
6863   cand->pos = IP_BEFORE_USE;
6864   cand->incremented_at = use->stmt;
6865 }
6866
6867 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6868
6869 static void
6870 rewrite_use_address_1 (struct ivopts_data *data,
6871                        struct iv_use *use, struct iv_cand *cand)
6872 {
6873   aff_tree aff;
6874   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6875   tree base_hint = NULL_TREE;
6876   tree ref, iv;
6877   bool ok;
6878
6879   adjust_iv_update_pos (cand, use);
6880   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6881   gcc_assert (ok);
6882   unshare_aff_combination (&aff);
6883
6884   /* To avoid undefined overflow problems, all IV candidates use unsigned
6885      integer types.  The drawback is that this makes it impossible for
6886      create_mem_ref to distinguish an IV that is based on a memory object
6887      from one that represents simply an offset.
6888
6889      To work around this problem, we pass a hint to create_mem_ref that
6890      indicates which variable (if any) in aff is an IV based on a memory
6891      object.  Note that we only consider the candidate.  If this is not
6892      based on an object, the base of the reference is in some subexpression
6893      of the use -- but these will use pointer types, so they are recognized
6894      by the create_mem_ref heuristics anyway.  */
6895   if (cand->iv->base_object)
6896     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6897
6898   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6899   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6900                         reference_alias_ptr_type (*use->op_p),
6901                         iv, base_hint, data->speed);
6902   copy_ref_info (ref, *use->op_p);
6903   *use->op_p = ref;
6904 }
6905
6906 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
6907    first use of a group, rewrites sub uses in the group too.  */
6908
6909 static void
6910 rewrite_use_address (struct ivopts_data *data,
6911                       struct iv_use *use, struct iv_cand *cand)
6912 {
6913   struct iv_use *next;
6914
6915   gcc_assert (use->sub_id == 0);
6916   rewrite_use_address_1 (data, use, cand);
6917   update_stmt (use->stmt);
6918
6919   for (next = use->next; next != NULL; next = next->next)
6920     {
6921       rewrite_use_address_1 (data, next, cand);
6922       update_stmt (next->stmt);
6923     }
6924
6925   return;
6926 }
6927
6928 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6929    candidate CAND.  */
6930
6931 static void
6932 rewrite_use_compare (struct ivopts_data *data,
6933                      struct iv_use *use, struct iv_cand *cand)
6934 {
6935   tree comp, *var_p, op, bound;
6936   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6937   enum tree_code compare;
6938   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6939   bool ok;
6940
6941   bound = cp->value;
6942   if (bound)
6943     {
6944       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6945       tree var_type = TREE_TYPE (var);
6946       gimple_seq stmts;
6947
6948       if (dump_file && (dump_flags & TDF_DETAILS))
6949         {
6950           fprintf (dump_file, "Replacing exit test: ");
6951           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6952         }
6953       compare = cp->comp;
6954       bound = unshare_expr (fold_convert (var_type, bound));
6955       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6956       if (stmts)
6957         gsi_insert_seq_on_edge_immediate (
6958                 loop_preheader_edge (data->current_loop),
6959                 stmts);
6960
6961       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6962       gimple_cond_set_lhs (cond_stmt, var);
6963       gimple_cond_set_code (cond_stmt, compare);
6964       gimple_cond_set_rhs (cond_stmt, op);
6965       return;
6966     }
6967
6968   /* The induction variable elimination failed; just express the original
6969      giv.  */
6970   comp = get_computation (data->current_loop, use, cand);
6971   gcc_assert (comp != NULL_TREE);
6972
6973   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6974   gcc_assert (ok);
6975
6976   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6977                                      true, GSI_SAME_STMT);
6978 }
6979
6980 /* Rewrites USE using candidate CAND.  */
6981
6982 static void
6983 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6984 {
6985   switch (use->type)
6986     {
6987       case USE_NONLINEAR_EXPR:
6988         rewrite_use_nonlinear_expr (data, use, cand);
6989         break;
6990
6991       case USE_ADDRESS:
6992         rewrite_use_address (data, use, cand);
6993         break;
6994
6995       case USE_COMPARE:
6996         rewrite_use_compare (data, use, cand);
6997         break;
6998
6999       default:
7000         gcc_unreachable ();
7001     }
7002
7003   update_stmt (use->stmt);
7004 }
7005
7006 /* Rewrite the uses using the selected induction variables.  */
7007
7008 static void
7009 rewrite_uses (struct ivopts_data *data)
7010 {
7011   unsigned i;
7012   struct iv_cand *cand;
7013   struct iv_use *use;
7014
7015   for (i = 0; i < n_iv_uses (data); i++)
7016     {
7017       use = iv_use (data, i);
7018       cand = use->selected;
7019       gcc_assert (cand);
7020
7021       rewrite_use (data, use, cand);
7022     }
7023 }
7024
7025 /* Removes the ivs that are not used after rewriting.  */
7026
7027 static void
7028 remove_unused_ivs (struct ivopts_data *data)
7029 {
7030   unsigned j;
7031   bitmap_iterator bi;
7032   bitmap toremove = BITMAP_ALLOC (NULL);
7033
7034   /* Figure out an order in which to release SSA DEFs so that we don't
7035      release something that we'd have to propagate into a debug stmt
7036      afterwards.  */
7037   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7038     {
7039       struct version_info *info;
7040
7041       info = ver_info (data, j);
7042       if (info->iv
7043           && !integer_zerop (info->iv->step)
7044           && !info->inv_id
7045           && !info->iv->have_use_for
7046           && !info->preserve_biv)
7047         {
7048           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7049
7050           tree def = info->iv->ssa_name;
7051
7052           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7053             {
7054               imm_use_iterator imm_iter;
7055               use_operand_p use_p;
7056               gimple stmt;
7057               int count = 0;
7058
7059               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7060                 {
7061                   if (!gimple_debug_bind_p (stmt))
7062                     continue;
7063
7064                   /* We just want to determine whether to do nothing
7065                      (count == 0), to substitute the computed
7066                      expression into a single use of the SSA DEF by
7067                      itself (count == 1), or to use a debug temp
7068                      because the SSA DEF is used multiple times or as
7069                      part of a larger expression (count > 1). */
7070                   count++;
7071                   if (gimple_debug_bind_get_value (stmt) != def)
7072                     count++;
7073
7074                   if (count > 1)
7075                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7076                 }
7077
7078               if (!count)
7079                 continue;
7080
7081               struct iv_use dummy_use;
7082               struct iv_cand *best_cand = NULL, *cand;
7083               unsigned i, best_pref = 0, cand_pref;
7084
7085               memset (&dummy_use, 0, sizeof (dummy_use));
7086               dummy_use.iv = info->iv;
7087               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7088                 {
7089                   cand = iv_use (data, i)->selected;
7090                   if (cand == best_cand)
7091                     continue;
7092                   cand_pref = operand_equal_p (cand->iv->step,
7093                                                info->iv->step, 0)
7094                     ? 4 : 0;
7095                   cand_pref
7096                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7097                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7098                     ? 2 : 0;
7099                   cand_pref
7100                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7101                     ? 1 : 0;
7102                   if (best_cand == NULL || best_pref < cand_pref)
7103                     {
7104                       best_cand = cand;
7105                       best_pref = cand_pref;
7106                     }
7107                 }
7108
7109               if (!best_cand)
7110                 continue;
7111
7112               tree comp = get_computation_at (data->current_loop,
7113                                               &dummy_use, best_cand,
7114                                               SSA_NAME_DEF_STMT (def));
7115               if (!comp)
7116                 continue;
7117
7118               if (count > 1)
7119                 {
7120                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7121                   DECL_ARTIFICIAL (vexpr) = 1;
7122                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7123                   if (SSA_NAME_VAR (def))
7124                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7125                   else
7126                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7127                   gdebug *def_temp
7128                     = gimple_build_debug_bind (vexpr, comp, NULL);
7129                   gimple_stmt_iterator gsi;
7130
7131                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7132                     gsi = gsi_after_labels (gimple_bb
7133                                             (SSA_NAME_DEF_STMT (def)));
7134                   else
7135                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7136
7137                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7138                   comp = vexpr;
7139                 }
7140
7141               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7142                 {
7143                   if (!gimple_debug_bind_p (stmt))
7144                     continue;
7145
7146                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7147                     SET_USE (use_p, comp);
7148
7149                   update_stmt (stmt);
7150                 }
7151             }
7152         }
7153     }
7154
7155   release_defs_bitset (toremove);
7156
7157   BITMAP_FREE (toremove);
7158 }
7159
7160 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7161    for hash_map::traverse.  */
7162
7163 bool
7164 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7165 {
7166   free (value);
7167   return true;
7168 }
7169
7170 /* Frees data allocated by the optimization of a single loop.  */
7171
7172 static void
7173 free_loop_data (struct ivopts_data *data)
7174 {
7175   unsigned i, j;
7176   bitmap_iterator bi;
7177   tree obj;
7178
7179   if (data->niters)
7180     {
7181       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7182       delete data->niters;
7183       data->niters = NULL;
7184     }
7185
7186   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7187     {
7188       struct version_info *info;
7189
7190       info = ver_info (data, i);
7191       free (info->iv);
7192       info->iv = NULL;
7193       info->has_nonlin_use = false;
7194       info->preserve_biv = false;
7195       info->inv_id = 0;
7196     }
7197   bitmap_clear (data->relevant);
7198   bitmap_clear (data->important_candidates);
7199
7200   for (i = 0; i < n_iv_uses (data); i++)
7201     {
7202       struct iv_use *use = iv_use (data, i);
7203       struct iv_use *pre = use, *sub = use->next;
7204
7205       while (sub)
7206         {
7207           gcc_assert (sub->related_cands == NULL);
7208           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7209
7210           free (sub->iv);
7211           pre = sub;
7212           sub = sub->next;
7213           free (pre);
7214         }
7215
7216       free (use->iv);
7217       BITMAP_FREE (use->related_cands);
7218       for (j = 0; j < use->n_map_members; j++)
7219         if (use->cost_map[j].depends_on)
7220           BITMAP_FREE (use->cost_map[j].depends_on);
7221       free (use->cost_map);
7222       free (use);
7223     }
7224   data->iv_uses.truncate (0);
7225
7226   for (i = 0; i < n_iv_cands (data); i++)
7227     {
7228       struct iv_cand *cand = iv_cand (data, i);
7229
7230       free (cand->iv);
7231       if (cand->depends_on)
7232         BITMAP_FREE (cand->depends_on);
7233       free (cand);
7234     }
7235   data->iv_candidates.truncate (0);
7236
7237   if (data->version_info_size < num_ssa_names)
7238     {
7239       data->version_info_size = 2 * num_ssa_names;
7240       free (data->version_info);
7241       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7242     }
7243
7244   data->max_inv_id = 0;
7245
7246   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7247     SET_DECL_RTL (obj, NULL_RTX);
7248
7249   decl_rtl_to_reset.truncate (0);
7250
7251   data->inv_expr_tab->empty ();
7252   data->inv_expr_id = 0;
7253 }
7254
7255 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7256    loop tree.  */
7257
7258 static void
7259 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7260 {
7261   free_loop_data (data);
7262   free (data->version_info);
7263   BITMAP_FREE (data->relevant);
7264   BITMAP_FREE (data->important_candidates);
7265
7266   decl_rtl_to_reset.release ();
7267   data->iv_uses.release ();
7268   data->iv_candidates.release ();
7269   delete data->inv_expr_tab;
7270   data->inv_expr_tab = NULL;
7271   free_affine_expand_cache (&data->name_expansion_cache);
7272 }
7273
7274 /* Returns true if the loop body BODY includes any function calls.  */
7275
7276 static bool
7277 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7278 {
7279   gimple_stmt_iterator gsi;
7280   unsigned i;
7281
7282   for (i = 0; i < num_nodes; i++)
7283     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7284       {
7285         gimple stmt = gsi_stmt (gsi);
7286         if (is_gimple_call (stmt)
7287             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7288           return true;
7289       }
7290   return false;
7291 }
7292
7293 /* Optimizes the LOOP.  Returns true if anything changed.  */
7294
7295 static bool
7296 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7297 {
7298   bool changed = false;
7299   struct iv_ca *iv_ca;
7300   edge exit = single_dom_exit (loop);
7301   basic_block *body;
7302
7303   gcc_assert (!data->niters);
7304   data->current_loop = loop;
7305   data->loop_loc = find_loop_location (loop);
7306   data->speed = optimize_loop_for_speed_p (loop);
7307
7308   if (dump_file && (dump_flags & TDF_DETAILS))
7309     {
7310       fprintf (dump_file, "Processing loop %d", loop->num);
7311       if (data->loop_loc != UNKNOWN_LOCATION)
7312         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7313                  LOCATION_LINE (data->loop_loc));
7314       fprintf (dump_file, "\n");
7315
7316       if (exit)
7317         {
7318           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7319                    exit->src->index, exit->dest->index);
7320           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7321           fprintf (dump_file, "\n");
7322         }
7323
7324       fprintf (dump_file, "\n");
7325     }
7326
7327   body = get_loop_body (loop);
7328   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7329   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7330   free (body);
7331
7332   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7333
7334   /* For each ssa name determines whether it behaves as an induction variable
7335      in some loop.  */
7336   if (!find_induction_variables (data))
7337     goto finish;
7338
7339   /* Finds interesting uses (item 1).  */
7340   find_interesting_uses (data);
7341   group_address_uses (data);
7342   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7343     goto finish;
7344
7345   /* Finds candidates for the induction variables (item 2).  */
7346   find_iv_candidates (data);
7347
7348   /* Calculates the costs (item 3, part 1).  */
7349   determine_iv_costs (data);
7350   determine_use_iv_costs (data);
7351   determine_set_costs (data);
7352
7353   /* Find the optimal set of induction variables (item 3, part 2).  */
7354   iv_ca = find_optimal_iv_set (data);
7355   if (!iv_ca)
7356     goto finish;
7357   changed = true;
7358
7359   /* Create the new induction variables (item 4, part 1).  */
7360   create_new_ivs (data, iv_ca);
7361   iv_ca_free (&iv_ca);
7362
7363   /* Rewrite the uses (item 4, part 2).  */
7364   rewrite_uses (data);
7365
7366   /* Remove the ivs that are unused after rewriting.  */
7367   remove_unused_ivs (data);
7368
7369   /* We have changed the structure of induction variables; it might happen
7370      that definitions in the scev database refer to some of them that were
7371      eliminated.  */
7372   scev_reset ();
7373
7374 finish:
7375   free_loop_data (data);
7376
7377   return changed;
7378 }
7379
7380 /* Main entry point.  Optimizes induction variables in loops.  */
7381
7382 void
7383 tree_ssa_iv_optimize (void)
7384 {
7385   struct loop *loop;
7386   struct ivopts_data data;
7387
7388   tree_ssa_iv_optimize_init (&data);
7389
7390   /* Optimize the loops starting with the innermost ones.  */
7391   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7392     {
7393       if (dump_file && (dump_flags & TDF_DETAILS))
7394         flow_loop_dump (loop, dump_file, NULL, 1);
7395
7396       tree_ssa_iv_optimize_loop (&data, loop);
7397     }
7398
7399   tree_ssa_iv_optimize_finalize (&data);
7400 }