gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "backend.h"
  68 #include "rtl.h"
  69 #include "tree.h"
  70 #include "gimple.h"
  71 #include "cfghooks.h"
  72 #include "tree-pass.h"
  73 #include "tm_p.h"
  74 #include "ssa.h"
  75 #include "expmed.h"
  76 #include "insn-config.h"
  77 #include "emit-rtl.h"
  78 #include "recog.h"
  79 #include "cgraph.h"
  80 #include "gimple-pretty-print.h"
  81 #include "alias.h"
  82 #include "fold-const.h"
  83 #include "stor-layout.h"
  84 #include "tree-eh.h"
  85 #include "gimplify.h"
  86 #include "gimple-iterator.h"
  87 #include "gimplify-me.h"
  88 #include "tree-cfg.h"
  89 #include "tree-ssa-loop-ivopts.h"
  90 #include "tree-ssa-loop-manip.h"
  91 #include "tree-ssa-loop-niter.h"
  92 #include "tree-ssa-loop.h"
  93 #include "explow.h"
  94 #include "expr.h"
  95 #include "tree-dfa.h"
  96 #include "tree-ssa.h"
  97 #include "cfgloop.h"
  98 #include "tree-scalar-evolution.h"
  99 #include "params.h"
 100 #include "tree-affine.h"
 101 #include "tree-ssa-propagate.h"
 102 #include "tree-ssa-address.h"
 103 #include "builtins.h"
 104 #include "tree-vectorizer.h"
 105
 106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 107    cost of different addressing modes.  This should be moved to a TBD
 108    interface between the GIMPLE and RTL worlds.  */
 109
 110 /* The infinite cost.  */
 111 #define INFTY 10000000
 112
 113 #define AVG_LOOP_NITER(LOOP) 5
 114
 115 /* Returns the expected number of loop iterations for LOOP.
 116    The average trip count is computed from profile data if it
 117    exists. */
 118
 119 static inline HOST_WIDE_INT
 120 avg_loop_niter (struct loop *loop)
 121 {
 122   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 123   if (niter == -1)
 124     return AVG_LOOP_NITER (loop);
 125
 126   return niter;
 127 }
 128
 129 /* Representation of the induction variable.  */
 130 struct iv
 131 {
 132   tree base;            /* Initial value of the iv.  */
 133   tree base_object;     /* A memory object to that the induction variable points.  */
 134   tree step;            /* Step of the iv (constant only).  */
 135   tree ssa_name;        /* The ssa name with the value.  */
 136   unsigned use_id;      /* The identifier in the use if it is the case.  */
 137   bool biv_p;           /* Is it a biv?  */
 138   bool have_use_for;    /* Do we already have a use for it?  */
 139   bool no_overflow;     /* True if the iv doesn't overflow.  */
 140   bool have_address_use;/* For biv, indicate if it's used in any address
 141                            type use.  */
 142 };
 143
 144 /* Per-ssa version information (induction variable descriptions, etc.).  */
 145 struct version_info
 146 {
 147   tree name;            /* The ssa name.  */
 148   struct iv *iv;        /* Induction variable description.  */
 149   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 150                            an expression that is not an induction variable.  */
 151   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 152   unsigned inv_id;      /* Id of an invariant.  */
 153 };
 154
 155 /* Types of uses.  */
 156 enum use_type
 157 {
 158   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 159   USE_ADDRESS,          /* Use in an address.  */
 160   USE_COMPARE           /* Use is a compare.  */
 161 };
 162
 163 /* Cost of a computation.  */
 164 struct comp_cost
 165 {
 166   int cost;             /* The runtime cost.  */
 167   unsigned complexity;  /* The estimate of the complexity of the code for
 168                            the computation (in no concrete units --
 169                            complexity field should be larger for more
 170                            complex expressions and addressing modes).  */
 171 };
 172
 173 static const comp_cost no_cost = {0, 0};
 174 static const comp_cost infinite_cost = {INFTY, INFTY};
 175
 176 /* The candidate - cost pair.  */
 177 struct cost_pair
 178 {
 179   struct iv_cand *cand; /* The candidate.  */
 180   comp_cost cost;       /* The cost.  */
 181   bitmap depends_on;    /* The list of invariants that have to be
 182                            preserved.  */
 183   tree value;           /* For final value elimination, the expression for
 184                            the final value of the iv.  For iv elimination,
 185                            the new bound to compare with.  */
 186   enum tree_code comp;  /* For iv elimination, the comparison.  */
 187   int inv_expr_id;      /* Loop invariant expression id.  */
 188 };
 189
 190 /* Use.  */
 191 struct iv_use
 192 {
 193   unsigned id;          /* The id of the use.  */
 194   unsigned sub_id;      /* The id of the sub use.  */
 195   enum use_type type;   /* Type of the use.  */
 196   struct iv *iv;        /* The induction variable it is based on.  */
 197   gimple *stmt;         /* Statement in that it occurs.  */
 198   tree *op_p;           /* The place where it occurs.  */
 199   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 200                            important ones.  */
 201
 202   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 203   struct cost_pair *cost_map;
 204                         /* The costs wrto the iv candidates.  */
 205
 206   struct iv_cand *selected;
 207                         /* The selected candidate.  */
 208
 209   struct iv_use *next;  /* The next sub use.  */
 210   tree addr_base;       /* Base address with const offset stripped.  */
 211   unsigned HOST_WIDE_INT addr_offset;
 212                         /* Const offset stripped from base address.  */
 213 };
 214
 215 /* The position where the iv is computed.  */
 216 enum iv_position
 217 {
 218   IP_NORMAL,            /* At the end, just before the exit condition.  */
 219   IP_END,               /* At the end of the latch block.  */
 220   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 221   IP_AFTER_USE,         /* Immediately after a specific use.  */
 222   IP_ORIGINAL           /* The original biv.  */
 223 };
 224
 225 /* The induction variable candidate.  */
 226 struct iv_cand
 227 {
 228   unsigned id;          /* The number of the candidate.  */
 229   bool important;       /* Whether this is an "important" candidate, i.e. such
 230                            that it should be considered by all uses.  */
 231   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 232   gimple *incremented_at;/* For original biv, the statement where it is
 233                            incremented.  */
 234   tree var_before;      /* The variable used for it before increment.  */
 235   tree var_after;       /* The variable used for it after increment.  */
 236   struct iv *iv;        /* The value of the candidate.  NULL for
 237                            "pseudocandidate" used to indicate the possibility
 238                            to replace the final value of an iv by direct
 239                            computation of the value.  */
 240   unsigned cost;        /* Cost of the candidate.  */
 241   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 242   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 243                               where it is incremented.  */
 244   bitmap depends_on;    /* The list of invariants that are used in step of the
 245                            biv.  */
 246   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 247                            smaller type.  */
 248 };
 249
 250 /* Loop invariant expression hashtable entry.  */
 251 struct iv_inv_expr_ent
 252 {
 253   tree expr;
 254   int id;
 255   hashval_t hash;
 256 };
 257
 258 /* The data used by the induction variable optimizations.  */
 259
 260 /* Hashtable helpers.  */
 261
 262 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 263 {
 264   static inline hashval_t hash (const iv_inv_expr_ent *);
 265   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 266 };
 267
 268 /* Hash function for loop invariant expressions.  */
 269
 270 inline hashval_t
 271 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 272 {
 273   return expr->hash;
 274 }
 275
 276 /* Hash table equality function for expressions.  */
 277
 278 inline bool
 279 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 280                            const iv_inv_expr_ent *expr2)
 281 {
 282   return expr1->hash == expr2->hash
 283          && operand_equal_p (expr1->expr, expr2->expr, 0);
 284 }
 285
 286 struct ivopts_data
 287 {
 288   /* The currently optimized loop.  */
 289   struct loop *current_loop;
 290   source_location loop_loc;
 291
 292   /* Numbers of iterations for all exits of the current loop.  */
 293   hash_map<edge, tree_niter_desc *> *niters;
 294
 295   /* Number of registers used in it.  */
 296   unsigned regs_used;
 297
 298   /* The size of version_info array allocated.  */
 299   unsigned version_info_size;
 300
 301   /* The array of information for the ssa names.  */
 302   struct version_info *version_info;
 303
 304   /* The hashtable of loop invariant expressions created
 305      by ivopt.  */
 306   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 307
 308   /* Loop invariant expression id.  */
 309   int inv_expr_id;
 310
 311   /* The bitmap of indices in version_info whose value was changed.  */
 312   bitmap relevant;
 313
 314   /* The uses of induction variables.  */
 315   vec<iv_use *> iv_uses;
 316
 317   /* The candidates.  */
 318   vec<iv_cand *> iv_candidates;
 319
 320   /* A bitmap of important candidates.  */
 321   bitmap important_candidates;
 322
 323   /* Cache used by tree_to_aff_combination_expand.  */
 324   hash_map<tree, name_expansion *> *name_expansion_cache;
 325
 326   /* The maximum invariant id.  */
 327   unsigned max_inv_id;
 328
 329   /* Number of no_overflow BIVs which are not used in memory address.  */
 330   unsigned bivs_not_used_in_addr;
 331
 332   /* Obstack for iv structure.  */
 333   struct obstack iv_obstack;
 334
 335   /* Whether to consider just related and important candidates when replacing a
 336      use.  */
 337   bool consider_all_candidates;
 338
 339   /* Are we optimizing for speed?  */
 340   bool speed;
 341
 342   /* Whether the loop body includes any function calls.  */
 343   bool body_includes_call;
 344
 345   /* Whether the loop body can only be exited via single exit.  */
 346   bool loop_single_exit_p;
 347 };
 348
 349 /* An assignment of iv candidates to uses.  */
 350
 351 struct iv_ca
 352 {
 353   /* The number of uses covered by the assignment.  */
 354   unsigned upto;
 355
 356   /* Number of uses that cannot be expressed by the candidates in the set.  */
 357   unsigned bad_uses;
 358
 359   /* Candidate assigned to a use, together with the related costs.  */
 360   struct cost_pair **cand_for_use;
 361
 362   /* Number of times each candidate is used.  */
 363   unsigned *n_cand_uses;
 364
 365   /* The candidates used.  */
 366   bitmap cands;
 367
 368   /* The number of candidates in the set.  */
 369   unsigned n_cands;
 370
 371   /* Total number of registers needed.  */
 372   unsigned n_regs;
 373
 374   /* Total cost of expressing uses.  */
 375   comp_cost cand_use_cost;
 376
 377   /* Total cost of candidates.  */
 378   unsigned cand_cost;
 379
 380   /* Number of times each invariant is used.  */
 381   unsigned *n_invariant_uses;
 382
 383   /* The array holding the number of uses of each loop
 384      invariant expressions created by ivopt.  */
 385   unsigned *used_inv_expr;
 386
 387   /* The number of created loop invariants.  */
 388   unsigned num_used_inv_expr;
 389
 390   /* Total cost of the assignment.  */
 391   comp_cost cost;
 392 };
 393
 394 /* Difference of two iv candidate assignments.  */
 395
 396 struct iv_ca_delta
 397 {
 398   /* Changed use.  */
 399   struct iv_use *use;
 400
 401   /* An old assignment (for rollback purposes).  */
 402   struct cost_pair *old_cp;
 403
 404   /* A new assignment.  */
 405   struct cost_pair *new_cp;
 406
 407   /* Next change in the list.  */
 408   struct iv_ca_delta *next_change;
 409 };
 410
 411 /* Bound on number of candidates below that all candidates are considered.  */
 412
 413 #define CONSIDER_ALL_CANDIDATES_BOUND \
 414   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 415
 416 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 417    optimizing such a loop would help, and it would take ages).  */
 418
 419 #define MAX_CONSIDERED_USES \
 420   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 421
 422 /* If there are at most this number of ivs in the set, try removing unnecessary
 423    ivs from the set always.  */
 424
 425 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 426   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 427
 428 /* The list of trees for that the decl_rtl field must be reset is stored
 429    here.  */
 430
 431 static vec<tree> decl_rtl_to_reset;
 432
 433 static comp_cost force_expr_to_var_cost (tree, bool);
 434
 435 /* Number of uses recorded in DATA.  */
 436
 437 static inline unsigned
 438 n_iv_uses (struct ivopts_data *data)
 439 {
 440   return data->iv_uses.length ();
 441 }
 442
 443 /* Ith use recorded in DATA.  */
 444
 445 static inline struct iv_use *
 446 iv_use (struct ivopts_data *data, unsigned i)
 447 {
 448   return data->iv_uses[i];
 449 }
 450
 451 /* Number of candidates recorded in DATA.  */
 452
 453 static inline unsigned
 454 n_iv_cands (struct ivopts_data *data)
 455 {
 456   return data->iv_candidates.length ();
 457 }
 458
 459 /* Ith candidate recorded in DATA.  */
 460
 461 static inline struct iv_cand *
 462 iv_cand (struct ivopts_data *data, unsigned i)
 463 {
 464   return data->iv_candidates[i];
 465 }
 466
 467 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 468
 469 edge
 470 single_dom_exit (struct loop *loop)
 471 {
 472   edge exit = single_exit (loop);
 473
 474   if (!exit)
 475     return NULL;
 476
 477   if (!just_once_each_iteration_p (loop, exit->src))
 478     return NULL;
 479
 480   return exit;
 481 }
 482
 483 /* Dumps information about the induction variable IV to FILE.  */
 484
 485 void
 486 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 487 {
 488   if (iv->ssa_name && dump_name)
 489     {
 490       fprintf (file, "ssa name ");
 491       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 492       fprintf (file, "\n");
 493     }
 494
 495   fprintf (file, "  type ");
 496   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 497   fprintf (file, "\n");
 498
 499   if (iv->step)
 500     {
 501       fprintf (file, "  base ");
 502       print_generic_expr (file, iv->base, TDF_SLIM);
 503       fprintf (file, "\n");
 504
 505       fprintf (file, "  step ");
 506       print_generic_expr (file, iv->step, TDF_SLIM);
 507       fprintf (file, "\n");
 508     }
 509   else
 510     {
 511       fprintf (file, "  invariant ");
 512       print_generic_expr (file, iv->base, TDF_SLIM);
 513       fprintf (file, "\n");
 514     }
 515
 516   if (iv->base_object)
 517     {
 518       fprintf (file, "  base object ");
 519       print_generic_expr (file, iv->base_object, TDF_SLIM);
 520       fprintf (file, "\n");
 521     }
 522
 523   if (iv->biv_p)
 524     fprintf (file, "  is a biv\n");
 525
 526   if (iv->no_overflow)
 527     fprintf (file, "  iv doesn't overflow wrto loop niter\n");
 528 }
 529
 530 /* Dumps information about the USE to FILE.  */
 531
 532 void
 533 dump_use (FILE *file, struct iv_use *use)
 534 {
 535   fprintf (file, "use %d", use->id);
 536   if (use->sub_id)
 537     fprintf (file, ".%d", use->sub_id);
 538
 539   fprintf (file, "\n");
 540
 541   switch (use->type)
 542     {
 543     case USE_NONLINEAR_EXPR:
 544       fprintf (file, "  generic\n");
 545       break;
 546
 547     case USE_ADDRESS:
 548       fprintf (file, "  address\n");
 549       break;
 550
 551     case USE_COMPARE:
 552       fprintf (file, "  compare\n");
 553       break;
 554
 555     default:
 556       gcc_unreachable ();
 557     }
 558
 559   fprintf (file, "  in statement ");
 560   print_gimple_stmt (file, use->stmt, 0, 0);
 561   fprintf (file, "\n");
 562
 563   fprintf (file, "  at position ");
 564   if (use->op_p)
 565     print_generic_expr (file, *use->op_p, TDF_SLIM);
 566   fprintf (file, "\n");
 567
 568   dump_iv (file, use->iv, false);
 569
 570   if (use->related_cands)
 571     {
 572       fprintf (file, "  related candidates ");
 573       dump_bitmap (file, use->related_cands);
 574     }
 575 }
 576
 577 /* Dumps information about the uses to FILE.  */
 578
 579 void
 580 dump_uses (FILE *file, struct ivopts_data *data)
 581 {
 582   unsigned i;
 583   struct iv_use *use;
 584
 585   for (i = 0; i < n_iv_uses (data); i++)
 586     {
 587       use = iv_use (data, i);
 588       do
 589         {
 590           dump_use (file, use);
 591           use = use->next;
 592         }
 593       while (use);
 594       fprintf (file, "\n");
 595     }
 596 }
 597
 598 /* Dumps information about induction variable candidate CAND to FILE.  */
 599
 600 void
 601 dump_cand (FILE *file, struct iv_cand *cand)
 602 {
 603   struct iv *iv = cand->iv;
 604
 605   fprintf (file, "candidate %d%s\n",
 606            cand->id, cand->important ? " (important)" : "");
 607
 608   if (cand->depends_on)
 609     {
 610       fprintf (file, "  depends on ");
 611       dump_bitmap (file, cand->depends_on);
 612     }
 613
 614   if (!iv)
 615     {
 616       fprintf (file, "  final value replacement\n");
 617       return;
 618     }
 619
 620   if (cand->var_before)
 621     {
 622       fprintf (file, "  var_before ");
 623       print_generic_expr (file, cand->var_before, TDF_SLIM);
 624       fprintf (file, "\n");
 625     }
 626   if (cand->var_after)
 627     {
 628       fprintf (file, "  var_after ");
 629       print_generic_expr (file, cand->var_after, TDF_SLIM);
 630       fprintf (file, "\n");
 631     }
 632
 633   switch (cand->pos)
 634     {
 635     case IP_NORMAL:
 636       fprintf (file, "  incremented before exit test\n");
 637       break;
 638
 639     case IP_BEFORE_USE:
 640       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 641       break;
 642
 643     case IP_AFTER_USE:
 644       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 645       break;
 646
 647     case IP_END:
 648       fprintf (file, "  incremented at end\n");
 649       break;
 650
 651     case IP_ORIGINAL:
 652       fprintf (file, "  original biv\n");
 653       break;
 654     }
 655
 656   dump_iv (file, iv, false);
 657 }
 658
 659 /* Returns the info for ssa version VER.  */
 660
 661 static inline struct version_info *
 662 ver_info (struct ivopts_data *data, unsigned ver)
 663 {
 664   return data->version_info + ver;
 665 }
 666
 667 /* Returns the info for ssa name NAME.  */
 668
 669 static inline struct version_info *
 670 name_info (struct ivopts_data *data, tree name)
 671 {
 672   return ver_info (data, SSA_NAME_VERSION (name));
 673 }
 674
 675 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 676    emitted in LOOP.  */
 677
 678 static bool
 679 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 680 {
 681   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 682
 683   gcc_assert (bb);
 684
 685   if (sbb == loop->latch)
 686     return true;
 687
 688   if (sbb != bb)
 689     return false;
 690
 691   return stmt == last_stmt (bb);
 692 }
 693
 694 /* Returns true if STMT if after the place where the original induction
 695    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 696    if the positions are identical.  */
 697
 698 static bool
 699 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 700 {
 701   basic_block cand_bb = gimple_bb (cand->incremented_at);
 702   basic_block stmt_bb = gimple_bb (stmt);
 703
 704   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 705     return false;
 706
 707   if (stmt_bb != cand_bb)
 708     return true;
 709
 710   if (true_if_equal
 711       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 712     return true;
 713   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 714 }
 715
 716 /* Returns true if STMT if after the place where the induction variable
 717    CAND is incremented in LOOP.  */
 718
 719 static bool
 720 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 721 {
 722   switch (cand->pos)
 723     {
 724     case IP_END:
 725       return false;
 726
 727     case IP_NORMAL:
 728       return stmt_after_ip_normal_pos (loop, stmt);
 729
 730     case IP_ORIGINAL:
 731     case IP_AFTER_USE:
 732       return stmt_after_inc_pos (cand, stmt, false);
 733
 734     case IP_BEFORE_USE:
 735       return stmt_after_inc_pos (cand, stmt, true);
 736
 737     default:
 738       gcc_unreachable ();
 739     }
 740 }
 741
 742 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 743
 744 static bool
 745 abnormal_ssa_name_p (tree exp)
 746 {
 747   if (!exp)
 748     return false;
 749
 750   if (TREE_CODE (exp) != SSA_NAME)
 751     return false;
 752
 753   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 754 }
 755
 756 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 757    abnormal phi node.  Callback for for_each_index.  */
 758
 759 static bool
 760 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 761                                   void *data ATTRIBUTE_UNUSED)
 762 {
 763   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 764     {
 765       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 766         return false;
 767       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 768         return false;
 769     }
 770
 771   return !abnormal_ssa_name_p (*index);
 772 }
 773
 774 /* Returns true if EXPR contains a ssa name that occurs in an
 775    abnormal phi node.  */
 776
 777 bool
 778 contains_abnormal_ssa_name_p (tree expr)
 779 {
 780   enum tree_code code;
 781   enum tree_code_class codeclass;
 782
 783   if (!expr)
 784     return false;
 785
 786   code = TREE_CODE (expr);
 787   codeclass = TREE_CODE_CLASS (code);
 788
 789   if (code == SSA_NAME)
 790     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 791
 792   if (code == INTEGER_CST
 793       || is_gimple_min_invariant (expr))
 794     return false;
 795
 796   if (code == ADDR_EXPR)
 797     return !for_each_index (&TREE_OPERAND (expr, 0),
 798                             idx_contains_abnormal_ssa_name_p,
 799                             NULL);
 800
 801   if (code == COND_EXPR)
 802     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 803       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 804       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 805
 806   switch (codeclass)
 807     {
 808     case tcc_binary:
 809     case tcc_comparison:
 810       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 811         return true;
 812
 813       /* Fallthru.  */
 814     case tcc_unary:
 815       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 816         return true;
 817
 818       break;
 819
 820     default:
 821       gcc_unreachable ();
 822     }
 823
 824   return false;
 825 }
 826
 827 /*  Returns the structure describing number of iterations determined from
 828     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 829
 830 static struct tree_niter_desc *
 831 niter_for_exit (struct ivopts_data *data, edge exit)
 832 {
 833   struct tree_niter_desc *desc;
 834   tree_niter_desc **slot;
 835
 836   if (!data->niters)
 837     {
 838       data->niters = new hash_map<edge, tree_niter_desc *>;
 839       slot = NULL;
 840     }
 841   else
 842     slot = data->niters->get (exit);
 843
 844   if (!slot)
 845     {
 846       /* Try to determine number of iterations.  We cannot safely work with ssa
 847          names that appear in phi nodes on abnormal edges, so that we do not
 848          create overlapping life ranges for them (PR 27283).  */
 849       desc = XNEW (struct tree_niter_desc);
 850       if (!number_of_iterations_exit (data->current_loop,
 851                                       exit, desc, true)
 852           || contains_abnormal_ssa_name_p (desc->niter))
 853         {
 854           XDELETE (desc);
 855           desc = NULL;
 856         }
 857       data->niters->put (exit, desc);
 858     }
 859   else
 860     desc = *slot;
 861
 862   return desc;
 863 }
 864
 865 /* Returns the structure describing number of iterations determined from
 866    single dominating exit of DATA->current_loop, or NULL if something
 867    goes wrong.  */
 868
 869 static struct tree_niter_desc *
 870 niter_for_single_dom_exit (struct ivopts_data *data)
 871 {
 872   edge exit = single_dom_exit (data->current_loop);
 873
 874   if (!exit)
 875     return NULL;
 876
 877   return niter_for_exit (data, exit);
 878 }
 879
 880 /* Initializes data structures used by the iv optimization pass, stored
 881    in DATA.  */
 882
 883 static void
 884 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 885 {
 886   data->version_info_size = 2 * num_ssa_names;
 887   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 888   data->relevant = BITMAP_ALLOC (NULL);
 889   data->important_candidates = BITMAP_ALLOC (NULL);
 890   data->max_inv_id = 0;
 891   data->niters = NULL;
 892   data->iv_uses.create (20);
 893   data->iv_candidates.create (20);
 894   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 895   data->inv_expr_id = 0;
 896   data->name_expansion_cache = NULL;
 897   decl_rtl_to_reset.create (20);
 898   gcc_obstack_init (&data->iv_obstack);
 899 }
 900
 901 /* Returns a memory object to that EXPR points.  In case we are able to
 902    determine that it does not point to any such object, NULL is returned.  */
 903
 904 static tree
 905 determine_base_object (tree expr)
 906 {
 907   enum tree_code code = TREE_CODE (expr);
 908   tree base, obj;
 909
 910   /* If this is a pointer casted to any type, we need to determine
 911      the base object for the pointer; so handle conversions before
 912      throwing away non-pointer expressions.  */
 913   if (CONVERT_EXPR_P (expr))
 914     return determine_base_object (TREE_OPERAND (expr, 0));
 915
 916   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 917     return NULL_TREE;
 918
 919   switch (code)
 920     {
 921     case INTEGER_CST:
 922       return NULL_TREE;
 923
 924     case ADDR_EXPR:
 925       obj = TREE_OPERAND (expr, 0);
 926       base = get_base_address (obj);
 927
 928       if (!base)
 929         return expr;
 930
 931       if (TREE_CODE (base) == MEM_REF)
 932         return determine_base_object (TREE_OPERAND (base, 0));
 933
 934       return fold_convert (ptr_type_node,
 935                            build_fold_addr_expr (base));
 936
 937     case POINTER_PLUS_EXPR:
 938       return determine_base_object (TREE_OPERAND (expr, 0));
 939
 940     case PLUS_EXPR:
 941     case MINUS_EXPR:
 942       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 943       gcc_unreachable ();
 944
 945     default:
 946       return fold_convert (ptr_type_node, expr);
 947     }
 948 }
 949
 950 /* Return true if address expression with non-DECL_P operand appears
 951    in EXPR.  */
 952
 953 static bool
 954 contain_complex_addr_expr (tree expr)
 955 {
 956   bool res = false;
 957
 958   STRIP_NOPS (expr);
 959   switch (TREE_CODE (expr))
 960     {
 961     case POINTER_PLUS_EXPR:
 962     case PLUS_EXPR:
 963     case MINUS_EXPR:
 964       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 965       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 966       break;
 967
 968     case ADDR_EXPR:
 969       return (!DECL_P (TREE_OPERAND (expr, 0)));
 970
 971     default:
 972       return false;
 973     }
 974
 975   return res;
 976 }
 977
 978 /* Allocates an induction variable with given initial value BASE and step STEP
 979    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 980
 981 static struct iv *
 982 alloc_iv (struct ivopts_data *data, tree base, tree step,
 983           bool no_overflow = false)
 984 {
 985   tree expr = base;
 986   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
 987                                               sizeof (struct iv));
 988   gcc_assert (step != NULL_TREE);
 989
 990   /* Lower address expression in base except ones with DECL_P as operand.
 991      By doing this:
 992        1) More accurate cost can be computed for address expressions;
 993        2) Duplicate candidates won't be created for bases in different
 994           forms, like &a[0] and &a.  */
 995   STRIP_NOPS (expr);
 996   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
 997       || contain_complex_addr_expr (expr))
 998     {
 999       aff_tree comb;
1000       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1001       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1002     }
1003
1004   iv->base = base;
1005   iv->base_object = determine_base_object (base);
1006   iv->step = step;
1007   iv->biv_p = false;
1008   iv->have_use_for = false;
1009   iv->use_id = 0;
1010   iv->ssa_name = NULL_TREE;
1011   iv->no_overflow = no_overflow;
1012   iv->have_address_use = false;
1013
1014   return iv;
1015 }
1016
1017 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1018    doesn't overflow.  */
1019
1020 static void
1021 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1022         bool no_overflow)
1023 {
1024   struct version_info *info = name_info (data, iv);
1025
1026   gcc_assert (!info->iv);
1027
1028   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1029   info->iv = alloc_iv (data, base, step, no_overflow);
1030   info->iv->ssa_name = iv;
1031 }
1032
1033 /* Finds induction variable declaration for VAR.  */
1034
1035 static struct iv *
1036 get_iv (struct ivopts_data *data, tree var)
1037 {
1038   basic_block bb;
1039   tree type = TREE_TYPE (var);
1040
1041   if (!POINTER_TYPE_P (type)
1042       && !INTEGRAL_TYPE_P (type))
1043     return NULL;
1044
1045   if (!name_info (data, var)->iv)
1046     {
1047       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1048
1049       if (!bb
1050           || !flow_bb_inside_loop_p (data->current_loop, bb))
1051         set_iv (data, var, var, build_int_cst (type, 0), true);
1052     }
1053
1054   return name_info (data, var)->iv;
1055 }
1056
1057 /* Return the first non-invariant ssa var found in EXPR.  */
1058
1059 static tree
1060 extract_single_var_from_expr (tree expr)
1061 {
1062   int i, n;
1063   tree tmp;
1064   enum tree_code code;
1065
1066   if (!expr || is_gimple_min_invariant (expr))
1067     return NULL;
1068
1069   code = TREE_CODE (expr);
1070   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1071     {
1072       n = TREE_OPERAND_LENGTH (expr);
1073       for (i = 0; i < n; i++)
1074         {
1075           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1076
1077           if (tmp)
1078             return tmp;
1079         }
1080     }
1081   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1082 }
1083
1084 /* Finds basic ivs.  */
1085
1086 static bool
1087 find_bivs (struct ivopts_data *data)
1088 {
1089   gphi *phi;
1090   affine_iv iv;
1091   tree step, type, base, stop;
1092   bool found = false;
1093   struct loop *loop = data->current_loop;
1094   gphi_iterator psi;
1095
1096   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1097     {
1098       phi = psi.phi ();
1099
1100       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1101         continue;
1102
1103       if (virtual_operand_p (PHI_RESULT (phi)))
1104         continue;
1105
1106       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1107         continue;
1108
1109       if (integer_zerop (iv.step))
1110         continue;
1111
1112       step = iv.step;
1113       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1114       /* Stop expanding iv base at the first ssa var referred by iv step.
1115          Ideally we should stop at any ssa var, because that's expensive
1116          and unusual to happen, we just do it on the first one.
1117
1118          See PR64705 for the rationale.  */
1119       stop = extract_single_var_from_expr (step);
1120       base = expand_simple_operations (base, stop);
1121       if (contains_abnormal_ssa_name_p (base)
1122           || contains_abnormal_ssa_name_p (step))
1123         continue;
1124
1125       type = TREE_TYPE (PHI_RESULT (phi));
1126       base = fold_convert (type, base);
1127       if (step)
1128         {
1129           if (POINTER_TYPE_P (type))
1130             step = convert_to_ptrofftype (step);
1131           else
1132             step = fold_convert (type, step);
1133         }
1134
1135       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1136       found = true;
1137     }
1138
1139   return found;
1140 }
1141
1142 /* Marks basic ivs.  */
1143
1144 static void
1145 mark_bivs (struct ivopts_data *data)
1146 {
1147   gphi *phi;
1148   gimple *def;
1149   tree var;
1150   struct iv *iv, *incr_iv;
1151   struct loop *loop = data->current_loop;
1152   basic_block incr_bb;
1153   gphi_iterator psi;
1154
1155   data->bivs_not_used_in_addr = 0;
1156   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1157     {
1158       phi = psi.phi ();
1159
1160       iv = get_iv (data, PHI_RESULT (phi));
1161       if (!iv)
1162         continue;
1163
1164       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1165       def = SSA_NAME_DEF_STMT (var);
1166       /* Don't mark iv peeled from other one as biv.  */
1167       if (def
1168           && gimple_code (def) == GIMPLE_PHI
1169           && gimple_bb (def) == loop->header)
1170         continue;
1171
1172       incr_iv = get_iv (data, var);
1173       if (!incr_iv)
1174         continue;
1175
1176       /* If the increment is in the subloop, ignore it.  */
1177       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1178       if (incr_bb->loop_father != data->current_loop
1179           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1180         continue;
1181
1182       iv->biv_p = true;
1183       incr_iv->biv_p = true;
1184       if (iv->no_overflow)
1185         data->bivs_not_used_in_addr++;
1186       if (incr_iv->no_overflow)
1187         data->bivs_not_used_in_addr++;
1188     }
1189 }
1190
1191 /* Checks whether STMT defines a linear induction variable and stores its
1192    parameters to IV.  */
1193
1194 static bool
1195 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1196 {
1197   tree lhs, stop;
1198   struct loop *loop = data->current_loop;
1199
1200   iv->base = NULL_TREE;
1201   iv->step = NULL_TREE;
1202
1203   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1204     return false;
1205
1206   lhs = gimple_assign_lhs (stmt);
1207   if (TREE_CODE (lhs) != SSA_NAME)
1208     return false;
1209
1210   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1211     return false;
1212
1213   /* Stop expanding iv base at the first ssa var referred by iv step.
1214      Ideally we should stop at any ssa var, because that's expensive
1215      and unusual to happen, we just do it on the first one.
1216
1217      See PR64705 for the rationale.  */
1218   stop = extract_single_var_from_expr (iv->step);
1219   iv->base = expand_simple_operations (iv->base, stop);
1220   if (contains_abnormal_ssa_name_p (iv->base)
1221       || contains_abnormal_ssa_name_p (iv->step))
1222     return false;
1223
1224   /* If STMT could throw, then do not consider STMT as defining a GIV.
1225      While this will suppress optimizations, we can not safely delete this
1226      GIV and associated statements, even if it appears it is not used.  */
1227   if (stmt_could_throw_p (stmt))
1228     return false;
1229
1230   return true;
1231 }
1232
1233 /* Finds general ivs in statement STMT.  */
1234
1235 static void
1236 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1237 {
1238   affine_iv iv;
1239
1240   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1241     return;
1242
1243   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1244 }
1245
1246 /* Finds general ivs in basic block BB.  */
1247
1248 static void
1249 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1250 {
1251   gimple_stmt_iterator bsi;
1252
1253   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1254     find_givs_in_stmt (data, gsi_stmt (bsi));
1255 }
1256
1257 /* Finds general ivs.  */
1258
1259 static void
1260 find_givs (struct ivopts_data *data)
1261 {
1262   struct loop *loop = data->current_loop;
1263   basic_block *body = get_loop_body_in_dom_order (loop);
1264   unsigned i;
1265
1266   for (i = 0; i < loop->num_nodes; i++)
1267     find_givs_in_bb (data, body[i]);
1268   free (body);
1269 }
1270
1271 /* For each ssa name defined in LOOP determines whether it is an induction
1272    variable and if so, its initial value and step.  */
1273
1274 static bool
1275 find_induction_variables (struct ivopts_data *data)
1276 {
1277   unsigned i;
1278   bitmap_iterator bi;
1279
1280   if (!find_bivs (data))
1281     return false;
1282
1283   find_givs (data);
1284   mark_bivs (data);
1285
1286   if (dump_file && (dump_flags & TDF_DETAILS))
1287     {
1288       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1289
1290       if (niter)
1291         {
1292           fprintf (dump_file, "  number of iterations ");
1293           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1294           if (!integer_zerop (niter->may_be_zero))
1295             {
1296               fprintf (dump_file, "; zero if ");
1297               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1298             }
1299           fprintf (dump_file, "\n\n");
1300         };
1301
1302       fprintf (dump_file, "Induction variables:\n\n");
1303
1304       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1305         {
1306           if (ver_info (data, i)->iv)
1307             dump_iv (dump_file, ver_info (data, i)->iv, true);
1308         }
1309     }
1310
1311   return true;
1312 }
1313
1314 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1315    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1316    is the const offset stripped from IV base.  For uses of other types,
1317    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1318
1319 static struct iv_use *
1320 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1321             gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1322             unsigned HOST_WIDE_INT addr_offset = 0)
1323 {
1324   struct iv_use *use = XCNEW (struct iv_use);
1325
1326   use->id = n_iv_uses (data);
1327   use->sub_id = 0;
1328   use->type = use_type;
1329   use->iv = iv;
1330   use->stmt = stmt;
1331   use->op_p = use_p;
1332   use->related_cands = BITMAP_ALLOC (NULL);
1333   use->next = NULL;
1334   use->addr_base = addr_base;
1335   use->addr_offset = addr_offset;
1336
1337   data->iv_uses.safe_push (use);
1338
1339   return use;
1340 }
1341
1342 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1343    The sub use is recorded under the one whose use id is ID_GROUP.  */
1344
1345 static struct iv_use *
1346 record_sub_use (struct ivopts_data *data, tree *use_p,
1347                     struct iv *iv, gimple *stmt, enum use_type use_type,
1348                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1349                     unsigned int id_group)
1350 {
1351   struct iv_use *use = XCNEW (struct iv_use);
1352   struct iv_use *group = iv_use (data, id_group);
1353
1354   use->id = group->id;
1355   use->sub_id = 0;
1356   use->type = use_type;
1357   use->iv = iv;
1358   use->stmt = stmt;
1359   use->op_p = use_p;
1360   use->related_cands = NULL;
1361   use->addr_base = addr_base;
1362   use->addr_offset = addr_offset;
1363
1364   /* Sub use list is maintained in offset ascending order.  */
1365   if (addr_offset <= group->addr_offset)
1366     {
1367       use->related_cands = group->related_cands;
1368       group->related_cands = NULL;
1369       use->next = group;
1370       data->iv_uses[id_group] = use;
1371     }
1372   else
1373     {
1374       struct iv_use *pre;
1375       do
1376         {
1377           pre = group;
1378           group = group->next;
1379         }
1380       while (group && addr_offset > group->addr_offset);
1381       use->next = pre->next;
1382       pre->next = use;
1383     }
1384
1385   return use;
1386 }
1387
1388 /* Checks whether OP is a loop-level invariant and if so, records it.
1389    NONLINEAR_USE is true if the invariant is used in a way we do not
1390    handle specially.  */
1391
1392 static void
1393 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1394 {
1395   basic_block bb;
1396   struct version_info *info;
1397
1398   if (TREE_CODE (op) != SSA_NAME
1399       || virtual_operand_p (op))
1400     return;
1401
1402   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1403   if (bb
1404       && flow_bb_inside_loop_p (data->current_loop, bb))
1405     return;
1406
1407   info = name_info (data, op);
1408   info->name = op;
1409   info->has_nonlin_use |= nonlinear_use;
1410   if (!info->inv_id)
1411     info->inv_id = ++data->max_inv_id;
1412   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1413 }
1414
1415 /* Checks whether the use OP is interesting and if so, records it.  */
1416
1417 static struct iv_use *
1418 find_interesting_uses_op (struct ivopts_data *data, tree op)
1419 {
1420   struct iv *iv;
1421   gimple *stmt;
1422   struct iv_use *use;
1423
1424   if (TREE_CODE (op) != SSA_NAME)
1425     return NULL;
1426
1427   iv = get_iv (data, op);
1428   if (!iv)
1429     return NULL;
1430
1431   if (iv->have_use_for)
1432     {
1433       use = iv_use (data, iv->use_id);
1434
1435       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1436       return use;
1437     }
1438
1439   if (integer_zerop (iv->step))
1440     {
1441       record_invariant (data, op, true);
1442       return NULL;
1443     }
1444   iv->have_use_for = true;
1445
1446   stmt = SSA_NAME_DEF_STMT (op);
1447   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1448               || is_gimple_assign (stmt));
1449
1450   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1451   iv->use_id = use->id;
1452
1453   return use;
1454 }
1455
1456 /* Given a condition in statement STMT, checks whether it is a compare
1457    of an induction variable and an invariant.  If this is the case,
1458    CONTROL_VAR is set to location of the iv, BOUND to the location of
1459    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1460    induction variable descriptions, and true is returned.  If this is not
1461    the case, CONTROL_VAR and BOUND are set to the arguments of the
1462    condition and false is returned.  */
1463
1464 static bool
1465 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1466                        tree **control_var, tree **bound,
1467                        struct iv **iv_var, struct iv **iv_bound)
1468 {
1469   /* The objects returned when COND has constant operands.  */
1470   static struct iv const_iv;
1471   static tree zero;
1472   tree *op0 = &zero, *op1 = &zero;
1473   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1474   bool ret = false;
1475
1476   if (gimple_code (stmt) == GIMPLE_COND)
1477     {
1478       gcond *cond_stmt = as_a <gcond *> (stmt);
1479       op0 = gimple_cond_lhs_ptr (cond_stmt);
1480       op1 = gimple_cond_rhs_ptr (cond_stmt);
1481     }
1482   else
1483     {
1484       op0 = gimple_assign_rhs1_ptr (stmt);
1485       op1 = gimple_assign_rhs2_ptr (stmt);
1486     }
1487
1488   zero = integer_zero_node;
1489   const_iv.step = integer_zero_node;
1490
1491   if (TREE_CODE (*op0) == SSA_NAME)
1492     iv0 = get_iv (data, *op0);
1493   if (TREE_CODE (*op1) == SSA_NAME)
1494     iv1 = get_iv (data, *op1);
1495
1496   /* Exactly one of the compared values must be an iv, and the other one must
1497      be an invariant.  */
1498   if (!iv0 || !iv1)
1499     goto end;
1500
1501   if (integer_zerop (iv0->step))
1502     {
1503       /* Control variable may be on the other side.  */
1504       std::swap (op0, op1);
1505       std::swap (iv0, iv1);
1506     }
1507   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1508
1509 end:
1510   if (control_var)
1511     *control_var = op0;
1512   if (iv_var)
1513     *iv_var = iv0;
1514   if (bound)
1515     *bound = op1;
1516   if (iv_bound)
1517     *iv_bound = iv1;
1518
1519   return ret;
1520 }
1521
1522 /* Checks whether the condition in STMT is interesting and if so,
1523    records it.  */
1524
1525 static void
1526 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1527 {
1528   tree *var_p, *bound_p;
1529   struct iv *var_iv;
1530
1531   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1532     {
1533       find_interesting_uses_op (data, *var_p);
1534       find_interesting_uses_op (data, *bound_p);
1535       return;
1536     }
1537
1538   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1539 }
1540
1541 /* Returns the outermost loop EXPR is obviously invariant in
1542    relative to the loop LOOP, i.e. if all its operands are defined
1543    outside of the returned loop.  Returns NULL if EXPR is not
1544    even obviously invariant in LOOP.  */
1545
1546 struct loop *
1547 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1548 {
1549   basic_block def_bb;
1550   unsigned i, len;
1551
1552   if (is_gimple_min_invariant (expr))
1553     return current_loops->tree_root;
1554
1555   if (TREE_CODE (expr) == SSA_NAME)
1556     {
1557       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1558       if (def_bb)
1559         {
1560           if (flow_bb_inside_loop_p (loop, def_bb))
1561             return NULL;
1562           return superloop_at_depth (loop,
1563                                      loop_depth (def_bb->loop_father) + 1);
1564         }
1565
1566       return current_loops->tree_root;
1567     }
1568
1569   if (!EXPR_P (expr))
1570     return NULL;
1571
1572   unsigned maxdepth = 0;
1573   len = TREE_OPERAND_LENGTH (expr);
1574   for (i = 0; i < len; i++)
1575     {
1576       struct loop *ivloop;
1577       if (!TREE_OPERAND (expr, i))
1578         continue;
1579
1580       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1581       if (!ivloop)
1582         return NULL;
1583       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1584     }
1585
1586   return superloop_at_depth (loop, maxdepth);
1587 }
1588
1589 /* Returns true if expression EXPR is obviously invariant in LOOP,
1590    i.e. if all its operands are defined outside of the LOOP.  LOOP
1591    should not be the function body.  */
1592
1593 bool
1594 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1595 {
1596   basic_block def_bb;
1597   unsigned i, len;
1598
1599   gcc_assert (loop_depth (loop) > 0);
1600
1601   if (is_gimple_min_invariant (expr))
1602     return true;
1603
1604   if (TREE_CODE (expr) == SSA_NAME)
1605     {
1606       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1607       if (def_bb
1608           && flow_bb_inside_loop_p (loop, def_bb))
1609         return false;
1610
1611       return true;
1612     }
1613
1614   if (!EXPR_P (expr))
1615     return false;
1616
1617   len = TREE_OPERAND_LENGTH (expr);
1618   for (i = 0; i < len; i++)
1619     if (TREE_OPERAND (expr, i)
1620         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1621       return false;
1622
1623   return true;
1624 }
1625
1626 /* Given expression EXPR which computes inductive values with respect
1627    to loop recorded in DATA, this function returns biv from which EXPR
1628    is derived by tracing definition chains of ssa variables in EXPR.  */
1629
1630 static struct iv*
1631 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1632 {
1633   struct iv *iv;
1634   unsigned i, n;
1635   tree e2, e1;
1636   enum tree_code code;
1637   gimple *stmt;
1638
1639   if (expr == NULL_TREE)
1640     return NULL;
1641
1642   if (is_gimple_min_invariant (expr))
1643     return NULL;
1644
1645   code = TREE_CODE (expr);
1646   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1647     {
1648       n = TREE_OPERAND_LENGTH (expr);
1649       for (i = 0; i < n; i++)
1650         {
1651           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1652           if (iv)
1653             return iv;
1654         }
1655     }
1656
1657   /* Stop if it's not ssa name.  */
1658   if (code != SSA_NAME)
1659     return NULL;
1660
1661   iv = get_iv (data, expr);
1662   if (!iv || integer_zerop (iv->step))
1663     return NULL;
1664   else if (iv->biv_p)
1665     return iv;
1666
1667   stmt = SSA_NAME_DEF_STMT (expr);
1668   if (gphi *phi = dyn_cast <gphi *> (stmt))
1669     {
1670       ssa_op_iter iter;
1671       use_operand_p use_p;
1672
1673       if (virtual_operand_p (gimple_phi_result (phi)))
1674         return NULL;
1675
1676       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1677         {
1678           tree use = USE_FROM_PTR (use_p);
1679           iv = find_deriving_biv_for_expr (data, use);
1680           if (iv)
1681             return iv;
1682         }
1683       return NULL;
1684     }
1685   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1686     return NULL;
1687
1688   e1 = gimple_assign_rhs1 (stmt);
1689   code = gimple_assign_rhs_code (stmt);
1690   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1691     return find_deriving_biv_for_expr (data, e1);
1692
1693   switch (code)
1694     {
1695     case MULT_EXPR:
1696     case PLUS_EXPR:
1697     case MINUS_EXPR:
1698     case POINTER_PLUS_EXPR:
1699       /* Increments, decrements and multiplications by a constant
1700          are simple.  */
1701       e2 = gimple_assign_rhs2 (stmt);
1702       iv = find_deriving_biv_for_expr (data, e2);
1703       if (iv)
1704         return iv;
1705
1706       /* Fallthru.  */
1707     CASE_CONVERT:
1708       /* Casts are simple.  */
1709       return find_deriving_biv_for_expr (data, e1);
1710
1711     default:
1712       break;
1713     }
1714
1715   return NULL;
1716 }
1717
1718 /* Record BIV, its predecessor and successor that they are used in
1719    address type uses.  */
1720
1721 static void
1722 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1723 {
1724   unsigned i;
1725   tree type, base_1, base_2;
1726   bitmap_iterator bi;
1727
1728   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1729       || biv->have_address_use || !biv->no_overflow)
1730     return;
1731
1732   type = TREE_TYPE (biv->base);
1733   if (!INTEGRAL_TYPE_P (type))
1734     return;
1735
1736   biv->have_address_use = true;
1737   data->bivs_not_used_in_addr--;
1738   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1739   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1740     {
1741       struct iv *iv = ver_info (data, i)->iv;
1742
1743       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1744           || iv->have_address_use || !iv->no_overflow)
1745         continue;
1746
1747       if (type != TREE_TYPE (iv->base)
1748           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1749         continue;
1750
1751       if (!operand_equal_p (biv->step, iv->step, 0))
1752         continue;
1753
1754       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1755       if (operand_equal_p (base_1, iv->base, 0)
1756           || operand_equal_p (base_2, biv->base, 0))
1757         {
1758           iv->have_address_use = true;
1759           data->bivs_not_used_in_addr--;
1760         }
1761     }
1762 }
1763
1764 /* Cumulates the steps of indices into DATA and replaces their values with the
1765    initial ones.  Returns false when the value of the index cannot be determined.
1766    Callback for for_each_index.  */
1767
1768 struct ifs_ivopts_data
1769 {
1770   struct ivopts_data *ivopts_data;
1771   gimple *stmt;
1772   tree step;
1773 };
1774
1775 static bool
1776 idx_find_step (tree base, tree *idx, void *data)
1777 {
1778   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1779   struct iv *iv;
1780   bool use_overflow_semantics = false;
1781   tree step, iv_base, iv_step, lbound, off;
1782   struct loop *loop = dta->ivopts_data->current_loop;
1783
1784   /* If base is a component ref, require that the offset of the reference
1785      be invariant.  */
1786   if (TREE_CODE (base) == COMPONENT_REF)
1787     {
1788       off = component_ref_field_offset (base);
1789       return expr_invariant_in_loop_p (loop, off);
1790     }
1791
1792   /* If base is array, first check whether we will be able to move the
1793      reference out of the loop (in order to take its address in strength
1794      reduction).  In order for this to work we need both lower bound
1795      and step to be loop invariants.  */
1796   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1797     {
1798       /* Moreover, for a range, the size needs to be invariant as well.  */
1799       if (TREE_CODE (base) == ARRAY_RANGE_REF
1800           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1801         return false;
1802
1803       step = array_ref_element_size (base);
1804       lbound = array_ref_low_bound (base);
1805
1806       if (!expr_invariant_in_loop_p (loop, step)
1807           || !expr_invariant_in_loop_p (loop, lbound))
1808         return false;
1809     }
1810
1811   if (TREE_CODE (*idx) != SSA_NAME)
1812     return true;
1813
1814   iv = get_iv (dta->ivopts_data, *idx);
1815   if (!iv)
1816     return false;
1817
1818   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1819           *&x[0], which is not folded and does not trigger the
1820           ARRAY_REF path below.  */
1821   *idx = iv->base;
1822
1823   if (integer_zerop (iv->step))
1824     return true;
1825
1826   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1827     {
1828       step = array_ref_element_size (base);
1829
1830       /* We only handle addresses whose step is an integer constant.  */
1831       if (TREE_CODE (step) != INTEGER_CST)
1832         return false;
1833     }
1834   else
1835     /* The step for pointer arithmetics already is 1 byte.  */
1836     step = size_one_node;
1837
1838   iv_base = iv->base;
1839   iv_step = iv->step;
1840   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1841     use_overflow_semantics = true;
1842
1843   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1844                             sizetype, &iv_base, &iv_step, dta->stmt,
1845                             use_overflow_semantics))
1846     {
1847       /* The index might wrap.  */
1848       return false;
1849     }
1850
1851   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1852   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1853
1854   if (dta->ivopts_data->bivs_not_used_in_addr)
1855     {
1856       if (!iv->biv_p)
1857         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1858
1859       record_biv_for_address_use (dta->ivopts_data, iv);
1860     }
1861   return true;
1862 }
1863
1864 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1865    object is passed to it in DATA.  */
1866
1867 static bool
1868 idx_record_use (tree base, tree *idx,
1869                 void *vdata)
1870 {
1871   struct ivopts_data *data = (struct ivopts_data *) vdata;
1872   find_interesting_uses_op (data, *idx);
1873   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1874     {
1875       find_interesting_uses_op (data, array_ref_element_size (base));
1876       find_interesting_uses_op (data, array_ref_low_bound (base));
1877     }
1878   return true;
1879 }
1880
1881 /* If we can prove that TOP = cst * BOT for some constant cst,
1882    store cst to MUL and return true.  Otherwise return false.
1883    The returned value is always sign-extended, regardless of the
1884    signedness of TOP and BOT.  */
1885
1886 static bool
1887 constant_multiple_of (tree top, tree bot, widest_int *mul)
1888 {
1889   tree mby;
1890   enum tree_code code;
1891   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1892   widest_int res, p0, p1;
1893
1894   STRIP_NOPS (top);
1895   STRIP_NOPS (bot);
1896
1897   if (operand_equal_p (top, bot, 0))
1898     {
1899       *mul = 1;
1900       return true;
1901     }
1902
1903   code = TREE_CODE (top);
1904   switch (code)
1905     {
1906     case MULT_EXPR:
1907       mby = TREE_OPERAND (top, 1);
1908       if (TREE_CODE (mby) != INTEGER_CST)
1909         return false;
1910
1911       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1912         return false;
1913
1914       *mul = wi::sext (res * wi::to_widest (mby), precision);
1915       return true;
1916
1917     case PLUS_EXPR:
1918     case MINUS_EXPR:
1919       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1920           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1921         return false;
1922
1923       if (code == MINUS_EXPR)
1924         p1 = -p1;
1925       *mul = wi::sext (p0 + p1, precision);
1926       return true;
1927
1928     case INTEGER_CST:
1929       if (TREE_CODE (bot) != INTEGER_CST)
1930         return false;
1931
1932       p0 = widest_int::from (top, SIGNED);
1933       p1 = widest_int::from (bot, SIGNED);
1934       if (p1 == 0)
1935         return false;
1936       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1937       return res == 0;
1938
1939     default:
1940       return false;
1941     }
1942 }
1943
1944 /* Return true if memory reference REF with step STEP may be unaligned.  */
1945
1946 static bool
1947 may_be_unaligned_p (tree ref, tree step)
1948 {
1949   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1950      thus they are not misaligned.  */
1951   if (TREE_CODE (ref) == TARGET_MEM_REF)
1952     return false;
1953
1954   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1955   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1956     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1957
1958   unsigned HOST_WIDE_INT bitpos;
1959   unsigned int ref_align;
1960   get_object_alignment_1 (ref, &ref_align, &bitpos);
1961   if (ref_align < align
1962       || (bitpos % align) != 0
1963       || (bitpos % BITS_PER_UNIT) != 0)
1964     return true;
1965
1966   unsigned int trailing_zeros = tree_ctz (step);
1967   if (trailing_zeros < HOST_BITS_PER_INT
1968       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1969     return true;
1970
1971   return false;
1972 }
1973
1974 /* Return true if EXPR may be non-addressable.   */
1975
1976 bool
1977 may_be_nonaddressable_p (tree expr)
1978 {
1979   switch (TREE_CODE (expr))
1980     {
1981     case TARGET_MEM_REF:
1982       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1983          target, thus they are always addressable.  */
1984       return false;
1985
1986     case MEM_REF:
1987       /* Likewise for MEM_REFs, modulo the storage order.  */
1988       return REF_REVERSE_STORAGE_ORDER (expr);
1989
1990     case BIT_FIELD_REF:
1991       if (REF_REVERSE_STORAGE_ORDER (expr))
1992         return true;
1993       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1994
1995     case COMPONENT_REF:
1996       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
1997         return true;
1998       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1999              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2000
2001     case ARRAY_REF:
2002     case ARRAY_RANGE_REF:
2003       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2004         return true;
2005       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2006
2007     case VIEW_CONVERT_EXPR:
2008       /* This kind of view-conversions may wrap non-addressable objects
2009          and make them look addressable.  After some processing the
2010          non-addressability may be uncovered again, causing ADDR_EXPRs
2011          of inappropriate objects to be built.  */
2012       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2013           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2014         return true;
2015       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2016
2017     CASE_CONVERT:
2018       return true;
2019
2020     default:
2021       break;
2022     }
2023
2024   return false;
2025 }
2026
2027 static tree
2028 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2029
2030 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2031    If there is an existing use which has same stripped iv base and step,
2032    this function records this one as a sub use to that; otherwise records
2033    it as a normal one.  */
2034
2035 static struct iv_use *
2036 record_group_use (struct ivopts_data *data, tree *use_p,
2037                   struct iv *iv, gimple *stmt, enum use_type use_type)
2038 {
2039   unsigned int i;
2040   struct iv_use *use;
2041   tree addr_base;
2042   unsigned HOST_WIDE_INT addr_offset;
2043
2044   /* Only support sub use for address type uses, that is, with base
2045      object.  */
2046   if (!iv->base_object)
2047     return record_use (data, use_p, iv, stmt, use_type);
2048
2049   addr_base = strip_offset (iv->base, &addr_offset);
2050   for (i = 0; i < n_iv_uses (data); i++)
2051     {
2052       use = iv_use (data, i);
2053       if (use->type != USE_ADDRESS || !use->iv->base_object)
2054         continue;
2055
2056       /* Check if it has the same stripped base and step.  */
2057       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2058           && operand_equal_p (iv->step, use->iv->step, 0)
2059           && operand_equal_p (addr_base, use->addr_base, 0))
2060         break;
2061     }
2062
2063   if (i == n_iv_uses (data))
2064     return record_use (data, use_p, iv, stmt,
2065                        use_type, addr_base, addr_offset);
2066   else
2067     return record_sub_use (data, use_p, iv, stmt,
2068                            use_type, addr_base, addr_offset, i);
2069 }
2070
2071 /* Finds addresses in *OP_P inside STMT.  */
2072
2073 static void
2074 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2075                                tree *op_p)
2076 {
2077   tree base = *op_p, step = size_zero_node;
2078   struct iv *civ;
2079   struct ifs_ivopts_data ifs_ivopts_data;
2080
2081   /* Do not play with volatile memory references.  A bit too conservative,
2082      perhaps, but safe.  */
2083   if (gimple_has_volatile_ops (stmt))
2084     goto fail;
2085
2086   /* Ignore bitfields for now.  Not really something terribly complicated
2087      to handle.  TODO.  */
2088   if (TREE_CODE (base) == BIT_FIELD_REF)
2089     goto fail;
2090
2091   base = unshare_expr (base);
2092
2093   if (TREE_CODE (base) == TARGET_MEM_REF)
2094     {
2095       tree type = build_pointer_type (TREE_TYPE (base));
2096       tree astep;
2097
2098       if (TMR_BASE (base)
2099           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2100         {
2101           civ = get_iv (data, TMR_BASE (base));
2102           if (!civ)
2103             goto fail;
2104
2105           TMR_BASE (base) = civ->base;
2106           step = civ->step;
2107         }
2108       if (TMR_INDEX2 (base)
2109           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2110         {
2111           civ = get_iv (data, TMR_INDEX2 (base));
2112           if (!civ)
2113             goto fail;
2114
2115           TMR_INDEX2 (base) = civ->base;
2116           step = civ->step;
2117         }
2118       if (TMR_INDEX (base)
2119           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2120         {
2121           civ = get_iv (data, TMR_INDEX (base));
2122           if (!civ)
2123             goto fail;
2124
2125           TMR_INDEX (base) = civ->base;
2126           astep = civ->step;
2127
2128           if (astep)
2129             {
2130               if (TMR_STEP (base))
2131                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2132
2133               step = fold_build2 (PLUS_EXPR, type, step, astep);
2134             }
2135         }
2136
2137       if (integer_zerop (step))
2138         goto fail;
2139       base = tree_mem_ref_addr (type, base);
2140     }
2141   else
2142     {
2143       ifs_ivopts_data.ivopts_data = data;
2144       ifs_ivopts_data.stmt = stmt;
2145       ifs_ivopts_data.step = size_zero_node;
2146       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2147           || integer_zerop (ifs_ivopts_data.step))
2148         goto fail;
2149       step = ifs_ivopts_data.step;
2150
2151       /* Check that the base expression is addressable.  This needs
2152          to be done after substituting bases of IVs into it.  */
2153       if (may_be_nonaddressable_p (base))
2154         goto fail;
2155
2156       /* Moreover, on strict alignment platforms, check that it is
2157          sufficiently aligned.  */
2158       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2159         goto fail;
2160
2161       base = build_fold_addr_expr (base);
2162
2163       /* Substituting bases of IVs into the base expression might
2164          have caused folding opportunities.  */
2165       if (TREE_CODE (base) == ADDR_EXPR)
2166         {
2167           tree *ref = &TREE_OPERAND (base, 0);
2168           while (handled_component_p (*ref))
2169             ref = &TREE_OPERAND (*ref, 0);
2170           if (TREE_CODE (*ref) == MEM_REF)
2171             {
2172               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2173                                       TREE_OPERAND (*ref, 0),
2174                                       TREE_OPERAND (*ref, 1));
2175               if (tem)
2176                 *ref = tem;
2177             }
2178         }
2179     }
2180
2181   civ = alloc_iv (data, base, step);
2182   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2183   return;
2184
2185 fail:
2186   for_each_index (op_p, idx_record_use, data);
2187 }
2188
2189 /* Finds and records invariants used in STMT.  */
2190
2191 static void
2192 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2193 {
2194   ssa_op_iter iter;
2195   use_operand_p use_p;
2196   tree op;
2197
2198   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2199     {
2200       op = USE_FROM_PTR (use_p);
2201       record_invariant (data, op, false);
2202     }
2203 }
2204
2205 /* Finds interesting uses of induction variables in the statement STMT.  */
2206
2207 static void
2208 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2209 {
2210   struct iv *iv;
2211   tree op, *lhs, *rhs;
2212   ssa_op_iter iter;
2213   use_operand_p use_p;
2214   enum tree_code code;
2215
2216   find_invariants_stmt (data, stmt);
2217
2218   if (gimple_code (stmt) == GIMPLE_COND)
2219     {
2220       find_interesting_uses_cond (data, stmt);
2221       return;
2222     }
2223
2224   if (is_gimple_assign (stmt))
2225     {
2226       lhs = gimple_assign_lhs_ptr (stmt);
2227       rhs = gimple_assign_rhs1_ptr (stmt);
2228
2229       if (TREE_CODE (*lhs) == SSA_NAME)
2230         {
2231           /* If the statement defines an induction variable, the uses are not
2232              interesting by themselves.  */
2233
2234           iv = get_iv (data, *lhs);
2235
2236           if (iv && !integer_zerop (iv->step))
2237             return;
2238         }
2239
2240       code = gimple_assign_rhs_code (stmt);
2241       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2242           && (REFERENCE_CLASS_P (*rhs)
2243               || is_gimple_val (*rhs)))
2244         {
2245           if (REFERENCE_CLASS_P (*rhs))
2246             find_interesting_uses_address (data, stmt, rhs);
2247           else
2248             find_interesting_uses_op (data, *rhs);
2249
2250           if (REFERENCE_CLASS_P (*lhs))
2251             find_interesting_uses_address (data, stmt, lhs);
2252           return;
2253         }
2254       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2255         {
2256           find_interesting_uses_cond (data, stmt);
2257           return;
2258         }
2259
2260       /* TODO -- we should also handle address uses of type
2261
2262          memory = call (whatever);
2263
2264          and
2265
2266          call (memory).  */
2267     }
2268
2269   if (gimple_code (stmt) == GIMPLE_PHI
2270       && gimple_bb (stmt) == data->current_loop->header)
2271     {
2272       iv = get_iv (data, PHI_RESULT (stmt));
2273
2274       if (iv && !integer_zerop (iv->step))
2275         return;
2276     }
2277
2278   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2279     {
2280       op = USE_FROM_PTR (use_p);
2281
2282       if (TREE_CODE (op) != SSA_NAME)
2283         continue;
2284
2285       iv = get_iv (data, op);
2286       if (!iv)
2287         continue;
2288
2289       find_interesting_uses_op (data, op);
2290     }
2291 }
2292
2293 /* Finds interesting uses of induction variables outside of loops
2294    on loop exit edge EXIT.  */
2295
2296 static void
2297 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2298 {
2299   gphi *phi;
2300   gphi_iterator psi;
2301   tree def;
2302
2303   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2304     {
2305       phi = psi.phi ();
2306       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2307       if (!virtual_operand_p (def))
2308         find_interesting_uses_op (data, def);
2309     }
2310 }
2311
2312 /* Finds uses of the induction variables that are interesting.  */
2313
2314 static void
2315 find_interesting_uses (struct ivopts_data *data)
2316 {
2317   basic_block bb;
2318   gimple_stmt_iterator bsi;
2319   basic_block *body = get_loop_body (data->current_loop);
2320   unsigned i;
2321   struct version_info *info;
2322   edge e;
2323
2324   if (dump_file && (dump_flags & TDF_DETAILS))
2325     fprintf (dump_file, "Uses:\n\n");
2326
2327   for (i = 0; i < data->current_loop->num_nodes; i++)
2328     {
2329       edge_iterator ei;
2330       bb = body[i];
2331
2332       FOR_EACH_EDGE (e, ei, bb->succs)
2333         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2334             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2335           find_interesting_uses_outside (data, e);
2336
2337       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2338         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2339       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2340         if (!is_gimple_debug (gsi_stmt (bsi)))
2341           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2342     }
2343
2344   if (dump_file && (dump_flags & TDF_DETAILS))
2345     {
2346       bitmap_iterator bi;
2347
2348       fprintf (dump_file, "\n");
2349
2350       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2351         {
2352           info = ver_info (data, i);
2353           if (info->inv_id)
2354             {
2355               fprintf (dump_file, "  ");
2356               print_generic_expr (dump_file, info->name, TDF_SLIM);
2357               fprintf (dump_file, " is invariant (%d)%s\n",
2358                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2359             }
2360         }
2361
2362       fprintf (dump_file, "\n");
2363     }
2364
2365   free (body);
2366 }
2367
2368 /* Compute maximum offset of [base + offset] addressing mode
2369    for memory reference represented by USE.  */
2370
2371 static HOST_WIDE_INT
2372 compute_max_addr_offset (struct iv_use *use)
2373 {
2374   int width;
2375   rtx reg, addr;
2376   HOST_WIDE_INT i, off;
2377   unsigned list_index, num;
2378   addr_space_t as;
2379   machine_mode mem_mode, addr_mode;
2380   static vec<HOST_WIDE_INT> max_offset_list;
2381
2382   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2383   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2384
2385   num = max_offset_list.length ();
2386   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2387   if (list_index >= num)
2388     {
2389       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2390       for (; num < max_offset_list.length (); num++)
2391         max_offset_list[num] = -1;
2392     }
2393
2394   off = max_offset_list[list_index];
2395   if (off != -1)
2396     return off;
2397
2398   addr_mode = targetm.addr_space.address_mode (as);
2399   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2400   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2401
2402   width = GET_MODE_BITSIZE (addr_mode) - 1;
2403   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2404     width = HOST_BITS_PER_WIDE_INT - 1;
2405
2406   for (i = width; i > 0; i--)
2407     {
2408       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2409       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2410       if (memory_address_addr_space_p (mem_mode, addr, as))
2411         break;
2412
2413       /* For some strict-alignment targets, the offset must be naturally
2414          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2415       off = ((unsigned HOST_WIDE_INT) 1 << i);
2416       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2417         {
2418           off -= GET_MODE_SIZE (mem_mode);
2419           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2420           if (memory_address_addr_space_p (mem_mode, addr, as))
2421             break;
2422         }
2423     }
2424   if (i == 0)
2425     off = 0;
2426
2427   max_offset_list[list_index] = off;
2428   return off;
2429 }
2430
2431 /* Check if all small groups should be split.  Return true if and
2432    only if:
2433
2434      1) At least one groups contain two uses with different offsets.
2435      2) No group contains more than two uses with different offsets.
2436
2437    Return false otherwise.  We want to split such groups because:
2438
2439      1) Small groups don't have much benefit and may interfer with
2440         general candidate selection.
2441      2) Size for problem with only small groups is usually small and
2442         general algorithm can handle it well.
2443
2444    TODO -- Above claim may not hold when auto increment is supported.  */
2445
2446 static bool
2447 split_all_small_groups (struct ivopts_data *data)
2448 {
2449   bool split_p = false;
2450   unsigned int i, n, distinct;
2451   struct iv_use *pre, *use;
2452
2453   n = n_iv_uses (data);
2454   for (i = 0; i < n; i++)
2455     {
2456       use = iv_use (data, i);
2457       if (!use->next)
2458         continue;
2459
2460       distinct = 1;
2461       gcc_assert (use->type == USE_ADDRESS);
2462       for (pre = use, use = use->next; use; pre = use, use = use->next)
2463         {
2464           if (pre->addr_offset != use->addr_offset)
2465             distinct++;
2466
2467           if (distinct > 2)
2468             return false;
2469         }
2470       if (distinct == 2)
2471         split_p = true;
2472     }
2473
2474   return split_p;
2475 }
2476
2477 /* For each group of address type uses, this function further groups
2478    these uses according to the maximum offset supported by target's
2479    [base + offset] addressing mode.  */
2480
2481 static void
2482 group_address_uses (struct ivopts_data *data)
2483 {
2484   HOST_WIDE_INT max_offset = -1;
2485   unsigned int i, n, sub_id;
2486   struct iv_use *pre, *use;
2487   unsigned HOST_WIDE_INT addr_offset_first;
2488
2489   /* Reset max offset to split all small groups.  */
2490   if (split_all_small_groups (data))
2491     max_offset = 0;
2492
2493   n = n_iv_uses (data);
2494   for (i = 0; i < n; i++)
2495     {
2496       use = iv_use (data, i);
2497       if (!use->next)
2498         continue;
2499
2500       gcc_assert (use->type == USE_ADDRESS);
2501       if (max_offset != 0)
2502         max_offset = compute_max_addr_offset (use);
2503
2504       while (use)
2505         {
2506           sub_id = 0;
2507           addr_offset_first = use->addr_offset;
2508           /* Only uses with offset that can fit in offset part against
2509              the first use can be grouped together.  */
2510           for (pre = use, use = use->next;
2511                use && (use->addr_offset - addr_offset_first
2512                        <= (unsigned HOST_WIDE_INT) max_offset);
2513                pre = use, use = use->next)
2514             {
2515               use->id = pre->id;
2516               use->sub_id = ++sub_id;
2517             }
2518
2519           /* Break the list and create new group.  */
2520           if (use)
2521             {
2522               pre->next = NULL;
2523               use->id = n_iv_uses (data);
2524               use->related_cands = BITMAP_ALLOC (NULL);
2525               data->iv_uses.safe_push (use);
2526             }
2527         }
2528     }
2529
2530   if (dump_file && (dump_flags & TDF_DETAILS))
2531     dump_uses (dump_file, data);
2532 }
2533
2534 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2535    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2536    we are at the top-level of the processed address.  */
2537
2538 static tree
2539 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2540                 HOST_WIDE_INT *offset)
2541 {
2542   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2543   enum tree_code code;
2544   tree type, orig_type = TREE_TYPE (expr);
2545   HOST_WIDE_INT off0, off1, st;
2546   tree orig_expr = expr;
2547
2548   STRIP_NOPS (expr);
2549
2550   type = TREE_TYPE (expr);
2551   code = TREE_CODE (expr);
2552   *offset = 0;
2553
2554   switch (code)
2555     {
2556     case INTEGER_CST:
2557       if (!cst_and_fits_in_hwi (expr)
2558           || integer_zerop (expr))
2559         return orig_expr;
2560
2561       *offset = int_cst_value (expr);
2562       return build_int_cst (orig_type, 0);
2563
2564     case POINTER_PLUS_EXPR:
2565     case PLUS_EXPR:
2566     case MINUS_EXPR:
2567       op0 = TREE_OPERAND (expr, 0);
2568       op1 = TREE_OPERAND (expr, 1);
2569
2570       op0 = strip_offset_1 (op0, false, false, &off0);
2571       op1 = strip_offset_1 (op1, false, false, &off1);
2572
2573       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2574       if (op0 == TREE_OPERAND (expr, 0)
2575           && op1 == TREE_OPERAND (expr, 1))
2576         return orig_expr;
2577
2578       if (integer_zerop (op1))
2579         expr = op0;
2580       else if (integer_zerop (op0))
2581         {
2582           if (code == MINUS_EXPR)
2583             expr = fold_build1 (NEGATE_EXPR, type, op1);
2584           else
2585             expr = op1;
2586         }
2587       else
2588         expr = fold_build2 (code, type, op0, op1);
2589
2590       return fold_convert (orig_type, expr);
2591
2592     case MULT_EXPR:
2593       op1 = TREE_OPERAND (expr, 1);
2594       if (!cst_and_fits_in_hwi (op1))
2595         return orig_expr;
2596
2597       op0 = TREE_OPERAND (expr, 0);
2598       op0 = strip_offset_1 (op0, false, false, &off0);
2599       if (op0 == TREE_OPERAND (expr, 0))
2600         return orig_expr;
2601
2602       *offset = off0 * int_cst_value (op1);
2603       if (integer_zerop (op0))
2604         expr = op0;
2605       else
2606         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2607
2608       return fold_convert (orig_type, expr);
2609
2610     case ARRAY_REF:
2611     case ARRAY_RANGE_REF:
2612       if (!inside_addr)
2613         return orig_expr;
2614
2615       step = array_ref_element_size (expr);
2616       if (!cst_and_fits_in_hwi (step))
2617         break;
2618
2619       st = int_cst_value (step);
2620       op1 = TREE_OPERAND (expr, 1);
2621       op1 = strip_offset_1 (op1, false, false, &off1);
2622       *offset = off1 * st;
2623
2624       if (top_compref
2625           && integer_zerop (op1))
2626         {
2627           /* Strip the component reference completely.  */
2628           op0 = TREE_OPERAND (expr, 0);
2629           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2630           *offset += off0;
2631           return op0;
2632         }
2633       break;
2634
2635     case COMPONENT_REF:
2636       {
2637         tree field;
2638
2639         if (!inside_addr)
2640           return orig_expr;
2641
2642         tmp = component_ref_field_offset (expr);
2643         field = TREE_OPERAND (expr, 1);
2644         if (top_compref
2645             && cst_and_fits_in_hwi (tmp)
2646             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2647           {
2648             HOST_WIDE_INT boffset, abs_off;
2649
2650             /* Strip the component reference completely.  */
2651             op0 = TREE_OPERAND (expr, 0);
2652             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2653             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2654             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2655             if (boffset < 0)
2656               abs_off = -abs_off;
2657
2658             *offset = off0 + int_cst_value (tmp) + abs_off;
2659             return op0;
2660           }
2661       }
2662       break;
2663
2664     case ADDR_EXPR:
2665       op0 = TREE_OPERAND (expr, 0);
2666       op0 = strip_offset_1 (op0, true, true, &off0);
2667       *offset += off0;
2668
2669       if (op0 == TREE_OPERAND (expr, 0))
2670         return orig_expr;
2671
2672       expr = build_fold_addr_expr (op0);
2673       return fold_convert (orig_type, expr);
2674
2675     case MEM_REF:
2676       /* ???  Offset operand?  */
2677       inside_addr = false;
2678       break;
2679
2680     default:
2681       return orig_expr;
2682     }
2683
2684   /* Default handling of expressions for that we want to recurse into
2685      the first operand.  */
2686   op0 = TREE_OPERAND (expr, 0);
2687   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2688   *offset += off0;
2689
2690   if (op0 == TREE_OPERAND (expr, 0)
2691       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2692     return orig_expr;
2693
2694   expr = copy_node (expr);
2695   TREE_OPERAND (expr, 0) = op0;
2696   if (op1)
2697     TREE_OPERAND (expr, 1) = op1;
2698
2699   /* Inside address, we might strip the top level component references,
2700      thus changing type of the expression.  Handling of ADDR_EXPR
2701      will fix that.  */
2702   expr = fold_convert (orig_type, expr);
2703
2704   return expr;
2705 }
2706
2707 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2708
2709 static tree
2710 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2711 {
2712   HOST_WIDE_INT off;
2713   tree core = strip_offset_1 (expr, false, false, &off);
2714   *offset = off;
2715   return core;
2716 }
2717
2718 /* Returns variant of TYPE that can be used as base for different uses.
2719    We return unsigned type with the same precision, which avoids problems
2720    with overflows.  */
2721
2722 static tree
2723 generic_type_for (tree type)
2724 {
2725   if (POINTER_TYPE_P (type))
2726     return unsigned_type_for (type);
2727
2728   if (TYPE_UNSIGNED (type))
2729     return type;
2730
2731   return unsigned_type_for (type);
2732 }
2733
2734 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2735    the bitmap to that we should store it.  */
2736
2737 static struct ivopts_data *fd_ivopts_data;
2738 static tree
2739 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2740 {
2741   bitmap *depends_on = (bitmap *) data;
2742   struct version_info *info;
2743
2744   if (TREE_CODE (*expr_p) != SSA_NAME)
2745     return NULL_TREE;
2746   info = name_info (fd_ivopts_data, *expr_p);
2747
2748   if (!info->inv_id || info->has_nonlin_use)
2749     return NULL_TREE;
2750
2751   if (!*depends_on)
2752     *depends_on = BITMAP_ALLOC (NULL);
2753   bitmap_set_bit (*depends_on, info->inv_id);
2754
2755   return NULL_TREE;
2756 }
2757
2758 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2759    position to POS.  If USE is not NULL, the candidate is set as related to
2760    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2761    replacement of the final value of the iv by a direct computation.  */
2762
2763 static struct iv_cand *
2764 add_candidate_1 (struct ivopts_data *data,
2765                  tree base, tree step, bool important, enum iv_position pos,
2766                  struct iv_use *use, gimple *incremented_at,
2767                  struct iv *orig_iv = NULL)
2768 {
2769   unsigned i;
2770   struct iv_cand *cand = NULL;
2771   tree type, orig_type;
2772
2773   /* For non-original variables, make sure their values are computed in a type
2774      that does not invoke undefined behavior on overflows (since in general,
2775      we cannot prove that these induction variables are non-wrapping).  */
2776   if (pos != IP_ORIGINAL)
2777     {
2778       orig_type = TREE_TYPE (base);
2779       type = generic_type_for (orig_type);
2780       if (type != orig_type)
2781         {
2782           base = fold_convert (type, base);
2783           step = fold_convert (type, step);
2784         }
2785     }
2786
2787   for (i = 0; i < n_iv_cands (data); i++)
2788     {
2789       cand = iv_cand (data, i);
2790
2791       if (cand->pos != pos)
2792         continue;
2793
2794       if (cand->incremented_at != incremented_at
2795           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2796               && cand->ainc_use != use))
2797         continue;
2798
2799       if (!cand->iv)
2800         {
2801           if (!base && !step)
2802             break;
2803
2804           continue;
2805         }
2806
2807       if (!base && !step)
2808         continue;
2809
2810       if (operand_equal_p (base, cand->iv->base, 0)
2811           && operand_equal_p (step, cand->iv->step, 0)
2812           && (TYPE_PRECISION (TREE_TYPE (base))
2813               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2814         break;
2815     }
2816
2817   if (i == n_iv_cands (data))
2818     {
2819       cand = XCNEW (struct iv_cand);
2820       cand->id = i;
2821
2822       if (!base && !step)
2823         cand->iv = NULL;
2824       else
2825         cand->iv = alloc_iv (data, base, step);
2826
2827       cand->pos = pos;
2828       if (pos != IP_ORIGINAL && cand->iv)
2829         {
2830           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2831           cand->var_after = cand->var_before;
2832         }
2833       cand->important = important;
2834       cand->incremented_at = incremented_at;
2835       data->iv_candidates.safe_push (cand);
2836
2837       if (step
2838           && TREE_CODE (step) != INTEGER_CST)
2839         {
2840           fd_ivopts_data = data;
2841           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2842         }
2843
2844       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2845         cand->ainc_use = use;
2846       else
2847         cand->ainc_use = NULL;
2848
2849       cand->orig_iv = orig_iv;
2850       if (dump_file && (dump_flags & TDF_DETAILS))
2851         dump_cand (dump_file, cand);
2852     }
2853
2854   if (important && !cand->important)
2855     {
2856       cand->important = true;
2857       if (dump_file && (dump_flags & TDF_DETAILS))
2858         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2859     }
2860
2861   if (use)
2862     {
2863       bitmap_set_bit (use->related_cands, i);
2864       if (dump_file && (dump_flags & TDF_DETAILS))
2865         fprintf (dump_file, "Candidate %d is related to use %d\n",
2866                  cand->id, use->id);
2867     }
2868
2869   return cand;
2870 }
2871
2872 /* Returns true if incrementing the induction variable at the end of the LOOP
2873    is allowed.
2874
2875    The purpose is to avoid splitting latch edge with a biv increment, thus
2876    creating a jump, possibly confusing other optimization passes and leaving
2877    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2878    is not available (so we do not have a better alternative), or if the latch
2879    edge is already nonempty.  */
2880
2881 static bool
2882 allow_ip_end_pos_p (struct loop *loop)
2883 {
2884   if (!ip_normal_pos (loop))
2885     return true;
2886
2887   if (!empty_block_p (ip_end_pos (loop)))
2888     return true;
2889
2890   return false;
2891 }
2892
2893 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2894    Important field is set to IMPORTANT.  */
2895
2896 static void
2897 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2898                         bool important, struct iv_use *use)
2899 {
2900   basic_block use_bb = gimple_bb (use->stmt);
2901   machine_mode mem_mode;
2902   unsigned HOST_WIDE_INT cstepi;
2903
2904   /* If we insert the increment in any position other than the standard
2905      ones, we must ensure that it is incremented once per iteration.
2906      It must not be in an inner nested loop, or one side of an if
2907      statement.  */
2908   if (use_bb->loop_father != data->current_loop
2909       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2910       || stmt_could_throw_p (use->stmt)
2911       || !cst_and_fits_in_hwi (step))
2912     return;
2913
2914   cstepi = int_cst_value (step);
2915
2916   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2917   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2918         || USE_STORE_PRE_INCREMENT (mem_mode))
2919        && GET_MODE_SIZE (mem_mode) == cstepi)
2920       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2921            || USE_STORE_PRE_DECREMENT (mem_mode))
2922           && GET_MODE_SIZE (mem_mode) == -cstepi))
2923     {
2924       enum tree_code code = MINUS_EXPR;
2925       tree new_base;
2926       tree new_step = step;
2927
2928       if (POINTER_TYPE_P (TREE_TYPE (base)))
2929         {
2930           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2931           code = POINTER_PLUS_EXPR;
2932         }
2933       else
2934         new_step = fold_convert (TREE_TYPE (base), new_step);
2935       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2936       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2937                        use->stmt);
2938     }
2939   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2940         || USE_STORE_POST_INCREMENT (mem_mode))
2941        && GET_MODE_SIZE (mem_mode) == cstepi)
2942       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2943            || USE_STORE_POST_DECREMENT (mem_mode))
2944           && GET_MODE_SIZE (mem_mode) == -cstepi))
2945     {
2946       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2947                        use->stmt);
2948     }
2949 }
2950
2951 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2952    position to POS.  If USE is not NULL, the candidate is set as related to
2953    it.  The candidate computation is scheduled before exit condition and at
2954    the end of loop.  */
2955
2956 static void
2957 add_candidate (struct ivopts_data *data,
2958                tree base, tree step, bool important, struct iv_use *use,
2959                struct iv *orig_iv = NULL)
2960 {
2961   gcc_assert (use == NULL || use->sub_id == 0);
2962
2963   if (ip_normal_pos (data->current_loop))
2964     add_candidate_1 (data, base, step, important,
2965                      IP_NORMAL, use, NULL, orig_iv);
2966   if (ip_end_pos (data->current_loop)
2967       && allow_ip_end_pos_p (data->current_loop))
2968     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
2969 }
2970
2971 /* Adds standard iv candidates.  */
2972
2973 static void
2974 add_standard_iv_candidates (struct ivopts_data *data)
2975 {
2976   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2977
2978   /* The same for a double-integer type if it is still fast enough.  */
2979   if (TYPE_PRECISION
2980         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2981       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2982     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2983                    build_int_cst (long_integer_type_node, 1), true, NULL);
2984
2985   /* The same for a double-integer type if it is still fast enough.  */
2986   if (TYPE_PRECISION
2987         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2988       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2989     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2990                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2991 }
2992
2993
2994 /* Adds candidates bases on the old induction variable IV.  */
2995
2996 static void
2997 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2998 {
2999   gimple *phi;
3000   tree def;
3001   struct iv_cand *cand;
3002
3003   /* Check if this biv is used in address type use.  */
3004   if (iv->no_overflow  && iv->have_address_use
3005       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3006       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3007     {
3008       tree base = fold_convert (sizetype, iv->base);
3009       tree step = fold_convert (sizetype, iv->step);
3010
3011       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3012       add_candidate (data, base, step, true, NULL, iv);
3013       /* Add iv cand of the original type only if it has nonlinear use.  */
3014       if (iv->have_use_for)
3015         add_candidate (data, iv->base, iv->step, true, NULL);
3016     }
3017   else
3018     add_candidate (data, iv->base, iv->step, true, NULL);
3019
3020   /* The same, but with initial value zero.  */
3021   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3022     add_candidate (data, size_int (0), iv->step, true, NULL);
3023   else
3024     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3025                    iv->step, true, NULL);
3026
3027   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3028   if (gimple_code (phi) == GIMPLE_PHI)
3029     {
3030       /* Additionally record the possibility of leaving the original iv
3031          untouched.  */
3032       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3033       /* Don't add candidate if it's from another PHI node because
3034          it's an affine iv appearing in the form of PEELED_CHREC.  */
3035       phi = SSA_NAME_DEF_STMT (def);
3036       if (gimple_code (phi) != GIMPLE_PHI)
3037         {
3038           cand = add_candidate_1 (data,
3039                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3040                                   SSA_NAME_DEF_STMT (def));
3041           cand->var_before = iv->ssa_name;
3042           cand->var_after = def;
3043         }
3044       else
3045         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3046     }
3047 }
3048
3049 /* Adds candidates based on the old induction variables.  */
3050
3051 static void
3052 add_iv_candidate_for_bivs (struct ivopts_data *data)
3053 {
3054   unsigned i;
3055   struct iv *iv;
3056   bitmap_iterator bi;
3057
3058   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3059     {
3060       iv = ver_info (data, i)->iv;
3061       if (iv && iv->biv_p && !integer_zerop (iv->step))
3062         add_iv_candidate_for_biv (data, iv);
3063     }
3064 }
3065
3066 /* Adds candidates based on the value of USE's iv.  */
3067
3068 static void
3069 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3070 {
3071   unsigned HOST_WIDE_INT offset;
3072   tree base;
3073   tree basetype;
3074   struct iv *iv = use->iv;
3075
3076   add_candidate (data, iv->base, iv->step, false, use);
3077
3078   /* The same, but with initial value zero.  Make such variable important,
3079      since it is generic enough so that possibly many uses may be based
3080      on it.  */
3081   basetype = TREE_TYPE (iv->base);
3082   if (POINTER_TYPE_P (basetype))
3083     basetype = sizetype;
3084   add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
3085
3086   /* Third, try removing the constant offset.  Make sure to even
3087      add a candidate for &a[0] vs. (T *)&a.  */
3088   base = strip_offset (iv->base, &offset);
3089   if (offset || base != iv->base)
3090     add_candidate (data, base, iv->step, false, use);
3091
3092   /* At last, add auto-incremental candidates.  Make such variables
3093      important since other iv uses with same base object may be based
3094      on it.  */
3095   if (use != NULL && use->type == USE_ADDRESS)
3096     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3097 }
3098
3099 /* Adds candidates based on the uses.  */
3100
3101 static void
3102 add_iv_candidate_for_uses (struct ivopts_data *data)
3103 {
3104   unsigned i;
3105
3106   for (i = 0; i < n_iv_uses (data); i++)
3107     {
3108       struct iv_use *use = iv_use (data, i);
3109
3110       if (!use)
3111         continue;
3112
3113       switch (use->type)
3114         {
3115         case USE_NONLINEAR_EXPR:
3116         case USE_COMPARE:
3117         case USE_ADDRESS:
3118           /* Just add the ivs based on the value of the iv used here.  */
3119           add_iv_candidate_for_use (data, use);
3120           break;
3121
3122         default:
3123           gcc_unreachable ();
3124         }
3125     }
3126 }
3127
3128 /* Record important candidates and add them to related_cands bitmaps
3129    if needed.  */
3130
3131 static void
3132 record_important_candidates (struct ivopts_data *data)
3133 {
3134   unsigned i;
3135   struct iv_use *use;
3136
3137   for (i = 0; i < n_iv_cands (data); i++)
3138     {
3139       struct iv_cand *cand = iv_cand (data, i);
3140
3141       if (cand->important)
3142         bitmap_set_bit (data->important_candidates, i);
3143     }
3144
3145   data->consider_all_candidates = (n_iv_cands (data)
3146                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3147
3148   if (data->consider_all_candidates)
3149     {
3150       /* We will not need "related_cands" bitmaps in this case,
3151          so release them to decrease peak memory consumption.  */
3152       for (i = 0; i < n_iv_uses (data); i++)
3153         {
3154           use = iv_use (data, i);
3155           BITMAP_FREE (use->related_cands);
3156         }
3157     }
3158   else
3159     {
3160       /* Add important candidates to the related_cands bitmaps.  */
3161       for (i = 0; i < n_iv_uses (data); i++)
3162         bitmap_ior_into (iv_use (data, i)->related_cands,
3163                          data->important_candidates);
3164     }
3165 }
3166
3167 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3168    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3169    we allocate a simple list to every use.  */
3170
3171 static void
3172 alloc_use_cost_map (struct ivopts_data *data)
3173 {
3174   unsigned i, size, s;
3175
3176   for (i = 0; i < n_iv_uses (data); i++)
3177     {
3178       struct iv_use *use = iv_use (data, i);
3179
3180       if (data->consider_all_candidates)
3181         size = n_iv_cands (data);
3182       else
3183         {
3184           s = bitmap_count_bits (use->related_cands);
3185
3186           /* Round up to the power of two, so that moduling by it is fast.  */
3187           size = s ? (1 << ceil_log2 (s)) : 1;
3188         }
3189
3190       use->n_map_members = size;
3191       use->cost_map = XCNEWVEC (struct cost_pair, size);
3192     }
3193 }
3194
3195 /* Returns description of computation cost of expression whose runtime
3196    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3197
3198 static comp_cost
3199 new_cost (unsigned runtime, unsigned complexity)
3200 {
3201   comp_cost cost;
3202
3203   cost.cost = runtime;
3204   cost.complexity = complexity;
3205
3206   return cost;
3207 }
3208
3209 /* Returns true if COST is infinite.  */
3210
3211 static bool
3212 infinite_cost_p (comp_cost cost)
3213 {
3214   return cost.cost == INFTY;
3215 }
3216
3217 /* Adds costs COST1 and COST2.  */
3218
3219 static comp_cost
3220 add_costs (comp_cost cost1, comp_cost cost2)
3221 {
3222   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3223     return infinite_cost;
3224
3225   cost1.cost += cost2.cost;
3226   cost1.complexity += cost2.complexity;
3227
3228   return cost1;
3229 }
3230 /* Subtracts costs COST1 and COST2.  */
3231
3232 static comp_cost
3233 sub_costs (comp_cost cost1, comp_cost cost2)
3234 {
3235   cost1.cost -= cost2.cost;
3236   cost1.complexity -= cost2.complexity;
3237
3238   return cost1;
3239 }
3240
3241 /* Returns a negative number if COST1 < COST2, a positive number if
3242    COST1 > COST2, and 0 if COST1 = COST2.  */
3243
3244 static int
3245 compare_costs (comp_cost cost1, comp_cost cost2)
3246 {
3247   if (cost1.cost == cost2.cost)
3248     return cost1.complexity - cost2.complexity;
3249
3250   return cost1.cost - cost2.cost;
3251 }
3252
3253 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3254    on invariants DEPENDS_ON and that the value used in expressing it
3255    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3256
3257 static void
3258 set_use_iv_cost (struct ivopts_data *data,
3259                  struct iv_use *use, struct iv_cand *cand,
3260                  comp_cost cost, bitmap depends_on, tree value,
3261                  enum tree_code comp, int inv_expr_id)
3262 {
3263   unsigned i, s;
3264
3265   if (infinite_cost_p (cost))
3266     {
3267       BITMAP_FREE (depends_on);
3268       return;
3269     }
3270
3271   if (data->consider_all_candidates)
3272     {
3273       use->cost_map[cand->id].cand = cand;
3274       use->cost_map[cand->id].cost = cost;
3275       use->cost_map[cand->id].depends_on = depends_on;
3276       use->cost_map[cand->id].value = value;
3277       use->cost_map[cand->id].comp = comp;
3278       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3279       return;
3280     }
3281
3282   /* n_map_members is a power of two, so this computes modulo.  */
3283   s = cand->id & (use->n_map_members - 1);
3284   for (i = s; i < use->n_map_members; i++)
3285     if (!use->cost_map[i].cand)
3286       goto found;
3287   for (i = 0; i < s; i++)
3288     if (!use->cost_map[i].cand)
3289       goto found;
3290
3291   gcc_unreachable ();
3292
3293 found:
3294   use->cost_map[i].cand = cand;
3295   use->cost_map[i].cost = cost;
3296   use->cost_map[i].depends_on = depends_on;
3297   use->cost_map[i].value = value;
3298   use->cost_map[i].comp = comp;
3299   use->cost_map[i].inv_expr_id = inv_expr_id;
3300 }
3301
3302 /* Gets cost of (USE, CANDIDATE) pair.  */
3303
3304 static struct cost_pair *
3305 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3306                  struct iv_cand *cand)
3307 {
3308   unsigned i, s;
3309   struct cost_pair *ret;
3310
3311   if (!cand)
3312     return NULL;
3313
3314   if (data->consider_all_candidates)
3315     {
3316       ret = use->cost_map + cand->id;
3317       if (!ret->cand)
3318         return NULL;
3319
3320       return ret;
3321     }
3322
3323   /* n_map_members is a power of two, so this computes modulo.  */
3324   s = cand->id & (use->n_map_members - 1);
3325   for (i = s; i < use->n_map_members; i++)
3326     if (use->cost_map[i].cand == cand)
3327       return use->cost_map + i;
3328     else if (use->cost_map[i].cand == NULL)
3329       return NULL;
3330   for (i = 0; i < s; i++)
3331     if (use->cost_map[i].cand == cand)
3332       return use->cost_map + i;
3333     else if (use->cost_map[i].cand == NULL)
3334       return NULL;
3335
3336   return NULL;
3337 }
3338
3339 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3340 static rtx
3341 produce_memory_decl_rtl (tree obj, int *regno)
3342 {
3343   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3344   machine_mode address_mode = targetm.addr_space.address_mode (as);
3345   rtx x;
3346
3347   gcc_assert (obj);
3348   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3349     {
3350       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3351       x = gen_rtx_SYMBOL_REF (address_mode, name);
3352       SET_SYMBOL_REF_DECL (x, obj);
3353       x = gen_rtx_MEM (DECL_MODE (obj), x);
3354       set_mem_addr_space (x, as);
3355       targetm.encode_section_info (obj, x, true);
3356     }
3357   else
3358     {
3359       x = gen_raw_REG (address_mode, (*regno)++);
3360       x = gen_rtx_MEM (DECL_MODE (obj), x);
3361       set_mem_addr_space (x, as);
3362     }
3363
3364   return x;
3365 }
3366
3367 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3368    walk_tree.  DATA contains the actual fake register number.  */
3369
3370 static tree
3371 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3372 {
3373   tree obj = NULL_TREE;
3374   rtx x = NULL_RTX;
3375   int *regno = (int *) data;
3376
3377   switch (TREE_CODE (*expr_p))
3378     {
3379     case ADDR_EXPR:
3380       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3381            handled_component_p (*expr_p);
3382            expr_p = &TREE_OPERAND (*expr_p, 0))
3383         continue;
3384       obj = *expr_p;
3385       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3386         x = produce_memory_decl_rtl (obj, regno);
3387       break;
3388
3389     case SSA_NAME:
3390       *ws = 0;
3391       obj = SSA_NAME_VAR (*expr_p);
3392       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3393       if (!obj)
3394         return NULL_TREE;
3395       if (!DECL_RTL_SET_P (obj))
3396         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3397       break;
3398
3399     case VAR_DECL:
3400     case PARM_DECL:
3401     case RESULT_DECL:
3402       *ws = 0;
3403       obj = *expr_p;
3404
3405       if (DECL_RTL_SET_P (obj))
3406         break;
3407
3408       if (DECL_MODE (obj) == BLKmode)
3409         x = produce_memory_decl_rtl (obj, regno);
3410       else
3411         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3412
3413       break;
3414
3415     default:
3416       break;
3417     }
3418
3419   if (x)
3420     {
3421       decl_rtl_to_reset.safe_push (obj);
3422       SET_DECL_RTL (obj, x);
3423     }
3424
3425   return NULL_TREE;
3426 }
3427
3428 /* Determines cost of the computation of EXPR.  */
3429
3430 static unsigned
3431 computation_cost (tree expr, bool speed)
3432 {
3433   rtx_insn *seq;
3434   rtx rslt;
3435   tree type = TREE_TYPE (expr);
3436   unsigned cost;
3437   /* Avoid using hard regs in ways which may be unsupported.  */
3438   int regno = LAST_VIRTUAL_REGISTER + 1;
3439   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3440   enum node_frequency real_frequency = node->frequency;
3441
3442   node->frequency = NODE_FREQUENCY_NORMAL;
3443   crtl->maybe_hot_insn_p = speed;
3444   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3445   start_sequence ();
3446   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3447   seq = get_insns ();
3448   end_sequence ();
3449   default_rtl_profile ();
3450   node->frequency = real_frequency;
3451
3452   cost = seq_cost (seq, speed);
3453   if (MEM_P (rslt))
3454     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3455                           TYPE_ADDR_SPACE (type), speed);
3456   else if (!REG_P (rslt))
3457     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3458
3459   return cost;
3460 }
3461
3462 /* Returns variable containing the value of candidate CAND at statement AT.  */
3463
3464 static tree
3465 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3466 {
3467   if (stmt_after_increment (loop, cand, stmt))
3468     return cand->var_after;
3469   else
3470     return cand->var_before;
3471 }
3472
3473 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3474    same precision that is at least as wide as the precision of TYPE, stores
3475    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3476    type of A and B.  */
3477
3478 static tree
3479 determine_common_wider_type (tree *a, tree *b)
3480 {
3481   tree wider_type = NULL;
3482   tree suba, subb;
3483   tree atype = TREE_TYPE (*a);
3484
3485   if (CONVERT_EXPR_P (*a))
3486     {
3487       suba = TREE_OPERAND (*a, 0);
3488       wider_type = TREE_TYPE (suba);
3489       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3490         return atype;
3491     }
3492   else
3493     return atype;
3494
3495   if (CONVERT_EXPR_P (*b))
3496     {
3497       subb = TREE_OPERAND (*b, 0);
3498       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3499         return atype;
3500     }
3501   else
3502     return atype;
3503
3504   *a = suba;
3505   *b = subb;
3506   return wider_type;
3507 }
3508
3509 /* Determines the expression by that USE is expressed from induction variable
3510    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3511    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3512
3513 static bool
3514 get_computation_aff (struct loop *loop,
3515                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3516                      struct aff_tree *aff)
3517 {
3518   tree ubase = use->iv->base;
3519   tree ustep = use->iv->step;
3520   tree cbase = cand->iv->base;
3521   tree cstep = cand->iv->step, cstep_common;
3522   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3523   tree common_type, var;
3524   tree uutype;
3525   aff_tree cbase_aff, var_aff;
3526   widest_int rat;
3527
3528   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3529     {
3530       /* We do not have a precision to express the values of use.  */
3531       return false;
3532     }
3533
3534   var = var_at_stmt (loop, cand, at);
3535   uutype = unsigned_type_for (utype);
3536
3537   /* If the conversion is not noop, perform it.  */
3538   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3539     {
3540       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3541           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3542         {
3543           tree inner_base, inner_step, inner_type;
3544           inner_base = TREE_OPERAND (cbase, 0);
3545           if (CONVERT_EXPR_P (cstep))
3546             inner_step = TREE_OPERAND (cstep, 0);
3547           else
3548             inner_step = cstep;
3549
3550           inner_type = TREE_TYPE (inner_base);
3551           /* If candidate is added from a biv whose type is smaller than
3552              ctype, we know both candidate and the biv won't overflow.
3553              In this case, it's safe to skip the convertion in candidate.
3554              As an example, (unsigned short)((unsigned long)A) equals to
3555              (unsigned short)A, if A has a type no larger than short.  */
3556           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3557             {
3558               cbase = inner_base;
3559               cstep = inner_step;
3560             }
3561         }
3562       cstep = fold_convert (uutype, cstep);
3563       cbase = fold_convert (uutype, cbase);
3564       var = fold_convert (uutype, var);
3565     }
3566
3567   if (!constant_multiple_of (ustep, cstep, &rat))
3568     return false;
3569
3570   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3571      type, we achieve better folding by computing their difference in this
3572      wider type, and cast the result to UUTYPE.  We do not need to worry about
3573      overflows, as all the arithmetics will in the end be performed in UUTYPE
3574      anyway.  */
3575   common_type = determine_common_wider_type (&ubase, &cbase);
3576
3577   /* use = ubase - ratio * cbase + ratio * var.  */
3578   tree_to_aff_combination (ubase, common_type, aff);
3579   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3580   tree_to_aff_combination (var, uutype, &var_aff);
3581
3582   /* We need to shift the value if we are after the increment.  */
3583   if (stmt_after_increment (loop, cand, at))
3584     {
3585       aff_tree cstep_aff;
3586
3587       if (common_type != uutype)
3588         cstep_common = fold_convert (common_type, cstep);
3589       else
3590         cstep_common = cstep;
3591
3592       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3593       aff_combination_add (&cbase_aff, &cstep_aff);
3594     }
3595
3596   aff_combination_scale (&cbase_aff, -rat);
3597   aff_combination_add (aff, &cbase_aff);
3598   if (common_type != uutype)
3599     aff_combination_convert (aff, uutype);
3600
3601   aff_combination_scale (&var_aff, rat);
3602   aff_combination_add (aff, &var_aff);
3603
3604   return true;
3605 }
3606
3607 /* Return the type of USE.  */
3608
3609 static tree
3610 get_use_type (struct iv_use *use)
3611 {
3612   tree base_type = TREE_TYPE (use->iv->base);
3613   tree type;
3614
3615   if (use->type == USE_ADDRESS)
3616     {
3617       /* The base_type may be a void pointer.  Create a pointer type based on
3618          the mem_ref instead.  */
3619       type = build_pointer_type (TREE_TYPE (*use->op_p));
3620       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3621                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3622     }
3623   else
3624     type = base_type;
3625
3626   return type;
3627 }
3628
3629 /* Determines the expression by that USE is expressed from induction variable
3630    CAND at statement AT in LOOP.  The computation is unshared.  */
3631
3632 static tree
3633 get_computation_at (struct loop *loop,
3634                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3635 {
3636   aff_tree aff;
3637   tree type = get_use_type (use);
3638
3639   if (!get_computation_aff (loop, use, cand, at, &aff))
3640     return NULL_TREE;
3641   unshare_aff_combination (&aff);
3642   return fold_convert (type, aff_combination_to_tree (&aff));
3643 }
3644
3645 /* Determines the expression by that USE is expressed from induction variable
3646    CAND in LOOP.  The computation is unshared.  */
3647
3648 static tree
3649 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3650 {
3651   return get_computation_at (loop, use, cand, use->stmt);
3652 }
3653
3654 /* Adjust the cost COST for being in loop setup rather than loop body.
3655    If we're optimizing for space, the loop setup overhead is constant;
3656    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3657 static unsigned
3658 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3659 {
3660   if (cost == INFTY)
3661     return cost;
3662   else if (optimize_loop_for_speed_p (data->current_loop))
3663     return cost / avg_loop_niter (data->current_loop);
3664   else
3665     return cost;
3666 }
3667
3668 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3669    validity for a memory reference accessing memory of mode MODE in
3670    address space AS.  */
3671
3672
3673 bool
3674 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3675                                  addr_space_t as)
3676 {
3677 #define MAX_RATIO 128
3678   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3679   static vec<sbitmap> valid_mult_list;
3680   sbitmap valid_mult;
3681
3682   if (data_index >= valid_mult_list.length ())
3683     valid_mult_list.safe_grow_cleared (data_index + 1);
3684
3685   valid_mult = valid_mult_list[data_index];
3686   if (!valid_mult)
3687     {
3688       machine_mode address_mode = targetm.addr_space.address_mode (as);
3689       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3690       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3691       rtx addr, scaled;
3692       HOST_WIDE_INT i;
3693
3694       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3695       bitmap_clear (valid_mult);
3696       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3697       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3698       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3699         {
3700           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3701           if (memory_address_addr_space_p (mode, addr, as)
3702               || memory_address_addr_space_p (mode, scaled, as))
3703             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3704         }
3705
3706       if (dump_file && (dump_flags & TDF_DETAILS))
3707         {
3708           fprintf (dump_file, "  allowed multipliers:");
3709           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3710             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3711               fprintf (dump_file, " %d", (int) i);
3712           fprintf (dump_file, "\n");
3713           fprintf (dump_file, "\n");
3714         }
3715
3716       valid_mult_list[data_index] = valid_mult;
3717     }
3718
3719   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3720     return false;
3721
3722   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3723 }
3724
3725 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3726    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3727    variable is omitted.  Compute the cost for a memory reference that accesses
3728    a memory location of mode MEM_MODE in address space AS.
3729
3730    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3731    size of MEM_MODE / RATIO) is available.  To make this determination, we
3732    look at the size of the increment to be made, which is given in CSTEP.
3733    CSTEP may be zero if the step is unknown.
3734    STMT_AFTER_INC is true iff the statement we're looking at is after the
3735    increment of the original biv.
3736
3737    TODO -- there must be some better way.  This all is quite crude.  */
3738
3739 enum ainc_type
3740 {
3741   AINC_PRE_INC,         /* Pre increment.  */
3742   AINC_PRE_DEC,         /* Pre decrement.  */
3743   AINC_POST_INC,        /* Post increment.  */
3744   AINC_POST_DEC,        /* Post decrement.  */
3745   AINC_NONE             /* Also the number of auto increment types.  */
3746 };
3747
3748 struct address_cost_data
3749 {
3750   HOST_WIDE_INT min_offset, max_offset;
3751   unsigned costs[2][2][2][2];
3752   unsigned ainc_costs[AINC_NONE];
3753 };
3754
3755
3756 static comp_cost
3757 get_address_cost (bool symbol_present, bool var_present,
3758                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3759                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3760                   addr_space_t as, bool speed,
3761                   bool stmt_after_inc, bool *may_autoinc)
3762 {
3763   machine_mode address_mode = targetm.addr_space.address_mode (as);
3764   static vec<address_cost_data *> address_cost_data_list;
3765   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3766   address_cost_data *data;
3767   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3768   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3769   unsigned cost, acost, complexity;
3770   enum ainc_type autoinc_type;
3771   bool offset_p, ratio_p, autoinc;
3772   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3773   unsigned HOST_WIDE_INT mask;
3774   unsigned bits;
3775
3776   if (data_index >= address_cost_data_list.length ())
3777     address_cost_data_list.safe_grow_cleared (data_index + 1);
3778
3779   data = address_cost_data_list[data_index];
3780   if (!data)
3781     {
3782       HOST_WIDE_INT i;
3783       HOST_WIDE_INT rat, off = 0;
3784       int old_cse_not_expected, width;
3785       unsigned sym_p, var_p, off_p, rat_p, add_c;
3786       rtx_insn *seq;
3787       rtx addr, base;
3788       rtx reg0, reg1;
3789
3790       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3791
3792       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3793
3794       width = GET_MODE_BITSIZE (address_mode) - 1;
3795       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3796         width = HOST_BITS_PER_WIDE_INT - 1;
3797       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3798
3799       for (i = width; i >= 0; i--)
3800         {
3801           off = -((unsigned HOST_WIDE_INT) 1 << i);
3802           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3803           if (memory_address_addr_space_p (mem_mode, addr, as))
3804             break;
3805         }
3806       data->min_offset = (i == -1? 0 : off);
3807
3808       for (i = width; i >= 0; i--)
3809         {
3810           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3811           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3812           if (memory_address_addr_space_p (mem_mode, addr, as))
3813             break;
3814           /* For some strict-alignment targets, the offset must be naturally
3815              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3816           off = mem_mode != QImode
3817                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3818                     - GET_MODE_SIZE (mem_mode)
3819                 : 0;
3820           if (off > 0)
3821             {
3822               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3823               if (memory_address_addr_space_p (mem_mode, addr, as))
3824                 break;
3825             }
3826         }
3827       if (i == -1)
3828         off = 0;
3829       data->max_offset = off;
3830
3831       if (dump_file && (dump_flags & TDF_DETAILS))
3832         {
3833           fprintf (dump_file, "get_address_cost:\n");
3834           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3835                    GET_MODE_NAME (mem_mode),
3836                    data->min_offset);
3837           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3838                    GET_MODE_NAME (mem_mode),
3839                    data->max_offset);
3840         }
3841
3842       rat = 1;
3843       for (i = 2; i <= MAX_RATIO; i++)
3844         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3845           {
3846             rat = i;
3847             break;
3848           }
3849
3850       /* Compute the cost of various addressing modes.  */
3851       acost = 0;
3852       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3853       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3854
3855       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3856           || USE_STORE_PRE_DECREMENT (mem_mode))
3857         {
3858           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3859           has_predec[mem_mode]
3860             = memory_address_addr_space_p (mem_mode, addr, as);
3861
3862           if (has_predec[mem_mode])
3863             data->ainc_costs[AINC_PRE_DEC]
3864               = address_cost (addr, mem_mode, as, speed);
3865         }
3866       if (USE_LOAD_POST_DECREMENT (mem_mode)
3867           || USE_STORE_POST_DECREMENT (mem_mode))
3868         {
3869           addr = gen_rtx_POST_DEC (address_mode, reg0);
3870           has_postdec[mem_mode]
3871             = memory_address_addr_space_p (mem_mode, addr, as);
3872
3873           if (has_postdec[mem_mode])
3874             data->ainc_costs[AINC_POST_DEC]
3875               = address_cost (addr, mem_mode, as, speed);
3876         }
3877       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3878           || USE_STORE_PRE_DECREMENT (mem_mode))
3879         {
3880           addr = gen_rtx_PRE_INC (address_mode, reg0);
3881           has_preinc[mem_mode]
3882             = memory_address_addr_space_p (mem_mode, addr, as);
3883
3884           if (has_preinc[mem_mode])
3885             data->ainc_costs[AINC_PRE_INC]
3886               = address_cost (addr, mem_mode, as, speed);
3887         }
3888       if (USE_LOAD_POST_INCREMENT (mem_mode)
3889           || USE_STORE_POST_INCREMENT (mem_mode))
3890         {
3891           addr = gen_rtx_POST_INC (address_mode, reg0);
3892           has_postinc[mem_mode]
3893             = memory_address_addr_space_p (mem_mode, addr, as);
3894
3895           if (has_postinc[mem_mode])
3896             data->ainc_costs[AINC_POST_INC]
3897               = address_cost (addr, mem_mode, as, speed);
3898         }
3899       for (i = 0; i < 16; i++)
3900         {
3901           sym_p = i & 1;
3902           var_p = (i >> 1) & 1;
3903           off_p = (i >> 2) & 1;
3904           rat_p = (i >> 3) & 1;
3905
3906           addr = reg0;
3907           if (rat_p)
3908             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3909                                    gen_int_mode (rat, address_mode));
3910
3911           if (var_p)
3912             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3913
3914           if (sym_p)
3915             {
3916               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3917               /* ??? We can run into trouble with some backends by presenting
3918                  it with symbols which haven't been properly passed through
3919                  targetm.encode_section_info.  By setting the local bit, we
3920                  enhance the probability of things working.  */
3921               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3922
3923               if (off_p)
3924                 base = gen_rtx_fmt_e (CONST, address_mode,
3925                                       gen_rtx_fmt_ee
3926                                         (PLUS, address_mode, base,
3927                                          gen_int_mode (off, address_mode)));
3928             }
3929           else if (off_p)
3930             base = gen_int_mode (off, address_mode);
3931           else
3932             base = NULL_RTX;
3933
3934           if (base)
3935             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3936
3937           start_sequence ();
3938           /* To avoid splitting addressing modes, pretend that no cse will
3939              follow.  */
3940           old_cse_not_expected = cse_not_expected;
3941           cse_not_expected = true;
3942           addr = memory_address_addr_space (mem_mode, addr, as);
3943           cse_not_expected = old_cse_not_expected;
3944           seq = get_insns ();
3945           end_sequence ();
3946
3947           acost = seq_cost (seq, speed);
3948           acost += address_cost (addr, mem_mode, as, speed);
3949
3950           if (!acost)
3951             acost = 1;
3952           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3953         }
3954
3955       /* On some targets, it is quite expensive to load symbol to a register,
3956          which makes addresses that contain symbols look much more expensive.
3957          However, the symbol will have to be loaded in any case before the
3958          loop (and quite likely we have it in register already), so it does not
3959          make much sense to penalize them too heavily.  So make some final
3960          tweaks for the SYMBOL_PRESENT modes:
3961
3962          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3963          var is cheaper, use this mode with small penalty.
3964          If VAR_PRESENT is true, try whether the mode with
3965          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3966          if this is the case, use it.  */
3967       add_c = add_cost (speed, address_mode);
3968       for (i = 0; i < 8; i++)
3969         {
3970           var_p = i & 1;
3971           off_p = (i >> 1) & 1;
3972           rat_p = (i >> 2) & 1;
3973
3974           acost = data->costs[0][1][off_p][rat_p] + 1;
3975           if (var_p)
3976             acost += add_c;
3977
3978           if (acost < data->costs[1][var_p][off_p][rat_p])
3979             data->costs[1][var_p][off_p][rat_p] = acost;
3980         }
3981
3982       if (dump_file && (dump_flags & TDF_DETAILS))
3983         {
3984           fprintf (dump_file, "Address costs:\n");
3985
3986           for (i = 0; i < 16; i++)
3987             {
3988               sym_p = i & 1;
3989               var_p = (i >> 1) & 1;
3990               off_p = (i >> 2) & 1;
3991               rat_p = (i >> 3) & 1;
3992
3993               fprintf (dump_file, "  ");
3994               if (sym_p)
3995                 fprintf (dump_file, "sym + ");
3996               if (var_p)
3997                 fprintf (dump_file, "var + ");
3998               if (off_p)
3999                 fprintf (dump_file, "cst + ");
4000               if (rat_p)
4001                 fprintf (dump_file, "rat * ");
4002
4003               acost = data->costs[sym_p][var_p][off_p][rat_p];
4004               fprintf (dump_file, "index costs %d\n", acost);
4005             }
4006           if (has_predec[mem_mode] || has_postdec[mem_mode]
4007               || has_preinc[mem_mode] || has_postinc[mem_mode])
4008             fprintf (dump_file, "  May include autoinc/dec\n");
4009           fprintf (dump_file, "\n");
4010         }
4011
4012       address_cost_data_list[data_index] = data;
4013     }
4014
4015   bits = GET_MODE_BITSIZE (address_mode);
4016   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4017   offset &= mask;
4018   if ((offset >> (bits - 1) & 1))
4019     offset |= ~mask;
4020   s_offset = offset;
4021
4022   autoinc = false;
4023   autoinc_type = AINC_NONE;
4024   msize = GET_MODE_SIZE (mem_mode);
4025   autoinc_offset = offset;
4026   if (stmt_after_inc)
4027     autoinc_offset += ratio * cstep;
4028   if (symbol_present || var_present || ratio != 1)
4029     autoinc = false;
4030   else
4031     {
4032       if (has_postinc[mem_mode] && autoinc_offset == 0
4033           && msize == cstep)
4034         autoinc_type = AINC_POST_INC;
4035       else if (has_postdec[mem_mode] && autoinc_offset == 0
4036                && msize == -cstep)
4037         autoinc_type = AINC_POST_DEC;
4038       else if (has_preinc[mem_mode] && autoinc_offset == msize
4039                && msize == cstep)
4040         autoinc_type = AINC_PRE_INC;
4041       else if (has_predec[mem_mode] && autoinc_offset == -msize
4042                && msize == -cstep)
4043         autoinc_type = AINC_PRE_DEC;
4044
4045       if (autoinc_type != AINC_NONE)
4046         autoinc = true;
4047     }
4048
4049   cost = 0;
4050   offset_p = (s_offset != 0
4051               && data->min_offset <= s_offset
4052               && s_offset <= data->max_offset);
4053   ratio_p = (ratio != 1
4054              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4055
4056   if (ratio != 1 && !ratio_p)
4057     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4058
4059   if (s_offset && !offset_p && !symbol_present)
4060     cost += add_cost (speed, address_mode);
4061
4062   if (may_autoinc)
4063     *may_autoinc = autoinc;
4064   if (autoinc)
4065     acost = data->ainc_costs[autoinc_type];
4066   else
4067     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4068   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4069   return new_cost (cost + acost, complexity);
4070 }
4071
4072  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4073     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
4074     calculating the operands of EXPR.  Returns true if successful, and returns
4075     the cost in COST.  */
4076
4077 static bool
4078 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4079                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4080 {
4081   comp_cost res;
4082   tree op1 = TREE_OPERAND (expr, 1);
4083   tree cst = TREE_OPERAND (mult, 1);
4084   tree multop = TREE_OPERAND (mult, 0);
4085   int m = exact_log2 (int_cst_value (cst));
4086   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4087   int as_cost, sa_cost;
4088   bool mult_in_op1;
4089
4090   if (!(m >= 0 && m < maxm))
4091     return false;
4092
4093   STRIP_NOPS (op1);
4094   mult_in_op1 = operand_equal_p (op1, mult, 0);
4095
4096   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4097
4098   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4099      use that in preference to a shift insn followed by an add insn.  */
4100   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4101              ? shiftadd_cost (speed, mode, m)
4102              : (mult_in_op1
4103                 ? shiftsub1_cost (speed, mode, m)
4104                 : shiftsub0_cost (speed, mode, m)));
4105
4106   res = new_cost (MIN (as_cost, sa_cost), 0);
4107   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4108
4109   STRIP_NOPS (multop);
4110   if (!is_gimple_val (multop))
4111     res = add_costs (res, force_expr_to_var_cost (multop, speed));
4112
4113   *cost = res;
4114   return true;
4115 }
4116
4117 /* Estimates cost of forcing expression EXPR into a variable.  */
4118
4119 static comp_cost
4120 force_expr_to_var_cost (tree expr, bool speed)
4121 {
4122   static bool costs_initialized = false;
4123   static unsigned integer_cost [2];
4124   static unsigned symbol_cost [2];
4125   static unsigned address_cost [2];
4126   tree op0, op1;
4127   comp_cost cost0, cost1, cost;
4128   machine_mode mode;
4129
4130   if (!costs_initialized)
4131     {
4132       tree type = build_pointer_type (integer_type_node);
4133       tree var, addr;
4134       rtx x;
4135       int i;
4136
4137       var = create_tmp_var_raw (integer_type_node, "test_var");
4138       TREE_STATIC (var) = 1;
4139       x = produce_memory_decl_rtl (var, NULL);
4140       SET_DECL_RTL (var, x);
4141
4142       addr = build1 (ADDR_EXPR, type, var);
4143
4144
4145       for (i = 0; i < 2; i++)
4146         {
4147           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4148                                                              2000), i);
4149
4150           symbol_cost[i] = computation_cost (addr, i) + 1;
4151
4152           address_cost[i]
4153             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4154           if (dump_file && (dump_flags & TDF_DETAILS))
4155             {
4156               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4157               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4158               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4159               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4160               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4161               fprintf (dump_file, "\n");
4162             }
4163         }
4164
4165       costs_initialized = true;
4166     }
4167
4168   STRIP_NOPS (expr);
4169
4170   if (SSA_VAR_P (expr))
4171     return no_cost;
4172
4173   if (is_gimple_min_invariant (expr))
4174     {
4175       if (TREE_CODE (expr) == INTEGER_CST)
4176         return new_cost (integer_cost [speed], 0);
4177
4178       if (TREE_CODE (expr) == ADDR_EXPR)
4179         {
4180           tree obj = TREE_OPERAND (expr, 0);
4181
4182           if (TREE_CODE (obj) == VAR_DECL
4183               || TREE_CODE (obj) == PARM_DECL
4184               || TREE_CODE (obj) == RESULT_DECL)
4185             return new_cost (symbol_cost [speed], 0);
4186         }
4187
4188       return new_cost (address_cost [speed], 0);
4189     }
4190
4191   switch (TREE_CODE (expr))
4192     {
4193     case POINTER_PLUS_EXPR:
4194     case PLUS_EXPR:
4195     case MINUS_EXPR:
4196     case MULT_EXPR:
4197       op0 = TREE_OPERAND (expr, 0);
4198       op1 = TREE_OPERAND (expr, 1);
4199       STRIP_NOPS (op0);
4200       STRIP_NOPS (op1);
4201       break;
4202
4203     CASE_CONVERT:
4204     case NEGATE_EXPR:
4205       op0 = TREE_OPERAND (expr, 0);
4206       STRIP_NOPS (op0);
4207       op1 = NULL_TREE;
4208       break;
4209
4210     default:
4211       /* Just an arbitrary value, FIXME.  */
4212       return new_cost (target_spill_cost[speed], 0);
4213     }
4214
4215   if (op0 == NULL_TREE
4216       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4217     cost0 = no_cost;
4218   else
4219     cost0 = force_expr_to_var_cost (op0, speed);
4220
4221   if (op1 == NULL_TREE
4222       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4223     cost1 = no_cost;
4224   else
4225     cost1 = force_expr_to_var_cost (op1, speed);
4226
4227   mode = TYPE_MODE (TREE_TYPE (expr));
4228   switch (TREE_CODE (expr))
4229     {
4230     case POINTER_PLUS_EXPR:
4231     case PLUS_EXPR:
4232     case MINUS_EXPR:
4233     case NEGATE_EXPR:
4234       cost = new_cost (add_cost (speed, mode), 0);
4235       if (TREE_CODE (expr) != NEGATE_EXPR)
4236         {
4237           tree mult = NULL_TREE;
4238           comp_cost sa_cost;
4239           if (TREE_CODE (op1) == MULT_EXPR)
4240             mult = op1;
4241           else if (TREE_CODE (op0) == MULT_EXPR)
4242             mult = op0;
4243
4244           if (mult != NULL_TREE
4245               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4246               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4247                                     speed, &sa_cost))
4248             return sa_cost;
4249         }
4250       break;
4251
4252     CASE_CONVERT:
4253       {
4254         tree inner_mode, outer_mode;
4255         outer_mode = TREE_TYPE (expr);
4256         inner_mode = TREE_TYPE (op0);
4257         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4258                                        TYPE_MODE (inner_mode), speed), 0);
4259       }
4260       break;
4261
4262     case MULT_EXPR:
4263       if (cst_and_fits_in_hwi (op0))
4264         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4265                                              mode, speed), 0);
4266       else if (cst_and_fits_in_hwi (op1))
4267         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4268                                              mode, speed), 0);
4269       else
4270         return new_cost (target_spill_cost [speed], 0);
4271       break;
4272
4273     default:
4274       gcc_unreachable ();
4275     }
4276
4277   cost = add_costs (cost, cost0);
4278   cost = add_costs (cost, cost1);
4279
4280   /* Bound the cost by target_spill_cost.  The parts of complicated
4281      computations often are either loop invariant or at least can
4282      be shared between several iv uses, so letting this grow without
4283      limits would not give reasonable results.  */
4284   if (cost.cost > (int) target_spill_cost [speed])
4285     cost.cost = target_spill_cost [speed];
4286
4287   return cost;
4288 }
4289
4290 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4291    invariants the computation depends on.  */
4292
4293 static comp_cost
4294 force_var_cost (struct ivopts_data *data,
4295                 tree expr, bitmap *depends_on)
4296 {
4297   if (depends_on)
4298     {
4299       fd_ivopts_data = data;
4300       walk_tree (&expr, find_depends, depends_on, NULL);
4301     }
4302
4303   return force_expr_to_var_cost (expr, data->speed);
4304 }
4305
4306 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4307    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4308    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4309    invariants the computation depends on.  */
4310
4311 static comp_cost
4312 split_address_cost (struct ivopts_data *data,
4313                     tree addr, bool *symbol_present, bool *var_present,
4314                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4315 {
4316   tree core;
4317   HOST_WIDE_INT bitsize;
4318   HOST_WIDE_INT bitpos;
4319   tree toffset;
4320   machine_mode mode;
4321   int unsignedp, reversep, volatilep;
4322
4323   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4324                               &unsignedp, &reversep, &volatilep, false);
4325
4326   if (toffset != 0
4327       || bitpos % BITS_PER_UNIT != 0
4328       || reversep
4329       || TREE_CODE (core) != VAR_DECL)
4330     {
4331       *symbol_present = false;
4332       *var_present = true;
4333       fd_ivopts_data = data;
4334       if (depends_on)
4335         walk_tree (&addr, find_depends, depends_on, NULL);
4336
4337       return new_cost (target_spill_cost[data->speed], 0);
4338     }
4339
4340   *offset += bitpos / BITS_PER_UNIT;
4341   if (TREE_STATIC (core)
4342       || DECL_EXTERNAL (core))
4343     {
4344       *symbol_present = true;
4345       *var_present = false;
4346       return no_cost;
4347     }
4348
4349   *symbol_present = false;
4350   *var_present = true;
4351   return no_cost;
4352 }
4353
4354 /* Estimates cost of expressing difference of addresses E1 - E2 as
4355    var + symbol + offset.  The value of offset is added to OFFSET,
4356    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4357    part is missing.  DEPENDS_ON is a set of the invariants the computation
4358    depends on.  */
4359
4360 static comp_cost
4361 ptr_difference_cost (struct ivopts_data *data,
4362                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4363                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4364 {
4365   HOST_WIDE_INT diff = 0;
4366   aff_tree aff_e1, aff_e2;
4367   tree type;
4368
4369   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4370
4371   if (ptr_difference_const (e1, e2, &diff))
4372     {
4373       *offset += diff;
4374       *symbol_present = false;
4375       *var_present = false;
4376       return no_cost;
4377     }
4378
4379   if (integer_zerop (e2))
4380     return split_address_cost (data, TREE_OPERAND (e1, 0),
4381                                symbol_present, var_present, offset, depends_on);
4382
4383   *symbol_present = false;
4384   *var_present = true;
4385
4386   type = signed_type_for (TREE_TYPE (e1));
4387   tree_to_aff_combination (e1, type, &aff_e1);
4388   tree_to_aff_combination (e2, type, &aff_e2);
4389   aff_combination_scale (&aff_e2, -1);
4390   aff_combination_add (&aff_e1, &aff_e2);
4391
4392   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4393 }
4394
4395 /* Estimates cost of expressing difference E1 - E2 as
4396    var + symbol + offset.  The value of offset is added to OFFSET,
4397    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4398    part is missing.  DEPENDS_ON is a set of the invariants the computation
4399    depends on.  */
4400
4401 static comp_cost
4402 difference_cost (struct ivopts_data *data,
4403                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4404                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4405 {
4406   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4407   unsigned HOST_WIDE_INT off1, off2;
4408   aff_tree aff_e1, aff_e2;
4409   tree type;
4410
4411   e1 = strip_offset (e1, &off1);
4412   e2 = strip_offset (e2, &off2);
4413   *offset += off1 - off2;
4414
4415   STRIP_NOPS (e1);
4416   STRIP_NOPS (e2);
4417
4418   if (TREE_CODE (e1) == ADDR_EXPR)
4419     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4420                                 offset, depends_on);
4421   *symbol_present = false;
4422
4423   if (operand_equal_p (e1, e2, 0))
4424     {
4425       *var_present = false;
4426       return no_cost;
4427     }
4428
4429   *var_present = true;
4430
4431   if (integer_zerop (e2))
4432     return force_var_cost (data, e1, depends_on);
4433
4434   if (integer_zerop (e1))
4435     {
4436       comp_cost cost = force_var_cost (data, e2, depends_on);
4437       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4438       return cost;
4439     }
4440
4441   type = signed_type_for (TREE_TYPE (e1));
4442   tree_to_aff_combination (e1, type, &aff_e1);
4443   tree_to_aff_combination (e2, type, &aff_e2);
4444   aff_combination_scale (&aff_e2, -1);
4445   aff_combination_add (&aff_e1, &aff_e2);
4446
4447   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4448 }
4449
4450 /* Returns true if AFF1 and AFF2 are identical.  */
4451
4452 static bool
4453 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4454 {
4455   unsigned i;
4456
4457   if (aff1->n != aff2->n)
4458     return false;
4459
4460   for (i = 0; i < aff1->n; i++)
4461     {
4462       if (aff1->elts[i].coef != aff2->elts[i].coef)
4463         return false;
4464
4465       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4466         return false;
4467     }
4468   return true;
4469 }
4470
4471 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4472
4473 static int
4474 get_expr_id (struct ivopts_data *data, tree expr)
4475 {
4476   struct iv_inv_expr_ent ent;
4477   struct iv_inv_expr_ent **slot;
4478
4479   ent.expr = expr;
4480   ent.hash = iterative_hash_expr (expr, 0);
4481   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4482   if (*slot)
4483     return (*slot)->id;
4484
4485   *slot = XNEW (struct iv_inv_expr_ent);
4486   (*slot)->expr = expr;
4487   (*slot)->hash = ent.hash;
4488   (*slot)->id = data->inv_expr_id++;
4489   return (*slot)->id;
4490 }
4491
4492 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4493    requires a new compiler generated temporary.  Returns -1 otherwise.
4494    ADDRESS_P is a flag indicating if the expression is for address
4495    computation.  */
4496
4497 static int
4498 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4499                             tree cbase, HOST_WIDE_INT ratio,
4500                             bool address_p)
4501 {
4502   aff_tree ubase_aff, cbase_aff;
4503   tree expr, ub, cb;
4504
4505   STRIP_NOPS (ubase);
4506   STRIP_NOPS (cbase);
4507   ub = ubase;
4508   cb = cbase;
4509
4510   if ((TREE_CODE (ubase) == INTEGER_CST)
4511       && (TREE_CODE (cbase) == INTEGER_CST))
4512     return -1;
4513
4514   /* Strips the constant part. */
4515   if (TREE_CODE (ubase) == PLUS_EXPR
4516       || TREE_CODE (ubase) == MINUS_EXPR
4517       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4518     {
4519       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4520         ubase = TREE_OPERAND (ubase, 0);
4521     }
4522
4523   /* Strips the constant part. */
4524   if (TREE_CODE (cbase) == PLUS_EXPR
4525       || TREE_CODE (cbase) == MINUS_EXPR
4526       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4527     {
4528       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4529         cbase = TREE_OPERAND (cbase, 0);
4530     }
4531
4532   if (address_p)
4533     {
4534       if (((TREE_CODE (ubase) == SSA_NAME)
4535            || (TREE_CODE (ubase) == ADDR_EXPR
4536                && is_gimple_min_invariant (ubase)))
4537           && (TREE_CODE (cbase) == INTEGER_CST))
4538         return -1;
4539
4540       if (((TREE_CODE (cbase) == SSA_NAME)
4541            || (TREE_CODE (cbase) == ADDR_EXPR
4542                && is_gimple_min_invariant (cbase)))
4543           && (TREE_CODE (ubase) == INTEGER_CST))
4544         return -1;
4545     }
4546
4547   if (ratio == 1)
4548     {
4549       if (operand_equal_p (ubase, cbase, 0))
4550         return -1;
4551
4552       if (TREE_CODE (ubase) == ADDR_EXPR
4553           && TREE_CODE (cbase) == ADDR_EXPR)
4554         {
4555           tree usym, csym;
4556
4557           usym = TREE_OPERAND (ubase, 0);
4558           csym = TREE_OPERAND (cbase, 0);
4559           if (TREE_CODE (usym) == ARRAY_REF)
4560             {
4561               tree ind = TREE_OPERAND (usym, 1);
4562               if (TREE_CODE (ind) == INTEGER_CST
4563                   && tree_fits_shwi_p (ind)
4564                   && tree_to_shwi (ind) == 0)
4565                 usym = TREE_OPERAND (usym, 0);
4566             }
4567           if (TREE_CODE (csym) == ARRAY_REF)
4568             {
4569               tree ind = TREE_OPERAND (csym, 1);
4570               if (TREE_CODE (ind) == INTEGER_CST
4571                   && tree_fits_shwi_p (ind)
4572                   && tree_to_shwi (ind) == 0)
4573                 csym = TREE_OPERAND (csym, 0);
4574             }
4575           if (operand_equal_p (usym, csym, 0))
4576             return -1;
4577         }
4578       /* Now do more complex comparison  */
4579       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4580       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4581       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4582         return -1;
4583     }
4584
4585   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4586   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4587
4588   aff_combination_scale (&cbase_aff, -1 * ratio);
4589   aff_combination_add (&ubase_aff, &cbase_aff);
4590   expr = aff_combination_to_tree (&ubase_aff);
4591   return get_expr_id (data, expr);
4592 }
4593
4594
4595
4596 /* Determines the cost of the computation by that USE is expressed
4597    from induction variable CAND.  If ADDRESS_P is true, we just need
4598    to create an address from it, otherwise we want to get it into
4599    register.  A set of invariants we depend on is stored in
4600    DEPENDS_ON.  AT is the statement at that the value is computed.
4601    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4602    addressing is likely.  */
4603
4604 static comp_cost
4605 get_computation_cost_at (struct ivopts_data *data,
4606                          struct iv_use *use, struct iv_cand *cand,
4607                          bool address_p, bitmap *depends_on, gimple *at,
4608                          bool *can_autoinc,
4609                          int *inv_expr_id)
4610 {
4611   tree ubase = use->iv->base, ustep = use->iv->step;
4612   tree cbase, cstep;
4613   tree utype = TREE_TYPE (ubase), ctype;
4614   unsigned HOST_WIDE_INT cstepi, offset = 0;
4615   HOST_WIDE_INT ratio, aratio;
4616   bool var_present, symbol_present, stmt_is_after_inc;
4617   comp_cost cost;
4618   widest_int rat;
4619   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4620   machine_mode mem_mode = (address_p
4621                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4622                                 : VOIDmode);
4623
4624   if (depends_on)
4625     *depends_on = NULL;
4626
4627   /* Only consider real candidates.  */
4628   if (!cand->iv)
4629     return infinite_cost;
4630
4631   cbase = cand->iv->base;
4632   cstep = cand->iv->step;
4633   ctype = TREE_TYPE (cbase);
4634
4635   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4636     {
4637       /* We do not have a precision to express the values of use.  */
4638       return infinite_cost;
4639     }
4640
4641   if (address_p
4642       || (use->iv->base_object
4643           && cand->iv->base_object
4644           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4645           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4646     {
4647       /* Do not try to express address of an object with computation based
4648          on address of a different object.  This may cause problems in rtl
4649          level alias analysis (that does not expect this to be happening,
4650          as this is illegal in C), and would be unlikely to be useful
4651          anyway.  */
4652       if (use->iv->base_object
4653           && cand->iv->base_object
4654           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4655         return infinite_cost;
4656     }
4657
4658   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4659     {
4660       /* TODO -- add direct handling of this case.  */
4661       goto fallback;
4662     }
4663
4664   /* CSTEPI is removed from the offset in case statement is after the
4665      increment.  If the step is not constant, we use zero instead.
4666      This is a bit imprecise (there is the extra addition), but
4667      redundancy elimination is likely to transform the code so that
4668      it uses value of the variable before increment anyway,
4669      so it is not that much unrealistic.  */
4670   if (cst_and_fits_in_hwi (cstep))
4671     cstepi = int_cst_value (cstep);
4672   else
4673     cstepi = 0;
4674
4675   if (!constant_multiple_of (ustep, cstep, &rat))
4676     return infinite_cost;
4677
4678   if (wi::fits_shwi_p (rat))
4679     ratio = rat.to_shwi ();
4680   else
4681     return infinite_cost;
4682
4683   STRIP_NOPS (cbase);
4684   ctype = TREE_TYPE (cbase);
4685
4686   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4687
4688   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4689      or ratio == 1, it is better to handle this like
4690
4691      ubase - ratio * cbase + ratio * var
4692
4693      (also holds in the case ratio == -1, TODO.  */
4694
4695   if (cst_and_fits_in_hwi (cbase))
4696     {
4697       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4698       cost = difference_cost (data,
4699                               ubase, build_int_cst (utype, 0),
4700                               &symbol_present, &var_present, &offset,
4701                               depends_on);
4702       cost.cost /= avg_loop_niter (data->current_loop);
4703     }
4704   else if (ratio == 1)
4705     {
4706       tree real_cbase = cbase;
4707
4708       /* Check to see if any adjustment is needed.  */
4709       if (cstepi == 0 && stmt_is_after_inc)
4710         {
4711           aff_tree real_cbase_aff;
4712           aff_tree cstep_aff;
4713
4714           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4715                                    &real_cbase_aff);
4716           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4717
4718           aff_combination_add (&real_cbase_aff, &cstep_aff);
4719           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4720         }
4721
4722       cost = difference_cost (data,
4723                               ubase, real_cbase,
4724                               &symbol_present, &var_present, &offset,
4725                               depends_on);
4726       cost.cost /= avg_loop_niter (data->current_loop);
4727     }
4728   else if (address_p
4729            && !POINTER_TYPE_P (ctype)
4730            && multiplier_allowed_in_address_p
4731                 (ratio, mem_mode,
4732                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4733     {
4734       if (cstepi == 0 && stmt_is_after_inc)
4735         {
4736           if (POINTER_TYPE_P (ctype))
4737             cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4738           else
4739             cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4740         }
4741       cbase
4742         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4743       cost = difference_cost (data,
4744                               ubase, cbase,
4745                               &symbol_present, &var_present, &offset,
4746                               depends_on);
4747       cost.cost /= avg_loop_niter (data->current_loop);
4748     }
4749   else
4750     {
4751       cost = force_var_cost (data, cbase, depends_on);
4752       cost = add_costs (cost,
4753                         difference_cost (data,
4754                                          ubase, build_int_cst (utype, 0),
4755                                          &symbol_present, &var_present,
4756                                          &offset, depends_on));
4757       cost.cost /= avg_loop_niter (data->current_loop);
4758       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4759     }
4760
4761   /* Set of invariants depended on by sub use has already been computed
4762      for the first use in the group.  */
4763   if (use->sub_id)
4764     {
4765       cost.cost = 0;
4766       if (depends_on && *depends_on)
4767         bitmap_clear (*depends_on);
4768     }
4769   else if (inv_expr_id)
4770     {
4771       *inv_expr_id =
4772           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4773       /* Clear depends on.  */
4774       if (*inv_expr_id != -1 && depends_on && *depends_on)
4775         bitmap_clear (*depends_on);
4776     }
4777
4778   /* If we are after the increment, the value of the candidate is higher by
4779      one iteration.  */
4780   if (stmt_is_after_inc)
4781     offset -= ratio * cstepi;
4782
4783   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4784      (symbol/var1/const parts may be omitted).  If we are looking for an
4785      address, find the cost of addressing this.  */
4786   if (address_p)
4787     return add_costs (cost,
4788                       get_address_cost (symbol_present, var_present,
4789                                         offset, ratio, cstepi,
4790                                         mem_mode,
4791                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4792                                         speed, stmt_is_after_inc,
4793                                         can_autoinc));
4794
4795   /* Otherwise estimate the costs for computing the expression.  */
4796   if (!symbol_present && !var_present && !offset)
4797     {
4798       if (ratio != 1)
4799         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4800       return cost;
4801     }
4802
4803   /* Symbol + offset should be compile-time computable so consider that they
4804       are added once to the variable, if present.  */
4805   if (var_present && (symbol_present || offset))
4806     cost.cost += adjust_setup_cost (data,
4807                                     add_cost (speed, TYPE_MODE (ctype)));
4808
4809   /* Having offset does not affect runtime cost in case it is added to
4810      symbol, but it increases complexity.  */
4811   if (offset)
4812     cost.complexity++;
4813
4814   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4815
4816   aratio = ratio > 0 ? ratio : -ratio;
4817   if (aratio != 1)
4818     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4819   return cost;
4820
4821 fallback:
4822   if (can_autoinc)
4823     *can_autoinc = false;
4824
4825   {
4826     /* Just get the expression, expand it and measure the cost.  */
4827     tree comp = get_computation_at (data->current_loop, use, cand, at);
4828
4829     if (!comp)
4830       return infinite_cost;
4831
4832     if (address_p)
4833       comp = build_simple_mem_ref (comp);
4834
4835     return new_cost (computation_cost (comp, speed), 0);
4836   }
4837 }
4838
4839 /* Determines the cost of the computation by that USE is expressed
4840    from induction variable CAND.  If ADDRESS_P is true, we just need
4841    to create an address from it, otherwise we want to get it into
4842    register.  A set of invariants we depend on is stored in
4843    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4844    autoinc addressing is likely.  */
4845
4846 static comp_cost
4847 get_computation_cost (struct ivopts_data *data,
4848                       struct iv_use *use, struct iv_cand *cand,
4849                       bool address_p, bitmap *depends_on,
4850                       bool *can_autoinc, int *inv_expr_id)
4851 {
4852   return get_computation_cost_at (data,
4853                                   use, cand, address_p, depends_on, use->stmt,
4854                                   can_autoinc, inv_expr_id);
4855 }
4856
4857 /* Determines cost of basing replacement of USE on CAND in a generic
4858    expression.  */
4859
4860 static bool
4861 determine_use_iv_cost_generic (struct ivopts_data *data,
4862                                struct iv_use *use, struct iv_cand *cand)
4863 {
4864   bitmap depends_on;
4865   comp_cost cost;
4866   int inv_expr_id = -1;
4867
4868   /* The simple case first -- if we need to express value of the preserved
4869      original biv, the cost is 0.  This also prevents us from counting the
4870      cost of increment twice -- once at this use and once in the cost of
4871      the candidate.  */
4872   if (cand->pos == IP_ORIGINAL
4873       && cand->incremented_at == use->stmt)
4874     {
4875       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4876                        ERROR_MARK, -1);
4877       return true;
4878     }
4879
4880   cost = get_computation_cost (data, use, cand, false, &depends_on,
4881                                NULL, &inv_expr_id);
4882
4883   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4884                    inv_expr_id);
4885
4886   return !infinite_cost_p (cost);
4887 }
4888
4889 /* Determines cost of basing replacement of USE on CAND in an address.  */
4890
4891 static bool
4892 determine_use_iv_cost_address (struct ivopts_data *data,
4893                                struct iv_use *use, struct iv_cand *cand)
4894 {
4895   bitmap depends_on;
4896   bool can_autoinc;
4897   int inv_expr_id = -1;
4898   struct iv_use *sub_use;
4899   comp_cost sub_cost;
4900   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4901                                          &can_autoinc, &inv_expr_id);
4902
4903   if (cand->ainc_use == use)
4904     {
4905       if (can_autoinc)
4906         cost.cost -= cand->cost_step;
4907       /* If we generated the candidate solely for exploiting autoincrement
4908          opportunities, and it turns out it can't be used, set the cost to
4909          infinity to make sure we ignore it.  */
4910       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4911         cost = infinite_cost;
4912     }
4913   for (sub_use = use->next;
4914        sub_use && !infinite_cost_p (cost);
4915        sub_use = sub_use->next)
4916     {
4917       sub_cost = get_computation_cost (data, sub_use, cand, true, NULL,
4918                                        &can_autoinc, NULL);
4919       cost = add_costs (cost, sub_cost);
4920     }
4921
4922   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4923                    inv_expr_id);
4924
4925   return !infinite_cost_p (cost);
4926 }
4927
4928 /* Computes value of candidate CAND at position AT in iteration NITER, and
4929    stores it to VAL.  */
4930
4931 static void
4932 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4933                aff_tree *val)
4934 {
4935   aff_tree step, delta, nit;
4936   struct iv *iv = cand->iv;
4937   tree type = TREE_TYPE (iv->base);
4938   tree steptype = type;
4939   if (POINTER_TYPE_P (type))
4940     steptype = sizetype;
4941   steptype = unsigned_type_for (type);
4942
4943   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4944   aff_combination_convert (&step, steptype);
4945   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4946   aff_combination_convert (&nit, steptype);
4947   aff_combination_mult (&nit, &step, &delta);
4948   if (stmt_after_increment (loop, cand, at))
4949     aff_combination_add (&delta, &step);
4950
4951   tree_to_aff_combination (iv->base, type, val);
4952   if (!POINTER_TYPE_P (type))
4953     aff_combination_convert (val, steptype);
4954   aff_combination_add (val, &delta);
4955 }
4956
4957 /* Returns period of induction variable iv.  */
4958
4959 static tree
4960 iv_period (struct iv *iv)
4961 {
4962   tree step = iv->step, period, type;
4963   tree pow2div;
4964
4965   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4966
4967   type = unsigned_type_for (TREE_TYPE (step));
4968   /* Period of the iv is lcm (step, type_range)/step -1,
4969      i.e., N*type_range/step - 1. Since type range is power
4970      of two, N == (step >> num_of_ending_zeros_binary (step),
4971      so the final result is
4972
4973        (type_range >> num_of_ending_zeros_binary (step)) - 1
4974
4975   */
4976   pow2div = num_ending_zeros (step);
4977
4978   period = build_low_bits_mask (type,
4979                                 (TYPE_PRECISION (type)
4980                                  - tree_to_uhwi (pow2div)));
4981
4982   return period;
4983 }
4984
4985 /* Returns the comparison operator used when eliminating the iv USE.  */
4986
4987 static enum tree_code
4988 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4989 {
4990   struct loop *loop = data->current_loop;
4991   basic_block ex_bb;
4992   edge exit;
4993
4994   ex_bb = gimple_bb (use->stmt);
4995   exit = EDGE_SUCC (ex_bb, 0);
4996   if (flow_bb_inside_loop_p (loop, exit->dest))
4997     exit = EDGE_SUCC (ex_bb, 1);
4998
4999   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5000 }
5001
5002 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5003    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5004    calculation is performed in non-wrapping type.
5005
5006    TODO: More generally, we could test for the situation that
5007          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5008          This would require knowing the sign of OFFSET.  */
5009
5010 static bool
5011 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5012 {
5013   enum tree_code code;
5014   tree e1, e2;
5015   aff_tree aff_e1, aff_e2, aff_offset;
5016
5017   if (!nowrap_type_p (TREE_TYPE (base)))
5018     return false;
5019
5020   base = expand_simple_operations (base);
5021
5022   if (TREE_CODE (base) == SSA_NAME)
5023     {
5024       gimple *stmt = SSA_NAME_DEF_STMT (base);
5025
5026       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5027         return false;
5028
5029       code = gimple_assign_rhs_code (stmt);
5030       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5031         return false;
5032
5033       e1 = gimple_assign_rhs1 (stmt);
5034       e2 = gimple_assign_rhs2 (stmt);
5035     }
5036   else
5037     {
5038       code = TREE_CODE (base);
5039       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5040         return false;
5041       e1 = TREE_OPERAND (base, 0);
5042       e2 = TREE_OPERAND (base, 1);
5043     }
5044
5045   /* Use affine expansion as deeper inspection to prove the equality.  */
5046   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5047                                   &aff_e2, &data->name_expansion_cache);
5048   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5049                                   &aff_offset, &data->name_expansion_cache);
5050   aff_combination_scale (&aff_offset, -1);
5051   switch (code)
5052     {
5053     case PLUS_EXPR:
5054       aff_combination_add (&aff_e2, &aff_offset);
5055       if (aff_combination_zero_p (&aff_e2))
5056         return true;
5057
5058       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5059                                       &aff_e1, &data->name_expansion_cache);
5060       aff_combination_add (&aff_e1, &aff_offset);
5061       return aff_combination_zero_p (&aff_e1);
5062
5063     case POINTER_PLUS_EXPR:
5064       aff_combination_add (&aff_e2, &aff_offset);
5065       return aff_combination_zero_p (&aff_e2);
5066
5067     default:
5068       return false;
5069     }
5070 }
5071
5072 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5073    comparison with CAND.  NITER describes the number of iterations of
5074    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5075
5076    We aim to handle the following situation:
5077
5078    sometype *base, *p;
5079    int a, b, i;
5080
5081    i = a;
5082    p = p_0 = base + a;
5083
5084    do
5085      {
5086        bla (*p);
5087        p++;
5088        i++;
5089      }
5090    while (i < b);
5091
5092    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5093    We aim to optimize this to
5094
5095    p = p_0 = base + a;
5096    do
5097      {
5098        bla (*p);
5099        p++;
5100      }
5101    while (p < p_0 - a + b);
5102
5103    This preserves the correctness, since the pointer arithmetics does not
5104    overflow.  More precisely:
5105
5106    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5107       overflow in computing it or the values of p.
5108    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5109       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5110
5111 static bool
5112 iv_elimination_compare_lt (struct ivopts_data *data,
5113                            struct iv_cand *cand, enum tree_code *comp_p,
5114                            struct tree_niter_desc *niter)
5115 {
5116   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5117   struct aff_tree nit, tmpa, tmpb;
5118   enum tree_code comp;
5119   HOST_WIDE_INT step;
5120
5121   /* We need to know that the candidate induction variable does not overflow.
5122      While more complex analysis may be used to prove this, for now just
5123      check that the variable appears in the original program and that it
5124      is computed in a type that guarantees no overflows.  */
5125   cand_type = TREE_TYPE (cand->iv->base);
5126   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5127     return false;
5128
5129   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5130      the calculation of the BOUND could overflow, making the comparison
5131      invalid.  */
5132   if (!data->loop_single_exit_p)
5133     return false;
5134
5135   /* We need to be able to decide whether candidate is increasing or decreasing
5136      in order to choose the right comparison operator.  */
5137   if (!cst_and_fits_in_hwi (cand->iv->step))
5138     return false;
5139   step = int_cst_value (cand->iv->step);
5140
5141   /* Check that the number of iterations matches the expected pattern:
5142      a + 1 > b ? 0 : b - a - 1.  */
5143   mbz = niter->may_be_zero;
5144   if (TREE_CODE (mbz) == GT_EXPR)
5145     {
5146       /* Handle a + 1 > b.  */
5147       tree op0 = TREE_OPERAND (mbz, 0);
5148       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5149         {
5150           a = TREE_OPERAND (op0, 0);
5151           b = TREE_OPERAND (mbz, 1);
5152         }
5153       else
5154         return false;
5155     }
5156   else if (TREE_CODE (mbz) == LT_EXPR)
5157     {
5158       tree op1 = TREE_OPERAND (mbz, 1);
5159
5160       /* Handle b < a + 1.  */
5161       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5162         {
5163           a = TREE_OPERAND (op1, 0);
5164           b = TREE_OPERAND (mbz, 0);
5165         }
5166       else
5167         return false;
5168     }
5169   else
5170     return false;
5171
5172   /* Expected number of iterations is B - A - 1.  Check that it matches
5173      the actual number, i.e., that B - A - NITER = 1.  */
5174   tree_to_aff_combination (niter->niter, nit_type, &nit);
5175   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5176   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5177   aff_combination_scale (&nit, -1);
5178   aff_combination_scale (&tmpa, -1);
5179   aff_combination_add (&tmpb, &tmpa);
5180   aff_combination_add (&tmpb, &nit);
5181   if (tmpb.n != 0 || tmpb.offset != 1)
5182     return false;
5183
5184   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5185      overflow.  */
5186   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5187                         cand->iv->step,
5188                         fold_convert (TREE_TYPE (cand->iv->step), a));
5189   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5190     return false;
5191
5192   /* Determine the new comparison operator.  */
5193   comp = step < 0 ? GT_EXPR : LT_EXPR;
5194   if (*comp_p == NE_EXPR)
5195     *comp_p = comp;
5196   else if (*comp_p == EQ_EXPR)
5197     *comp_p = invert_tree_comparison (comp, false);
5198   else
5199     gcc_unreachable ();
5200
5201   return true;
5202 }
5203
5204 /* Check whether it is possible to express the condition in USE by comparison
5205    of candidate CAND.  If so, store the value compared with to BOUND, and the
5206    comparison operator to COMP.  */
5207
5208 static bool
5209 may_eliminate_iv (struct ivopts_data *data,
5210                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5211                   enum tree_code *comp)
5212 {
5213   basic_block ex_bb;
5214   edge exit;
5215   tree period;
5216   struct loop *loop = data->current_loop;
5217   aff_tree bnd;
5218   struct tree_niter_desc *desc = NULL;
5219
5220   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5221     return false;
5222
5223   /* For now works only for exits that dominate the loop latch.
5224      TODO: extend to other conditions inside loop body.  */
5225   ex_bb = gimple_bb (use->stmt);
5226   if (use->stmt != last_stmt (ex_bb)
5227       || gimple_code (use->stmt) != GIMPLE_COND
5228       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5229     return false;
5230
5231   exit = EDGE_SUCC (ex_bb, 0);
5232   if (flow_bb_inside_loop_p (loop, exit->dest))
5233     exit = EDGE_SUCC (ex_bb, 1);
5234   if (flow_bb_inside_loop_p (loop, exit->dest))
5235     return false;
5236
5237   desc = niter_for_exit (data, exit);
5238   if (!desc)
5239     return false;
5240
5241   /* Determine whether we can use the variable to test the exit condition.
5242      This is the case iff the period of the induction variable is greater
5243      than the number of iterations for which the exit condition is true.  */
5244   period = iv_period (cand->iv);
5245
5246   /* If the number of iterations is constant, compare against it directly.  */
5247   if (TREE_CODE (desc->niter) == INTEGER_CST)
5248     {
5249       /* See cand_value_at.  */
5250       if (stmt_after_increment (loop, cand, use->stmt))
5251         {
5252           if (!tree_int_cst_lt (desc->niter, period))
5253             return false;
5254         }
5255       else
5256         {
5257           if (tree_int_cst_lt (period, desc->niter))
5258             return false;
5259         }
5260     }
5261
5262   /* If not, and if this is the only possible exit of the loop, see whether
5263      we can get a conservative estimate on the number of iterations of the
5264      entire loop and compare against that instead.  */
5265   else
5266     {
5267       widest_int period_value, max_niter;
5268
5269       max_niter = desc->max;
5270       if (stmt_after_increment (loop, cand, use->stmt))
5271         max_niter += 1;
5272       period_value = wi::to_widest (period);
5273       if (wi::gtu_p (max_niter, period_value))
5274         {
5275           /* See if we can take advantage of inferred loop bound information.  */
5276           if (data->loop_single_exit_p)
5277             {
5278               if (!max_loop_iterations (loop, &max_niter))
5279                 return false;
5280               /* The loop bound is already adjusted by adding 1.  */
5281               if (wi::gtu_p (max_niter, period_value))
5282                 return false;
5283             }
5284           else
5285             return false;
5286         }
5287     }
5288
5289   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5290
5291   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5292                          aff_combination_to_tree (&bnd));
5293   *comp = iv_elimination_compare (data, use);
5294
5295   /* It is unlikely that computing the number of iterations using division
5296      would be more profitable than keeping the original induction variable.  */
5297   if (expression_expensive_p (*bound))
5298     return false;
5299
5300   /* Sometimes, it is possible to handle the situation that the number of
5301      iterations may be zero unless additional assumtions by using <
5302      instead of != in the exit condition.
5303
5304      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5305            base the exit condition on it.  However, that is often too
5306            expensive.  */
5307   if (!integer_zerop (desc->may_be_zero))
5308     return iv_elimination_compare_lt (data, cand, comp, desc);
5309
5310   return true;
5311 }
5312
5313  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5314     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5315
5316 static int
5317 parm_decl_cost (struct ivopts_data *data, tree bound)
5318 {
5319   tree sbound = bound;
5320   STRIP_NOPS (sbound);
5321
5322   if (TREE_CODE (sbound) == SSA_NAME
5323       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5324       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5325       && data->body_includes_call)
5326     return COSTS_N_INSNS (1);
5327
5328   return 0;
5329 }
5330
5331 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5332
5333 static bool
5334 determine_use_iv_cost_condition (struct ivopts_data *data,
5335                                  struct iv_use *use, struct iv_cand *cand)
5336 {
5337   tree bound = NULL_TREE;
5338   struct iv *cmp_iv;
5339   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5340   comp_cost elim_cost, express_cost, cost, bound_cost;
5341   bool ok;
5342   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5343   tree *control_var, *bound_cst;
5344   enum tree_code comp = ERROR_MARK;
5345
5346   /* Only consider real candidates.  */
5347   if (!cand->iv)
5348     {
5349       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5350                        ERROR_MARK, -1);
5351       return false;
5352     }
5353
5354   /* Try iv elimination.  */
5355   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5356     {
5357       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5358       if (elim_cost.cost == 0)
5359         elim_cost.cost = parm_decl_cost (data, bound);
5360       else if (TREE_CODE (bound) == INTEGER_CST)
5361         elim_cost.cost = 0;
5362       /* If we replace a loop condition 'i < n' with 'p < base + n',
5363          depends_on_elim will have 'base' and 'n' set, which implies
5364          that both 'base' and 'n' will be live during the loop.  More likely,
5365          'base + n' will be loop invariant, resulting in only one live value
5366          during the loop.  So in that case we clear depends_on_elim and set
5367         elim_inv_expr_id instead.  */
5368       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5369         {
5370           elim_inv_expr_id = get_expr_id (data, bound);
5371           bitmap_clear (depends_on_elim);
5372         }
5373       /* The bound is a loop invariant, so it will be only computed
5374          once.  */
5375       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5376     }
5377   else
5378     elim_cost = infinite_cost;
5379
5380   /* Try expressing the original giv.  If it is compared with an invariant,
5381      note that we cannot get rid of it.  */
5382   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5383                               NULL, &cmp_iv);
5384   gcc_assert (ok);
5385
5386   /* When the condition is a comparison of the candidate IV against
5387      zero, prefer this IV.
5388
5389      TODO: The constant that we're subtracting from the cost should
5390      be target-dependent.  This information should be added to the
5391      target costs for each backend.  */
5392   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5393       && integer_zerop (*bound_cst)
5394       && (operand_equal_p (*control_var, cand->var_after, 0)
5395           || operand_equal_p (*control_var, cand->var_before, 0)))
5396     elim_cost.cost -= 1;
5397
5398   express_cost = get_computation_cost (data, use, cand, false,
5399                                        &depends_on_express, NULL,
5400                                        &express_inv_expr_id);
5401   fd_ivopts_data = data;
5402   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5403
5404   /* Count the cost of the original bound as well.  */
5405   bound_cost = force_var_cost (data, *bound_cst, NULL);
5406   if (bound_cost.cost == 0)
5407     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5408   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5409     bound_cost.cost = 0;
5410   express_cost.cost += bound_cost.cost;
5411
5412   /* Choose the better approach, preferring the eliminated IV. */
5413   if (compare_costs (elim_cost, express_cost) <= 0)
5414     {
5415       cost = elim_cost;
5416       depends_on = depends_on_elim;
5417       depends_on_elim = NULL;
5418       inv_expr_id = elim_inv_expr_id;
5419     }
5420   else
5421     {
5422       cost = express_cost;
5423       depends_on = depends_on_express;
5424       depends_on_express = NULL;
5425       bound = NULL_TREE;
5426       comp = ERROR_MARK;
5427       inv_expr_id = express_inv_expr_id;
5428     }
5429
5430   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5431
5432   if (depends_on_elim)
5433     BITMAP_FREE (depends_on_elim);
5434   if (depends_on_express)
5435     BITMAP_FREE (depends_on_express);
5436
5437   return !infinite_cost_p (cost);
5438 }
5439
5440 /* Determines cost of basing replacement of USE on CAND.  Returns false
5441    if USE cannot be based on CAND.  */
5442
5443 static bool
5444 determine_use_iv_cost (struct ivopts_data *data,
5445                        struct iv_use *use, struct iv_cand *cand)
5446 {
5447   switch (use->type)
5448     {
5449     case USE_NONLINEAR_EXPR:
5450       return determine_use_iv_cost_generic (data, use, cand);
5451
5452     case USE_ADDRESS:
5453       return determine_use_iv_cost_address (data, use, cand);
5454
5455     case USE_COMPARE:
5456       return determine_use_iv_cost_condition (data, use, cand);
5457
5458     default:
5459       gcc_unreachable ();
5460     }
5461 }
5462
5463 /* Return true if get_computation_cost indicates that autoincrement is
5464    a possibility for the pair of USE and CAND, false otherwise.  */
5465
5466 static bool
5467 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5468                            struct iv_cand *cand)
5469 {
5470   bitmap depends_on;
5471   bool can_autoinc;
5472   comp_cost cost;
5473
5474   if (use->type != USE_ADDRESS)
5475     return false;
5476
5477   cost = get_computation_cost (data, use, cand, true, &depends_on,
5478                                &can_autoinc, NULL);
5479
5480   BITMAP_FREE (depends_on);
5481
5482   return !infinite_cost_p (cost) && can_autoinc;
5483 }
5484
5485 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5486    use that allows autoincrement, and set their AINC_USE if possible.  */
5487
5488 static void
5489 set_autoinc_for_original_candidates (struct ivopts_data *data)
5490 {
5491   unsigned i, j;
5492
5493   for (i = 0; i < n_iv_cands (data); i++)
5494     {
5495       struct iv_cand *cand = iv_cand (data, i);
5496       struct iv_use *closest_before = NULL;
5497       struct iv_use *closest_after = NULL;
5498       if (cand->pos != IP_ORIGINAL)
5499         continue;
5500
5501       for (j = 0; j < n_iv_uses (data); j++)
5502         {
5503           struct iv_use *use = iv_use (data, j);
5504           unsigned uid = gimple_uid (use->stmt);
5505
5506           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5507             continue;
5508
5509           if (uid < gimple_uid (cand->incremented_at)
5510               && (closest_before == NULL
5511                   || uid > gimple_uid (closest_before->stmt)))
5512             closest_before = use;
5513
5514           if (uid > gimple_uid (cand->incremented_at)
5515               && (closest_after == NULL
5516                   || uid < gimple_uid (closest_after->stmt)))
5517             closest_after = use;
5518         }
5519
5520       if (closest_before != NULL
5521           && autoinc_possible_for_pair (data, closest_before, cand))
5522         cand->ainc_use = closest_before;
5523       else if (closest_after != NULL
5524                && autoinc_possible_for_pair (data, closest_after, cand))
5525         cand->ainc_use = closest_after;
5526     }
5527 }
5528
5529 /* Finds the candidates for the induction variables.  */
5530
5531 static void
5532 find_iv_candidates (struct ivopts_data *data)
5533 {
5534   /* Add commonly used ivs.  */
5535   add_standard_iv_candidates (data);
5536
5537   /* Add old induction variables.  */
5538   add_iv_candidate_for_bivs (data);
5539
5540   /* Add induction variables derived from uses.  */
5541   add_iv_candidate_for_uses (data);
5542
5543   set_autoinc_for_original_candidates (data);
5544
5545   /* Record the important candidates.  */
5546   record_important_candidates (data);
5547 }
5548
5549 /* Determines costs of basing the use of the iv on an iv candidate.  */
5550
5551 static void
5552 determine_use_iv_costs (struct ivopts_data *data)
5553 {
5554   unsigned i, j;
5555   struct iv_use *use;
5556   struct iv_cand *cand;
5557   bitmap to_clear = BITMAP_ALLOC (NULL);
5558
5559   alloc_use_cost_map (data);
5560
5561   for (i = 0; i < n_iv_uses (data); i++)
5562     {
5563       use = iv_use (data, i);
5564
5565       if (data->consider_all_candidates)
5566         {
5567           for (j = 0; j < n_iv_cands (data); j++)
5568             {
5569               cand = iv_cand (data, j);
5570               determine_use_iv_cost (data, use, cand);
5571             }
5572         }
5573       else
5574         {
5575           bitmap_iterator bi;
5576
5577           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5578             {
5579               cand = iv_cand (data, j);
5580               if (!determine_use_iv_cost (data, use, cand))
5581                 bitmap_set_bit (to_clear, j);
5582             }
5583
5584           /* Remove the candidates for that the cost is infinite from
5585              the list of related candidates.  */
5586           bitmap_and_compl_into (use->related_cands, to_clear);
5587           bitmap_clear (to_clear);
5588         }
5589     }
5590
5591   BITMAP_FREE (to_clear);
5592
5593   if (dump_file && (dump_flags & TDF_DETAILS))
5594     {
5595       fprintf (dump_file, "Use-candidate costs:\n");
5596
5597       for (i = 0; i < n_iv_uses (data); i++)
5598         {
5599           use = iv_use (data, i);
5600
5601           fprintf (dump_file, "Use %d:\n", i);
5602           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5603           for (j = 0; j < use->n_map_members; j++)
5604             {
5605               if (!use->cost_map[j].cand
5606                   || infinite_cost_p (use->cost_map[j].cost))
5607                 continue;
5608
5609               fprintf (dump_file, "  %d\t%d\t%d\t",
5610                        use->cost_map[j].cand->id,
5611                        use->cost_map[j].cost.cost,
5612                        use->cost_map[j].cost.complexity);
5613               if (use->cost_map[j].depends_on)
5614                 bitmap_print (dump_file,
5615                               use->cost_map[j].depends_on, "","");
5616               if (use->cost_map[j].inv_expr_id != -1)
5617                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5618               fprintf (dump_file, "\n");
5619             }
5620
5621           fprintf (dump_file, "\n");
5622         }
5623       fprintf (dump_file, "\n");
5624     }
5625 }
5626
5627 /* Determines cost of the candidate CAND.  */
5628
5629 static void
5630 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5631 {
5632   comp_cost cost_base;
5633   unsigned cost, cost_step;
5634   tree base;
5635
5636   if (!cand->iv)
5637     {
5638       cand->cost = 0;
5639       return;
5640     }
5641
5642   /* There are two costs associated with the candidate -- its increment
5643      and its initialization.  The second is almost negligible for any loop
5644      that rolls enough, so we take it just very little into account.  */
5645
5646   base = cand->iv->base;
5647   cost_base = force_var_cost (data, base, NULL);
5648   /* It will be exceptional that the iv register happens to be initialized with
5649      the proper value at no cost.  In general, there will at least be a regcopy
5650      or a const set.  */
5651   if (cost_base.cost == 0)
5652     cost_base.cost = COSTS_N_INSNS (1);
5653   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5654
5655   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5656
5657   /* Prefer the original ivs unless we may gain something by replacing it.
5658      The reason is to make debugging simpler; so this is not relevant for
5659      artificial ivs created by other optimization passes.  */
5660   if (cand->pos != IP_ORIGINAL
5661       || !SSA_NAME_VAR (cand->var_before)
5662       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5663     cost++;
5664
5665   /* Prefer not to insert statements into latch unless there are some
5666      already (so that we do not create unnecessary jumps).  */
5667   if (cand->pos == IP_END
5668       && empty_block_p (ip_end_pos (data->current_loop)))
5669     cost++;
5670
5671   cand->cost = cost;
5672   cand->cost_step = cost_step;
5673 }
5674
5675 /* Determines costs of computation of the candidates.  */
5676
5677 static void
5678 determine_iv_costs (struct ivopts_data *data)
5679 {
5680   unsigned i;
5681
5682   if (dump_file && (dump_flags & TDF_DETAILS))
5683     {
5684       fprintf (dump_file, "Candidate costs:\n");
5685       fprintf (dump_file, "  cand\tcost\n");
5686     }
5687
5688   for (i = 0; i < n_iv_cands (data); i++)
5689     {
5690       struct iv_cand *cand = iv_cand (data, i);
5691
5692       determine_iv_cost (data, cand);
5693
5694       if (dump_file && (dump_flags & TDF_DETAILS))
5695         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5696     }
5697
5698   if (dump_file && (dump_flags & TDF_DETAILS))
5699     fprintf (dump_file, "\n");
5700 }
5701
5702 /* Calculates cost for having SIZE induction variables.  */
5703
5704 static unsigned
5705 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5706 {
5707   /* We add size to the cost, so that we prefer eliminating ivs
5708      if possible.  */
5709   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5710                                             data->body_includes_call);
5711 }
5712
5713 /* For each size of the induction variable set determine the penalty.  */
5714
5715 static void
5716 determine_set_costs (struct ivopts_data *data)
5717 {
5718   unsigned j, n;
5719   gphi *phi;
5720   gphi_iterator psi;
5721   tree op;
5722   struct loop *loop = data->current_loop;
5723   bitmap_iterator bi;
5724
5725   if (dump_file && (dump_flags & TDF_DETAILS))
5726     {
5727       fprintf (dump_file, "Global costs:\n");
5728       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5729       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5730       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5731       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5732     }
5733
5734   n = 0;
5735   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5736     {
5737       phi = psi.phi ();
5738       op = PHI_RESULT (phi);
5739
5740       if (virtual_operand_p (op))
5741         continue;
5742
5743       if (get_iv (data, op))
5744         continue;
5745
5746       n++;
5747     }
5748
5749   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5750     {
5751       struct version_info *info = ver_info (data, j);
5752
5753       if (info->inv_id && info->has_nonlin_use)
5754         n++;
5755     }
5756
5757   data->regs_used = n;
5758   if (dump_file && (dump_flags & TDF_DETAILS))
5759     fprintf (dump_file, "  regs_used %d\n", n);
5760
5761   if (dump_file && (dump_flags & TDF_DETAILS))
5762     {
5763       fprintf (dump_file, "  cost for size:\n");
5764       fprintf (dump_file, "  ivs\tcost\n");
5765       for (j = 0; j <= 2 * target_avail_regs; j++)
5766         fprintf (dump_file, "  %d\t%d\n", j,
5767                  ivopts_global_cost_for_size (data, j));
5768       fprintf (dump_file, "\n");
5769     }
5770 }
5771
5772 /* Returns true if A is a cheaper cost pair than B.  */
5773
5774 static bool
5775 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5776 {
5777   int cmp;
5778
5779   if (!a)
5780     return false;
5781
5782   if (!b)
5783     return true;
5784
5785   cmp = compare_costs (a->cost, b->cost);
5786   if (cmp < 0)
5787     return true;
5788
5789   if (cmp > 0)
5790     return false;
5791
5792   /* In case the costs are the same, prefer the cheaper candidate.  */
5793   if (a->cand->cost < b->cand->cost)
5794     return true;
5795
5796   return false;
5797 }
5798
5799
5800 /* Returns candidate by that USE is expressed in IVS.  */
5801
5802 static struct cost_pair *
5803 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5804 {
5805   return ivs->cand_for_use[use->id];
5806 }
5807
5808 /* Computes the cost field of IVS structure.  */
5809
5810 static void
5811 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5812 {
5813   comp_cost cost = ivs->cand_use_cost;
5814
5815   cost.cost += ivs->cand_cost;
5816
5817   cost.cost += ivopts_global_cost_for_size (data,
5818                                             ivs->n_regs + ivs->num_used_inv_expr);
5819
5820   ivs->cost = cost;
5821 }
5822
5823 /* Remove invariants in set INVS to set IVS.  */
5824
5825 static void
5826 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5827 {
5828   bitmap_iterator bi;
5829   unsigned iid;
5830
5831   if (!invs)
5832     return;
5833
5834   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5835     {
5836       ivs->n_invariant_uses[iid]--;
5837       if (ivs->n_invariant_uses[iid] == 0)
5838         ivs->n_regs--;
5839     }
5840 }
5841
5842 /* Set USE not to be expressed by any candidate in IVS.  */
5843
5844 static void
5845 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5846                  struct iv_use *use)
5847 {
5848   unsigned uid = use->id, cid;
5849   struct cost_pair *cp;
5850
5851   cp = ivs->cand_for_use[uid];
5852   if (!cp)
5853     return;
5854   cid = cp->cand->id;
5855
5856   ivs->bad_uses++;
5857   ivs->cand_for_use[uid] = NULL;
5858   ivs->n_cand_uses[cid]--;
5859
5860   if (ivs->n_cand_uses[cid] == 0)
5861     {
5862       bitmap_clear_bit (ivs->cands, cid);
5863       /* Do not count the pseudocandidates.  */
5864       if (cp->cand->iv)
5865         ivs->n_regs--;
5866       ivs->n_cands--;
5867       ivs->cand_cost -= cp->cand->cost;
5868
5869       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5870     }
5871
5872   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5873
5874   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5875
5876   if (cp->inv_expr_id != -1)
5877     {
5878       ivs->used_inv_expr[cp->inv_expr_id]--;
5879       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5880         ivs->num_used_inv_expr--;
5881     }
5882   iv_ca_recount_cost (data, ivs);
5883 }
5884
5885 /* Add invariants in set INVS to set IVS.  */
5886
5887 static void
5888 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5889 {
5890   bitmap_iterator bi;
5891   unsigned iid;
5892
5893   if (!invs)
5894     return;
5895
5896   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5897     {
5898       ivs->n_invariant_uses[iid]++;
5899       if (ivs->n_invariant_uses[iid] == 1)
5900         ivs->n_regs++;
5901     }
5902 }
5903
5904 /* Set cost pair for USE in set IVS to CP.  */
5905
5906 static void
5907 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5908               struct iv_use *use, struct cost_pair *cp)
5909 {
5910   unsigned uid = use->id, cid;
5911
5912   if (ivs->cand_for_use[uid] == cp)
5913     return;
5914
5915   if (ivs->cand_for_use[uid])
5916     iv_ca_set_no_cp (data, ivs, use);
5917
5918   if (cp)
5919     {
5920       cid = cp->cand->id;
5921
5922       ivs->bad_uses--;
5923       ivs->cand_for_use[uid] = cp;
5924       ivs->n_cand_uses[cid]++;
5925       if (ivs->n_cand_uses[cid] == 1)
5926         {
5927           bitmap_set_bit (ivs->cands, cid);
5928           /* Do not count the pseudocandidates.  */
5929           if (cp->cand->iv)
5930             ivs->n_regs++;
5931           ivs->n_cands++;
5932           ivs->cand_cost += cp->cand->cost;
5933
5934           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5935         }
5936
5937       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5938       iv_ca_set_add_invariants (ivs, cp->depends_on);
5939
5940       if (cp->inv_expr_id != -1)
5941         {
5942           ivs->used_inv_expr[cp->inv_expr_id]++;
5943           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5944             ivs->num_used_inv_expr++;
5945         }
5946       iv_ca_recount_cost (data, ivs);
5947     }
5948 }
5949
5950 /* Extend set IVS by expressing USE by some of the candidates in it
5951    if possible.  Consider all important candidates if candidates in
5952    set IVS don't give any result.  */
5953
5954 static void
5955 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5956                struct iv_use *use)
5957 {
5958   struct cost_pair *best_cp = NULL, *cp;
5959   bitmap_iterator bi;
5960   unsigned i;
5961   struct iv_cand *cand;
5962
5963   gcc_assert (ivs->upto >= use->id);
5964   ivs->upto++;
5965   ivs->bad_uses++;
5966
5967   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5968     {
5969       cand = iv_cand (data, i);
5970       cp = get_use_iv_cost (data, use, cand);
5971       if (cheaper_cost_pair (cp, best_cp))
5972         best_cp = cp;
5973     }
5974
5975   if (best_cp == NULL)
5976     {
5977       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5978         {
5979           cand = iv_cand (data, i);
5980           cp = get_use_iv_cost (data, use, cand);
5981           if (cheaper_cost_pair (cp, best_cp))
5982             best_cp = cp;
5983         }
5984     }
5985
5986   iv_ca_set_cp (data, ivs, use, best_cp);
5987 }
5988
5989 /* Get cost for assignment IVS.  */
5990
5991 static comp_cost
5992 iv_ca_cost (struct iv_ca *ivs)
5993 {
5994   /* This was a conditional expression but it triggered a bug in
5995      Sun C 5.5.  */
5996   if (ivs->bad_uses)
5997     return infinite_cost;
5998   else
5999     return ivs->cost;
6000 }
6001
6002 /* Returns true if all dependences of CP are among invariants in IVS.  */
6003
6004 static bool
6005 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6006 {
6007   unsigned i;
6008   bitmap_iterator bi;
6009
6010   if (!cp->depends_on)
6011     return true;
6012
6013   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6014     {
6015       if (ivs->n_invariant_uses[i] == 0)
6016         return false;
6017     }
6018
6019   return true;
6020 }
6021
6022 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6023    it before NEXT_CHANGE.  */
6024
6025 static struct iv_ca_delta *
6026 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6027                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6028 {
6029   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6030
6031   change->use = use;
6032   change->old_cp = old_cp;
6033   change->new_cp = new_cp;
6034   change->next_change = next_change;
6035
6036   return change;
6037 }
6038
6039 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6040    are rewritten.  */
6041
6042 static struct iv_ca_delta *
6043 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6044 {
6045   struct iv_ca_delta *last;
6046
6047   if (!l2)
6048     return l1;
6049
6050   if (!l1)
6051     return l2;
6052
6053   for (last = l1; last->next_change; last = last->next_change)
6054     continue;
6055   last->next_change = l2;
6056
6057   return l1;
6058 }
6059
6060 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6061
6062 static struct iv_ca_delta *
6063 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6064 {
6065   struct iv_ca_delta *act, *next, *prev = NULL;
6066
6067   for (act = delta; act; act = next)
6068     {
6069       next = act->next_change;
6070       act->next_change = prev;
6071       prev = act;
6072
6073       std::swap (act->old_cp, act->new_cp);
6074     }
6075
6076   return prev;
6077 }
6078
6079 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6080    reverted instead.  */
6081
6082 static void
6083 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6084                     struct iv_ca_delta *delta, bool forward)
6085 {
6086   struct cost_pair *from, *to;
6087   struct iv_ca_delta *act;
6088
6089   if (!forward)
6090     delta = iv_ca_delta_reverse (delta);
6091
6092   for (act = delta; act; act = act->next_change)
6093     {
6094       from = act->old_cp;
6095       to = act->new_cp;
6096       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6097       iv_ca_set_cp (data, ivs, act->use, to);
6098     }
6099
6100   if (!forward)
6101     iv_ca_delta_reverse (delta);
6102 }
6103
6104 /* Returns true if CAND is used in IVS.  */
6105
6106 static bool
6107 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6108 {
6109   return ivs->n_cand_uses[cand->id] > 0;
6110 }
6111
6112 /* Returns number of induction variable candidates in the set IVS.  */
6113
6114 static unsigned
6115 iv_ca_n_cands (struct iv_ca *ivs)
6116 {
6117   return ivs->n_cands;
6118 }
6119
6120 /* Free the list of changes DELTA.  */
6121
6122 static void
6123 iv_ca_delta_free (struct iv_ca_delta **delta)
6124 {
6125   struct iv_ca_delta *act, *next;
6126
6127   for (act = *delta; act; act = next)
6128     {
6129       next = act->next_change;
6130       free (act);
6131     }
6132
6133   *delta = NULL;
6134 }
6135
6136 /* Allocates new iv candidates assignment.  */
6137
6138 static struct iv_ca *
6139 iv_ca_new (struct ivopts_data *data)
6140 {
6141   struct iv_ca *nw = XNEW (struct iv_ca);
6142
6143   nw->upto = 0;
6144   nw->bad_uses = 0;
6145   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6146   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6147   nw->cands = BITMAP_ALLOC (NULL);
6148   nw->n_cands = 0;
6149   nw->n_regs = 0;
6150   nw->cand_use_cost = no_cost;
6151   nw->cand_cost = 0;
6152   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6153   nw->cost = no_cost;
6154   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6155   nw->num_used_inv_expr = 0;
6156
6157   return nw;
6158 }
6159
6160 /* Free memory occupied by the set IVS.  */
6161
6162 static void
6163 iv_ca_free (struct iv_ca **ivs)
6164 {
6165   free ((*ivs)->cand_for_use);
6166   free ((*ivs)->n_cand_uses);
6167   BITMAP_FREE ((*ivs)->cands);
6168   free ((*ivs)->n_invariant_uses);
6169   free ((*ivs)->used_inv_expr);
6170   free (*ivs);
6171   *ivs = NULL;
6172 }
6173
6174 /* Dumps IVS to FILE.  */
6175
6176 static void
6177 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6178 {
6179   const char *pref = "  invariants ";
6180   unsigned i;
6181   comp_cost cost = iv_ca_cost (ivs);
6182
6183   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6184   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
6185            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6186   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6187
6188    for (i = 0; i < ivs->upto; i++)
6189     {
6190       struct iv_use *use = iv_use (data, i);
6191       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6192       if (cp)
6193         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6194                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6195       else
6196         fprintf (file, "   use:%d --> ??\n", use->id);
6197     }
6198
6199   for (i = 1; i <= data->max_inv_id; i++)
6200     if (ivs->n_invariant_uses[i])
6201       {
6202         fprintf (file, "%s%d", pref, i);
6203         pref = ", ";
6204       }
6205   fprintf (file, "\n\n");
6206 }
6207
6208 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6209    new set, and store differences in DELTA.  Number of induction variables
6210    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6211    the function will try to find a solution with mimimal iv candidates.  */
6212
6213 static comp_cost
6214 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6215               struct iv_cand *cand, struct iv_ca_delta **delta,
6216               unsigned *n_ivs, bool min_ncand)
6217 {
6218   unsigned i;
6219   comp_cost cost;
6220   struct iv_use *use;
6221   struct cost_pair *old_cp, *new_cp;
6222
6223   *delta = NULL;
6224   for (i = 0; i < ivs->upto; i++)
6225     {
6226       use = iv_use (data, i);
6227       old_cp = iv_ca_cand_for_use (ivs, use);
6228
6229       if (old_cp
6230           && old_cp->cand == cand)
6231         continue;
6232
6233       new_cp = get_use_iv_cost (data, use, cand);
6234       if (!new_cp)
6235         continue;
6236
6237       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6238         continue;
6239
6240       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6241         continue;
6242
6243       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6244     }
6245
6246   iv_ca_delta_commit (data, ivs, *delta, true);
6247   cost = iv_ca_cost (ivs);
6248   if (n_ivs)
6249     *n_ivs = iv_ca_n_cands (ivs);
6250   iv_ca_delta_commit (data, ivs, *delta, false);
6251
6252   return cost;
6253 }
6254
6255 /* Try narrowing set IVS by removing CAND.  Return the cost of
6256    the new set and store the differences in DELTA.  START is
6257    the candidate with which we start narrowing.  */
6258
6259 static comp_cost
6260 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6261               struct iv_cand *cand, struct iv_cand *start,
6262               struct iv_ca_delta **delta)
6263 {
6264   unsigned i, ci;
6265   struct iv_use *use;
6266   struct cost_pair *old_cp, *new_cp, *cp;
6267   bitmap_iterator bi;
6268   struct iv_cand *cnd;
6269   comp_cost cost, best_cost, acost;
6270
6271   *delta = NULL;
6272   for (i = 0; i < n_iv_uses (data); i++)
6273     {
6274       use = iv_use (data, i);
6275
6276       old_cp = iv_ca_cand_for_use (ivs, use);
6277       if (old_cp->cand != cand)
6278         continue;
6279
6280       best_cost = iv_ca_cost (ivs);
6281       /* Start narrowing with START.  */
6282       new_cp = get_use_iv_cost (data, use, start);
6283
6284       if (data->consider_all_candidates)
6285         {
6286           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6287             {
6288               if (ci == cand->id || (start && ci == start->id))
6289                 continue;
6290
6291               cnd = iv_cand (data, ci);
6292
6293               cp = get_use_iv_cost (data, use, cnd);
6294               if (!cp)
6295                 continue;
6296
6297               iv_ca_set_cp (data, ivs, use, cp);
6298               acost = iv_ca_cost (ivs);
6299
6300               if (compare_costs (acost, best_cost) < 0)
6301                 {
6302                   best_cost = acost;
6303                   new_cp = cp;
6304                 }
6305             }
6306         }
6307       else
6308         {
6309           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6310             {
6311               if (ci == cand->id || (start && ci == start->id))
6312                 continue;
6313
6314               cnd = iv_cand (data, ci);
6315
6316               cp = get_use_iv_cost (data, use, cnd);
6317               if (!cp)
6318                 continue;
6319
6320               iv_ca_set_cp (data, ivs, use, cp);
6321               acost = iv_ca_cost (ivs);
6322
6323               if (compare_costs (acost, best_cost) < 0)
6324                 {
6325                   best_cost = acost;
6326                   new_cp = cp;
6327                 }
6328             }
6329         }
6330       /* Restore to old cp for use.  */
6331       iv_ca_set_cp (data, ivs, use, old_cp);
6332
6333       if (!new_cp)
6334         {
6335           iv_ca_delta_free (delta);
6336           return infinite_cost;
6337         }
6338
6339       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6340     }
6341
6342   iv_ca_delta_commit (data, ivs, *delta, true);
6343   cost = iv_ca_cost (ivs);
6344   iv_ca_delta_commit (data, ivs, *delta, false);
6345
6346   return cost;
6347 }
6348
6349 /* Try optimizing the set of candidates IVS by removing candidates different
6350    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6351    differences in DELTA.  */
6352
6353 static comp_cost
6354 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6355              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6356 {
6357   bitmap_iterator bi;
6358   struct iv_ca_delta *act_delta, *best_delta;
6359   unsigned i;
6360   comp_cost best_cost, acost;
6361   struct iv_cand *cand;
6362
6363   best_delta = NULL;
6364   best_cost = iv_ca_cost (ivs);
6365
6366   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6367     {
6368       cand = iv_cand (data, i);
6369
6370       if (cand == except_cand)
6371         continue;
6372
6373       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6374
6375       if (compare_costs (acost, best_cost) < 0)
6376         {
6377           best_cost = acost;
6378           iv_ca_delta_free (&best_delta);
6379           best_delta = act_delta;
6380         }
6381       else
6382         iv_ca_delta_free (&act_delta);
6383     }
6384
6385   if (!best_delta)
6386     {
6387       *delta = NULL;
6388       return best_cost;
6389     }
6390
6391   /* Recurse to possibly remove other unnecessary ivs.  */
6392   iv_ca_delta_commit (data, ivs, best_delta, true);
6393   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6394   iv_ca_delta_commit (data, ivs, best_delta, false);
6395   *delta = iv_ca_delta_join (best_delta, *delta);
6396   return best_cost;
6397 }
6398
6399 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6400    cheaper local cost for USE than BEST_CP.  Return pointer to
6401    the corresponding cost_pair, otherwise just return BEST_CP.  */
6402
6403 static struct cost_pair*
6404 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6405                         unsigned int cand_idx, struct iv_cand *old_cand,
6406                         struct cost_pair *best_cp)
6407 {
6408   struct iv_cand *cand;
6409   struct cost_pair *cp;
6410
6411   gcc_assert (old_cand != NULL && best_cp != NULL);
6412   if (cand_idx == old_cand->id)
6413     return best_cp;
6414
6415   cand = iv_cand (data, cand_idx);
6416   cp = get_use_iv_cost (data, use, cand);
6417   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6418     return cp;
6419
6420   return best_cp;
6421 }
6422
6423 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6424    which are used by more than one iv uses.  For each of those candidates,
6425    this function tries to represent iv uses under that candidate using
6426    other ones with lower local cost, then tries to prune the new set.
6427    If the new set has lower cost, It returns the new cost after recording
6428    candidate replacement in list DELTA.  */
6429
6430 static comp_cost
6431 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6432                struct iv_ca_delta **delta)
6433 {
6434   bitmap_iterator bi, bj;
6435   unsigned int i, j, k;
6436   struct iv_use *use;
6437   struct iv_cand *cand;
6438   comp_cost orig_cost, acost;
6439   struct iv_ca_delta *act_delta, *tmp_delta;
6440   struct cost_pair *old_cp, *best_cp = NULL;
6441
6442   *delta = NULL;
6443   orig_cost = iv_ca_cost (ivs);
6444
6445   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6446     {
6447       if (ivs->n_cand_uses[i] == 1
6448           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6449         continue;
6450
6451       cand = iv_cand (data, i);
6452
6453       act_delta = NULL;
6454       /*  Represent uses under current candidate using other ones with
6455           lower local cost.  */
6456       for (j = 0; j < ivs->upto; j++)
6457         {
6458           use = iv_use (data, j);
6459           old_cp = iv_ca_cand_for_use (ivs, use);
6460
6461           if (old_cp->cand != cand)
6462             continue;
6463
6464           best_cp = old_cp;
6465           if (data->consider_all_candidates)
6466             for (k = 0; k < n_iv_cands (data); k++)
6467               best_cp = cheaper_cost_with_cand (data, use, k,
6468                                                 old_cp->cand, best_cp);
6469           else
6470             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6471               best_cp = cheaper_cost_with_cand (data, use, k,
6472                                                 old_cp->cand, best_cp);
6473
6474           if (best_cp == old_cp)
6475             continue;
6476
6477           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6478         }
6479       /* No need for further prune.  */
6480       if (!act_delta)
6481         continue;
6482
6483       /* Prune the new candidate set.  */
6484       iv_ca_delta_commit (data, ivs, act_delta, true);
6485       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6486       iv_ca_delta_commit (data, ivs, act_delta, false);
6487       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6488
6489       if (compare_costs (acost, orig_cost) < 0)
6490         {
6491           *delta = act_delta;
6492           return acost;
6493         }
6494       else
6495         iv_ca_delta_free (&act_delta);
6496     }
6497
6498   return orig_cost;
6499 }
6500
6501 /* Tries to extend the sets IVS in the best possible way in order
6502    to express the USE.  If ORIGINALP is true, prefer candidates from
6503    the original set of IVs, otherwise favor important candidates not
6504    based on any memory object.  */
6505
6506 static bool
6507 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6508                   struct iv_use *use, bool originalp)
6509 {
6510   comp_cost best_cost, act_cost;
6511   unsigned i;
6512   bitmap_iterator bi;
6513   struct iv_cand *cand;
6514   struct iv_ca_delta *best_delta = NULL, *act_delta;
6515   struct cost_pair *cp;
6516
6517   iv_ca_add_use (data, ivs, use);
6518   best_cost = iv_ca_cost (ivs);
6519   cp = iv_ca_cand_for_use (ivs, use);
6520   if (cp)
6521     {
6522       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6523       iv_ca_set_no_cp (data, ivs, use);
6524     }
6525
6526   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6527      first try important candidates not based on any memory object.  Only if
6528      this fails, try the specific ones.  Rationale -- in loops with many
6529      variables the best choice often is to use just one generic biv.  If we
6530      added here many ivs specific to the uses, the optimization algorithm later
6531      would be likely to get stuck in a local minimum, thus causing us to create
6532      too many ivs.  The approach from few ivs to more seems more likely to be
6533      successful -- starting from few ivs, replacing an expensive use by a
6534      specific iv should always be a win.  */
6535   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6536     {
6537       cand = iv_cand (data, i);
6538
6539       if (originalp && cand->pos !=IP_ORIGINAL)
6540         continue;
6541
6542       if (!originalp && cand->iv->base_object != NULL_TREE)
6543         continue;
6544
6545       if (iv_ca_cand_used_p (ivs, cand))
6546         continue;
6547
6548       cp = get_use_iv_cost (data, use, cand);
6549       if (!cp)
6550         continue;
6551
6552       iv_ca_set_cp (data, ivs, use, cp);
6553       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6554                                true);
6555       iv_ca_set_no_cp (data, ivs, use);
6556       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6557
6558       if (compare_costs (act_cost, best_cost) < 0)
6559         {
6560           best_cost = act_cost;
6561
6562           iv_ca_delta_free (&best_delta);
6563           best_delta = act_delta;
6564         }
6565       else
6566         iv_ca_delta_free (&act_delta);
6567     }
6568
6569   if (infinite_cost_p (best_cost))
6570     {
6571       for (i = 0; i < use->n_map_members; i++)
6572         {
6573           cp = use->cost_map + i;
6574           cand = cp->cand;
6575           if (!cand)
6576             continue;
6577
6578           /* Already tried this.  */
6579           if (cand->important)
6580             {
6581               if (originalp && cand->pos == IP_ORIGINAL)
6582                 continue;
6583               if (!originalp && cand->iv->base_object == NULL_TREE)
6584                 continue;
6585             }
6586
6587           if (iv_ca_cand_used_p (ivs, cand))
6588             continue;
6589
6590           act_delta = NULL;
6591           iv_ca_set_cp (data, ivs, use, cp);
6592           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6593           iv_ca_set_no_cp (data, ivs, use);
6594           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6595                                        cp, act_delta);
6596
6597           if (compare_costs (act_cost, best_cost) < 0)
6598             {
6599               best_cost = act_cost;
6600
6601               if (best_delta)
6602                 iv_ca_delta_free (&best_delta);
6603               best_delta = act_delta;
6604             }
6605           else
6606             iv_ca_delta_free (&act_delta);
6607         }
6608     }
6609
6610   iv_ca_delta_commit (data, ivs, best_delta, true);
6611   iv_ca_delta_free (&best_delta);
6612
6613   return !infinite_cost_p (best_cost);
6614 }
6615
6616 /* Finds an initial assignment of candidates to uses.  */
6617
6618 static struct iv_ca *
6619 get_initial_solution (struct ivopts_data *data, bool originalp)
6620 {
6621   struct iv_ca *ivs = iv_ca_new (data);
6622   unsigned i;
6623
6624   for (i = 0; i < n_iv_uses (data); i++)
6625     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6626       {
6627         iv_ca_free (&ivs);
6628         return NULL;
6629       }
6630
6631   return ivs;
6632 }
6633
6634 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6635    points to a bool variable, this function tries to break local
6636    optimal fixed-point by replacing candidates in IVS if it's true.  */
6637
6638 static bool
6639 try_improve_iv_set (struct ivopts_data *data,
6640                     struct iv_ca *ivs, bool *try_replace_p)
6641 {
6642   unsigned i, n_ivs;
6643   comp_cost acost, best_cost = iv_ca_cost (ivs);
6644   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6645   struct iv_cand *cand;
6646
6647   /* Try extending the set of induction variables by one.  */
6648   for (i = 0; i < n_iv_cands (data); i++)
6649     {
6650       cand = iv_cand (data, i);
6651
6652       if (iv_ca_cand_used_p (ivs, cand))
6653         continue;
6654
6655       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6656       if (!act_delta)
6657         continue;
6658
6659       /* If we successfully added the candidate and the set is small enough,
6660          try optimizing it by removing other candidates.  */
6661       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6662         {
6663           iv_ca_delta_commit (data, ivs, act_delta, true);
6664           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6665           iv_ca_delta_commit (data, ivs, act_delta, false);
6666           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6667         }
6668
6669       if (compare_costs (acost, best_cost) < 0)
6670         {
6671           best_cost = acost;
6672           iv_ca_delta_free (&best_delta);
6673           best_delta = act_delta;
6674         }
6675       else
6676         iv_ca_delta_free (&act_delta);
6677     }
6678
6679   if (!best_delta)
6680     {
6681       /* Try removing the candidates from the set instead.  */
6682       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6683
6684       if (!best_delta && *try_replace_p)
6685         {
6686           *try_replace_p = false;
6687           /* So far candidate selecting algorithm tends to choose fewer IVs
6688              so that it can handle cases in which loops have many variables
6689              but the best choice is often to use only one general biv.  One
6690              weakness is it can't handle opposite cases, in which different
6691              candidates should be chosen with respect to each use.  To solve
6692              the problem, we replace candidates in a manner described by the
6693              comments of iv_ca_replace, thus give general algorithm a chance
6694              to break local optimal fixed-point in these cases.  */
6695           best_cost = iv_ca_replace (data, ivs, &best_delta);
6696         }
6697
6698       if (!best_delta)
6699         return false;
6700     }
6701
6702   iv_ca_delta_commit (data, ivs, best_delta, true);
6703   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6704   iv_ca_delta_free (&best_delta);
6705   return true;
6706 }
6707
6708 /* Attempts to find the optimal set of induction variables.  We do simple
6709    greedy heuristic -- we try to replace at most one candidate in the selected
6710    solution and remove the unused ivs while this improves the cost.  */
6711
6712 static struct iv_ca *
6713 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6714 {
6715   struct iv_ca *set;
6716   bool try_replace_p = true;
6717
6718   /* Get the initial solution.  */
6719   set = get_initial_solution (data, originalp);
6720   if (!set)
6721     {
6722       if (dump_file && (dump_flags & TDF_DETAILS))
6723         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6724       return NULL;
6725     }
6726
6727   if (dump_file && (dump_flags & TDF_DETAILS))
6728     {
6729       fprintf (dump_file, "Initial set of candidates:\n");
6730       iv_ca_dump (data, dump_file, set);
6731     }
6732
6733   while (try_improve_iv_set (data, set, &try_replace_p))
6734     {
6735       if (dump_file && (dump_flags & TDF_DETAILS))
6736         {
6737           fprintf (dump_file, "Improved to:\n");
6738           iv_ca_dump (data, dump_file, set);
6739         }
6740     }
6741
6742   return set;
6743 }
6744
6745 static struct iv_ca *
6746 find_optimal_iv_set (struct ivopts_data *data)
6747 {
6748   unsigned i;
6749   struct iv_ca *set, *origset;
6750   struct iv_use *use;
6751   comp_cost cost, origcost;
6752
6753   /* Determine the cost based on a strategy that starts with original IVs,
6754      and try again using a strategy that prefers candidates not based
6755      on any IVs.  */
6756   origset = find_optimal_iv_set_1 (data, true);
6757   set = find_optimal_iv_set_1 (data, false);
6758
6759   if (!origset && !set)
6760     return NULL;
6761
6762   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6763   cost = set ? iv_ca_cost (set) : infinite_cost;
6764
6765   if (dump_file && (dump_flags & TDF_DETAILS))
6766     {
6767       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6768                origcost.cost, origcost.complexity);
6769       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6770                cost.cost, cost.complexity);
6771     }
6772
6773   /* Choose the one with the best cost.  */
6774   if (compare_costs (origcost, cost) <= 0)
6775     {
6776       if (set)
6777         iv_ca_free (&set);
6778       set = origset;
6779     }
6780   else if (origset)
6781     iv_ca_free (&origset);
6782
6783   for (i = 0; i < n_iv_uses (data); i++)
6784     {
6785       use = iv_use (data, i);
6786       use->selected = iv_ca_cand_for_use (set, use)->cand;
6787     }
6788
6789   return set;
6790 }
6791
6792 /* Creates a new induction variable corresponding to CAND.  */
6793
6794 static void
6795 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6796 {
6797   gimple_stmt_iterator incr_pos;
6798   tree base;
6799   bool after = false;
6800
6801   if (!cand->iv)
6802     return;
6803
6804   switch (cand->pos)
6805     {
6806     case IP_NORMAL:
6807       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6808       break;
6809
6810     case IP_END:
6811       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6812       after = true;
6813       break;
6814
6815     case IP_AFTER_USE:
6816       after = true;
6817       /* fall through */
6818     case IP_BEFORE_USE:
6819       incr_pos = gsi_for_stmt (cand->incremented_at);
6820       break;
6821
6822     case IP_ORIGINAL:
6823       /* Mark that the iv is preserved.  */
6824       name_info (data, cand->var_before)->preserve_biv = true;
6825       name_info (data, cand->var_after)->preserve_biv = true;
6826
6827       /* Rewrite the increment so that it uses var_before directly.  */
6828       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6829       return;
6830     }
6831
6832   gimple_add_tmp_var (cand->var_before);
6833
6834   base = unshare_expr (cand->iv->base);
6835
6836   create_iv (base, unshare_expr (cand->iv->step),
6837              cand->var_before, data->current_loop,
6838              &incr_pos, after, &cand->var_before, &cand->var_after);
6839 }
6840
6841 /* Creates new induction variables described in SET.  */
6842
6843 static void
6844 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6845 {
6846   unsigned i;
6847   struct iv_cand *cand;
6848   bitmap_iterator bi;
6849
6850   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6851     {
6852       cand = iv_cand (data, i);
6853       create_new_iv (data, cand);
6854     }
6855
6856   if (dump_file && (dump_flags & TDF_DETAILS))
6857     {
6858       fprintf (dump_file, "Selected IV set for loop %d",
6859                data->current_loop->num);
6860       if (data->loop_loc != UNKNOWN_LOCATION)
6861         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6862                  LOCATION_LINE (data->loop_loc));
6863       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6864       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6865         {
6866           cand = iv_cand (data, i);
6867           dump_cand (dump_file, cand);
6868         }
6869       fprintf (dump_file, "\n");
6870     }
6871 }
6872
6873 /* Rewrites USE (definition of iv used in a nonlinear expression)
6874    using candidate CAND.  */
6875
6876 static void
6877 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6878                             struct iv_use *use, struct iv_cand *cand)
6879 {
6880   tree comp;
6881   tree op, tgt;
6882   gassign *ass;
6883   gimple_stmt_iterator bsi;
6884
6885   /* An important special case -- if we are asked to express value of
6886      the original iv by itself, just exit; there is no need to
6887      introduce a new computation (that might also need casting the
6888      variable to unsigned and back).  */
6889   if (cand->pos == IP_ORIGINAL
6890       && cand->incremented_at == use->stmt)
6891     {
6892       enum tree_code stmt_code;
6893
6894       gcc_assert (is_gimple_assign (use->stmt));
6895       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6896
6897       /* Check whether we may leave the computation unchanged.
6898          This is the case only if it does not rely on other
6899          computations in the loop -- otherwise, the computation
6900          we rely upon may be removed in remove_unused_ivs,
6901          thus leading to ICE.  */
6902       stmt_code = gimple_assign_rhs_code (use->stmt);
6903       if (stmt_code == PLUS_EXPR
6904           || stmt_code == MINUS_EXPR
6905           || stmt_code == POINTER_PLUS_EXPR)
6906         {
6907           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6908             op = gimple_assign_rhs2 (use->stmt);
6909           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6910             op = gimple_assign_rhs1 (use->stmt);
6911           else
6912             op = NULL_TREE;
6913         }
6914       else
6915         op = NULL_TREE;
6916
6917       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6918         return;
6919     }
6920
6921   comp = get_computation (data->current_loop, use, cand);
6922   gcc_assert (comp != NULL_TREE);
6923
6924   switch (gimple_code (use->stmt))
6925     {
6926     case GIMPLE_PHI:
6927       tgt = PHI_RESULT (use->stmt);
6928
6929       /* If we should keep the biv, do not replace it.  */
6930       if (name_info (data, tgt)->preserve_biv)
6931         return;
6932
6933       bsi = gsi_after_labels (gimple_bb (use->stmt));
6934       break;
6935
6936     case GIMPLE_ASSIGN:
6937       tgt = gimple_assign_lhs (use->stmt);
6938       bsi = gsi_for_stmt (use->stmt);
6939       break;
6940
6941     default:
6942       gcc_unreachable ();
6943     }
6944
6945   if (!valid_gimple_rhs_p (comp)
6946       || (gimple_code (use->stmt) != GIMPLE_PHI
6947           /* We can't allow re-allocating the stmt as it might be pointed
6948              to still.  */
6949           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6950               >= gimple_num_ops (gsi_stmt (bsi)))))
6951     {
6952       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6953                                        true, GSI_SAME_STMT);
6954       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6955         {
6956           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6957           /* As this isn't a plain copy we have to reset alignment
6958              information.  */
6959           if (SSA_NAME_PTR_INFO (comp))
6960             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6961         }
6962     }
6963
6964   if (gimple_code (use->stmt) == GIMPLE_PHI)
6965     {
6966       ass = gimple_build_assign (tgt, comp);
6967       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6968
6969       bsi = gsi_for_stmt (use->stmt);
6970       remove_phi_node (&bsi, false);
6971     }
6972   else
6973     {
6974       gimple_assign_set_rhs_from_tree (&bsi, comp);
6975       use->stmt = gsi_stmt (bsi);
6976     }
6977 }
6978
6979 /* Performs a peephole optimization to reorder the iv update statement with
6980    a mem ref to enable instruction combining in later phases. The mem ref uses
6981    the iv value before the update, so the reordering transformation requires
6982    adjustment of the offset. CAND is the selected IV_CAND.
6983
6984    Example:
6985
6986    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6987    iv2 = iv1 + 1;
6988
6989    if (t < val)      (1)
6990      goto L;
6991    goto Head;
6992
6993
6994    directly propagating t over to (1) will introduce overlapping live range
6995    thus increase register pressure. This peephole transform it into:
6996
6997
6998    iv2 = iv1 + 1;
6999    t = MEM_REF (base, iv2, 8, 8);
7000    if (t < val)
7001      goto L;
7002    goto Head;
7003 */
7004
7005 static void
7006 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7007 {
7008   tree var_after;
7009   gimple *iv_update, *stmt;
7010   basic_block bb;
7011   gimple_stmt_iterator gsi, gsi_iv;
7012
7013   if (cand->pos != IP_NORMAL)
7014     return;
7015
7016   var_after = cand->var_after;
7017   iv_update = SSA_NAME_DEF_STMT (var_after);
7018
7019   bb = gimple_bb (iv_update);
7020   gsi = gsi_last_nondebug_bb (bb);
7021   stmt = gsi_stmt (gsi);
7022
7023   /* Only handle conditional statement for now.  */
7024   if (gimple_code (stmt) != GIMPLE_COND)
7025     return;
7026
7027   gsi_prev_nondebug (&gsi);
7028   stmt = gsi_stmt (gsi);
7029   if (stmt != iv_update)
7030     return;
7031
7032   gsi_prev_nondebug (&gsi);
7033   if (gsi_end_p (gsi))
7034     return;
7035
7036   stmt = gsi_stmt (gsi);
7037   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7038     return;
7039
7040   if (stmt != use->stmt)
7041     return;
7042
7043   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7044     return;
7045
7046   if (dump_file && (dump_flags & TDF_DETAILS))
7047     {
7048       fprintf (dump_file, "Reordering \n");
7049       print_gimple_stmt (dump_file, iv_update, 0, 0);
7050       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7051       fprintf (dump_file, "\n");
7052     }
7053
7054   gsi = gsi_for_stmt (use->stmt);
7055   gsi_iv = gsi_for_stmt (iv_update);
7056   gsi_move_before (&gsi_iv, &gsi);
7057
7058   cand->pos = IP_BEFORE_USE;
7059   cand->incremented_at = use->stmt;
7060 }
7061
7062 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7063
7064 static void
7065 rewrite_use_address_1 (struct ivopts_data *data,
7066                        struct iv_use *use, struct iv_cand *cand)
7067 {
7068   aff_tree aff;
7069   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7070   tree base_hint = NULL_TREE;
7071   tree ref, iv;
7072   bool ok;
7073
7074   adjust_iv_update_pos (cand, use);
7075   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7076   gcc_assert (ok);
7077   unshare_aff_combination (&aff);
7078
7079   /* To avoid undefined overflow problems, all IV candidates use unsigned
7080      integer types.  The drawback is that this makes it impossible for
7081      create_mem_ref to distinguish an IV that is based on a memory object
7082      from one that represents simply an offset.
7083
7084      To work around this problem, we pass a hint to create_mem_ref that
7085      indicates which variable (if any) in aff is an IV based on a memory
7086      object.  Note that we only consider the candidate.  If this is not
7087      based on an object, the base of the reference is in some subexpression
7088      of the use -- but these will use pointer types, so they are recognized
7089      by the create_mem_ref heuristics anyway.  */
7090   if (cand->iv->base_object)
7091     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7092
7093   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7094   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7095                         reference_alias_ptr_type (*use->op_p),
7096                         iv, base_hint, data->speed);
7097   copy_ref_info (ref, *use->op_p);
7098   *use->op_p = ref;
7099 }
7100
7101 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
7102    first use of a group, rewrites sub uses in the group too.  */
7103
7104 static void
7105 rewrite_use_address (struct ivopts_data *data,
7106                       struct iv_use *use, struct iv_cand *cand)
7107 {
7108   struct iv_use *next;
7109
7110   gcc_assert (use->sub_id == 0);
7111   rewrite_use_address_1 (data, use, cand);
7112   update_stmt (use->stmt);
7113
7114   for (next = use->next; next != NULL; next = next->next)
7115     {
7116       rewrite_use_address_1 (data, next, cand);
7117       update_stmt (next->stmt);
7118     }
7119
7120   return;
7121 }
7122
7123 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7124    candidate CAND.  */
7125
7126 static void
7127 rewrite_use_compare (struct ivopts_data *data,
7128                      struct iv_use *use, struct iv_cand *cand)
7129 {
7130   tree comp, *var_p, op, bound;
7131   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7132   enum tree_code compare;
7133   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7134   bool ok;
7135
7136   bound = cp->value;
7137   if (bound)
7138     {
7139       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7140       tree var_type = TREE_TYPE (var);
7141       gimple_seq stmts;
7142
7143       if (dump_file && (dump_flags & TDF_DETAILS))
7144         {
7145           fprintf (dump_file, "Replacing exit test: ");
7146           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7147         }
7148       compare = cp->comp;
7149       bound = unshare_expr (fold_convert (var_type, bound));
7150       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7151       if (stmts)
7152         gsi_insert_seq_on_edge_immediate (
7153                 loop_preheader_edge (data->current_loop),
7154                 stmts);
7155
7156       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7157       gimple_cond_set_lhs (cond_stmt, var);
7158       gimple_cond_set_code (cond_stmt, compare);
7159       gimple_cond_set_rhs (cond_stmt, op);
7160       return;
7161     }
7162
7163   /* The induction variable elimination failed; just express the original
7164      giv.  */
7165   comp = get_computation (data->current_loop, use, cand);
7166   gcc_assert (comp != NULL_TREE);
7167
7168   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7169   gcc_assert (ok);
7170
7171   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7172                                      true, GSI_SAME_STMT);
7173 }
7174
7175 /* Rewrites USE using candidate CAND.  */
7176
7177 static void
7178 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7179 {
7180   switch (use->type)
7181     {
7182       case USE_NONLINEAR_EXPR:
7183         rewrite_use_nonlinear_expr (data, use, cand);
7184         break;
7185
7186       case USE_ADDRESS:
7187         rewrite_use_address (data, use, cand);
7188         break;
7189
7190       case USE_COMPARE:
7191         rewrite_use_compare (data, use, cand);
7192         break;
7193
7194       default:
7195         gcc_unreachable ();
7196     }
7197
7198   update_stmt (use->stmt);
7199 }
7200
7201 /* Rewrite the uses using the selected induction variables.  */
7202
7203 static void
7204 rewrite_uses (struct ivopts_data *data)
7205 {
7206   unsigned i;
7207   struct iv_cand *cand;
7208   struct iv_use *use;
7209
7210   for (i = 0; i < n_iv_uses (data); i++)
7211     {
7212       use = iv_use (data, i);
7213       cand = use->selected;
7214       gcc_assert (cand);
7215
7216       rewrite_use (data, use, cand);
7217     }
7218 }
7219
7220 /* Removes the ivs that are not used after rewriting.  */
7221
7222 static void
7223 remove_unused_ivs (struct ivopts_data *data)
7224 {
7225   unsigned j;
7226   bitmap_iterator bi;
7227   bitmap toremove = BITMAP_ALLOC (NULL);
7228
7229   /* Figure out an order in which to release SSA DEFs so that we don't
7230      release something that we'd have to propagate into a debug stmt
7231      afterwards.  */
7232   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7233     {
7234       struct version_info *info;
7235
7236       info = ver_info (data, j);
7237       if (info->iv
7238           && !integer_zerop (info->iv->step)
7239           && !info->inv_id
7240           && !info->iv->have_use_for
7241           && !info->preserve_biv)
7242         {
7243           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7244
7245           tree def = info->iv->ssa_name;
7246
7247           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7248             {
7249               imm_use_iterator imm_iter;
7250               use_operand_p use_p;
7251               gimple *stmt;
7252               int count = 0;
7253
7254               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7255                 {
7256                   if (!gimple_debug_bind_p (stmt))
7257                     continue;
7258
7259                   /* We just want to determine whether to do nothing
7260                      (count == 0), to substitute the computed
7261                      expression into a single use of the SSA DEF by
7262                      itself (count == 1), or to use a debug temp
7263                      because the SSA DEF is used multiple times or as
7264                      part of a larger expression (count > 1). */
7265                   count++;
7266                   if (gimple_debug_bind_get_value (stmt) != def)
7267                     count++;
7268
7269                   if (count > 1)
7270                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7271                 }
7272
7273               if (!count)
7274                 continue;
7275
7276               struct iv_use dummy_use;
7277               struct iv_cand *best_cand = NULL, *cand;
7278               unsigned i, best_pref = 0, cand_pref;
7279
7280               memset (&dummy_use, 0, sizeof (dummy_use));
7281               dummy_use.iv = info->iv;
7282               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7283                 {
7284                   cand = iv_use (data, i)->selected;
7285                   if (cand == best_cand)
7286                     continue;
7287                   cand_pref = operand_equal_p (cand->iv->step,
7288                                                info->iv->step, 0)
7289                     ? 4 : 0;
7290                   cand_pref
7291                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7292                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7293                     ? 2 : 0;
7294                   cand_pref
7295                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7296                     ? 1 : 0;
7297                   if (best_cand == NULL || best_pref < cand_pref)
7298                     {
7299                       best_cand = cand;
7300                       best_pref = cand_pref;
7301                     }
7302                 }
7303
7304               if (!best_cand)
7305                 continue;
7306
7307               tree comp = get_computation_at (data->current_loop,
7308                                               &dummy_use, best_cand,
7309                                               SSA_NAME_DEF_STMT (def));
7310               if (!comp)
7311                 continue;
7312
7313               if (count > 1)
7314                 {
7315                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7316                   DECL_ARTIFICIAL (vexpr) = 1;
7317                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7318                   if (SSA_NAME_VAR (def))
7319                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7320                   else
7321                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7322                   gdebug *def_temp
7323                     = gimple_build_debug_bind (vexpr, comp, NULL);
7324                   gimple_stmt_iterator gsi;
7325
7326                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7327                     gsi = gsi_after_labels (gimple_bb
7328                                             (SSA_NAME_DEF_STMT (def)));
7329                   else
7330                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7331
7332                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7333                   comp = vexpr;
7334                 }
7335
7336               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7337                 {
7338                   if (!gimple_debug_bind_p (stmt))
7339                     continue;
7340
7341                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7342                     SET_USE (use_p, comp);
7343
7344                   update_stmt (stmt);
7345                 }
7346             }
7347         }
7348     }
7349
7350   release_defs_bitset (toremove);
7351
7352   BITMAP_FREE (toremove);
7353 }
7354
7355 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7356    for hash_map::traverse.  */
7357
7358 bool
7359 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7360 {
7361   free (value);
7362   return true;
7363 }
7364
7365 /* Frees data allocated by the optimization of a single loop.  */
7366
7367 static void
7368 free_loop_data (struct ivopts_data *data)
7369 {
7370   unsigned i, j;
7371   bitmap_iterator bi;
7372   tree obj;
7373
7374   if (data->niters)
7375     {
7376       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7377       delete data->niters;
7378       data->niters = NULL;
7379     }
7380
7381   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7382     {
7383       struct version_info *info;
7384
7385       info = ver_info (data, i);
7386       info->iv = NULL;
7387       info->has_nonlin_use = false;
7388       info->preserve_biv = false;
7389       info->inv_id = 0;
7390     }
7391   bitmap_clear (data->relevant);
7392   bitmap_clear (data->important_candidates);
7393
7394   for (i = 0; i < n_iv_uses (data); i++)
7395     {
7396       struct iv_use *use = iv_use (data, i);
7397       struct iv_use *pre = use, *sub = use->next;
7398
7399       while (sub)
7400         {
7401           gcc_assert (sub->related_cands == NULL);
7402           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7403
7404           pre = sub;
7405           sub = sub->next;
7406           free (pre);
7407         }
7408
7409       BITMAP_FREE (use->related_cands);
7410       for (j = 0; j < use->n_map_members; j++)
7411         if (use->cost_map[j].depends_on)
7412           BITMAP_FREE (use->cost_map[j].depends_on);
7413       free (use->cost_map);
7414       free (use);
7415     }
7416   data->iv_uses.truncate (0);
7417
7418   for (i = 0; i < n_iv_cands (data); i++)
7419     {
7420       struct iv_cand *cand = iv_cand (data, i);
7421
7422       if (cand->depends_on)
7423         BITMAP_FREE (cand->depends_on);
7424       free (cand);
7425     }
7426   data->iv_candidates.truncate (0);
7427
7428   if (data->version_info_size < num_ssa_names)
7429     {
7430       data->version_info_size = 2 * num_ssa_names;
7431       free (data->version_info);
7432       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7433     }
7434
7435   data->max_inv_id = 0;
7436
7437   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7438     SET_DECL_RTL (obj, NULL_RTX);
7439
7440   decl_rtl_to_reset.truncate (0);
7441
7442   data->inv_expr_tab->empty ();
7443   data->inv_expr_id = 0;
7444 }
7445
7446 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7447    loop tree.  */
7448
7449 static void
7450 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7451 {
7452   free_loop_data (data);
7453   free (data->version_info);
7454   BITMAP_FREE (data->relevant);
7455   BITMAP_FREE (data->important_candidates);
7456
7457   decl_rtl_to_reset.release ();
7458   data->iv_uses.release ();
7459   data->iv_candidates.release ();
7460   delete data->inv_expr_tab;
7461   data->inv_expr_tab = NULL;
7462   free_affine_expand_cache (&data->name_expansion_cache);
7463   obstack_free (&data->iv_obstack, NULL);
7464 }
7465
7466 /* Returns true if the loop body BODY includes any function calls.  */
7467
7468 static bool
7469 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7470 {
7471   gimple_stmt_iterator gsi;
7472   unsigned i;
7473
7474   for (i = 0; i < num_nodes; i++)
7475     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7476       {
7477         gimple *stmt = gsi_stmt (gsi);
7478         if (is_gimple_call (stmt)
7479             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7480           return true;
7481       }
7482   return false;
7483 }
7484
7485 /* Optimizes the LOOP.  Returns true if anything changed.  */
7486
7487 static bool
7488 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7489 {
7490   bool changed = false;
7491   struct iv_ca *iv_ca;
7492   edge exit = single_dom_exit (loop);
7493   basic_block *body;
7494
7495   gcc_assert (!data->niters);
7496   data->current_loop = loop;
7497   data->loop_loc = find_loop_location (loop);
7498   data->speed = optimize_loop_for_speed_p (loop);
7499
7500   if (dump_file && (dump_flags & TDF_DETAILS))
7501     {
7502       fprintf (dump_file, "Processing loop %d", loop->num);
7503       if (data->loop_loc != UNKNOWN_LOCATION)
7504         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7505                  LOCATION_LINE (data->loop_loc));
7506       fprintf (dump_file, "\n");
7507
7508       if (exit)
7509         {
7510           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7511                    exit->src->index, exit->dest->index);
7512           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7513           fprintf (dump_file, "\n");
7514         }
7515
7516       fprintf (dump_file, "\n");
7517     }
7518
7519   body = get_loop_body (loop);
7520   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7521   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7522   free (body);
7523
7524   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7525
7526   /* For each ssa name determines whether it behaves as an induction variable
7527      in some loop.  */
7528   if (!find_induction_variables (data))
7529     goto finish;
7530
7531   /* Finds interesting uses (item 1).  */
7532   find_interesting_uses (data);
7533   group_address_uses (data);
7534   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7535     goto finish;
7536
7537   /* Finds candidates for the induction variables (item 2).  */
7538   find_iv_candidates (data);
7539
7540   /* Calculates the costs (item 3, part 1).  */
7541   determine_iv_costs (data);
7542   determine_use_iv_costs (data);
7543   determine_set_costs (data);
7544
7545   /* Find the optimal set of induction variables (item 3, part 2).  */
7546   iv_ca = find_optimal_iv_set (data);
7547   if (!iv_ca)
7548     goto finish;
7549   changed = true;
7550
7551   /* Create the new induction variables (item 4, part 1).  */
7552   create_new_ivs (data, iv_ca);
7553   iv_ca_free (&iv_ca);
7554
7555   /* Rewrite the uses (item 4, part 2).  */
7556   rewrite_uses (data);
7557
7558   /* Remove the ivs that are unused after rewriting.  */
7559   remove_unused_ivs (data);
7560
7561   /* We have changed the structure of induction variables; it might happen
7562      that definitions in the scev database refer to some of them that were
7563      eliminated.  */
7564   scev_reset ();
7565
7566 finish:
7567   free_loop_data (data);
7568
7569   return changed;
7570 }
7571
7572 /* Main entry point.  Optimizes induction variables in loops.  */
7573
7574 void
7575 tree_ssa_iv_optimize (void)
7576 {
7577   struct loop *loop;
7578   struct ivopts_data data;
7579
7580   tree_ssa_iv_optimize_init (&data);
7581
7582   /* Optimize the loops starting with the innermost ones.  */
7583   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7584     {
7585       if (dump_file && (dump_flags & TDF_DETAILS))
7586         flow_loop_dump (loop, dump_file, NULL, 1);
7587
7588       tree_ssa_iv_optimize_loop (&data, loop);
7589     }
7590
7591   tree_ssa_iv_optimize_finalize (&data);
7592 }