gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "hash-set.h"
  69 #include "machmode.h"
  70 #include "vec.h"
  71 #include "double-int.h"
  72 #include "input.h"
  73 #include "alias.h"
  74 #include "symtab.h"
  75 #include "wide-int.h"
  76 #include "inchash.h"
  77 #include "tree.h"
  78 #include "fold-const.h"
  79 #include "stor-layout.h"
  80 #include "tm_p.h"
  81 #include "predict.h"
  82 #include "hard-reg-set.h"
  83 #include "function.h"
  84 #include "dominance.h"
  85 #include "cfg.h"
  86 #include "basic-block.h"
  87 #include "gimple-pretty-print.h"
  88 #include "hash-map.h"
  89 #include "hash-table.h"
  90 #include "tree-ssa-alias.h"
  91 #include "internal-fn.h"
  92 #include "tree-eh.h"
  93 #include "gimple-expr.h"
  94 #include "is-a.h"
  95 #include "gimple.h"
  96 #include "gimplify.h"
  97 #include "gimple-iterator.h"
  98 #include "gimplify-me.h"
  99 #include "gimple-ssa.h"
 100 #include "plugin-api.h"
 101 #include "ipa-ref.h"
 102 #include "cgraph.h"
 103 #include "tree-cfg.h"
 104 #include "tree-phinodes.h"
 105 #include "ssa-iterators.h"
 106 #include "stringpool.h"
 107 #include "tree-ssanames.h"
 108 #include "tree-ssa-loop-ivopts.h"
 109 #include "tree-ssa-loop-manip.h"
 110 #include "tree-ssa-loop-niter.h"
 111 #include "tree-ssa-loop.h"
 112 #include "hashtab.h"
 113 #include "rtl.h"
 114 #include "flags.h"
 115 #include "statistics.h"
 116 #include "real.h"
 117 #include "fixed-value.h"
 118 #include "insn-config.h"
 119 #include "expmed.h"
 120 #include "dojump.h"
 121 #include "explow.h"
 122 #include "calls.h"
 123 #include "emit-rtl.h"
 124 #include "varasm.h"
 125 #include "stmt.h"
 126 #include "expr.h"
 127 #include "tree-dfa.h"
 128 #include "tree-ssa.h"
 129 #include "cfgloop.h"
 130 #include "tree-pass.h"
 131 #include "tree-chrec.h"
 132 #include "tree-scalar-evolution.h"
 133 #include "params.h"
 134 #include "langhooks.h"
 135 #include "tree-affine.h"
 136 #include "target.h"
 137 #include "tree-inline.h"
 138 #include "tree-ssa-propagate.h"
 139 #include "tree-ssa-address.h"
 140 #include "builtins.h"
 141
 142 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 143    cost of different addressing modes.  This should be moved to a TBD
 144    interface between the GIMPLE and RTL worlds.  */
 145 #include "recog.h"
 146
 147 /* The infinite cost.  */
 148 #define INFTY 10000000
 149
 150 #define AVG_LOOP_NITER(LOOP) 5
 151
 152 /* Returns the expected number of loop iterations for LOOP.
 153    The average trip count is computed from profile data if it
 154    exists. */
 155
 156 static inline HOST_WIDE_INT
 157 avg_loop_niter (struct loop *loop)
 158 {
 159   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 160   if (niter == -1)
 161     return AVG_LOOP_NITER (loop);
 162
 163   return niter;
 164 }
 165
 166 /* Representation of the induction variable.  */
 167 struct iv
 168 {
 169   tree base;            /* Initial value of the iv.  */
 170   tree base_object;     /* A memory object to that the induction variable points.  */
 171   tree step;            /* Step of the iv (constant only).  */
 172   tree ssa_name;        /* The ssa name with the value.  */
 173   bool biv_p;           /* Is it a biv?  */
 174   bool have_use_for;    /* Do we already have a use for it?  */
 175   unsigned use_id;      /* The identifier in the use if it is the case.  */
 176 };
 177
 178 /* Per-ssa version information (induction variable descriptions, etc.).  */
 179 struct version_info
 180 {
 181   tree name;            /* The ssa name.  */
 182   struct iv *iv;        /* Induction variable description.  */
 183   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 184                            an expression that is not an induction variable.  */
 185   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 186   unsigned inv_id;      /* Id of an invariant.  */
 187 };
 188
 189 /* Types of uses.  */
 190 enum use_type
 191 {
 192   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 193   USE_ADDRESS,          /* Use in an address.  */
 194   USE_COMPARE           /* Use is a compare.  */
 195 };
 196
 197 /* Cost of a computation.  */
 198 typedef struct
 199 {
 200   int cost;             /* The runtime cost.  */
 201   unsigned complexity;  /* The estimate of the complexity of the code for
 202                            the computation (in no concrete units --
 203                            complexity field should be larger for more
 204                            complex expressions and addressing modes).  */
 205 } comp_cost;
 206
 207 static const comp_cost no_cost = {0, 0};
 208 static const comp_cost infinite_cost = {INFTY, INFTY};
 209
 210 /* The candidate - cost pair.  */
 211 struct cost_pair
 212 {
 213   struct iv_cand *cand; /* The candidate.  */
 214   comp_cost cost;       /* The cost.  */
 215   bitmap depends_on;    /* The list of invariants that have to be
 216                            preserved.  */
 217   tree value;           /* For final value elimination, the expression for
 218                            the final value of the iv.  For iv elimination,
 219                            the new bound to compare with.  */
 220   enum tree_code comp;  /* For iv elimination, the comparison.  */
 221   int inv_expr_id;      /* Loop invariant expression id.  */
 222 };
 223
 224 /* Use.  */
 225 struct iv_use
 226 {
 227   unsigned id;          /* The id of the use.  */
 228   enum use_type type;   /* Type of the use.  */
 229   struct iv *iv;        /* The induction variable it is based on.  */
 230   gimple stmt;          /* Statement in that it occurs.  */
 231   tree *op_p;           /* The place where it occurs.  */
 232   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 233                            important ones.  */
 234
 235   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 236   struct cost_pair *cost_map;
 237                         /* The costs wrto the iv candidates.  */
 238
 239   struct iv_cand *selected;
 240                         /* The selected candidate.  */
 241 };
 242
 243 /* The position where the iv is computed.  */
 244 enum iv_position
 245 {
 246   IP_NORMAL,            /* At the end, just before the exit condition.  */
 247   IP_END,               /* At the end of the latch block.  */
 248   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 249   IP_AFTER_USE,         /* Immediately after a specific use.  */
 250   IP_ORIGINAL           /* The original biv.  */
 251 };
 252
 253 /* The induction variable candidate.  */
 254 struct iv_cand
 255 {
 256   unsigned id;          /* The number of the candidate.  */
 257   bool important;       /* Whether this is an "important" candidate, i.e. such
 258                            that it should be considered by all uses.  */
 259   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 260   gimple incremented_at;/* For original biv, the statement where it is
 261                            incremented.  */
 262   tree var_before;      /* The variable used for it before increment.  */
 263   tree var_after;       /* The variable used for it after increment.  */
 264   struct iv *iv;        /* The value of the candidate.  NULL for
 265                            "pseudocandidate" used to indicate the possibility
 266                            to replace the final value of an iv by direct
 267                            computation of the value.  */
 268   unsigned cost;        /* Cost of the candidate.  */
 269   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 270   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 271                               where it is incremented.  */
 272   bitmap depends_on;    /* The list of invariants that are used in step of the
 273                            biv.  */
 274 };
 275
 276 /* Loop invariant expression hashtable entry.  */
 277 struct iv_inv_expr_ent
 278 {
 279   tree expr;
 280   int id;
 281   hashval_t hash;
 282 };
 283
 284 /* The data used by the induction variable optimizations.  */
 285
 286 typedef struct iv_use *iv_use_p;
 287
 288 typedef struct iv_cand *iv_cand_p;
 289
 290 /* Hashtable helpers.  */
 291
 292 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 293 {
 294   typedef iv_inv_expr_ent value_type;
 295   typedef iv_inv_expr_ent compare_type;
 296   static inline hashval_t hash (const value_type *);
 297   static inline bool equal (const value_type *, const compare_type *);
 298 };
 299
 300 /* Hash function for loop invariant expressions.  */
 301
 302 inline hashval_t
 303 iv_inv_expr_hasher::hash (const value_type *expr)
 304 {
 305   return expr->hash;
 306 }
 307
 308 /* Hash table equality function for expressions.  */
 309
 310 inline bool
 311 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 312 {
 313   return expr1->hash == expr2->hash
 314          && operand_equal_p (expr1->expr, expr2->expr, 0);
 315 }
 316
 317 struct ivopts_data
 318 {
 319   /* The currently optimized loop.  */
 320   struct loop *current_loop;
 321
 322   /* Numbers of iterations for all exits of the current loop.  */
 323   hash_map<edge, tree_niter_desc *> *niters;
 324
 325   /* Number of registers used in it.  */
 326   unsigned regs_used;
 327
 328   /* The size of version_info array allocated.  */
 329   unsigned version_info_size;
 330
 331   /* The array of information for the ssa names.  */
 332   struct version_info *version_info;
 333
 334   /* The hashtable of loop invariant expressions created
 335      by ivopt.  */
 336   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 337
 338   /* Loop invariant expression id.  */
 339   int inv_expr_id;
 340
 341   /* The bitmap of indices in version_info whose value was changed.  */
 342   bitmap relevant;
 343
 344   /* The uses of induction variables.  */
 345   vec<iv_use_p> iv_uses;
 346
 347   /* The candidates.  */
 348   vec<iv_cand_p> iv_candidates;
 349
 350   /* A bitmap of important candidates.  */
 351   bitmap important_candidates;
 352
 353   /* Cache used by tree_to_aff_combination_expand.  */
 354   hash_map<tree, name_expansion *> *name_expansion_cache;
 355
 356   /* The maximum invariant id.  */
 357   unsigned max_inv_id;
 358
 359   /* Whether to consider just related and important candidates when replacing a
 360      use.  */
 361   bool consider_all_candidates;
 362
 363   /* Are we optimizing for speed?  */
 364   bool speed;
 365
 366   /* Whether the loop body includes any function calls.  */
 367   bool body_includes_call;
 368
 369   /* Whether the loop body can only be exited via single exit.  */
 370   bool loop_single_exit_p;
 371 };
 372
 373 /* An assignment of iv candidates to uses.  */
 374
 375 struct iv_ca
 376 {
 377   /* The number of uses covered by the assignment.  */
 378   unsigned upto;
 379
 380   /* Number of uses that cannot be expressed by the candidates in the set.  */
 381   unsigned bad_uses;
 382
 383   /* Candidate assigned to a use, together with the related costs.  */
 384   struct cost_pair **cand_for_use;
 385
 386   /* Number of times each candidate is used.  */
 387   unsigned *n_cand_uses;
 388
 389   /* The candidates used.  */
 390   bitmap cands;
 391
 392   /* The number of candidates in the set.  */
 393   unsigned n_cands;
 394
 395   /* Total number of registers needed.  */
 396   unsigned n_regs;
 397
 398   /* Total cost of expressing uses.  */
 399   comp_cost cand_use_cost;
 400
 401   /* Total cost of candidates.  */
 402   unsigned cand_cost;
 403
 404   /* Number of times each invariant is used.  */
 405   unsigned *n_invariant_uses;
 406
 407   /* The array holding the number of uses of each loop
 408      invariant expressions created by ivopt.  */
 409   unsigned *used_inv_expr;
 410
 411   /* The number of created loop invariants.  */
 412   unsigned num_used_inv_expr;
 413
 414   /* Total cost of the assignment.  */
 415   comp_cost cost;
 416 };
 417
 418 /* Difference of two iv candidate assignments.  */
 419
 420 struct iv_ca_delta
 421 {
 422   /* Changed use.  */
 423   struct iv_use *use;
 424
 425   /* An old assignment (for rollback purposes).  */
 426   struct cost_pair *old_cp;
 427
 428   /* A new assignment.  */
 429   struct cost_pair *new_cp;
 430
 431   /* Next change in the list.  */
 432   struct iv_ca_delta *next_change;
 433 };
 434
 435 /* Bound on number of candidates below that all candidates are considered.  */
 436
 437 #define CONSIDER_ALL_CANDIDATES_BOUND \
 438   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 439
 440 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 441    optimizing such a loop would help, and it would take ages).  */
 442
 443 #define MAX_CONSIDERED_USES \
 444   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 445
 446 /* If there are at most this number of ivs in the set, try removing unnecessary
 447    ivs from the set always.  */
 448
 449 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 450   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 451
 452 /* The list of trees for that the decl_rtl field must be reset is stored
 453    here.  */
 454
 455 static vec<tree> decl_rtl_to_reset;
 456
 457 static comp_cost force_expr_to_var_cost (tree, bool);
 458
 459 /* Number of uses recorded in DATA.  */
 460
 461 static inline unsigned
 462 n_iv_uses (struct ivopts_data *data)
 463 {
 464   return data->iv_uses.length ();
 465 }
 466
 467 /* Ith use recorded in DATA.  */
 468
 469 static inline struct iv_use *
 470 iv_use (struct ivopts_data *data, unsigned i)
 471 {
 472   return data->iv_uses[i];
 473 }
 474
 475 /* Number of candidates recorded in DATA.  */
 476
 477 static inline unsigned
 478 n_iv_cands (struct ivopts_data *data)
 479 {
 480   return data->iv_candidates.length ();
 481 }
 482
 483 /* Ith candidate recorded in DATA.  */
 484
 485 static inline struct iv_cand *
 486 iv_cand (struct ivopts_data *data, unsigned i)
 487 {
 488   return data->iv_candidates[i];
 489 }
 490
 491 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 492
 493 edge
 494 single_dom_exit (struct loop *loop)
 495 {
 496   edge exit = single_exit (loop);
 497
 498   if (!exit)
 499     return NULL;
 500
 501   if (!just_once_each_iteration_p (loop, exit->src))
 502     return NULL;
 503
 504   return exit;
 505 }
 506
 507 /* Dumps information about the induction variable IV to FILE.  */
 508
 509 void
 510 dump_iv (FILE *file, struct iv *iv)
 511 {
 512   if (iv->ssa_name)
 513     {
 514       fprintf (file, "ssa name ");
 515       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 516       fprintf (file, "\n");
 517     }
 518
 519   fprintf (file, "  type ");
 520   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 521   fprintf (file, "\n");
 522
 523   if (iv->step)
 524     {
 525       fprintf (file, "  base ");
 526       print_generic_expr (file, iv->base, TDF_SLIM);
 527       fprintf (file, "\n");
 528
 529       fprintf (file, "  step ");
 530       print_generic_expr (file, iv->step, TDF_SLIM);
 531       fprintf (file, "\n");
 532     }
 533   else
 534     {
 535       fprintf (file, "  invariant ");
 536       print_generic_expr (file, iv->base, TDF_SLIM);
 537       fprintf (file, "\n");
 538     }
 539
 540   if (iv->base_object)
 541     {
 542       fprintf (file, "  base object ");
 543       print_generic_expr (file, iv->base_object, TDF_SLIM);
 544       fprintf (file, "\n");
 545     }
 546
 547   if (iv->biv_p)
 548     fprintf (file, "  is a biv\n");
 549 }
 550
 551 /* Dumps information about the USE to FILE.  */
 552
 553 void
 554 dump_use (FILE *file, struct iv_use *use)
 555 {
 556   fprintf (file, "use %d\n", use->id);
 557
 558   switch (use->type)
 559     {
 560     case USE_NONLINEAR_EXPR:
 561       fprintf (file, "  generic\n");
 562       break;
 563
 564     case USE_ADDRESS:
 565       fprintf (file, "  address\n");
 566       break;
 567
 568     case USE_COMPARE:
 569       fprintf (file, "  compare\n");
 570       break;
 571
 572     default:
 573       gcc_unreachable ();
 574     }
 575
 576   fprintf (file, "  in statement ");
 577   print_gimple_stmt (file, use->stmt, 0, 0);
 578   fprintf (file, "\n");
 579
 580   fprintf (file, "  at position ");
 581   if (use->op_p)
 582     print_generic_expr (file, *use->op_p, TDF_SLIM);
 583   fprintf (file, "\n");
 584
 585   dump_iv (file, use->iv);
 586
 587   if (use->related_cands)
 588     {
 589       fprintf (file, "  related candidates ");
 590       dump_bitmap (file, use->related_cands);
 591     }
 592 }
 593
 594 /* Dumps information about the uses to FILE.  */
 595
 596 void
 597 dump_uses (FILE *file, struct ivopts_data *data)
 598 {
 599   unsigned i;
 600   struct iv_use *use;
 601
 602   for (i = 0; i < n_iv_uses (data); i++)
 603     {
 604       use = iv_use (data, i);
 605
 606       dump_use (file, use);
 607       fprintf (file, "\n");
 608     }
 609 }
 610
 611 /* Dumps information about induction variable candidate CAND to FILE.  */
 612
 613 void
 614 dump_cand (FILE *file, struct iv_cand *cand)
 615 {
 616   struct iv *iv = cand->iv;
 617
 618   fprintf (file, "candidate %d%s\n",
 619            cand->id, cand->important ? " (important)" : "");
 620
 621   if (cand->depends_on)
 622     {
 623       fprintf (file, "  depends on ");
 624       dump_bitmap (file, cand->depends_on);
 625     }
 626
 627   if (!iv)
 628     {
 629       fprintf (file, "  final value replacement\n");
 630       return;
 631     }
 632
 633   if (cand->var_before)
 634     {
 635       fprintf (file, "  var_before ");
 636       print_generic_expr (file, cand->var_before, TDF_SLIM);
 637       fprintf (file, "\n");
 638     }
 639   if (cand->var_after)
 640     {
 641       fprintf (file, "  var_after ");
 642       print_generic_expr (file, cand->var_after, TDF_SLIM);
 643       fprintf (file, "\n");
 644     }
 645
 646   switch (cand->pos)
 647     {
 648     case IP_NORMAL:
 649       fprintf (file, "  incremented before exit test\n");
 650       break;
 651
 652     case IP_BEFORE_USE:
 653       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 654       break;
 655
 656     case IP_AFTER_USE:
 657       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 658       break;
 659
 660     case IP_END:
 661       fprintf (file, "  incremented at end\n");
 662       break;
 663
 664     case IP_ORIGINAL:
 665       fprintf (file, "  original biv\n");
 666       break;
 667     }
 668
 669   dump_iv (file, iv);
 670 }
 671
 672 /* Returns the info for ssa version VER.  */
 673
 674 static inline struct version_info *
 675 ver_info (struct ivopts_data *data, unsigned ver)
 676 {
 677   return data->version_info + ver;
 678 }
 679
 680 /* Returns the info for ssa name NAME.  */
 681
 682 static inline struct version_info *
 683 name_info (struct ivopts_data *data, tree name)
 684 {
 685   return ver_info (data, SSA_NAME_VERSION (name));
 686 }
 687
 688 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 689    emitted in LOOP.  */
 690
 691 static bool
 692 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 693 {
 694   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 695
 696   gcc_assert (bb);
 697
 698   if (sbb == loop->latch)
 699     return true;
 700
 701   if (sbb != bb)
 702     return false;
 703
 704   return stmt == last_stmt (bb);
 705 }
 706
 707 /* Returns true if STMT if after the place where the original induction
 708    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 709    if the positions are identical.  */
 710
 711 static bool
 712 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 713 {
 714   basic_block cand_bb = gimple_bb (cand->incremented_at);
 715   basic_block stmt_bb = gimple_bb (stmt);
 716
 717   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 718     return false;
 719
 720   if (stmt_bb != cand_bb)
 721     return true;
 722
 723   if (true_if_equal
 724       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 725     return true;
 726   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 727 }
 728
 729 /* Returns true if STMT if after the place where the induction variable
 730    CAND is incremented in LOOP.  */
 731
 732 static bool
 733 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 734 {
 735   switch (cand->pos)
 736     {
 737     case IP_END:
 738       return false;
 739
 740     case IP_NORMAL:
 741       return stmt_after_ip_normal_pos (loop, stmt);
 742
 743     case IP_ORIGINAL:
 744     case IP_AFTER_USE:
 745       return stmt_after_inc_pos (cand, stmt, false);
 746
 747     case IP_BEFORE_USE:
 748       return stmt_after_inc_pos (cand, stmt, true);
 749
 750     default:
 751       gcc_unreachable ();
 752     }
 753 }
 754
 755 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 756
 757 static bool
 758 abnormal_ssa_name_p (tree exp)
 759 {
 760   if (!exp)
 761     return false;
 762
 763   if (TREE_CODE (exp) != SSA_NAME)
 764     return false;
 765
 766   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 767 }
 768
 769 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 770    abnormal phi node.  Callback for for_each_index.  */
 771
 772 static bool
 773 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 774                                   void *data ATTRIBUTE_UNUSED)
 775 {
 776   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 777     {
 778       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 779         return false;
 780       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 781         return false;
 782     }
 783
 784   return !abnormal_ssa_name_p (*index);
 785 }
 786
 787 /* Returns true if EXPR contains a ssa name that occurs in an
 788    abnormal phi node.  */
 789
 790 bool
 791 contains_abnormal_ssa_name_p (tree expr)
 792 {
 793   enum tree_code code;
 794   enum tree_code_class codeclass;
 795
 796   if (!expr)
 797     return false;
 798
 799   code = TREE_CODE (expr);
 800   codeclass = TREE_CODE_CLASS (code);
 801
 802   if (code == SSA_NAME)
 803     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 804
 805   if (code == INTEGER_CST
 806       || is_gimple_min_invariant (expr))
 807     return false;
 808
 809   if (code == ADDR_EXPR)
 810     return !for_each_index (&TREE_OPERAND (expr, 0),
 811                             idx_contains_abnormal_ssa_name_p,
 812                             NULL);
 813
 814   if (code == COND_EXPR)
 815     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 816       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 817       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 818
 819   switch (codeclass)
 820     {
 821     case tcc_binary:
 822     case tcc_comparison:
 823       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 824         return true;
 825
 826       /* Fallthru.  */
 827     case tcc_unary:
 828       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 829         return true;
 830
 831       break;
 832
 833     default:
 834       gcc_unreachable ();
 835     }
 836
 837   return false;
 838 }
 839
 840 /*  Returns the structure describing number of iterations determined from
 841     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 842
 843 static struct tree_niter_desc *
 844 niter_for_exit (struct ivopts_data *data, edge exit)
 845 {
 846   struct tree_niter_desc *desc;
 847   tree_niter_desc **slot;
 848
 849   if (!data->niters)
 850     {
 851       data->niters = new hash_map<edge, tree_niter_desc *>;
 852       slot = NULL;
 853     }
 854   else
 855     slot = data->niters->get (exit);
 856
 857   if (!slot)
 858     {
 859       /* Try to determine number of iterations.  We cannot safely work with ssa
 860          names that appear in phi nodes on abnormal edges, so that we do not
 861          create overlapping life ranges for them (PR 27283).  */
 862       desc = XNEW (struct tree_niter_desc);
 863       if (!number_of_iterations_exit (data->current_loop,
 864                                       exit, desc, true)
 865           || contains_abnormal_ssa_name_p (desc->niter))
 866         {
 867           XDELETE (desc);
 868           desc = NULL;
 869         }
 870       data->niters->put (exit, desc);
 871     }
 872   else
 873     desc = *slot;
 874
 875   return desc;
 876 }
 877
 878 /* Returns the structure describing number of iterations determined from
 879    single dominating exit of DATA->current_loop, or NULL if something
 880    goes wrong.  */
 881
 882 static struct tree_niter_desc *
 883 niter_for_single_dom_exit (struct ivopts_data *data)
 884 {
 885   edge exit = single_dom_exit (data->current_loop);
 886
 887   if (!exit)
 888     return NULL;
 889
 890   return niter_for_exit (data, exit);
 891 }
 892
 893 /* Initializes data structures used by the iv optimization pass, stored
 894    in DATA.  */
 895
 896 static void
 897 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 898 {
 899   data->version_info_size = 2 * num_ssa_names;
 900   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 901   data->relevant = BITMAP_ALLOC (NULL);
 902   data->important_candidates = BITMAP_ALLOC (NULL);
 903   data->max_inv_id = 0;
 904   data->niters = NULL;
 905   data->iv_uses.create (20);
 906   data->iv_candidates.create (20);
 907   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 908   data->inv_expr_id = 0;
 909   data->name_expansion_cache = NULL;
 910   decl_rtl_to_reset.create (20);
 911 }
 912
 913 /* Returns a memory object to that EXPR points.  In case we are able to
 914    determine that it does not point to any such object, NULL is returned.  */
 915
 916 static tree
 917 determine_base_object (tree expr)
 918 {
 919   enum tree_code code = TREE_CODE (expr);
 920   tree base, obj;
 921
 922   /* If this is a pointer casted to any type, we need to determine
 923      the base object for the pointer; so handle conversions before
 924      throwing away non-pointer expressions.  */
 925   if (CONVERT_EXPR_P (expr))
 926     return determine_base_object (TREE_OPERAND (expr, 0));
 927
 928   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 929     return NULL_TREE;
 930
 931   switch (code)
 932     {
 933     case INTEGER_CST:
 934       return NULL_TREE;
 935
 936     case ADDR_EXPR:
 937       obj = TREE_OPERAND (expr, 0);
 938       base = get_base_address (obj);
 939
 940       if (!base)
 941         return expr;
 942
 943       if (TREE_CODE (base) == MEM_REF)
 944         return determine_base_object (TREE_OPERAND (base, 0));
 945
 946       return fold_convert (ptr_type_node,
 947                            build_fold_addr_expr (base));
 948
 949     case POINTER_PLUS_EXPR:
 950       return determine_base_object (TREE_OPERAND (expr, 0));
 951
 952     case PLUS_EXPR:
 953     case MINUS_EXPR:
 954       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 955       gcc_unreachable ();
 956
 957     default:
 958       return fold_convert (ptr_type_node, expr);
 959     }
 960 }
 961
 962 /* Return true if address expression with non-DECL_P operand appears
 963    in EXPR.  */
 964
 965 static bool
 966 contain_complex_addr_expr (tree expr)
 967 {
 968   bool res = false;
 969
 970   STRIP_NOPS (expr);
 971   switch (TREE_CODE (expr))
 972     {
 973     case POINTER_PLUS_EXPR:
 974     case PLUS_EXPR:
 975     case MINUS_EXPR:
 976       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 977       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 978       break;
 979
 980     case ADDR_EXPR:
 981       return (!DECL_P (TREE_OPERAND (expr, 0)));
 982
 983     default:
 984       return false;
 985     }
 986
 987   return res;
 988 }
 989
 990 /* Allocates an induction variable with given initial value BASE and step STEP
 991    for loop LOOP.  */
 992
 993 static struct iv *
 994 alloc_iv (tree base, tree step)
 995 {
 996   tree expr = base;
 997   struct iv *iv = XCNEW (struct iv);
 998   gcc_assert (step != NULL_TREE);
 999
1000   /* Lower address expression in base except ones with DECL_P as operand.
1001      By doing this:
1002        1) More accurate cost can be computed for address expressions;
1003        2) Duplicate candidates won't be created for bases in different
1004           forms, like &a[0] and &a.  */
1005   STRIP_NOPS (expr);
1006   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1007       || contain_complex_addr_expr (expr))
1008     {
1009       aff_tree comb;
1010       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1011       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1012     }
1013
1014   iv->base = base;
1015   iv->base_object = determine_base_object (base);
1016   iv->step = step;
1017   iv->biv_p = false;
1018   iv->have_use_for = false;
1019   iv->use_id = 0;
1020   iv->ssa_name = NULL_TREE;
1021
1022   return iv;
1023 }
1024
1025 /* Sets STEP and BASE for induction variable IV.  */
1026
1027 static void
1028 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1029 {
1030   struct version_info *info = name_info (data, iv);
1031
1032   gcc_assert (!info->iv);
1033
1034   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1035   info->iv = alloc_iv (base, step);
1036   info->iv->ssa_name = iv;
1037 }
1038
1039 /* Finds induction variable declaration for VAR.  */
1040
1041 static struct iv *
1042 get_iv (struct ivopts_data *data, tree var)
1043 {
1044   basic_block bb;
1045   tree type = TREE_TYPE (var);
1046
1047   if (!POINTER_TYPE_P (type)
1048       && !INTEGRAL_TYPE_P (type))
1049     return NULL;
1050
1051   if (!name_info (data, var)->iv)
1052     {
1053       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1054
1055       if (!bb
1056           || !flow_bb_inside_loop_p (data->current_loop, bb))
1057         set_iv (data, var, var, build_int_cst (type, 0));
1058     }
1059
1060   return name_info (data, var)->iv;
1061 }
1062
1063 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
1064    not define a simple affine biv with nonzero step.  */
1065
1066 static tree
1067 determine_biv_step (gphi *phi)
1068 {
1069   struct loop *loop = gimple_bb (phi)->loop_father;
1070   tree name = PHI_RESULT (phi);
1071   affine_iv iv;
1072
1073   if (virtual_operand_p (name))
1074     return NULL_TREE;
1075
1076   if (!simple_iv (loop, loop, name, &iv, true))
1077     return NULL_TREE;
1078
1079   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1080 }
1081
1082 /* Finds basic ivs.  */
1083
1084 static bool
1085 find_bivs (struct ivopts_data *data)
1086 {
1087   gphi *phi;
1088   tree step, type, base;
1089   bool found = false;
1090   struct loop *loop = data->current_loop;
1091   gphi_iterator psi;
1092
1093   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1094     {
1095       phi = psi.phi ();
1096
1097       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1098         continue;
1099
1100       step = determine_biv_step (phi);
1101       if (!step)
1102         continue;
1103
1104       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1105       base = expand_simple_operations (base);
1106       if (contains_abnormal_ssa_name_p (base)
1107           || contains_abnormal_ssa_name_p (step))
1108         continue;
1109
1110       type = TREE_TYPE (PHI_RESULT (phi));
1111       base = fold_convert (type, base);
1112       if (step)
1113         {
1114           if (POINTER_TYPE_P (type))
1115             step = convert_to_ptrofftype (step);
1116           else
1117             step = fold_convert (type, step);
1118         }
1119
1120       set_iv (data, PHI_RESULT (phi), base, step);
1121       found = true;
1122     }
1123
1124   return found;
1125 }
1126
1127 /* Marks basic ivs.  */
1128
1129 static void
1130 mark_bivs (struct ivopts_data *data)
1131 {
1132   gphi *phi;
1133   gimple def;
1134   tree var;
1135   struct iv *iv, *incr_iv;
1136   struct loop *loop = data->current_loop;
1137   basic_block incr_bb;
1138   gphi_iterator psi;
1139
1140   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1141     {
1142       phi = psi.phi ();
1143
1144       iv = get_iv (data, PHI_RESULT (phi));
1145       if (!iv)
1146         continue;
1147
1148       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1149       def = SSA_NAME_DEF_STMT (var);
1150       /* Don't mark iv peeled from other one as biv.  */
1151       if (def
1152           && gimple_code (def) == GIMPLE_PHI
1153           && gimple_bb (def) == loop->header)
1154         continue;
1155
1156       incr_iv = get_iv (data, var);
1157       if (!incr_iv)
1158         continue;
1159
1160       /* If the increment is in the subloop, ignore it.  */
1161       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1162       if (incr_bb->loop_father != data->current_loop
1163           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1164         continue;
1165
1166       iv->biv_p = true;
1167       incr_iv->biv_p = true;
1168     }
1169 }
1170
1171 /* Checks whether STMT defines a linear induction variable and stores its
1172    parameters to IV.  */
1173
1174 static bool
1175 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1176 {
1177   tree lhs;
1178   struct loop *loop = data->current_loop;
1179
1180   iv->base = NULL_TREE;
1181   iv->step = NULL_TREE;
1182
1183   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1184     return false;
1185
1186   lhs = gimple_assign_lhs (stmt);
1187   if (TREE_CODE (lhs) != SSA_NAME)
1188     return false;
1189
1190   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1191     return false;
1192   iv->base = expand_simple_operations (iv->base);
1193
1194   if (contains_abnormal_ssa_name_p (iv->base)
1195       || contains_abnormal_ssa_name_p (iv->step))
1196     return false;
1197
1198   /* If STMT could throw, then do not consider STMT as defining a GIV.
1199      While this will suppress optimizations, we can not safely delete this
1200      GIV and associated statements, even if it appears it is not used.  */
1201   if (stmt_could_throw_p (stmt))
1202     return false;
1203
1204   return true;
1205 }
1206
1207 /* Finds general ivs in statement STMT.  */
1208
1209 static void
1210 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1211 {
1212   affine_iv iv;
1213
1214   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1215     return;
1216
1217   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1218 }
1219
1220 /* Finds general ivs in basic block BB.  */
1221
1222 static void
1223 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1224 {
1225   gimple_stmt_iterator bsi;
1226
1227   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1228     find_givs_in_stmt (data, gsi_stmt (bsi));
1229 }
1230
1231 /* Finds general ivs.  */
1232
1233 static void
1234 find_givs (struct ivopts_data *data)
1235 {
1236   struct loop *loop = data->current_loop;
1237   basic_block *body = get_loop_body_in_dom_order (loop);
1238   unsigned i;
1239
1240   for (i = 0; i < loop->num_nodes; i++)
1241     find_givs_in_bb (data, body[i]);
1242   free (body);
1243 }
1244
1245 /* For each ssa name defined in LOOP determines whether it is an induction
1246    variable and if so, its initial value and step.  */
1247
1248 static bool
1249 find_induction_variables (struct ivopts_data *data)
1250 {
1251   unsigned i;
1252   bitmap_iterator bi;
1253
1254   if (!find_bivs (data))
1255     return false;
1256
1257   find_givs (data);
1258   mark_bivs (data);
1259
1260   if (dump_file && (dump_flags & TDF_DETAILS))
1261     {
1262       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1263
1264       if (niter)
1265         {
1266           fprintf (dump_file, "  number of iterations ");
1267           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1268           if (!integer_zerop (niter->may_be_zero))
1269             {
1270               fprintf (dump_file, "; zero if ");
1271               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1272             }
1273           fprintf (dump_file, "\n\n");
1274         };
1275
1276       fprintf (dump_file, "Induction variables:\n\n");
1277
1278       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1279         {
1280           if (ver_info (data, i)->iv)
1281             dump_iv (dump_file, ver_info (data, i)->iv);
1282         }
1283     }
1284
1285   return true;
1286 }
1287
1288 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1289
1290 static struct iv_use *
1291 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1292             gimple stmt, enum use_type use_type)
1293 {
1294   struct iv_use *use = XCNEW (struct iv_use);
1295
1296   use->id = n_iv_uses (data);
1297   use->type = use_type;
1298   use->iv = iv;
1299   use->stmt = stmt;
1300   use->op_p = use_p;
1301   use->related_cands = BITMAP_ALLOC (NULL);
1302
1303   /* To avoid showing ssa name in the dumps, if it was not reset by the
1304      caller.  */
1305   iv->ssa_name = NULL_TREE;
1306
1307   if (dump_file && (dump_flags & TDF_DETAILS))
1308     dump_use (dump_file, use);
1309
1310   data->iv_uses.safe_push (use);
1311
1312   return use;
1313 }
1314
1315 /* Checks whether OP is a loop-level invariant and if so, records it.
1316    NONLINEAR_USE is true if the invariant is used in a way we do not
1317    handle specially.  */
1318
1319 static void
1320 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1321 {
1322   basic_block bb;
1323   struct version_info *info;
1324
1325   if (TREE_CODE (op) != SSA_NAME
1326       || virtual_operand_p (op))
1327     return;
1328
1329   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1330   if (bb
1331       && flow_bb_inside_loop_p (data->current_loop, bb))
1332     return;
1333
1334   info = name_info (data, op);
1335   info->name = op;
1336   info->has_nonlin_use |= nonlinear_use;
1337   if (!info->inv_id)
1338     info->inv_id = ++data->max_inv_id;
1339   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1340 }
1341
1342 /* Checks whether the use OP is interesting and if so, records it.  */
1343
1344 static struct iv_use *
1345 find_interesting_uses_op (struct ivopts_data *data, tree op)
1346 {
1347   struct iv *iv;
1348   struct iv *civ;
1349   gimple stmt;
1350   struct iv_use *use;
1351
1352   if (TREE_CODE (op) != SSA_NAME)
1353     return NULL;
1354
1355   iv = get_iv (data, op);
1356   if (!iv)
1357     return NULL;
1358
1359   if (iv->have_use_for)
1360     {
1361       use = iv_use (data, iv->use_id);
1362
1363       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1364       return use;
1365     }
1366
1367   if (integer_zerop (iv->step))
1368     {
1369       record_invariant (data, op, true);
1370       return NULL;
1371     }
1372   iv->have_use_for = true;
1373
1374   civ = XNEW (struct iv);
1375   *civ = *iv;
1376
1377   stmt = SSA_NAME_DEF_STMT (op);
1378   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1379               || is_gimple_assign (stmt));
1380
1381   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1382   iv->use_id = use->id;
1383
1384   return use;
1385 }
1386
1387 /* Given a condition in statement STMT, checks whether it is a compare
1388    of an induction variable and an invariant.  If this is the case,
1389    CONTROL_VAR is set to location of the iv, BOUND to the location of
1390    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1391    induction variable descriptions, and true is returned.  If this is not
1392    the case, CONTROL_VAR and BOUND are set to the arguments of the
1393    condition and false is returned.  */
1394
1395 static bool
1396 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1397                        tree **control_var, tree **bound,
1398                        struct iv **iv_var, struct iv **iv_bound)
1399 {
1400   /* The objects returned when COND has constant operands.  */
1401   static struct iv const_iv;
1402   static tree zero;
1403   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1404   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1405   bool ret = false;
1406
1407   if (gimple_code (stmt) == GIMPLE_COND)
1408     {
1409       gcond *cond_stmt = as_a <gcond *> (stmt);
1410       op0 = gimple_cond_lhs_ptr (cond_stmt);
1411       op1 = gimple_cond_rhs_ptr (cond_stmt);
1412     }
1413   else
1414     {
1415       op0 = gimple_assign_rhs1_ptr (stmt);
1416       op1 = gimple_assign_rhs2_ptr (stmt);
1417     }
1418
1419   zero = integer_zero_node;
1420   const_iv.step = integer_zero_node;
1421
1422   if (TREE_CODE (*op0) == SSA_NAME)
1423     iv0 = get_iv (data, *op0);
1424   if (TREE_CODE (*op1) == SSA_NAME)
1425     iv1 = get_iv (data, *op1);
1426
1427   /* Exactly one of the compared values must be an iv, and the other one must
1428      be an invariant.  */
1429   if (!iv0 || !iv1)
1430     goto end;
1431
1432   if (integer_zerop (iv0->step))
1433     {
1434       /* Control variable may be on the other side.  */
1435       tmp_op = op0; op0 = op1; op1 = tmp_op;
1436       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1437     }
1438   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1439
1440 end:
1441   if (control_var)
1442     *control_var = op0;;
1443   if (iv_var)
1444     *iv_var = iv0;;
1445   if (bound)
1446     *bound = op1;
1447   if (iv_bound)
1448     *iv_bound = iv1;
1449
1450   return ret;
1451 }
1452
1453 /* Checks whether the condition in STMT is interesting and if so,
1454    records it.  */
1455
1456 static void
1457 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1458 {
1459   tree *var_p, *bound_p;
1460   struct iv *var_iv, *civ;
1461
1462   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1463     {
1464       find_interesting_uses_op (data, *var_p);
1465       find_interesting_uses_op (data, *bound_p);
1466       return;
1467     }
1468
1469   civ = XNEW (struct iv);
1470   *civ = *var_iv;
1471   record_use (data, NULL, civ, stmt, USE_COMPARE);
1472 }
1473
1474 /* Returns the outermost loop EXPR is obviously invariant in
1475    relative to the loop LOOP, i.e. if all its operands are defined
1476    outside of the returned loop.  Returns NULL if EXPR is not
1477    even obviously invariant in LOOP.  */
1478
1479 struct loop *
1480 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1481 {
1482   basic_block def_bb;
1483   unsigned i, len;
1484
1485   if (is_gimple_min_invariant (expr))
1486     return current_loops->tree_root;
1487
1488   if (TREE_CODE (expr) == SSA_NAME)
1489     {
1490       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1491       if (def_bb)
1492         {
1493           if (flow_bb_inside_loop_p (loop, def_bb))
1494             return NULL;
1495           return superloop_at_depth (loop,
1496                                      loop_depth (def_bb->loop_father) + 1);
1497         }
1498
1499       return current_loops->tree_root;
1500     }
1501
1502   if (!EXPR_P (expr))
1503     return NULL;
1504
1505   unsigned maxdepth = 0;
1506   len = TREE_OPERAND_LENGTH (expr);
1507   for (i = 0; i < len; i++)
1508     {
1509       struct loop *ivloop;
1510       if (!TREE_OPERAND (expr, i))
1511         continue;
1512
1513       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1514       if (!ivloop)
1515         return NULL;
1516       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1517     }
1518
1519   return superloop_at_depth (loop, maxdepth);
1520 }
1521
1522 /* Returns true if expression EXPR is obviously invariant in LOOP,
1523    i.e. if all its operands are defined outside of the LOOP.  LOOP
1524    should not be the function body.  */
1525
1526 bool
1527 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1528 {
1529   basic_block def_bb;
1530   unsigned i, len;
1531
1532   gcc_assert (loop_depth (loop) > 0);
1533
1534   if (is_gimple_min_invariant (expr))
1535     return true;
1536
1537   if (TREE_CODE (expr) == SSA_NAME)
1538     {
1539       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1540       if (def_bb
1541           && flow_bb_inside_loop_p (loop, def_bb))
1542         return false;
1543
1544       return true;
1545     }
1546
1547   if (!EXPR_P (expr))
1548     return false;
1549
1550   len = TREE_OPERAND_LENGTH (expr);
1551   for (i = 0; i < len; i++)
1552     if (TREE_OPERAND (expr, i)
1553         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1554       return false;
1555
1556   return true;
1557 }
1558
1559 /* Cumulates the steps of indices into DATA and replaces their values with the
1560    initial ones.  Returns false when the value of the index cannot be determined.
1561    Callback for for_each_index.  */
1562
1563 struct ifs_ivopts_data
1564 {
1565   struct ivopts_data *ivopts_data;
1566   gimple stmt;
1567   tree step;
1568 };
1569
1570 static bool
1571 idx_find_step (tree base, tree *idx, void *data)
1572 {
1573   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1574   struct iv *iv;
1575   tree step, iv_base, iv_step, lbound, off;
1576   struct loop *loop = dta->ivopts_data->current_loop;
1577
1578   /* If base is a component ref, require that the offset of the reference
1579      be invariant.  */
1580   if (TREE_CODE (base) == COMPONENT_REF)
1581     {
1582       off = component_ref_field_offset (base);
1583       return expr_invariant_in_loop_p (loop, off);
1584     }
1585
1586   /* If base is array, first check whether we will be able to move the
1587      reference out of the loop (in order to take its address in strength
1588      reduction).  In order for this to work we need both lower bound
1589      and step to be loop invariants.  */
1590   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1591     {
1592       /* Moreover, for a range, the size needs to be invariant as well.  */
1593       if (TREE_CODE (base) == ARRAY_RANGE_REF
1594           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1595         return false;
1596
1597       step = array_ref_element_size (base);
1598       lbound = array_ref_low_bound (base);
1599
1600       if (!expr_invariant_in_loop_p (loop, step)
1601           || !expr_invariant_in_loop_p (loop, lbound))
1602         return false;
1603     }
1604
1605   if (TREE_CODE (*idx) != SSA_NAME)
1606     return true;
1607
1608   iv = get_iv (dta->ivopts_data, *idx);
1609   if (!iv)
1610     return false;
1611
1612   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1613           *&x[0], which is not folded and does not trigger the
1614           ARRAY_REF path below.  */
1615   *idx = iv->base;
1616
1617   if (integer_zerop (iv->step))
1618     return true;
1619
1620   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1621     {
1622       step = array_ref_element_size (base);
1623
1624       /* We only handle addresses whose step is an integer constant.  */
1625       if (TREE_CODE (step) != INTEGER_CST)
1626         return false;
1627     }
1628   else
1629     /* The step for pointer arithmetics already is 1 byte.  */
1630     step = size_one_node;
1631
1632   iv_base = iv->base;
1633   iv_step = iv->step;
1634   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1635                             sizetype, &iv_base, &iv_step, dta->stmt,
1636                             false))
1637     {
1638       /* The index might wrap.  */
1639       return false;
1640     }
1641
1642   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1643   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1644
1645   return true;
1646 }
1647
1648 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1649    object is passed to it in DATA.  */
1650
1651 static bool
1652 idx_record_use (tree base, tree *idx,
1653                 void *vdata)
1654 {
1655   struct ivopts_data *data = (struct ivopts_data *) vdata;
1656   find_interesting_uses_op (data, *idx);
1657   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1658     {
1659       find_interesting_uses_op (data, array_ref_element_size (base));
1660       find_interesting_uses_op (data, array_ref_low_bound (base));
1661     }
1662   return true;
1663 }
1664
1665 /* If we can prove that TOP = cst * BOT for some constant cst,
1666    store cst to MUL and return true.  Otherwise return false.
1667    The returned value is always sign-extended, regardless of the
1668    signedness of TOP and BOT.  */
1669
1670 static bool
1671 constant_multiple_of (tree top, tree bot, widest_int *mul)
1672 {
1673   tree mby;
1674   enum tree_code code;
1675   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1676   widest_int res, p0, p1;
1677
1678   STRIP_NOPS (top);
1679   STRIP_NOPS (bot);
1680
1681   if (operand_equal_p (top, bot, 0))
1682     {
1683       *mul = 1;
1684       return true;
1685     }
1686
1687   code = TREE_CODE (top);
1688   switch (code)
1689     {
1690     case MULT_EXPR:
1691       mby = TREE_OPERAND (top, 1);
1692       if (TREE_CODE (mby) != INTEGER_CST)
1693         return false;
1694
1695       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1696         return false;
1697
1698       *mul = wi::sext (res * wi::to_widest (mby), precision);
1699       return true;
1700
1701     case PLUS_EXPR:
1702     case MINUS_EXPR:
1703       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1704           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1705         return false;
1706
1707       if (code == MINUS_EXPR)
1708         p1 = -p1;
1709       *mul = wi::sext (p0 + p1, precision);
1710       return true;
1711
1712     case INTEGER_CST:
1713       if (TREE_CODE (bot) != INTEGER_CST)
1714         return false;
1715
1716       p0 = widest_int::from (top, SIGNED);
1717       p1 = widest_int::from (bot, SIGNED);
1718       if (p1 == 0)
1719         return false;
1720       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1721       return res == 0;
1722
1723     default:
1724       return false;
1725     }
1726 }
1727
1728 /* Return true if memory reference REF with step STEP may be unaligned.  */
1729
1730 static bool
1731 may_be_unaligned_p (tree ref, tree step)
1732 {
1733   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1734      thus they are not misaligned.  */
1735   if (TREE_CODE (ref) == TARGET_MEM_REF)
1736     return false;
1737
1738   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1739   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1740     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1741
1742   unsigned HOST_WIDE_INT bitpos;
1743   unsigned int ref_align;
1744   get_object_alignment_1 (ref, &ref_align, &bitpos);
1745   if (ref_align < align
1746       || (bitpos % align) != 0
1747       || (bitpos % BITS_PER_UNIT) != 0)
1748     return true;
1749
1750   unsigned int trailing_zeros = tree_ctz (step);
1751   if (trailing_zeros < HOST_BITS_PER_INT
1752       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1753     return true;
1754
1755   return false;
1756 }
1757
1758 /* Return true if EXPR may be non-addressable.   */
1759
1760 bool
1761 may_be_nonaddressable_p (tree expr)
1762 {
1763   switch (TREE_CODE (expr))
1764     {
1765     case TARGET_MEM_REF:
1766       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1767          target, thus they are always addressable.  */
1768       return false;
1769
1770     case COMPONENT_REF:
1771       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1772              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1773
1774     case VIEW_CONVERT_EXPR:
1775       /* This kind of view-conversions may wrap non-addressable objects
1776          and make them look addressable.  After some processing the
1777          non-addressability may be uncovered again, causing ADDR_EXPRs
1778          of inappropriate objects to be built.  */
1779       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1780           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1781         return true;
1782
1783       /* ... fall through ... */
1784
1785     case ARRAY_REF:
1786     case ARRAY_RANGE_REF:
1787       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1788
1789     CASE_CONVERT:
1790       return true;
1791
1792     default:
1793       break;
1794     }
1795
1796   return false;
1797 }
1798
1799 /* Finds addresses in *OP_P inside STMT.  */
1800
1801 static void
1802 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1803 {
1804   tree base = *op_p, step = size_zero_node;
1805   struct iv *civ;
1806   struct ifs_ivopts_data ifs_ivopts_data;
1807
1808   /* Do not play with volatile memory references.  A bit too conservative,
1809      perhaps, but safe.  */
1810   if (gimple_has_volatile_ops (stmt))
1811     goto fail;
1812
1813   /* Ignore bitfields for now.  Not really something terribly complicated
1814      to handle.  TODO.  */
1815   if (TREE_CODE (base) == BIT_FIELD_REF)
1816     goto fail;
1817
1818   base = unshare_expr (base);
1819
1820   if (TREE_CODE (base) == TARGET_MEM_REF)
1821     {
1822       tree type = build_pointer_type (TREE_TYPE (base));
1823       tree astep;
1824
1825       if (TMR_BASE (base)
1826           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1827         {
1828           civ = get_iv (data, TMR_BASE (base));
1829           if (!civ)
1830             goto fail;
1831
1832           TMR_BASE (base) = civ->base;
1833           step = civ->step;
1834         }
1835       if (TMR_INDEX2 (base)
1836           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1837         {
1838           civ = get_iv (data, TMR_INDEX2 (base));
1839           if (!civ)
1840             goto fail;
1841
1842           TMR_INDEX2 (base) = civ->base;
1843           step = civ->step;
1844         }
1845       if (TMR_INDEX (base)
1846           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1847         {
1848           civ = get_iv (data, TMR_INDEX (base));
1849           if (!civ)
1850             goto fail;
1851
1852           TMR_INDEX (base) = civ->base;
1853           astep = civ->step;
1854
1855           if (astep)
1856             {
1857               if (TMR_STEP (base))
1858                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1859
1860               step = fold_build2 (PLUS_EXPR, type, step, astep);
1861             }
1862         }
1863
1864       if (integer_zerop (step))
1865         goto fail;
1866       base = tree_mem_ref_addr (type, base);
1867     }
1868   else
1869     {
1870       ifs_ivopts_data.ivopts_data = data;
1871       ifs_ivopts_data.stmt = stmt;
1872       ifs_ivopts_data.step = size_zero_node;
1873       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1874           || integer_zerop (ifs_ivopts_data.step))
1875         goto fail;
1876       step = ifs_ivopts_data.step;
1877
1878       /* Check that the base expression is addressable.  This needs
1879          to be done after substituting bases of IVs into it.  */
1880       if (may_be_nonaddressable_p (base))
1881         goto fail;
1882
1883       /* Moreover, on strict alignment platforms, check that it is
1884          sufficiently aligned.  */
1885       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1886         goto fail;
1887
1888       base = build_fold_addr_expr (base);
1889
1890       /* Substituting bases of IVs into the base expression might
1891          have caused folding opportunities.  */
1892       if (TREE_CODE (base) == ADDR_EXPR)
1893         {
1894           tree *ref = &TREE_OPERAND (base, 0);
1895           while (handled_component_p (*ref))
1896             ref = &TREE_OPERAND (*ref, 0);
1897           if (TREE_CODE (*ref) == MEM_REF)
1898             {
1899               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1900                                       TREE_OPERAND (*ref, 0),
1901                                       TREE_OPERAND (*ref, 1));
1902               if (tem)
1903                 *ref = tem;
1904             }
1905         }
1906     }
1907
1908   civ = alloc_iv (base, step);
1909   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1910   return;
1911
1912 fail:
1913   for_each_index (op_p, idx_record_use, data);
1914 }
1915
1916 /* Finds and records invariants used in STMT.  */
1917
1918 static void
1919 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1920 {
1921   ssa_op_iter iter;
1922   use_operand_p use_p;
1923   tree op;
1924
1925   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1926     {
1927       op = USE_FROM_PTR (use_p);
1928       record_invariant (data, op, false);
1929     }
1930 }
1931
1932 /* Finds interesting uses of induction variables in the statement STMT.  */
1933
1934 static void
1935 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1936 {
1937   struct iv *iv;
1938   tree op, *lhs, *rhs;
1939   ssa_op_iter iter;
1940   use_operand_p use_p;
1941   enum tree_code code;
1942
1943   find_invariants_stmt (data, stmt);
1944
1945   if (gimple_code (stmt) == GIMPLE_COND)
1946     {
1947       find_interesting_uses_cond (data, stmt);
1948       return;
1949     }
1950
1951   if (is_gimple_assign (stmt))
1952     {
1953       lhs = gimple_assign_lhs_ptr (stmt);
1954       rhs = gimple_assign_rhs1_ptr (stmt);
1955
1956       if (TREE_CODE (*lhs) == SSA_NAME)
1957         {
1958           /* If the statement defines an induction variable, the uses are not
1959              interesting by themselves.  */
1960
1961           iv = get_iv (data, *lhs);
1962
1963           if (iv && !integer_zerop (iv->step))
1964             return;
1965         }
1966
1967       code = gimple_assign_rhs_code (stmt);
1968       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1969           && (REFERENCE_CLASS_P (*rhs)
1970               || is_gimple_val (*rhs)))
1971         {
1972           if (REFERENCE_CLASS_P (*rhs))
1973             find_interesting_uses_address (data, stmt, rhs);
1974           else
1975             find_interesting_uses_op (data, *rhs);
1976
1977           if (REFERENCE_CLASS_P (*lhs))
1978             find_interesting_uses_address (data, stmt, lhs);
1979           return;
1980         }
1981       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1982         {
1983           find_interesting_uses_cond (data, stmt);
1984           return;
1985         }
1986
1987       /* TODO -- we should also handle address uses of type
1988
1989          memory = call (whatever);
1990
1991          and
1992
1993          call (memory).  */
1994     }
1995
1996   if (gimple_code (stmt) == GIMPLE_PHI
1997       && gimple_bb (stmt) == data->current_loop->header)
1998     {
1999       iv = get_iv (data, PHI_RESULT (stmt));
2000
2001       if (iv && !integer_zerop (iv->step))
2002         return;
2003     }
2004
2005   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2006     {
2007       op = USE_FROM_PTR (use_p);
2008
2009       if (TREE_CODE (op) != SSA_NAME)
2010         continue;
2011
2012       iv = get_iv (data, op);
2013       if (!iv)
2014         continue;
2015
2016       find_interesting_uses_op (data, op);
2017     }
2018 }
2019
2020 /* Finds interesting uses of induction variables outside of loops
2021    on loop exit edge EXIT.  */
2022
2023 static void
2024 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2025 {
2026   gphi *phi;
2027   gphi_iterator psi;
2028   tree def;
2029
2030   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2031     {
2032       phi = psi.phi ();
2033       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2034       if (!virtual_operand_p (def))
2035         find_interesting_uses_op (data, def);
2036     }
2037 }
2038
2039 /* Finds uses of the induction variables that are interesting.  */
2040
2041 static void
2042 find_interesting_uses (struct ivopts_data *data)
2043 {
2044   basic_block bb;
2045   gimple_stmt_iterator bsi;
2046   basic_block *body = get_loop_body (data->current_loop);
2047   unsigned i;
2048   struct version_info *info;
2049   edge e;
2050
2051   if (dump_file && (dump_flags & TDF_DETAILS))
2052     fprintf (dump_file, "Uses:\n\n");
2053
2054   for (i = 0; i < data->current_loop->num_nodes; i++)
2055     {
2056       edge_iterator ei;
2057       bb = body[i];
2058
2059       FOR_EACH_EDGE (e, ei, bb->succs)
2060         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2061             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2062           find_interesting_uses_outside (data, e);
2063
2064       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2065         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2066       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2067         if (!is_gimple_debug (gsi_stmt (bsi)))
2068           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2069     }
2070
2071   if (dump_file && (dump_flags & TDF_DETAILS))
2072     {
2073       bitmap_iterator bi;
2074
2075       fprintf (dump_file, "\n");
2076
2077       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2078         {
2079           info = ver_info (data, i);
2080           if (info->inv_id)
2081             {
2082               fprintf (dump_file, "  ");
2083               print_generic_expr (dump_file, info->name, TDF_SLIM);
2084               fprintf (dump_file, " is invariant (%d)%s\n",
2085                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2086             }
2087         }
2088
2089       fprintf (dump_file, "\n");
2090     }
2091
2092   free (body);
2093 }
2094
2095 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2096    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2097    we are at the top-level of the processed address.  */
2098
2099 static tree
2100 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2101                 HOST_WIDE_INT *offset)
2102 {
2103   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2104   enum tree_code code;
2105   tree type, orig_type = TREE_TYPE (expr);
2106   HOST_WIDE_INT off0, off1, st;
2107   tree orig_expr = expr;
2108
2109   STRIP_NOPS (expr);
2110
2111   type = TREE_TYPE (expr);
2112   code = TREE_CODE (expr);
2113   *offset = 0;
2114
2115   switch (code)
2116     {
2117     case INTEGER_CST:
2118       if (!cst_and_fits_in_hwi (expr)
2119           || integer_zerop (expr))
2120         return orig_expr;
2121
2122       *offset = int_cst_value (expr);
2123       return build_int_cst (orig_type, 0);
2124
2125     case POINTER_PLUS_EXPR:
2126     case PLUS_EXPR:
2127     case MINUS_EXPR:
2128       op0 = TREE_OPERAND (expr, 0);
2129       op1 = TREE_OPERAND (expr, 1);
2130
2131       op0 = strip_offset_1 (op0, false, false, &off0);
2132       op1 = strip_offset_1 (op1, false, false, &off1);
2133
2134       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2135       if (op0 == TREE_OPERAND (expr, 0)
2136           && op1 == TREE_OPERAND (expr, 1))
2137         return orig_expr;
2138
2139       if (integer_zerop (op1))
2140         expr = op0;
2141       else if (integer_zerop (op0))
2142         {
2143           if (code == MINUS_EXPR)
2144             expr = fold_build1 (NEGATE_EXPR, type, op1);
2145           else
2146             expr = op1;
2147         }
2148       else
2149         expr = fold_build2 (code, type, op0, op1);
2150
2151       return fold_convert (orig_type, expr);
2152
2153     case MULT_EXPR:
2154       op1 = TREE_OPERAND (expr, 1);
2155       if (!cst_and_fits_in_hwi (op1))
2156         return orig_expr;
2157
2158       op0 = TREE_OPERAND (expr, 0);
2159       op0 = strip_offset_1 (op0, false, false, &off0);
2160       if (op0 == TREE_OPERAND (expr, 0))
2161         return orig_expr;
2162
2163       *offset = off0 * int_cst_value (op1);
2164       if (integer_zerop (op0))
2165         expr = op0;
2166       else
2167         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2168
2169       return fold_convert (orig_type, expr);
2170
2171     case ARRAY_REF:
2172     case ARRAY_RANGE_REF:
2173       if (!inside_addr)
2174         return orig_expr;
2175
2176       step = array_ref_element_size (expr);
2177       if (!cst_and_fits_in_hwi (step))
2178         break;
2179
2180       st = int_cst_value (step);
2181       op1 = TREE_OPERAND (expr, 1);
2182       op1 = strip_offset_1 (op1, false, false, &off1);
2183       *offset = off1 * st;
2184
2185       if (top_compref
2186           && integer_zerop (op1))
2187         {
2188           /* Strip the component reference completely.  */
2189           op0 = TREE_OPERAND (expr, 0);
2190           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2191           *offset += off0;
2192           return op0;
2193         }
2194       break;
2195
2196     case COMPONENT_REF:
2197       {
2198         tree field;
2199
2200         if (!inside_addr)
2201           return orig_expr;
2202
2203         tmp = component_ref_field_offset (expr);
2204         field = TREE_OPERAND (expr, 1);
2205         if (top_compref
2206             && cst_and_fits_in_hwi (tmp)
2207             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2208           {
2209             HOST_WIDE_INT boffset, abs_off;
2210
2211             /* Strip the component reference completely.  */
2212             op0 = TREE_OPERAND (expr, 0);
2213             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2214             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2215             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2216             if (boffset < 0)
2217               abs_off = -abs_off;
2218
2219             *offset = off0 + int_cst_value (tmp) + abs_off;
2220             return op0;
2221           }
2222       }
2223       break;
2224
2225     case ADDR_EXPR:
2226       op0 = TREE_OPERAND (expr, 0);
2227       op0 = strip_offset_1 (op0, true, true, &off0);
2228       *offset += off0;
2229
2230       if (op0 == TREE_OPERAND (expr, 0))
2231         return orig_expr;
2232
2233       expr = build_fold_addr_expr (op0);
2234       return fold_convert (orig_type, expr);
2235
2236     case MEM_REF:
2237       /* ???  Offset operand?  */
2238       inside_addr = false;
2239       break;
2240
2241     default:
2242       return orig_expr;
2243     }
2244
2245   /* Default handling of expressions for that we want to recurse into
2246      the first operand.  */
2247   op0 = TREE_OPERAND (expr, 0);
2248   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2249   *offset += off0;
2250
2251   if (op0 == TREE_OPERAND (expr, 0)
2252       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2253     return orig_expr;
2254
2255   expr = copy_node (expr);
2256   TREE_OPERAND (expr, 0) = op0;
2257   if (op1)
2258     TREE_OPERAND (expr, 1) = op1;
2259
2260   /* Inside address, we might strip the top level component references,
2261      thus changing type of the expression.  Handling of ADDR_EXPR
2262      will fix that.  */
2263   expr = fold_convert (orig_type, expr);
2264
2265   return expr;
2266 }
2267
2268 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2269
2270 static tree
2271 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2272 {
2273   HOST_WIDE_INT off;
2274   tree core = strip_offset_1 (expr, false, false, &off);
2275   *offset = off;
2276   return core;
2277 }
2278
2279 /* Returns variant of TYPE that can be used as base for different uses.
2280    We return unsigned type with the same precision, which avoids problems
2281    with overflows.  */
2282
2283 static tree
2284 generic_type_for (tree type)
2285 {
2286   if (POINTER_TYPE_P (type))
2287     return unsigned_type_for (type);
2288
2289   if (TYPE_UNSIGNED (type))
2290     return type;
2291
2292   return unsigned_type_for (type);
2293 }
2294
2295 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2296    the bitmap to that we should store it.  */
2297
2298 static struct ivopts_data *fd_ivopts_data;
2299 static tree
2300 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2301 {
2302   bitmap *depends_on = (bitmap *) data;
2303   struct version_info *info;
2304
2305   if (TREE_CODE (*expr_p) != SSA_NAME)
2306     return NULL_TREE;
2307   info = name_info (fd_ivopts_data, *expr_p);
2308
2309   if (!info->inv_id || info->has_nonlin_use)
2310     return NULL_TREE;
2311
2312   if (!*depends_on)
2313     *depends_on = BITMAP_ALLOC (NULL);
2314   bitmap_set_bit (*depends_on, info->inv_id);
2315
2316   return NULL_TREE;
2317 }
2318
2319 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2320    position to POS.  If USE is not NULL, the candidate is set as related to
2321    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2322    replacement of the final value of the iv by a direct computation.  */
2323
2324 static struct iv_cand *
2325 add_candidate_1 (struct ivopts_data *data,
2326                  tree base, tree step, bool important, enum iv_position pos,
2327                  struct iv_use *use, gimple incremented_at)
2328 {
2329   unsigned i;
2330   struct iv_cand *cand = NULL;
2331   tree type, orig_type;
2332
2333   /* For non-original variables, make sure their values are computed in a type
2334      that does not invoke undefined behavior on overflows (since in general,
2335      we cannot prove that these induction variables are non-wrapping).  */
2336   if (pos != IP_ORIGINAL)
2337     {
2338       orig_type = TREE_TYPE (base);
2339       type = generic_type_for (orig_type);
2340       if (type != orig_type)
2341         {
2342           base = fold_convert (type, base);
2343           step = fold_convert (type, step);
2344         }
2345     }
2346
2347   for (i = 0; i < n_iv_cands (data); i++)
2348     {
2349       cand = iv_cand (data, i);
2350
2351       if (cand->pos != pos)
2352         continue;
2353
2354       if (cand->incremented_at != incremented_at
2355           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2356               && cand->ainc_use != use))
2357         continue;
2358
2359       if (!cand->iv)
2360         {
2361           if (!base && !step)
2362             break;
2363
2364           continue;
2365         }
2366
2367       if (!base && !step)
2368         continue;
2369
2370       if (operand_equal_p (base, cand->iv->base, 0)
2371           && operand_equal_p (step, cand->iv->step, 0)
2372           && (TYPE_PRECISION (TREE_TYPE (base))
2373               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2374         break;
2375     }
2376
2377   if (i == n_iv_cands (data))
2378     {
2379       cand = XCNEW (struct iv_cand);
2380       cand->id = i;
2381
2382       if (!base && !step)
2383         cand->iv = NULL;
2384       else
2385         cand->iv = alloc_iv (base, step);
2386
2387       cand->pos = pos;
2388       if (pos != IP_ORIGINAL && cand->iv)
2389         {
2390           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2391           cand->var_after = cand->var_before;
2392         }
2393       cand->important = important;
2394       cand->incremented_at = incremented_at;
2395       data->iv_candidates.safe_push (cand);
2396
2397       if (step
2398           && TREE_CODE (step) != INTEGER_CST)
2399         {
2400           fd_ivopts_data = data;
2401           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2402         }
2403
2404       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2405         cand->ainc_use = use;
2406       else
2407         cand->ainc_use = NULL;
2408
2409       if (dump_file && (dump_flags & TDF_DETAILS))
2410         dump_cand (dump_file, cand);
2411     }
2412
2413   if (important && !cand->important)
2414     {
2415       cand->important = true;
2416       if (dump_file && (dump_flags & TDF_DETAILS))
2417         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2418     }
2419
2420   if (use)
2421     {
2422       bitmap_set_bit (use->related_cands, i);
2423       if (dump_file && (dump_flags & TDF_DETAILS))
2424         fprintf (dump_file, "Candidate %d is related to use %d\n",
2425                  cand->id, use->id);
2426     }
2427
2428   return cand;
2429 }
2430
2431 /* Returns true if incrementing the induction variable at the end of the LOOP
2432    is allowed.
2433
2434    The purpose is to avoid splitting latch edge with a biv increment, thus
2435    creating a jump, possibly confusing other optimization passes and leaving
2436    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2437    is not available (so we do not have a better alternative), or if the latch
2438    edge is already nonempty.  */
2439
2440 static bool
2441 allow_ip_end_pos_p (struct loop *loop)
2442 {
2443   if (!ip_normal_pos (loop))
2444     return true;
2445
2446   if (!empty_block_p (ip_end_pos (loop)))
2447     return true;
2448
2449   return false;
2450 }
2451
2452 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2453    Important field is set to IMPORTANT.  */
2454
2455 static void
2456 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2457                         bool important, struct iv_use *use)
2458 {
2459   basic_block use_bb = gimple_bb (use->stmt);
2460   machine_mode mem_mode;
2461   unsigned HOST_WIDE_INT cstepi;
2462
2463   /* If we insert the increment in any position other than the standard
2464      ones, we must ensure that it is incremented once per iteration.
2465      It must not be in an inner nested loop, or one side of an if
2466      statement.  */
2467   if (use_bb->loop_father != data->current_loop
2468       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2469       || stmt_could_throw_p (use->stmt)
2470       || !cst_and_fits_in_hwi (step))
2471     return;
2472
2473   cstepi = int_cst_value (step);
2474
2475   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2476   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2477         || USE_STORE_PRE_INCREMENT (mem_mode))
2478        && GET_MODE_SIZE (mem_mode) == cstepi)
2479       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2480            || USE_STORE_PRE_DECREMENT (mem_mode))
2481           && GET_MODE_SIZE (mem_mode) == -cstepi))
2482     {
2483       enum tree_code code = MINUS_EXPR;
2484       tree new_base;
2485       tree new_step = step;
2486
2487       if (POINTER_TYPE_P (TREE_TYPE (base)))
2488         {
2489           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2490           code = POINTER_PLUS_EXPR;
2491         }
2492       else
2493         new_step = fold_convert (TREE_TYPE (base), new_step);
2494       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2495       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2496                        use->stmt);
2497     }
2498   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2499         || USE_STORE_POST_INCREMENT (mem_mode))
2500        && GET_MODE_SIZE (mem_mode) == cstepi)
2501       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2502            || USE_STORE_POST_DECREMENT (mem_mode))
2503           && GET_MODE_SIZE (mem_mode) == -cstepi))
2504     {
2505       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2506                        use->stmt);
2507     }
2508 }
2509
2510 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2511    position to POS.  If USE is not NULL, the candidate is set as related to
2512    it.  The candidate computation is scheduled on all available positions.  */
2513
2514 static void
2515 add_candidate (struct ivopts_data *data,
2516                tree base, tree step, bool important, struct iv_use *use)
2517 {
2518   if (ip_normal_pos (data->current_loop))
2519     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2520   if (ip_end_pos (data->current_loop)
2521       && allow_ip_end_pos_p (data->current_loop))
2522     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2523
2524   if (use != NULL && use->type == USE_ADDRESS)
2525     add_autoinc_candidates (data, base, step, important, use);
2526 }
2527
2528 /* Adds standard iv candidates.  */
2529
2530 static void
2531 add_standard_iv_candidates (struct ivopts_data *data)
2532 {
2533   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2534
2535   /* The same for a double-integer type if it is still fast enough.  */
2536   if (TYPE_PRECISION
2537         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2538       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2539     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2540                    build_int_cst (long_integer_type_node, 1), true, NULL);
2541
2542   /* The same for a double-integer type if it is still fast enough.  */
2543   if (TYPE_PRECISION
2544         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2545       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2546     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2547                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2548 }
2549
2550
2551 /* Adds candidates bases on the old induction variable IV.  */
2552
2553 static void
2554 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2555 {
2556   gimple phi;
2557   tree def;
2558   struct iv_cand *cand;
2559
2560   add_candidate (data, iv->base, iv->step, true, NULL);
2561
2562   /* The same, but with initial value zero.  */
2563   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2564     add_candidate (data, size_int (0), iv->step, true, NULL);
2565   else
2566     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2567                    iv->step, true, NULL);
2568
2569   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2570   if (gimple_code (phi) == GIMPLE_PHI)
2571     {
2572       /* Additionally record the possibility of leaving the original iv
2573          untouched.  */
2574       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2575       /* Don't add candidate if it's from another PHI node because
2576          it's an affine iv appearing in the form of PEELED_CHREC.  */
2577       phi = SSA_NAME_DEF_STMT (def);
2578       if (gimple_code (phi) != GIMPLE_PHI)
2579         {
2580           cand = add_candidate_1 (data,
2581                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2582                                   SSA_NAME_DEF_STMT (def));
2583           cand->var_before = iv->ssa_name;
2584           cand->var_after = def;
2585         }
2586       else
2587         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2588     }
2589 }
2590
2591 /* Adds candidates based on the old induction variables.  */
2592
2593 static void
2594 add_old_ivs_candidates (struct ivopts_data *data)
2595 {
2596   unsigned i;
2597   struct iv *iv;
2598   bitmap_iterator bi;
2599
2600   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2601     {
2602       iv = ver_info (data, i)->iv;
2603       if (iv && iv->biv_p && !integer_zerop (iv->step))
2604         add_old_iv_candidates (data, iv);
2605     }
2606 }
2607
2608 /* Adds candidates based on the value of the induction variable IV and USE.  */
2609
2610 static void
2611 add_iv_value_candidates (struct ivopts_data *data,
2612                          struct iv *iv, struct iv_use *use)
2613 {
2614   unsigned HOST_WIDE_INT offset;
2615   tree base;
2616   tree basetype;
2617
2618   add_candidate (data, iv->base, iv->step, false, use);
2619
2620   /* The same, but with initial value zero.  Make such variable important,
2621      since it is generic enough so that possibly many uses may be based
2622      on it.  */
2623   basetype = TREE_TYPE (iv->base);
2624   if (POINTER_TYPE_P (basetype))
2625     basetype = sizetype;
2626   add_candidate (data, build_int_cst (basetype, 0),
2627                  iv->step, true, use);
2628
2629   /* Third, try removing the constant offset.  Make sure to even
2630      add a candidate for &a[0] vs. (T *)&a.  */
2631   base = strip_offset (iv->base, &offset);
2632   if (offset
2633       || base != iv->base)
2634     add_candidate (data, base, iv->step, false, use);
2635 }
2636
2637 /* Adds candidates based on the uses.  */
2638
2639 static void
2640 add_derived_ivs_candidates (struct ivopts_data *data)
2641 {
2642   unsigned i;
2643
2644   for (i = 0; i < n_iv_uses (data); i++)
2645     {
2646       struct iv_use *use = iv_use (data, i);
2647
2648       if (!use)
2649         continue;
2650
2651       switch (use->type)
2652         {
2653         case USE_NONLINEAR_EXPR:
2654         case USE_COMPARE:
2655         case USE_ADDRESS:
2656           /* Just add the ivs based on the value of the iv used here.  */
2657           add_iv_value_candidates (data, use->iv, use);
2658           break;
2659
2660         default:
2661           gcc_unreachable ();
2662         }
2663     }
2664 }
2665
2666 /* Record important candidates and add them to related_cands bitmaps
2667    if needed.  */
2668
2669 static void
2670 record_important_candidates (struct ivopts_data *data)
2671 {
2672   unsigned i;
2673   struct iv_use *use;
2674
2675   for (i = 0; i < n_iv_cands (data); i++)
2676     {
2677       struct iv_cand *cand = iv_cand (data, i);
2678
2679       if (cand->important)
2680         bitmap_set_bit (data->important_candidates, i);
2681     }
2682
2683   data->consider_all_candidates = (n_iv_cands (data)
2684                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2685
2686   if (data->consider_all_candidates)
2687     {
2688       /* We will not need "related_cands" bitmaps in this case,
2689          so release them to decrease peak memory consumption.  */
2690       for (i = 0; i < n_iv_uses (data); i++)
2691         {
2692           use = iv_use (data, i);
2693           BITMAP_FREE (use->related_cands);
2694         }
2695     }
2696   else
2697     {
2698       /* Add important candidates to the related_cands bitmaps.  */
2699       for (i = 0; i < n_iv_uses (data); i++)
2700         bitmap_ior_into (iv_use (data, i)->related_cands,
2701                          data->important_candidates);
2702     }
2703 }
2704
2705 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2706    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2707    we allocate a simple list to every use.  */
2708
2709 static void
2710 alloc_use_cost_map (struct ivopts_data *data)
2711 {
2712   unsigned i, size, s;
2713
2714   for (i = 0; i < n_iv_uses (data); i++)
2715     {
2716       struct iv_use *use = iv_use (data, i);
2717
2718       if (data->consider_all_candidates)
2719         size = n_iv_cands (data);
2720       else
2721         {
2722           s = bitmap_count_bits (use->related_cands);
2723
2724           /* Round up to the power of two, so that moduling by it is fast.  */
2725           size = s ? (1 << ceil_log2 (s)) : 1;
2726         }
2727
2728       use->n_map_members = size;
2729       use->cost_map = XCNEWVEC (struct cost_pair, size);
2730     }
2731 }
2732
2733 /* Returns description of computation cost of expression whose runtime
2734    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2735
2736 static comp_cost
2737 new_cost (unsigned runtime, unsigned complexity)
2738 {
2739   comp_cost cost;
2740
2741   cost.cost = runtime;
2742   cost.complexity = complexity;
2743
2744   return cost;
2745 }
2746
2747 /* Adds costs COST1 and COST2.  */
2748
2749 static comp_cost
2750 add_costs (comp_cost cost1, comp_cost cost2)
2751 {
2752   cost1.cost += cost2.cost;
2753   cost1.complexity += cost2.complexity;
2754
2755   return cost1;
2756 }
2757 /* Subtracts costs COST1 and COST2.  */
2758
2759 static comp_cost
2760 sub_costs (comp_cost cost1, comp_cost cost2)
2761 {
2762   cost1.cost -= cost2.cost;
2763   cost1.complexity -= cost2.complexity;
2764
2765   return cost1;
2766 }
2767
2768 /* Returns a negative number if COST1 < COST2, a positive number if
2769    COST1 > COST2, and 0 if COST1 = COST2.  */
2770
2771 static int
2772 compare_costs (comp_cost cost1, comp_cost cost2)
2773 {
2774   if (cost1.cost == cost2.cost)
2775     return cost1.complexity - cost2.complexity;
2776
2777   return cost1.cost - cost2.cost;
2778 }
2779
2780 /* Returns true if COST is infinite.  */
2781
2782 static bool
2783 infinite_cost_p (comp_cost cost)
2784 {
2785   return cost.cost == INFTY;
2786 }
2787
2788 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2789    on invariants DEPENDS_ON and that the value used in expressing it
2790    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2791
2792 static void
2793 set_use_iv_cost (struct ivopts_data *data,
2794                  struct iv_use *use, struct iv_cand *cand,
2795                  comp_cost cost, bitmap depends_on, tree value,
2796                  enum tree_code comp, int inv_expr_id)
2797 {
2798   unsigned i, s;
2799
2800   if (infinite_cost_p (cost))
2801     {
2802       BITMAP_FREE (depends_on);
2803       return;
2804     }
2805
2806   if (data->consider_all_candidates)
2807     {
2808       use->cost_map[cand->id].cand = cand;
2809       use->cost_map[cand->id].cost = cost;
2810       use->cost_map[cand->id].depends_on = depends_on;
2811       use->cost_map[cand->id].value = value;
2812       use->cost_map[cand->id].comp = comp;
2813       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2814       return;
2815     }
2816
2817   /* n_map_members is a power of two, so this computes modulo.  */
2818   s = cand->id & (use->n_map_members - 1);
2819   for (i = s; i < use->n_map_members; i++)
2820     if (!use->cost_map[i].cand)
2821       goto found;
2822   for (i = 0; i < s; i++)
2823     if (!use->cost_map[i].cand)
2824       goto found;
2825
2826   gcc_unreachable ();
2827
2828 found:
2829   use->cost_map[i].cand = cand;
2830   use->cost_map[i].cost = cost;
2831   use->cost_map[i].depends_on = depends_on;
2832   use->cost_map[i].value = value;
2833   use->cost_map[i].comp = comp;
2834   use->cost_map[i].inv_expr_id = inv_expr_id;
2835 }
2836
2837 /* Gets cost of (USE, CANDIDATE) pair.  */
2838
2839 static struct cost_pair *
2840 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2841                  struct iv_cand *cand)
2842 {
2843   unsigned i, s;
2844   struct cost_pair *ret;
2845
2846   if (!cand)
2847     return NULL;
2848
2849   if (data->consider_all_candidates)
2850     {
2851       ret = use->cost_map + cand->id;
2852       if (!ret->cand)
2853         return NULL;
2854
2855       return ret;
2856     }
2857
2858   /* n_map_members is a power of two, so this computes modulo.  */
2859   s = cand->id & (use->n_map_members - 1);
2860   for (i = s; i < use->n_map_members; i++)
2861     if (use->cost_map[i].cand == cand)
2862       return use->cost_map + i;
2863     else if (use->cost_map[i].cand == NULL)
2864       return NULL;
2865   for (i = 0; i < s; i++)
2866     if (use->cost_map[i].cand == cand)
2867       return use->cost_map + i;
2868     else if (use->cost_map[i].cand == NULL)
2869       return NULL;
2870
2871   return NULL;
2872 }
2873
2874 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2875 static rtx
2876 produce_memory_decl_rtl (tree obj, int *regno)
2877 {
2878   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2879   machine_mode address_mode = targetm.addr_space.address_mode (as);
2880   rtx x;
2881
2882   gcc_assert (obj);
2883   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2884     {
2885       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2886       x = gen_rtx_SYMBOL_REF (address_mode, name);
2887       SET_SYMBOL_REF_DECL (x, obj);
2888       x = gen_rtx_MEM (DECL_MODE (obj), x);
2889       set_mem_addr_space (x, as);
2890       targetm.encode_section_info (obj, x, true);
2891     }
2892   else
2893     {
2894       x = gen_raw_REG (address_mode, (*regno)++);
2895       x = gen_rtx_MEM (DECL_MODE (obj), x);
2896       set_mem_addr_space (x, as);
2897     }
2898
2899   return x;
2900 }
2901
2902 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2903    walk_tree.  DATA contains the actual fake register number.  */
2904
2905 static tree
2906 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2907 {
2908   tree obj = NULL_TREE;
2909   rtx x = NULL_RTX;
2910   int *regno = (int *) data;
2911
2912   switch (TREE_CODE (*expr_p))
2913     {
2914     case ADDR_EXPR:
2915       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2916            handled_component_p (*expr_p);
2917            expr_p = &TREE_OPERAND (*expr_p, 0))
2918         continue;
2919       obj = *expr_p;
2920       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2921         x = produce_memory_decl_rtl (obj, regno);
2922       break;
2923
2924     case SSA_NAME:
2925       *ws = 0;
2926       obj = SSA_NAME_VAR (*expr_p);
2927       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2928       if (!obj)
2929         return NULL_TREE;
2930       if (!DECL_RTL_SET_P (obj))
2931         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2932       break;
2933
2934     case VAR_DECL:
2935     case PARM_DECL:
2936     case RESULT_DECL:
2937       *ws = 0;
2938       obj = *expr_p;
2939
2940       if (DECL_RTL_SET_P (obj))
2941         break;
2942
2943       if (DECL_MODE (obj) == BLKmode)
2944         x = produce_memory_decl_rtl (obj, regno);
2945       else
2946         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2947
2948       break;
2949
2950     default:
2951       break;
2952     }
2953
2954   if (x)
2955     {
2956       decl_rtl_to_reset.safe_push (obj);
2957       SET_DECL_RTL (obj, x);
2958     }
2959
2960   return NULL_TREE;
2961 }
2962
2963 /* Determines cost of the computation of EXPR.  */
2964
2965 static unsigned
2966 computation_cost (tree expr, bool speed)
2967 {
2968   rtx_insn *seq;
2969   rtx rslt;
2970   tree type = TREE_TYPE (expr);
2971   unsigned cost;
2972   /* Avoid using hard regs in ways which may be unsupported.  */
2973   int regno = LAST_VIRTUAL_REGISTER + 1;
2974   struct cgraph_node *node = cgraph_node::get (current_function_decl);
2975   enum node_frequency real_frequency = node->frequency;
2976
2977   node->frequency = NODE_FREQUENCY_NORMAL;
2978   crtl->maybe_hot_insn_p = speed;
2979   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2980   start_sequence ();
2981   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2982   seq = get_insns ();
2983   end_sequence ();
2984   default_rtl_profile ();
2985   node->frequency = real_frequency;
2986
2987   cost = seq_cost (seq, speed);
2988   if (MEM_P (rslt))
2989     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2990                           TYPE_ADDR_SPACE (type), speed);
2991   else if (!REG_P (rslt))
2992     cost += set_src_cost (rslt, speed);
2993
2994   return cost;
2995 }
2996
2997 /* Returns variable containing the value of candidate CAND at statement AT.  */
2998
2999 static tree
3000 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3001 {
3002   if (stmt_after_increment (loop, cand, stmt))
3003     return cand->var_after;
3004   else
3005     return cand->var_before;
3006 }
3007
3008 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3009    same precision that is at least as wide as the precision of TYPE, stores
3010    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3011    type of A and B.  */
3012
3013 static tree
3014 determine_common_wider_type (tree *a, tree *b)
3015 {
3016   tree wider_type = NULL;
3017   tree suba, subb;
3018   tree atype = TREE_TYPE (*a);
3019
3020   if (CONVERT_EXPR_P (*a))
3021     {
3022       suba = TREE_OPERAND (*a, 0);
3023       wider_type = TREE_TYPE (suba);
3024       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3025         return atype;
3026     }
3027   else
3028     return atype;
3029
3030   if (CONVERT_EXPR_P (*b))
3031     {
3032       subb = TREE_OPERAND (*b, 0);
3033       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3034         return atype;
3035     }
3036   else
3037     return atype;
3038
3039   *a = suba;
3040   *b = subb;
3041   return wider_type;
3042 }
3043
3044 /* Determines the expression by that USE is expressed from induction variable
3045    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3046    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3047
3048 static bool
3049 get_computation_aff (struct loop *loop,
3050                      struct iv_use *use, struct iv_cand *cand, gimple at,
3051                      struct aff_tree *aff)
3052 {
3053   tree ubase = use->iv->base;
3054   tree ustep = use->iv->step;
3055   tree cbase = cand->iv->base;
3056   tree cstep = cand->iv->step, cstep_common;
3057   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3058   tree common_type, var;
3059   tree uutype;
3060   aff_tree cbase_aff, var_aff;
3061   widest_int rat;
3062
3063   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3064     {
3065       /* We do not have a precision to express the values of use.  */
3066       return false;
3067     }
3068
3069   var = var_at_stmt (loop, cand, at);
3070   uutype = unsigned_type_for (utype);
3071
3072   /* If the conversion is not noop, perform it.  */
3073   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3074     {
3075       cstep = fold_convert (uutype, cstep);
3076       cbase = fold_convert (uutype, cbase);
3077       var = fold_convert (uutype, var);
3078     }
3079
3080   if (!constant_multiple_of (ustep, cstep, &rat))
3081     return false;
3082
3083   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3084      type, we achieve better folding by computing their difference in this
3085      wider type, and cast the result to UUTYPE.  We do not need to worry about
3086      overflows, as all the arithmetics will in the end be performed in UUTYPE
3087      anyway.  */
3088   common_type = determine_common_wider_type (&ubase, &cbase);
3089
3090   /* use = ubase - ratio * cbase + ratio * var.  */
3091   tree_to_aff_combination (ubase, common_type, aff);
3092   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3093   tree_to_aff_combination (var, uutype, &var_aff);
3094
3095   /* We need to shift the value if we are after the increment.  */
3096   if (stmt_after_increment (loop, cand, at))
3097     {
3098       aff_tree cstep_aff;
3099
3100       if (common_type != uutype)
3101         cstep_common = fold_convert (common_type, cstep);
3102       else
3103         cstep_common = cstep;
3104
3105       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3106       aff_combination_add (&cbase_aff, &cstep_aff);
3107     }
3108
3109   aff_combination_scale (&cbase_aff, -rat);
3110   aff_combination_add (aff, &cbase_aff);
3111   if (common_type != uutype)
3112     aff_combination_convert (aff, uutype);
3113
3114   aff_combination_scale (&var_aff, rat);
3115   aff_combination_add (aff, &var_aff);
3116
3117   return true;
3118 }
3119
3120 /* Return the type of USE.  */
3121
3122 static tree
3123 get_use_type (struct iv_use *use)
3124 {
3125   tree base_type = TREE_TYPE (use->iv->base);
3126   tree type;
3127
3128   if (use->type == USE_ADDRESS)
3129     {
3130       /* The base_type may be a void pointer.  Create a pointer type based on
3131          the mem_ref instead.  */
3132       type = build_pointer_type (TREE_TYPE (*use->op_p));
3133       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3134                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3135     }
3136   else
3137     type = base_type;
3138
3139   return type;
3140 }
3141
3142 /* Determines the expression by that USE is expressed from induction variable
3143    CAND at statement AT in LOOP.  The computation is unshared.  */
3144
3145 static tree
3146 get_computation_at (struct loop *loop,
3147                     struct iv_use *use, struct iv_cand *cand, gimple at)
3148 {
3149   aff_tree aff;
3150   tree type = get_use_type (use);
3151
3152   if (!get_computation_aff (loop, use, cand, at, &aff))
3153     return NULL_TREE;
3154   unshare_aff_combination (&aff);
3155   return fold_convert (type, aff_combination_to_tree (&aff));
3156 }
3157
3158 /* Determines the expression by that USE is expressed from induction variable
3159    CAND in LOOP.  The computation is unshared.  */
3160
3161 static tree
3162 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3163 {
3164   return get_computation_at (loop, use, cand, use->stmt);
3165 }
3166
3167 /* Adjust the cost COST for being in loop setup rather than loop body.
3168    If we're optimizing for space, the loop setup overhead is constant;
3169    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3170 static unsigned
3171 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3172 {
3173   if (cost == INFTY)
3174     return cost;
3175   else if (optimize_loop_for_speed_p (data->current_loop))
3176     return cost / avg_loop_niter (data->current_loop);
3177   else
3178     return cost;
3179 }
3180
3181 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3182    validity for a memory reference accessing memory of mode MODE in
3183    address space AS.  */
3184
3185
3186 bool
3187 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3188                                  addr_space_t as)
3189 {
3190 #define MAX_RATIO 128
3191   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3192   static vec<sbitmap> valid_mult_list;
3193   sbitmap valid_mult;
3194
3195   if (data_index >= valid_mult_list.length ())
3196     valid_mult_list.safe_grow_cleared (data_index + 1);
3197
3198   valid_mult = valid_mult_list[data_index];
3199   if (!valid_mult)
3200     {
3201       machine_mode address_mode = targetm.addr_space.address_mode (as);
3202       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3203       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3204       rtx addr, scaled;
3205       HOST_WIDE_INT i;
3206
3207       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3208       bitmap_clear (valid_mult);
3209       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3210       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3211       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3212         {
3213           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3214           if (memory_address_addr_space_p (mode, addr, as)
3215               || memory_address_addr_space_p (mode, scaled, as))
3216             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3217         }
3218
3219       if (dump_file && (dump_flags & TDF_DETAILS))
3220         {
3221           fprintf (dump_file, "  allowed multipliers:");
3222           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3223             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3224               fprintf (dump_file, " %d", (int) i);
3225           fprintf (dump_file, "\n");
3226           fprintf (dump_file, "\n");
3227         }
3228
3229       valid_mult_list[data_index] = valid_mult;
3230     }
3231
3232   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3233     return false;
3234
3235   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3236 }
3237
3238 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3239    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3240    variable is omitted.  Compute the cost for a memory reference that accesses
3241    a memory location of mode MEM_MODE in address space AS.
3242
3243    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3244    size of MEM_MODE / RATIO) is available.  To make this determination, we
3245    look at the size of the increment to be made, which is given in CSTEP.
3246    CSTEP may be zero if the step is unknown.
3247    STMT_AFTER_INC is true iff the statement we're looking at is after the
3248    increment of the original biv.
3249
3250    TODO -- there must be some better way.  This all is quite crude.  */
3251
3252 enum ainc_type
3253 {
3254   AINC_PRE_INC,         /* Pre increment.  */
3255   AINC_PRE_DEC,         /* Pre decrement.  */
3256   AINC_POST_INC,        /* Post increment.  */
3257   AINC_POST_DEC,        /* Post decrement.  */
3258   AINC_NONE             /* Also the number of auto increment types.  */
3259 };
3260
3261 typedef struct address_cost_data_s
3262 {
3263   HOST_WIDE_INT min_offset, max_offset;
3264   unsigned costs[2][2][2][2];
3265   unsigned ainc_costs[AINC_NONE];
3266 } *address_cost_data;
3267
3268
3269 static comp_cost
3270 get_address_cost (bool symbol_present, bool var_present,
3271                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3272                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3273                   addr_space_t as, bool speed,
3274                   bool stmt_after_inc, bool *may_autoinc)
3275 {
3276   machine_mode address_mode = targetm.addr_space.address_mode (as);
3277   static vec<address_cost_data> address_cost_data_list;
3278   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3279   address_cost_data data;
3280   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3281   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3282   unsigned cost, acost, complexity;
3283   enum ainc_type autoinc_type;
3284   bool offset_p, ratio_p, autoinc;
3285   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3286   unsigned HOST_WIDE_INT mask;
3287   unsigned bits;
3288
3289   if (data_index >= address_cost_data_list.length ())
3290     address_cost_data_list.safe_grow_cleared (data_index + 1);
3291
3292   data = address_cost_data_list[data_index];
3293   if (!data)
3294     {
3295       HOST_WIDE_INT i;
3296       HOST_WIDE_INT rat, off = 0;
3297       int old_cse_not_expected, width;
3298       unsigned sym_p, var_p, off_p, rat_p, add_c;
3299       rtx_insn *seq;
3300       rtx addr, base;
3301       rtx reg0, reg1;
3302
3303       data = (address_cost_data) xcalloc (1, sizeof (*data));
3304
3305       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3306
3307       width = GET_MODE_BITSIZE (address_mode) - 1;
3308       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3309         width = HOST_BITS_PER_WIDE_INT - 1;
3310       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3311
3312       for (i = width; i >= 0; i--)
3313         {
3314           off = -((unsigned HOST_WIDE_INT) 1 << i);
3315           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3316           if (memory_address_addr_space_p (mem_mode, addr, as))
3317             break;
3318         }
3319       data->min_offset = (i == -1? 0 : off);
3320
3321       for (i = width; i >= 0; i--)
3322         {
3323           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3324           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3325           if (memory_address_addr_space_p (mem_mode, addr, as))
3326             break;
3327           /* For some TARGET, like ARM THUMB1, the offset should be nature
3328              aligned.  Try an aligned offset if address_mode is not QImode.  */
3329           off = (address_mode == QImode)
3330                 ? 0
3331                 : ((unsigned HOST_WIDE_INT) 1 << i)
3332                     - GET_MODE_SIZE (address_mode);
3333           if (off > 0)
3334             {
3335               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3336               if (memory_address_addr_space_p (mem_mode, addr, as))
3337                 break;
3338             }
3339         }
3340       if (i == -1)
3341         off = 0;
3342       data->max_offset = off;
3343
3344       if (dump_file && (dump_flags & TDF_DETAILS))
3345         {
3346           fprintf (dump_file, "get_address_cost:\n");
3347           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3348                    GET_MODE_NAME (mem_mode),
3349                    data->min_offset);
3350           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3351                    GET_MODE_NAME (mem_mode),
3352                    data->max_offset);
3353         }
3354
3355       rat = 1;
3356       for (i = 2; i <= MAX_RATIO; i++)
3357         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3358           {
3359             rat = i;
3360             break;
3361           }
3362
3363       /* Compute the cost of various addressing modes.  */
3364       acost = 0;
3365       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3366       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3367
3368       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3369           || USE_STORE_PRE_DECREMENT (mem_mode))
3370         {
3371           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3372           has_predec[mem_mode]
3373             = memory_address_addr_space_p (mem_mode, addr, as);
3374
3375           if (has_predec[mem_mode])
3376             data->ainc_costs[AINC_PRE_DEC]
3377               = address_cost (addr, mem_mode, as, speed);
3378         }
3379       if (USE_LOAD_POST_DECREMENT (mem_mode)
3380           || USE_STORE_POST_DECREMENT (mem_mode))
3381         {
3382           addr = gen_rtx_POST_DEC (address_mode, reg0);
3383           has_postdec[mem_mode]
3384             = memory_address_addr_space_p (mem_mode, addr, as);
3385
3386           if (has_postdec[mem_mode])
3387             data->ainc_costs[AINC_POST_DEC]
3388               = address_cost (addr, mem_mode, as, speed);
3389         }
3390       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3391           || USE_STORE_PRE_DECREMENT (mem_mode))
3392         {
3393           addr = gen_rtx_PRE_INC (address_mode, reg0);
3394           has_preinc[mem_mode]
3395             = memory_address_addr_space_p (mem_mode, addr, as);
3396
3397           if (has_preinc[mem_mode])
3398             data->ainc_costs[AINC_PRE_INC]
3399               = address_cost (addr, mem_mode, as, speed);
3400         }
3401       if (USE_LOAD_POST_INCREMENT (mem_mode)
3402           || USE_STORE_POST_INCREMENT (mem_mode))
3403         {
3404           addr = gen_rtx_POST_INC (address_mode, reg0);
3405           has_postinc[mem_mode]
3406             = memory_address_addr_space_p (mem_mode, addr, as);
3407
3408           if (has_postinc[mem_mode])
3409             data->ainc_costs[AINC_POST_INC]
3410               = address_cost (addr, mem_mode, as, speed);
3411         }
3412       for (i = 0; i < 16; i++)
3413         {
3414           sym_p = i & 1;
3415           var_p = (i >> 1) & 1;
3416           off_p = (i >> 2) & 1;
3417           rat_p = (i >> 3) & 1;
3418
3419           addr = reg0;
3420           if (rat_p)
3421             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3422                                    gen_int_mode (rat, address_mode));
3423
3424           if (var_p)
3425             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3426
3427           if (sym_p)
3428             {
3429               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3430               /* ??? We can run into trouble with some backends by presenting
3431                  it with symbols which haven't been properly passed through
3432                  targetm.encode_section_info.  By setting the local bit, we
3433                  enhance the probability of things working.  */
3434               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3435
3436               if (off_p)
3437                 base = gen_rtx_fmt_e (CONST, address_mode,
3438                                       gen_rtx_fmt_ee
3439                                         (PLUS, address_mode, base,
3440                                          gen_int_mode (off, address_mode)));
3441             }
3442           else if (off_p)
3443             base = gen_int_mode (off, address_mode);
3444           else
3445             base = NULL_RTX;
3446
3447           if (base)
3448             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3449
3450           start_sequence ();
3451           /* To avoid splitting addressing modes, pretend that no cse will
3452              follow.  */
3453           old_cse_not_expected = cse_not_expected;
3454           cse_not_expected = true;
3455           addr = memory_address_addr_space (mem_mode, addr, as);
3456           cse_not_expected = old_cse_not_expected;
3457           seq = get_insns ();
3458           end_sequence ();
3459
3460           acost = seq_cost (seq, speed);
3461           acost += address_cost (addr, mem_mode, as, speed);
3462
3463           if (!acost)
3464             acost = 1;
3465           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3466         }
3467
3468       /* On some targets, it is quite expensive to load symbol to a register,
3469          which makes addresses that contain symbols look much more expensive.
3470          However, the symbol will have to be loaded in any case before the
3471          loop (and quite likely we have it in register already), so it does not
3472          make much sense to penalize them too heavily.  So make some final
3473          tweaks for the SYMBOL_PRESENT modes:
3474
3475          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3476          var is cheaper, use this mode with small penalty.
3477          If VAR_PRESENT is true, try whether the mode with
3478          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3479          if this is the case, use it.  */
3480       add_c = add_cost (speed, address_mode);
3481       for (i = 0; i < 8; i++)
3482         {
3483           var_p = i & 1;
3484           off_p = (i >> 1) & 1;
3485           rat_p = (i >> 2) & 1;
3486
3487           acost = data->costs[0][1][off_p][rat_p] + 1;
3488           if (var_p)
3489             acost += add_c;
3490
3491           if (acost < data->costs[1][var_p][off_p][rat_p])
3492             data->costs[1][var_p][off_p][rat_p] = acost;
3493         }
3494
3495       if (dump_file && (dump_flags & TDF_DETAILS))
3496         {
3497           fprintf (dump_file, "Address costs:\n");
3498
3499           for (i = 0; i < 16; i++)
3500             {
3501               sym_p = i & 1;
3502               var_p = (i >> 1) & 1;
3503               off_p = (i >> 2) & 1;
3504               rat_p = (i >> 3) & 1;
3505
3506               fprintf (dump_file, "  ");
3507               if (sym_p)
3508                 fprintf (dump_file, "sym + ");
3509               if (var_p)
3510                 fprintf (dump_file, "var + ");
3511               if (off_p)
3512                 fprintf (dump_file, "cst + ");
3513               if (rat_p)
3514                 fprintf (dump_file, "rat * ");
3515
3516               acost = data->costs[sym_p][var_p][off_p][rat_p];
3517               fprintf (dump_file, "index costs %d\n", acost);
3518             }
3519           if (has_predec[mem_mode] || has_postdec[mem_mode]
3520               || has_preinc[mem_mode] || has_postinc[mem_mode])
3521             fprintf (dump_file, "  May include autoinc/dec\n");
3522           fprintf (dump_file, "\n");
3523         }
3524
3525       address_cost_data_list[data_index] = data;
3526     }
3527
3528   bits = GET_MODE_BITSIZE (address_mode);
3529   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3530   offset &= mask;
3531   if ((offset >> (bits - 1) & 1))
3532     offset |= ~mask;
3533   s_offset = offset;
3534
3535   autoinc = false;
3536   autoinc_type = AINC_NONE;
3537   msize = GET_MODE_SIZE (mem_mode);
3538   autoinc_offset = offset;
3539   if (stmt_after_inc)
3540     autoinc_offset += ratio * cstep;
3541   if (symbol_present || var_present || ratio != 1)
3542     autoinc = false;
3543   else
3544     {
3545       if (has_postinc[mem_mode] && autoinc_offset == 0
3546           && msize == cstep)
3547         autoinc_type = AINC_POST_INC;
3548       else if (has_postdec[mem_mode] && autoinc_offset == 0
3549                && msize == -cstep)
3550         autoinc_type = AINC_POST_DEC;
3551       else if (has_preinc[mem_mode] && autoinc_offset == msize
3552                && msize == cstep)
3553         autoinc_type = AINC_PRE_INC;
3554       else if (has_predec[mem_mode] && autoinc_offset == -msize
3555                && msize == -cstep)
3556         autoinc_type = AINC_PRE_DEC;
3557
3558       if (autoinc_type != AINC_NONE)
3559         autoinc = true;
3560     }
3561
3562   cost = 0;
3563   offset_p = (s_offset != 0
3564               && data->min_offset <= s_offset
3565               && s_offset <= data->max_offset);
3566   ratio_p = (ratio != 1
3567              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3568
3569   if (ratio != 1 && !ratio_p)
3570     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3571
3572   if (s_offset && !offset_p && !symbol_present)
3573     cost += add_cost (speed, address_mode);
3574
3575   if (may_autoinc)
3576     *may_autoinc = autoinc;
3577   if (autoinc)
3578     acost = data->ainc_costs[autoinc_type];
3579   else
3580     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3581   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3582   return new_cost (cost + acost, complexity);
3583 }
3584
3585  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3586     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3587     calculating the operands of EXPR.  Returns true if successful, and returns
3588     the cost in COST.  */
3589
3590 static bool
3591 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3592                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3593 {
3594   comp_cost res;
3595   tree op1 = TREE_OPERAND (expr, 1);
3596   tree cst = TREE_OPERAND (mult, 1);
3597   tree multop = TREE_OPERAND (mult, 0);
3598   int m = exact_log2 (int_cst_value (cst));
3599   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3600   int sa_cost;
3601   bool equal_p = false;
3602
3603   if (!(m >= 0 && m < maxm))
3604     return false;
3605
3606   if (operand_equal_p (op1, mult, 0))
3607     equal_p = true;
3608
3609   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3610              ? shiftadd_cost (speed, mode, m)
3611              : (equal_p
3612                 ? shiftsub1_cost (speed, mode, m)
3613                 : shiftsub0_cost (speed, mode, m)));
3614   res = new_cost (sa_cost, 0);
3615   res = add_costs (res, equal_p ? cost0 : cost1);
3616
3617   STRIP_NOPS (multop);
3618   if (!is_gimple_val (multop))
3619     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3620
3621   *cost = res;
3622   return true;
3623 }
3624
3625 /* Estimates cost of forcing expression EXPR into a variable.  */
3626
3627 static comp_cost
3628 force_expr_to_var_cost (tree expr, bool speed)
3629 {
3630   static bool costs_initialized = false;
3631   static unsigned integer_cost [2];
3632   static unsigned symbol_cost [2];
3633   static unsigned address_cost [2];
3634   tree op0, op1;
3635   comp_cost cost0, cost1, cost;
3636   machine_mode mode;
3637
3638   if (!costs_initialized)
3639     {
3640       tree type = build_pointer_type (integer_type_node);
3641       tree var, addr;
3642       rtx x;
3643       int i;
3644
3645       var = create_tmp_var_raw (integer_type_node, "test_var");
3646       TREE_STATIC (var) = 1;
3647       x = produce_memory_decl_rtl (var, NULL);
3648       SET_DECL_RTL (var, x);
3649
3650       addr = build1 (ADDR_EXPR, type, var);
3651
3652
3653       for (i = 0; i < 2; i++)
3654         {
3655           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3656                                                              2000), i);
3657
3658           symbol_cost[i] = computation_cost (addr, i) + 1;
3659
3660           address_cost[i]
3661             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3662           if (dump_file && (dump_flags & TDF_DETAILS))
3663             {
3664               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3665               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3666               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3667               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3668               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3669               fprintf (dump_file, "\n");
3670             }
3671         }
3672
3673       costs_initialized = true;
3674     }
3675
3676   STRIP_NOPS (expr);
3677
3678   if (SSA_VAR_P (expr))
3679     return no_cost;
3680
3681   if (is_gimple_min_invariant (expr))
3682     {
3683       if (TREE_CODE (expr) == INTEGER_CST)
3684         return new_cost (integer_cost [speed], 0);
3685
3686       if (TREE_CODE (expr) == ADDR_EXPR)
3687         {
3688           tree obj = TREE_OPERAND (expr, 0);
3689
3690           if (TREE_CODE (obj) == VAR_DECL
3691               || TREE_CODE (obj) == PARM_DECL
3692               || TREE_CODE (obj) == RESULT_DECL)
3693             return new_cost (symbol_cost [speed], 0);
3694         }
3695
3696       return new_cost (address_cost [speed], 0);
3697     }
3698
3699   switch (TREE_CODE (expr))
3700     {
3701     case POINTER_PLUS_EXPR:
3702     case PLUS_EXPR:
3703     case MINUS_EXPR:
3704     case MULT_EXPR:
3705       op0 = TREE_OPERAND (expr, 0);
3706       op1 = TREE_OPERAND (expr, 1);
3707       STRIP_NOPS (op0);
3708       STRIP_NOPS (op1);
3709       break;
3710
3711     CASE_CONVERT:
3712     case NEGATE_EXPR:
3713       op0 = TREE_OPERAND (expr, 0);
3714       STRIP_NOPS (op0);
3715       op1 = NULL_TREE;
3716       break;
3717
3718     default:
3719       /* Just an arbitrary value, FIXME.  */
3720       return new_cost (target_spill_cost[speed], 0);
3721     }
3722
3723   if (op0 == NULL_TREE
3724       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3725     cost0 = no_cost;
3726   else
3727     cost0 = force_expr_to_var_cost (op0, speed);
3728
3729   if (op1 == NULL_TREE
3730       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3731     cost1 = no_cost;
3732   else
3733     cost1 = force_expr_to_var_cost (op1, speed);
3734
3735   mode = TYPE_MODE (TREE_TYPE (expr));
3736   switch (TREE_CODE (expr))
3737     {
3738     case POINTER_PLUS_EXPR:
3739     case PLUS_EXPR:
3740     case MINUS_EXPR:
3741     case NEGATE_EXPR:
3742       cost = new_cost (add_cost (speed, mode), 0);
3743       if (TREE_CODE (expr) != NEGATE_EXPR)
3744         {
3745           tree mult = NULL_TREE;
3746           comp_cost sa_cost;
3747           if (TREE_CODE (op1) == MULT_EXPR)
3748             mult = op1;
3749           else if (TREE_CODE (op0) == MULT_EXPR)
3750             mult = op0;
3751
3752           if (mult != NULL_TREE
3753               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3754               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3755                                     speed, &sa_cost))
3756             return sa_cost;
3757         }
3758       break;
3759
3760     CASE_CONVERT:
3761       {
3762         tree inner_mode, outer_mode;
3763         outer_mode = TREE_TYPE (expr);
3764         inner_mode = TREE_TYPE (op0);
3765         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3766                                        TYPE_MODE (inner_mode), speed), 0);
3767       }
3768       break;
3769
3770     case MULT_EXPR:
3771       if (cst_and_fits_in_hwi (op0))
3772         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3773                                              mode, speed), 0);
3774       else if (cst_and_fits_in_hwi (op1))
3775         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3776                                              mode, speed), 0);
3777       else
3778         return new_cost (target_spill_cost [speed], 0);
3779       break;
3780
3781     default:
3782       gcc_unreachable ();
3783     }
3784
3785   cost = add_costs (cost, cost0);
3786   cost = add_costs (cost, cost1);
3787
3788   /* Bound the cost by target_spill_cost.  The parts of complicated
3789      computations often are either loop invariant or at least can
3790      be shared between several iv uses, so letting this grow without
3791      limits would not give reasonable results.  */
3792   if (cost.cost > (int) target_spill_cost [speed])
3793     cost.cost = target_spill_cost [speed];
3794
3795   return cost;
3796 }
3797
3798 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3799    invariants the computation depends on.  */
3800
3801 static comp_cost
3802 force_var_cost (struct ivopts_data *data,
3803                 tree expr, bitmap *depends_on)
3804 {
3805   if (depends_on)
3806     {
3807       fd_ivopts_data = data;
3808       walk_tree (&expr, find_depends, depends_on, NULL);
3809     }
3810
3811   return force_expr_to_var_cost (expr, data->speed);
3812 }
3813
3814 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3815    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3816    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3817    invariants the computation depends on.  */
3818
3819 static comp_cost
3820 split_address_cost (struct ivopts_data *data,
3821                     tree addr, bool *symbol_present, bool *var_present,
3822                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3823 {
3824   tree core;
3825   HOST_WIDE_INT bitsize;
3826   HOST_WIDE_INT bitpos;
3827   tree toffset;
3828   machine_mode mode;
3829   int unsignedp, volatilep;
3830
3831   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3832                               &unsignedp, &volatilep, false);
3833
3834   if (toffset != 0
3835       || bitpos % BITS_PER_UNIT != 0
3836       || TREE_CODE (core) != VAR_DECL)
3837     {
3838       *symbol_present = false;
3839       *var_present = true;
3840       fd_ivopts_data = data;
3841       walk_tree (&addr, find_depends, depends_on, NULL);
3842       return new_cost (target_spill_cost[data->speed], 0);
3843     }
3844
3845   *offset += bitpos / BITS_PER_UNIT;
3846   if (TREE_STATIC (core)
3847       || DECL_EXTERNAL (core))
3848     {
3849       *symbol_present = true;
3850       *var_present = false;
3851       return no_cost;
3852     }
3853
3854   *symbol_present = false;
3855   *var_present = true;
3856   return no_cost;
3857 }
3858
3859 /* Estimates cost of expressing difference of addresses E1 - E2 as
3860    var + symbol + offset.  The value of offset is added to OFFSET,
3861    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3862    part is missing.  DEPENDS_ON is a set of the invariants the computation
3863    depends on.  */
3864
3865 static comp_cost
3866 ptr_difference_cost (struct ivopts_data *data,
3867                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3868                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3869 {
3870   HOST_WIDE_INT diff = 0;
3871   aff_tree aff_e1, aff_e2;
3872   tree type;
3873
3874   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3875
3876   if (ptr_difference_const (e1, e2, &diff))
3877     {
3878       *offset += diff;
3879       *symbol_present = false;
3880       *var_present = false;
3881       return no_cost;
3882     }
3883
3884   if (integer_zerop (e2))
3885     return split_address_cost (data, TREE_OPERAND (e1, 0),
3886                                symbol_present, var_present, offset, depends_on);
3887
3888   *symbol_present = false;
3889   *var_present = true;
3890
3891   type = signed_type_for (TREE_TYPE (e1));
3892   tree_to_aff_combination (e1, type, &aff_e1);
3893   tree_to_aff_combination (e2, type, &aff_e2);
3894   aff_combination_scale (&aff_e2, -1);
3895   aff_combination_add (&aff_e1, &aff_e2);
3896
3897   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3898 }
3899
3900 /* Estimates cost of expressing difference E1 - E2 as
3901    var + symbol + offset.  The value of offset is added to OFFSET,
3902    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3903    part is missing.  DEPENDS_ON is a set of the invariants the computation
3904    depends on.  */
3905
3906 static comp_cost
3907 difference_cost (struct ivopts_data *data,
3908                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3909                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3910 {
3911   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3912   unsigned HOST_WIDE_INT off1, off2;
3913   aff_tree aff_e1, aff_e2;
3914   tree type;
3915
3916   e1 = strip_offset (e1, &off1);
3917   e2 = strip_offset (e2, &off2);
3918   *offset += off1 - off2;
3919
3920   STRIP_NOPS (e1);
3921   STRIP_NOPS (e2);
3922
3923   if (TREE_CODE (e1) == ADDR_EXPR)
3924     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3925                                 offset, depends_on);
3926   *symbol_present = false;
3927
3928   if (operand_equal_p (e1, e2, 0))
3929     {
3930       *var_present = false;
3931       return no_cost;
3932     }
3933
3934   *var_present = true;
3935
3936   if (integer_zerop (e2))
3937     return force_var_cost (data, e1, depends_on);
3938
3939   if (integer_zerop (e1))
3940     {
3941       comp_cost cost = force_var_cost (data, e2, depends_on);
3942       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3943       return cost;
3944     }
3945
3946   type = signed_type_for (TREE_TYPE (e1));
3947   tree_to_aff_combination (e1, type, &aff_e1);
3948   tree_to_aff_combination (e2, type, &aff_e2);
3949   aff_combination_scale (&aff_e2, -1);
3950   aff_combination_add (&aff_e1, &aff_e2);
3951
3952   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3953 }
3954
3955 /* Returns true if AFF1 and AFF2 are identical.  */
3956
3957 static bool
3958 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3959 {
3960   unsigned i;
3961
3962   if (aff1->n != aff2->n)
3963     return false;
3964
3965   for (i = 0; i < aff1->n; i++)
3966     {
3967       if (aff1->elts[i].coef != aff2->elts[i].coef)
3968         return false;
3969
3970       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3971         return false;
3972     }
3973   return true;
3974 }
3975
3976 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3977
3978 static int
3979 get_expr_id (struct ivopts_data *data, tree expr)
3980 {
3981   struct iv_inv_expr_ent ent;
3982   struct iv_inv_expr_ent **slot;
3983
3984   ent.expr = expr;
3985   ent.hash = iterative_hash_expr (expr, 0);
3986   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3987   if (*slot)
3988     return (*slot)->id;
3989
3990   *slot = XNEW (struct iv_inv_expr_ent);
3991   (*slot)->expr = expr;
3992   (*slot)->hash = ent.hash;
3993   (*slot)->id = data->inv_expr_id++;
3994   return (*slot)->id;
3995 }
3996
3997 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3998    requires a new compiler generated temporary.  Returns -1 otherwise.
3999    ADDRESS_P is a flag indicating if the expression is for address
4000    computation.  */
4001
4002 static int
4003 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4004                             tree cbase, HOST_WIDE_INT ratio,
4005                             bool address_p)
4006 {
4007   aff_tree ubase_aff, cbase_aff;
4008   tree expr, ub, cb;
4009
4010   STRIP_NOPS (ubase);
4011   STRIP_NOPS (cbase);
4012   ub = ubase;
4013   cb = cbase;
4014
4015   if ((TREE_CODE (ubase) == INTEGER_CST)
4016       && (TREE_CODE (cbase) == INTEGER_CST))
4017     return -1;
4018
4019   /* Strips the constant part. */
4020   if (TREE_CODE (ubase) == PLUS_EXPR
4021       || TREE_CODE (ubase) == MINUS_EXPR
4022       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4023     {
4024       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4025         ubase = TREE_OPERAND (ubase, 0);
4026     }
4027
4028   /* Strips the constant part. */
4029   if (TREE_CODE (cbase) == PLUS_EXPR
4030       || TREE_CODE (cbase) == MINUS_EXPR
4031       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4032     {
4033       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4034         cbase = TREE_OPERAND (cbase, 0);
4035     }
4036
4037   if (address_p)
4038     {
4039       if (((TREE_CODE (ubase) == SSA_NAME)
4040            || (TREE_CODE (ubase) == ADDR_EXPR
4041                && is_gimple_min_invariant (ubase)))
4042           && (TREE_CODE (cbase) == INTEGER_CST))
4043         return -1;
4044
4045       if (((TREE_CODE (cbase) == SSA_NAME)
4046            || (TREE_CODE (cbase) == ADDR_EXPR
4047                && is_gimple_min_invariant (cbase)))
4048           && (TREE_CODE (ubase) == INTEGER_CST))
4049         return -1;
4050     }
4051
4052   if (ratio == 1)
4053     {
4054       if (operand_equal_p (ubase, cbase, 0))
4055         return -1;
4056
4057       if (TREE_CODE (ubase) == ADDR_EXPR
4058           && TREE_CODE (cbase) == ADDR_EXPR)
4059         {
4060           tree usym, csym;
4061
4062           usym = TREE_OPERAND (ubase, 0);
4063           csym = TREE_OPERAND (cbase, 0);
4064           if (TREE_CODE (usym) == ARRAY_REF)
4065             {
4066               tree ind = TREE_OPERAND (usym, 1);
4067               if (TREE_CODE (ind) == INTEGER_CST
4068                   && tree_fits_shwi_p (ind)
4069                   && tree_to_shwi (ind) == 0)
4070                 usym = TREE_OPERAND (usym, 0);
4071             }
4072           if (TREE_CODE (csym) == ARRAY_REF)
4073             {
4074               tree ind = TREE_OPERAND (csym, 1);
4075               if (TREE_CODE (ind) == INTEGER_CST
4076                   && tree_fits_shwi_p (ind)
4077                   && tree_to_shwi (ind) == 0)
4078                 csym = TREE_OPERAND (csym, 0);
4079             }
4080           if (operand_equal_p (usym, csym, 0))
4081             return -1;
4082         }
4083       /* Now do more complex comparison  */
4084       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4085       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4086       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4087         return -1;
4088     }
4089
4090   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4091   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4092
4093   aff_combination_scale (&cbase_aff, -1 * ratio);
4094   aff_combination_add (&ubase_aff, &cbase_aff);
4095   expr = aff_combination_to_tree (&ubase_aff);
4096   return get_expr_id (data, expr);
4097 }
4098
4099
4100
4101 /* Determines the cost of the computation by that USE is expressed
4102    from induction variable CAND.  If ADDRESS_P is true, we just need
4103    to create an address from it, otherwise we want to get it into
4104    register.  A set of invariants we depend on is stored in
4105    DEPENDS_ON.  AT is the statement at that the value is computed.
4106    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4107    addressing is likely.  */
4108
4109 static comp_cost
4110 get_computation_cost_at (struct ivopts_data *data,
4111                          struct iv_use *use, struct iv_cand *cand,
4112                          bool address_p, bitmap *depends_on, gimple at,
4113                          bool *can_autoinc,
4114                          int *inv_expr_id)
4115 {
4116   tree ubase = use->iv->base, ustep = use->iv->step;
4117   tree cbase, cstep;
4118   tree utype = TREE_TYPE (ubase), ctype;
4119   unsigned HOST_WIDE_INT cstepi, offset = 0;
4120   HOST_WIDE_INT ratio, aratio;
4121   bool var_present, symbol_present, stmt_is_after_inc;
4122   comp_cost cost;
4123   widest_int rat;
4124   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4125   machine_mode mem_mode = (address_p
4126                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4127                                 : VOIDmode);
4128
4129   *depends_on = NULL;
4130
4131   /* Only consider real candidates.  */
4132   if (!cand->iv)
4133     return infinite_cost;
4134
4135   cbase = cand->iv->base;
4136   cstep = cand->iv->step;
4137   ctype = TREE_TYPE (cbase);
4138
4139   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4140     {
4141       /* We do not have a precision to express the values of use.  */
4142       return infinite_cost;
4143     }
4144
4145   if (address_p
4146       || (use->iv->base_object
4147           && cand->iv->base_object
4148           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4149           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4150     {
4151       /* Do not try to express address of an object with computation based
4152          on address of a different object.  This may cause problems in rtl
4153          level alias analysis (that does not expect this to be happening,
4154          as this is illegal in C), and would be unlikely to be useful
4155          anyway.  */
4156       if (use->iv->base_object
4157           && cand->iv->base_object
4158           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4159         return infinite_cost;
4160     }
4161
4162   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4163     {
4164       /* TODO -- add direct handling of this case.  */
4165       goto fallback;
4166     }
4167
4168   /* CSTEPI is removed from the offset in case statement is after the
4169      increment.  If the step is not constant, we use zero instead.
4170      This is a bit imprecise (there is the extra addition), but
4171      redundancy elimination is likely to transform the code so that
4172      it uses value of the variable before increment anyway,
4173      so it is not that much unrealistic.  */
4174   if (cst_and_fits_in_hwi (cstep))
4175     cstepi = int_cst_value (cstep);
4176   else
4177     cstepi = 0;
4178
4179   if (!constant_multiple_of (ustep, cstep, &rat))
4180     return infinite_cost;
4181
4182   if (wi::fits_shwi_p (rat))
4183     ratio = rat.to_shwi ();
4184   else
4185     return infinite_cost;
4186
4187   STRIP_NOPS (cbase);
4188   ctype = TREE_TYPE (cbase);
4189
4190   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4191
4192   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4193      or ratio == 1, it is better to handle this like
4194
4195      ubase - ratio * cbase + ratio * var
4196
4197      (also holds in the case ratio == -1, TODO.  */
4198
4199   if (cst_and_fits_in_hwi (cbase))
4200     {
4201       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4202       cost = difference_cost (data,
4203                               ubase, build_int_cst (utype, 0),
4204                               &symbol_present, &var_present, &offset,
4205                               depends_on);
4206       cost.cost /= avg_loop_niter (data->current_loop);
4207     }
4208   else if (ratio == 1)
4209     {
4210       tree real_cbase = cbase;
4211
4212       /* Check to see if any adjustment is needed.  */
4213       if (cstepi == 0 && stmt_is_after_inc)
4214         {
4215           aff_tree real_cbase_aff;
4216           aff_tree cstep_aff;
4217
4218           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4219                                    &real_cbase_aff);
4220           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4221
4222           aff_combination_add (&real_cbase_aff, &cstep_aff);
4223           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4224         }
4225
4226       cost = difference_cost (data,
4227                               ubase, real_cbase,
4228                               &symbol_present, &var_present, &offset,
4229                               depends_on);
4230       cost.cost /= avg_loop_niter (data->current_loop);
4231     }
4232   else if (address_p
4233            && !POINTER_TYPE_P (ctype)
4234            && multiplier_allowed_in_address_p
4235                 (ratio, mem_mode,
4236                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4237     {
4238       cbase
4239         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4240       cost = difference_cost (data,
4241                               ubase, cbase,
4242                               &symbol_present, &var_present, &offset,
4243                               depends_on);
4244       cost.cost /= avg_loop_niter (data->current_loop);
4245     }
4246   else
4247     {
4248       cost = force_var_cost (data, cbase, depends_on);
4249       cost = add_costs (cost,
4250                         difference_cost (data,
4251                                          ubase, build_int_cst (utype, 0),
4252                                          &symbol_present, &var_present,
4253                                          &offset, depends_on));
4254       cost.cost /= avg_loop_niter (data->current_loop);
4255       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4256     }
4257
4258   if (inv_expr_id)
4259     {
4260       *inv_expr_id =
4261           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4262       /* Clear depends on.  */
4263       if (*inv_expr_id != -1 && depends_on && *depends_on)
4264         bitmap_clear (*depends_on);
4265     }
4266
4267   /* If we are after the increment, the value of the candidate is higher by
4268      one iteration.  */
4269   if (stmt_is_after_inc)
4270     offset -= ratio * cstepi;
4271
4272   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4273      (symbol/var1/const parts may be omitted).  If we are looking for an
4274      address, find the cost of addressing this.  */
4275   if (address_p)
4276     return add_costs (cost,
4277                       get_address_cost (symbol_present, var_present,
4278                                         offset, ratio, cstepi,
4279                                         mem_mode,
4280                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4281                                         speed, stmt_is_after_inc,
4282                                         can_autoinc));
4283
4284   /* Otherwise estimate the costs for computing the expression.  */
4285   if (!symbol_present && !var_present && !offset)
4286     {
4287       if (ratio != 1)
4288         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4289       return cost;
4290     }
4291
4292   /* Symbol + offset should be compile-time computable so consider that they
4293       are added once to the variable, if present.  */
4294   if (var_present && (symbol_present || offset))
4295     cost.cost += adjust_setup_cost (data,
4296                                     add_cost (speed, TYPE_MODE (ctype)));
4297
4298   /* Having offset does not affect runtime cost in case it is added to
4299      symbol, but it increases complexity.  */
4300   if (offset)
4301     cost.complexity++;
4302
4303   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4304
4305   aratio = ratio > 0 ? ratio : -ratio;
4306   if (aratio != 1)
4307     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4308   return cost;
4309
4310 fallback:
4311   if (can_autoinc)
4312     *can_autoinc = false;
4313
4314   {
4315     /* Just get the expression, expand it and measure the cost.  */
4316     tree comp = get_computation_at (data->current_loop, use, cand, at);
4317
4318     if (!comp)
4319       return infinite_cost;
4320
4321     if (address_p)
4322       comp = build_simple_mem_ref (comp);
4323
4324     return new_cost (computation_cost (comp, speed), 0);
4325   }
4326 }
4327
4328 /* Determines the cost of the computation by that USE is expressed
4329    from induction variable CAND.  If ADDRESS_P is true, we just need
4330    to create an address from it, otherwise we want to get it into
4331    register.  A set of invariants we depend on is stored in
4332    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4333    autoinc addressing is likely.  */
4334
4335 static comp_cost
4336 get_computation_cost (struct ivopts_data *data,
4337                       struct iv_use *use, struct iv_cand *cand,
4338                       bool address_p, bitmap *depends_on,
4339                       bool *can_autoinc, int *inv_expr_id)
4340 {
4341   return get_computation_cost_at (data,
4342                                   use, cand, address_p, depends_on, use->stmt,
4343                                   can_autoinc, inv_expr_id);
4344 }
4345
4346 /* Determines cost of basing replacement of USE on CAND in a generic
4347    expression.  */
4348
4349 static bool
4350 determine_use_iv_cost_generic (struct ivopts_data *data,
4351                                struct iv_use *use, struct iv_cand *cand)
4352 {
4353   bitmap depends_on;
4354   comp_cost cost;
4355   int inv_expr_id = -1;
4356
4357   /* The simple case first -- if we need to express value of the preserved
4358      original biv, the cost is 0.  This also prevents us from counting the
4359      cost of increment twice -- once at this use and once in the cost of
4360      the candidate.  */
4361   if (cand->pos == IP_ORIGINAL
4362       && cand->incremented_at == use->stmt)
4363     {
4364       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4365                        ERROR_MARK, -1);
4366       return true;
4367     }
4368
4369   cost = get_computation_cost (data, use, cand, false, &depends_on,
4370                                NULL, &inv_expr_id);
4371
4372   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4373                    inv_expr_id);
4374
4375   return !infinite_cost_p (cost);
4376 }
4377
4378 /* Determines cost of basing replacement of USE on CAND in an address.  */
4379
4380 static bool
4381 determine_use_iv_cost_address (struct ivopts_data *data,
4382                                struct iv_use *use, struct iv_cand *cand)
4383 {
4384   bitmap depends_on;
4385   bool can_autoinc;
4386   int inv_expr_id = -1;
4387   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4388                                          &can_autoinc, &inv_expr_id);
4389
4390   if (cand->ainc_use == use)
4391     {
4392       if (can_autoinc)
4393         cost.cost -= cand->cost_step;
4394       /* If we generated the candidate solely for exploiting autoincrement
4395          opportunities, and it turns out it can't be used, set the cost to
4396          infinity to make sure we ignore it.  */
4397       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4398         cost = infinite_cost;
4399     }
4400   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4401                    inv_expr_id);
4402
4403   return !infinite_cost_p (cost);
4404 }
4405
4406 /* Computes value of candidate CAND at position AT in iteration NITER, and
4407    stores it to VAL.  */
4408
4409 static void
4410 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4411                aff_tree *val)
4412 {
4413   aff_tree step, delta, nit;
4414   struct iv *iv = cand->iv;
4415   tree type = TREE_TYPE (iv->base);
4416   tree steptype = type;
4417   if (POINTER_TYPE_P (type))
4418     steptype = sizetype;
4419   steptype = unsigned_type_for (type);
4420
4421   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4422   aff_combination_convert (&step, steptype);
4423   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4424   aff_combination_convert (&nit, steptype);
4425   aff_combination_mult (&nit, &step, &delta);
4426   if (stmt_after_increment (loop, cand, at))
4427     aff_combination_add (&delta, &step);
4428
4429   tree_to_aff_combination (iv->base, type, val);
4430   if (!POINTER_TYPE_P (type))
4431     aff_combination_convert (val, steptype);
4432   aff_combination_add (val, &delta);
4433 }
4434
4435 /* Returns period of induction variable iv.  */
4436
4437 static tree
4438 iv_period (struct iv *iv)
4439 {
4440   tree step = iv->step, period, type;
4441   tree pow2div;
4442
4443   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4444
4445   type = unsigned_type_for (TREE_TYPE (step));
4446   /* Period of the iv is lcm (step, type_range)/step -1,
4447      i.e., N*type_range/step - 1. Since type range is power
4448      of two, N == (step >> num_of_ending_zeros_binary (step),
4449      so the final result is
4450
4451        (type_range >> num_of_ending_zeros_binary (step)) - 1
4452
4453   */
4454   pow2div = num_ending_zeros (step);
4455
4456   period = build_low_bits_mask (type,
4457                                 (TYPE_PRECISION (type)
4458                                  - tree_to_uhwi (pow2div)));
4459
4460   return period;
4461 }
4462
4463 /* Returns the comparison operator used when eliminating the iv USE.  */
4464
4465 static enum tree_code
4466 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4467 {
4468   struct loop *loop = data->current_loop;
4469   basic_block ex_bb;
4470   edge exit;
4471
4472   ex_bb = gimple_bb (use->stmt);
4473   exit = EDGE_SUCC (ex_bb, 0);
4474   if (flow_bb_inside_loop_p (loop, exit->dest))
4475     exit = EDGE_SUCC (ex_bb, 1);
4476
4477   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4478 }
4479
4480 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4481    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4482    calculation is performed in non-wrapping type.
4483
4484    TODO: More generally, we could test for the situation that
4485          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4486          This would require knowing the sign of OFFSET.  */
4487
4488 static bool
4489 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4490 {
4491   enum tree_code code;
4492   tree e1, e2;
4493   aff_tree aff_e1, aff_e2, aff_offset;
4494
4495   if (!nowrap_type_p (TREE_TYPE (base)))
4496     return false;
4497
4498   base = expand_simple_operations (base);
4499
4500   if (TREE_CODE (base) == SSA_NAME)
4501     {
4502       gimple stmt = SSA_NAME_DEF_STMT (base);
4503
4504       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4505         return false;
4506
4507       code = gimple_assign_rhs_code (stmt);
4508       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4509         return false;
4510
4511       e1 = gimple_assign_rhs1 (stmt);
4512       e2 = gimple_assign_rhs2 (stmt);
4513     }
4514   else
4515     {
4516       code = TREE_CODE (base);
4517       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4518         return false;
4519       e1 = TREE_OPERAND (base, 0);
4520       e2 = TREE_OPERAND (base, 1);
4521     }
4522
4523   /* Use affine expansion as deeper inspection to prove the equality.  */
4524   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4525                                   &aff_e2, &data->name_expansion_cache);
4526   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4527                                   &aff_offset, &data->name_expansion_cache);
4528   aff_combination_scale (&aff_offset, -1);
4529   switch (code)
4530     {
4531     case PLUS_EXPR:
4532       aff_combination_add (&aff_e2, &aff_offset);
4533       if (aff_combination_zero_p (&aff_e2))
4534         return true;
4535
4536       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4537                                       &aff_e1, &data->name_expansion_cache);
4538       aff_combination_add (&aff_e1, &aff_offset);
4539       return aff_combination_zero_p (&aff_e1);
4540
4541     case POINTER_PLUS_EXPR:
4542       aff_combination_add (&aff_e2, &aff_offset);
4543       return aff_combination_zero_p (&aff_e2);
4544
4545     default:
4546       return false;
4547     }
4548 }
4549
4550 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4551    comparison with CAND.  NITER describes the number of iterations of
4552    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4553
4554    We aim to handle the following situation:
4555
4556    sometype *base, *p;
4557    int a, b, i;
4558
4559    i = a;
4560    p = p_0 = base + a;
4561
4562    do
4563      {
4564        bla (*p);
4565        p++;
4566        i++;
4567      }
4568    while (i < b);
4569
4570    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4571    We aim to optimize this to
4572
4573    p = p_0 = base + a;
4574    do
4575      {
4576        bla (*p);
4577        p++;
4578      }
4579    while (p < p_0 - a + b);
4580
4581    This preserves the correctness, since the pointer arithmetics does not
4582    overflow.  More precisely:
4583
4584    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4585       overflow in computing it or the values of p.
4586    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4587       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4588
4589 static bool
4590 iv_elimination_compare_lt (struct ivopts_data *data,
4591                            struct iv_cand *cand, enum tree_code *comp_p,
4592                            struct tree_niter_desc *niter)
4593 {
4594   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4595   struct aff_tree nit, tmpa, tmpb;
4596   enum tree_code comp;
4597   HOST_WIDE_INT step;
4598
4599   /* We need to know that the candidate induction variable does not overflow.
4600      While more complex analysis may be used to prove this, for now just
4601      check that the variable appears in the original program and that it
4602      is computed in a type that guarantees no overflows.  */
4603   cand_type = TREE_TYPE (cand->iv->base);
4604   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4605     return false;
4606
4607   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4608      the calculation of the BOUND could overflow, making the comparison
4609      invalid.  */
4610   if (!data->loop_single_exit_p)
4611     return false;
4612
4613   /* We need to be able to decide whether candidate is increasing or decreasing
4614      in order to choose the right comparison operator.  */
4615   if (!cst_and_fits_in_hwi (cand->iv->step))
4616     return false;
4617   step = int_cst_value (cand->iv->step);
4618
4619   /* Check that the number of iterations matches the expected pattern:
4620      a + 1 > b ? 0 : b - a - 1.  */
4621   mbz = niter->may_be_zero;
4622   if (TREE_CODE (mbz) == GT_EXPR)
4623     {
4624       /* Handle a + 1 > b.  */
4625       tree op0 = TREE_OPERAND (mbz, 0);
4626       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4627         {
4628           a = TREE_OPERAND (op0, 0);
4629           b = TREE_OPERAND (mbz, 1);
4630         }
4631       else
4632         return false;
4633     }
4634   else if (TREE_CODE (mbz) == LT_EXPR)
4635     {
4636       tree op1 = TREE_OPERAND (mbz, 1);
4637
4638       /* Handle b < a + 1.  */
4639       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4640         {
4641           a = TREE_OPERAND (op1, 0);
4642           b = TREE_OPERAND (mbz, 0);
4643         }
4644       else
4645         return false;
4646     }
4647   else
4648     return false;
4649
4650   /* Expected number of iterations is B - A - 1.  Check that it matches
4651      the actual number, i.e., that B - A - NITER = 1.  */
4652   tree_to_aff_combination (niter->niter, nit_type, &nit);
4653   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4654   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4655   aff_combination_scale (&nit, -1);
4656   aff_combination_scale (&tmpa, -1);
4657   aff_combination_add (&tmpb, &tmpa);
4658   aff_combination_add (&tmpb, &nit);
4659   if (tmpb.n != 0 || tmpb.offset != 1)
4660     return false;
4661
4662   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4663      overflow.  */
4664   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4665                         cand->iv->step,
4666                         fold_convert (TREE_TYPE (cand->iv->step), a));
4667   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4668     return false;
4669
4670   /* Determine the new comparison operator.  */
4671   comp = step < 0 ? GT_EXPR : LT_EXPR;
4672   if (*comp_p == NE_EXPR)
4673     *comp_p = comp;
4674   else if (*comp_p == EQ_EXPR)
4675     *comp_p = invert_tree_comparison (comp, false);
4676   else
4677     gcc_unreachable ();
4678
4679   return true;
4680 }
4681
4682 /* Check whether it is possible to express the condition in USE by comparison
4683    of candidate CAND.  If so, store the value compared with to BOUND, and the
4684    comparison operator to COMP.  */
4685
4686 static bool
4687 may_eliminate_iv (struct ivopts_data *data,
4688                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4689                   enum tree_code *comp)
4690 {
4691   basic_block ex_bb;
4692   edge exit;
4693   tree period;
4694   struct loop *loop = data->current_loop;
4695   aff_tree bnd;
4696   struct tree_niter_desc *desc = NULL;
4697
4698   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4699     return false;
4700
4701   /* For now works only for exits that dominate the loop latch.
4702      TODO: extend to other conditions inside loop body.  */
4703   ex_bb = gimple_bb (use->stmt);
4704   if (use->stmt != last_stmt (ex_bb)
4705       || gimple_code (use->stmt) != GIMPLE_COND
4706       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4707     return false;
4708
4709   exit = EDGE_SUCC (ex_bb, 0);
4710   if (flow_bb_inside_loop_p (loop, exit->dest))
4711     exit = EDGE_SUCC (ex_bb, 1);
4712   if (flow_bb_inside_loop_p (loop, exit->dest))
4713     return false;
4714
4715   desc = niter_for_exit (data, exit);
4716   if (!desc)
4717     return false;
4718
4719   /* Determine whether we can use the variable to test the exit condition.
4720      This is the case iff the period of the induction variable is greater
4721      than the number of iterations for which the exit condition is true.  */
4722   period = iv_period (cand->iv);
4723
4724   /* If the number of iterations is constant, compare against it directly.  */
4725   if (TREE_CODE (desc->niter) == INTEGER_CST)
4726     {
4727       /* See cand_value_at.  */
4728       if (stmt_after_increment (loop, cand, use->stmt))
4729         {
4730           if (!tree_int_cst_lt (desc->niter, period))
4731             return false;
4732         }
4733       else
4734         {
4735           if (tree_int_cst_lt (period, desc->niter))
4736             return false;
4737         }
4738     }
4739
4740   /* If not, and if this is the only possible exit of the loop, see whether
4741      we can get a conservative estimate on the number of iterations of the
4742      entire loop and compare against that instead.  */
4743   else
4744     {
4745       widest_int period_value, max_niter;
4746
4747       max_niter = desc->max;
4748       if (stmt_after_increment (loop, cand, use->stmt))
4749         max_niter += 1;
4750       period_value = wi::to_widest (period);
4751       if (wi::gtu_p (max_niter, period_value))
4752         {
4753           /* See if we can take advantage of inferred loop bound information.  */
4754           if (data->loop_single_exit_p)
4755             {
4756               if (!max_loop_iterations (loop, &max_niter))
4757                 return false;
4758               /* The loop bound is already adjusted by adding 1.  */
4759               if (wi::gtu_p (max_niter, period_value))
4760                 return false;
4761             }
4762           else
4763             return false;
4764         }
4765     }
4766
4767   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4768
4769   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4770                          aff_combination_to_tree (&bnd));
4771   *comp = iv_elimination_compare (data, use);
4772
4773   /* It is unlikely that computing the number of iterations using division
4774      would be more profitable than keeping the original induction variable.  */
4775   if (expression_expensive_p (*bound))
4776     return false;
4777
4778   /* Sometimes, it is possible to handle the situation that the number of
4779      iterations may be zero unless additional assumtions by using <
4780      instead of != in the exit condition.
4781
4782      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4783            base the exit condition on it.  However, that is often too
4784            expensive.  */
4785   if (!integer_zerop (desc->may_be_zero))
4786     return iv_elimination_compare_lt (data, cand, comp, desc);
4787
4788   return true;
4789 }
4790
4791  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4792     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4793
4794 static int
4795 parm_decl_cost (struct ivopts_data *data, tree bound)
4796 {
4797   tree sbound = bound;
4798   STRIP_NOPS (sbound);
4799
4800   if (TREE_CODE (sbound) == SSA_NAME
4801       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4802       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4803       && data->body_includes_call)
4804     return COSTS_N_INSNS (1);
4805
4806   return 0;
4807 }
4808
4809 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4810
4811 static bool
4812 determine_use_iv_cost_condition (struct ivopts_data *data,
4813                                  struct iv_use *use, struct iv_cand *cand)
4814 {
4815   tree bound = NULL_TREE;
4816   struct iv *cmp_iv;
4817   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4818   comp_cost elim_cost, express_cost, cost, bound_cost;
4819   bool ok;
4820   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4821   tree *control_var, *bound_cst;
4822   enum tree_code comp = ERROR_MARK;
4823
4824   /* Only consider real candidates.  */
4825   if (!cand->iv)
4826     {
4827       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4828                        ERROR_MARK, -1);
4829       return false;
4830     }
4831
4832   /* Try iv elimination.  */
4833   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4834     {
4835       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4836       if (elim_cost.cost == 0)
4837         elim_cost.cost = parm_decl_cost (data, bound);
4838       else if (TREE_CODE (bound) == INTEGER_CST)
4839         elim_cost.cost = 0;
4840       /* If we replace a loop condition 'i < n' with 'p < base + n',
4841          depends_on_elim will have 'base' and 'n' set, which implies
4842          that both 'base' and 'n' will be live during the loop.  More likely,
4843          'base + n' will be loop invariant, resulting in only one live value
4844          during the loop.  So in that case we clear depends_on_elim and set
4845         elim_inv_expr_id instead.  */
4846       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4847         {
4848           elim_inv_expr_id = get_expr_id (data, bound);
4849           bitmap_clear (depends_on_elim);
4850         }
4851       /* The bound is a loop invariant, so it will be only computed
4852          once.  */
4853       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4854     }
4855   else
4856     elim_cost = infinite_cost;
4857
4858   /* Try expressing the original giv.  If it is compared with an invariant,
4859      note that we cannot get rid of it.  */
4860   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4861                               NULL, &cmp_iv);
4862   gcc_assert (ok);
4863
4864   /* When the condition is a comparison of the candidate IV against
4865      zero, prefer this IV.
4866
4867      TODO: The constant that we're subtracting from the cost should
4868      be target-dependent.  This information should be added to the
4869      target costs for each backend.  */
4870   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4871       && integer_zerop (*bound_cst)
4872       && (operand_equal_p (*control_var, cand->var_after, 0)
4873           || operand_equal_p (*control_var, cand->var_before, 0)))
4874     elim_cost.cost -= 1;
4875
4876   express_cost = get_computation_cost (data, use, cand, false,
4877                                        &depends_on_express, NULL,
4878                                        &express_inv_expr_id);
4879   fd_ivopts_data = data;
4880   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4881
4882   /* Count the cost of the original bound as well.  */
4883   bound_cost = force_var_cost (data, *bound_cst, NULL);
4884   if (bound_cost.cost == 0)
4885     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4886   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4887     bound_cost.cost = 0;
4888   express_cost.cost += bound_cost.cost;
4889
4890   /* Choose the better approach, preferring the eliminated IV. */
4891   if (compare_costs (elim_cost, express_cost) <= 0)
4892     {
4893       cost = elim_cost;
4894       depends_on = depends_on_elim;
4895       depends_on_elim = NULL;
4896       inv_expr_id = elim_inv_expr_id;
4897     }
4898   else
4899     {
4900       cost = express_cost;
4901       depends_on = depends_on_express;
4902       depends_on_express = NULL;
4903       bound = NULL_TREE;
4904       comp = ERROR_MARK;
4905       inv_expr_id = express_inv_expr_id;
4906     }
4907
4908   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4909
4910   if (depends_on_elim)
4911     BITMAP_FREE (depends_on_elim);
4912   if (depends_on_express)
4913     BITMAP_FREE (depends_on_express);
4914
4915   return !infinite_cost_p (cost);
4916 }
4917
4918 /* Determines cost of basing replacement of USE on CAND.  Returns false
4919    if USE cannot be based on CAND.  */
4920
4921 static bool
4922 determine_use_iv_cost (struct ivopts_data *data,
4923                        struct iv_use *use, struct iv_cand *cand)
4924 {
4925   switch (use->type)
4926     {
4927     case USE_NONLINEAR_EXPR:
4928       return determine_use_iv_cost_generic (data, use, cand);
4929
4930     case USE_ADDRESS:
4931       return determine_use_iv_cost_address (data, use, cand);
4932
4933     case USE_COMPARE:
4934       return determine_use_iv_cost_condition (data, use, cand);
4935
4936     default:
4937       gcc_unreachable ();
4938     }
4939 }
4940
4941 /* Return true if get_computation_cost indicates that autoincrement is
4942    a possibility for the pair of USE and CAND, false otherwise.  */
4943
4944 static bool
4945 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4946                            struct iv_cand *cand)
4947 {
4948   bitmap depends_on;
4949   bool can_autoinc;
4950   comp_cost cost;
4951
4952   if (use->type != USE_ADDRESS)
4953     return false;
4954
4955   cost = get_computation_cost (data, use, cand, true, &depends_on,
4956                                &can_autoinc, NULL);
4957
4958   BITMAP_FREE (depends_on);
4959
4960   return !infinite_cost_p (cost) && can_autoinc;
4961 }
4962
4963 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4964    use that allows autoincrement, and set their AINC_USE if possible.  */
4965
4966 static void
4967 set_autoinc_for_original_candidates (struct ivopts_data *data)
4968 {
4969   unsigned i, j;
4970
4971   for (i = 0; i < n_iv_cands (data); i++)
4972     {
4973       struct iv_cand *cand = iv_cand (data, i);
4974       struct iv_use *closest_before = NULL;
4975       struct iv_use *closest_after = NULL;
4976       if (cand->pos != IP_ORIGINAL)
4977         continue;
4978
4979       for (j = 0; j < n_iv_uses (data); j++)
4980         {
4981           struct iv_use *use = iv_use (data, j);
4982           unsigned uid = gimple_uid (use->stmt);
4983
4984           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4985             continue;
4986
4987           if (uid < gimple_uid (cand->incremented_at)
4988               && (closest_before == NULL
4989                   || uid > gimple_uid (closest_before->stmt)))
4990             closest_before = use;
4991
4992           if (uid > gimple_uid (cand->incremented_at)
4993               && (closest_after == NULL
4994                   || uid < gimple_uid (closest_after->stmt)))
4995             closest_after = use;
4996         }
4997
4998       if (closest_before != NULL
4999           && autoinc_possible_for_pair (data, closest_before, cand))
5000         cand->ainc_use = closest_before;
5001       else if (closest_after != NULL
5002                && autoinc_possible_for_pair (data, closest_after, cand))
5003         cand->ainc_use = closest_after;
5004     }
5005 }
5006
5007 /* Finds the candidates for the induction variables.  */
5008
5009 static void
5010 find_iv_candidates (struct ivopts_data *data)
5011 {
5012   /* Add commonly used ivs.  */
5013   add_standard_iv_candidates (data);
5014
5015   /* Add old induction variables.  */
5016   add_old_ivs_candidates (data);
5017
5018   /* Add induction variables derived from uses.  */
5019   add_derived_ivs_candidates (data);
5020
5021   set_autoinc_for_original_candidates (data);
5022
5023   /* Record the important candidates.  */
5024   record_important_candidates (data);
5025 }
5026
5027 /* Determines costs of basing the use of the iv on an iv candidate.  */
5028
5029 static void
5030 determine_use_iv_costs (struct ivopts_data *data)
5031 {
5032   unsigned i, j;
5033   struct iv_use *use;
5034   struct iv_cand *cand;
5035   bitmap to_clear = BITMAP_ALLOC (NULL);
5036
5037   alloc_use_cost_map (data);
5038
5039   for (i = 0; i < n_iv_uses (data); i++)
5040     {
5041       use = iv_use (data, i);
5042
5043       if (data->consider_all_candidates)
5044         {
5045           for (j = 0; j < n_iv_cands (data); j++)
5046             {
5047               cand = iv_cand (data, j);
5048               determine_use_iv_cost (data, use, cand);
5049             }
5050         }
5051       else
5052         {
5053           bitmap_iterator bi;
5054
5055           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5056             {
5057               cand = iv_cand (data, j);
5058               if (!determine_use_iv_cost (data, use, cand))
5059                 bitmap_set_bit (to_clear, j);
5060             }
5061
5062           /* Remove the candidates for that the cost is infinite from
5063              the list of related candidates.  */
5064           bitmap_and_compl_into (use->related_cands, to_clear);
5065           bitmap_clear (to_clear);
5066         }
5067     }
5068
5069   BITMAP_FREE (to_clear);
5070
5071   if (dump_file && (dump_flags & TDF_DETAILS))
5072     {
5073       fprintf (dump_file, "Use-candidate costs:\n");
5074
5075       for (i = 0; i < n_iv_uses (data); i++)
5076         {
5077           use = iv_use (data, i);
5078
5079           fprintf (dump_file, "Use %d:\n", i);
5080           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5081           for (j = 0; j < use->n_map_members; j++)
5082             {
5083               if (!use->cost_map[j].cand
5084                   || infinite_cost_p (use->cost_map[j].cost))
5085                 continue;
5086
5087               fprintf (dump_file, "  %d\t%d\t%d\t",
5088                        use->cost_map[j].cand->id,
5089                        use->cost_map[j].cost.cost,
5090                        use->cost_map[j].cost.complexity);
5091               if (use->cost_map[j].depends_on)
5092                 bitmap_print (dump_file,
5093                               use->cost_map[j].depends_on, "","");
5094               if (use->cost_map[j].inv_expr_id != -1)
5095                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5096               fprintf (dump_file, "\n");
5097             }
5098
5099           fprintf (dump_file, "\n");
5100         }
5101       fprintf (dump_file, "\n");
5102     }
5103 }
5104
5105 /* Determines cost of the candidate CAND.  */
5106
5107 static void
5108 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5109 {
5110   comp_cost cost_base;
5111   unsigned cost, cost_step;
5112   tree base;
5113
5114   if (!cand->iv)
5115     {
5116       cand->cost = 0;
5117       return;
5118     }
5119
5120   /* There are two costs associated with the candidate -- its increment
5121      and its initialization.  The second is almost negligible for any loop
5122      that rolls enough, so we take it just very little into account.  */
5123
5124   base = cand->iv->base;
5125   cost_base = force_var_cost (data, base, NULL);
5126   /* It will be exceptional that the iv register happens to be initialized with
5127      the proper value at no cost.  In general, there will at least be a regcopy
5128      or a const set.  */
5129   if (cost_base.cost == 0)
5130     cost_base.cost = COSTS_N_INSNS (1);
5131   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5132
5133   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5134
5135   /* Prefer the original ivs unless we may gain something by replacing it.
5136      The reason is to make debugging simpler; so this is not relevant for
5137      artificial ivs created by other optimization passes.  */
5138   if (cand->pos != IP_ORIGINAL
5139       || !SSA_NAME_VAR (cand->var_before)
5140       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5141     cost++;
5142
5143   /* Prefer not to insert statements into latch unless there are some
5144      already (so that we do not create unnecessary jumps).  */
5145   if (cand->pos == IP_END
5146       && empty_block_p (ip_end_pos (data->current_loop)))
5147     cost++;
5148
5149   cand->cost = cost;
5150   cand->cost_step = cost_step;
5151 }
5152
5153 /* Determines costs of computation of the candidates.  */
5154
5155 static void
5156 determine_iv_costs (struct ivopts_data *data)
5157 {
5158   unsigned i;
5159
5160   if (dump_file && (dump_flags & TDF_DETAILS))
5161     {
5162       fprintf (dump_file, "Candidate costs:\n");
5163       fprintf (dump_file, "  cand\tcost\n");
5164     }
5165
5166   for (i = 0; i < n_iv_cands (data); i++)
5167     {
5168       struct iv_cand *cand = iv_cand (data, i);
5169
5170       determine_iv_cost (data, cand);
5171
5172       if (dump_file && (dump_flags & TDF_DETAILS))
5173         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5174     }
5175
5176   if (dump_file && (dump_flags & TDF_DETAILS))
5177     fprintf (dump_file, "\n");
5178 }
5179
5180 /* Calculates cost for having SIZE induction variables.  */
5181
5182 static unsigned
5183 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5184 {
5185   /* We add size to the cost, so that we prefer eliminating ivs
5186      if possible.  */
5187   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5188                                             data->body_includes_call);
5189 }
5190
5191 /* For each size of the induction variable set determine the penalty.  */
5192
5193 static void
5194 determine_set_costs (struct ivopts_data *data)
5195 {
5196   unsigned j, n;
5197   gphi *phi;
5198   gphi_iterator psi;
5199   tree op;
5200   struct loop *loop = data->current_loop;
5201   bitmap_iterator bi;
5202
5203   if (dump_file && (dump_flags & TDF_DETAILS))
5204     {
5205       fprintf (dump_file, "Global costs:\n");
5206       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5207       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5208       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5209       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5210     }
5211
5212   n = 0;
5213   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5214     {
5215       phi = psi.phi ();
5216       op = PHI_RESULT (phi);
5217
5218       if (virtual_operand_p (op))
5219         continue;
5220
5221       if (get_iv (data, op))
5222         continue;
5223
5224       n++;
5225     }
5226
5227   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5228     {
5229       struct version_info *info = ver_info (data, j);
5230
5231       if (info->inv_id && info->has_nonlin_use)
5232         n++;
5233     }
5234
5235   data->regs_used = n;
5236   if (dump_file && (dump_flags & TDF_DETAILS))
5237     fprintf (dump_file, "  regs_used %d\n", n);
5238
5239   if (dump_file && (dump_flags & TDF_DETAILS))
5240     {
5241       fprintf (dump_file, "  cost for size:\n");
5242       fprintf (dump_file, "  ivs\tcost\n");
5243       for (j = 0; j <= 2 * target_avail_regs; j++)
5244         fprintf (dump_file, "  %d\t%d\n", j,
5245                  ivopts_global_cost_for_size (data, j));
5246       fprintf (dump_file, "\n");
5247     }
5248 }
5249
5250 /* Returns true if A is a cheaper cost pair than B.  */
5251
5252 static bool
5253 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5254 {
5255   int cmp;
5256
5257   if (!a)
5258     return false;
5259
5260   if (!b)
5261     return true;
5262
5263   cmp = compare_costs (a->cost, b->cost);
5264   if (cmp < 0)
5265     return true;
5266
5267   if (cmp > 0)
5268     return false;
5269
5270   /* In case the costs are the same, prefer the cheaper candidate.  */
5271   if (a->cand->cost < b->cand->cost)
5272     return true;
5273
5274   return false;
5275 }
5276
5277
5278 /* Returns candidate by that USE is expressed in IVS.  */
5279
5280 static struct cost_pair *
5281 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5282 {
5283   return ivs->cand_for_use[use->id];
5284 }
5285
5286 /* Computes the cost field of IVS structure.  */
5287
5288 static void
5289 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5290 {
5291   comp_cost cost = ivs->cand_use_cost;
5292
5293   cost.cost += ivs->cand_cost;
5294
5295   cost.cost += ivopts_global_cost_for_size (data,
5296                                             ivs->n_regs + ivs->num_used_inv_expr);
5297
5298   ivs->cost = cost;
5299 }
5300
5301 /* Remove invariants in set INVS to set IVS.  */
5302
5303 static void
5304 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5305 {
5306   bitmap_iterator bi;
5307   unsigned iid;
5308
5309   if (!invs)
5310     return;
5311
5312   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5313     {
5314       ivs->n_invariant_uses[iid]--;
5315       if (ivs->n_invariant_uses[iid] == 0)
5316         ivs->n_regs--;
5317     }
5318 }
5319
5320 /* Set USE not to be expressed by any candidate in IVS.  */
5321
5322 static void
5323 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5324                  struct iv_use *use)
5325 {
5326   unsigned uid = use->id, cid;
5327   struct cost_pair *cp;
5328
5329   cp = ivs->cand_for_use[uid];
5330   if (!cp)
5331     return;
5332   cid = cp->cand->id;
5333
5334   ivs->bad_uses++;
5335   ivs->cand_for_use[uid] = NULL;
5336   ivs->n_cand_uses[cid]--;
5337
5338   if (ivs->n_cand_uses[cid] == 0)
5339     {
5340       bitmap_clear_bit (ivs->cands, cid);
5341       /* Do not count the pseudocandidates.  */
5342       if (cp->cand->iv)
5343         ivs->n_regs--;
5344       ivs->n_cands--;
5345       ivs->cand_cost -= cp->cand->cost;
5346
5347       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5348     }
5349
5350   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5351
5352   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5353
5354   if (cp->inv_expr_id != -1)
5355     {
5356       ivs->used_inv_expr[cp->inv_expr_id]--;
5357       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5358         ivs->num_used_inv_expr--;
5359     }
5360   iv_ca_recount_cost (data, ivs);
5361 }
5362
5363 /* Add invariants in set INVS to set IVS.  */
5364
5365 static void
5366 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5367 {
5368   bitmap_iterator bi;
5369   unsigned iid;
5370
5371   if (!invs)
5372     return;
5373
5374   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5375     {
5376       ivs->n_invariant_uses[iid]++;
5377       if (ivs->n_invariant_uses[iid] == 1)
5378         ivs->n_regs++;
5379     }
5380 }
5381
5382 /* Set cost pair for USE in set IVS to CP.  */
5383
5384 static void
5385 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5386               struct iv_use *use, struct cost_pair *cp)
5387 {
5388   unsigned uid = use->id, cid;
5389
5390   if (ivs->cand_for_use[uid] == cp)
5391     return;
5392
5393   if (ivs->cand_for_use[uid])
5394     iv_ca_set_no_cp (data, ivs, use);
5395
5396   if (cp)
5397     {
5398       cid = cp->cand->id;
5399
5400       ivs->bad_uses--;
5401       ivs->cand_for_use[uid] = cp;
5402       ivs->n_cand_uses[cid]++;
5403       if (ivs->n_cand_uses[cid] == 1)
5404         {
5405           bitmap_set_bit (ivs->cands, cid);
5406           /* Do not count the pseudocandidates.  */
5407           if (cp->cand->iv)
5408             ivs->n_regs++;
5409           ivs->n_cands++;
5410           ivs->cand_cost += cp->cand->cost;
5411
5412           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5413         }
5414
5415       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5416       iv_ca_set_add_invariants (ivs, cp->depends_on);
5417
5418       if (cp->inv_expr_id != -1)
5419         {
5420           ivs->used_inv_expr[cp->inv_expr_id]++;
5421           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5422             ivs->num_used_inv_expr++;
5423         }
5424       iv_ca_recount_cost (data, ivs);
5425     }
5426 }
5427
5428 /* Extend set IVS by expressing USE by some of the candidates in it
5429    if possible.  Consider all important candidates if candidates in
5430    set IVS don't give any result.  */
5431
5432 static void
5433 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5434                struct iv_use *use)
5435 {
5436   struct cost_pair *best_cp = NULL, *cp;
5437   bitmap_iterator bi;
5438   unsigned i;
5439   struct iv_cand *cand;
5440
5441   gcc_assert (ivs->upto >= use->id);
5442   ivs->upto++;
5443   ivs->bad_uses++;
5444
5445   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5446     {
5447       cand = iv_cand (data, i);
5448       cp = get_use_iv_cost (data, use, cand);
5449       if (cheaper_cost_pair (cp, best_cp))
5450         best_cp = cp;
5451     }
5452
5453   if (best_cp == NULL)
5454     {
5455       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5456         {
5457           cand = iv_cand (data, i);
5458           cp = get_use_iv_cost (data, use, cand);
5459           if (cheaper_cost_pair (cp, best_cp))
5460             best_cp = cp;
5461         }
5462     }
5463
5464   iv_ca_set_cp (data, ivs, use, best_cp);
5465 }
5466
5467 /* Get cost for assignment IVS.  */
5468
5469 static comp_cost
5470 iv_ca_cost (struct iv_ca *ivs)
5471 {
5472   /* This was a conditional expression but it triggered a bug in
5473      Sun C 5.5.  */
5474   if (ivs->bad_uses)
5475     return infinite_cost;
5476   else
5477     return ivs->cost;
5478 }
5479
5480 /* Returns true if all dependences of CP are among invariants in IVS.  */
5481
5482 static bool
5483 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5484 {
5485   unsigned i;
5486   bitmap_iterator bi;
5487
5488   if (!cp->depends_on)
5489     return true;
5490
5491   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5492     {
5493       if (ivs->n_invariant_uses[i] == 0)
5494         return false;
5495     }
5496
5497   return true;
5498 }
5499
5500 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5501    it before NEXT_CHANGE.  */
5502
5503 static struct iv_ca_delta *
5504 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5505                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5506 {
5507   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5508
5509   change->use = use;
5510   change->old_cp = old_cp;
5511   change->new_cp = new_cp;
5512   change->next_change = next_change;
5513
5514   return change;
5515 }
5516
5517 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5518    are rewritten.  */
5519
5520 static struct iv_ca_delta *
5521 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5522 {
5523   struct iv_ca_delta *last;
5524
5525   if (!l2)
5526     return l1;
5527
5528   if (!l1)
5529     return l2;
5530
5531   for (last = l1; last->next_change; last = last->next_change)
5532     continue;
5533   last->next_change = l2;
5534
5535   return l1;
5536 }
5537
5538 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5539
5540 static struct iv_ca_delta *
5541 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5542 {
5543   struct iv_ca_delta *act, *next, *prev = NULL;
5544   struct cost_pair *tmp;
5545
5546   for (act = delta; act; act = next)
5547     {
5548       next = act->next_change;
5549       act->next_change = prev;
5550       prev = act;
5551
5552       tmp = act->old_cp;
5553       act->old_cp = act->new_cp;
5554       act->new_cp = tmp;
5555     }
5556
5557   return prev;
5558 }
5559
5560 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5561    reverted instead.  */
5562
5563 static void
5564 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5565                     struct iv_ca_delta *delta, bool forward)
5566 {
5567   struct cost_pair *from, *to;
5568   struct iv_ca_delta *act;
5569
5570   if (!forward)
5571     delta = iv_ca_delta_reverse (delta);
5572
5573   for (act = delta; act; act = act->next_change)
5574     {
5575       from = act->old_cp;
5576       to = act->new_cp;
5577       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5578       iv_ca_set_cp (data, ivs, act->use, to);
5579     }
5580
5581   if (!forward)
5582     iv_ca_delta_reverse (delta);
5583 }
5584
5585 /* Returns true if CAND is used in IVS.  */
5586
5587 static bool
5588 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5589 {
5590   return ivs->n_cand_uses[cand->id] > 0;
5591 }
5592
5593 /* Returns number of induction variable candidates in the set IVS.  */
5594
5595 static unsigned
5596 iv_ca_n_cands (struct iv_ca *ivs)
5597 {
5598   return ivs->n_cands;
5599 }
5600
5601 /* Free the list of changes DELTA.  */
5602
5603 static void
5604 iv_ca_delta_free (struct iv_ca_delta **delta)
5605 {
5606   struct iv_ca_delta *act, *next;
5607
5608   for (act = *delta; act; act = next)
5609     {
5610       next = act->next_change;
5611       free (act);
5612     }
5613
5614   *delta = NULL;
5615 }
5616
5617 /* Allocates new iv candidates assignment.  */
5618
5619 static struct iv_ca *
5620 iv_ca_new (struct ivopts_data *data)
5621 {
5622   struct iv_ca *nw = XNEW (struct iv_ca);
5623
5624   nw->upto = 0;
5625   nw->bad_uses = 0;
5626   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5627   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5628   nw->cands = BITMAP_ALLOC (NULL);
5629   nw->n_cands = 0;
5630   nw->n_regs = 0;
5631   nw->cand_use_cost = no_cost;
5632   nw->cand_cost = 0;
5633   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5634   nw->cost = no_cost;
5635   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5636   nw->num_used_inv_expr = 0;
5637
5638   return nw;
5639 }
5640
5641 /* Free memory occupied by the set IVS.  */
5642
5643 static void
5644 iv_ca_free (struct iv_ca **ivs)
5645 {
5646   free ((*ivs)->cand_for_use);
5647   free ((*ivs)->n_cand_uses);
5648   BITMAP_FREE ((*ivs)->cands);
5649   free ((*ivs)->n_invariant_uses);
5650   free ((*ivs)->used_inv_expr);
5651   free (*ivs);
5652   *ivs = NULL;
5653 }
5654
5655 /* Dumps IVS to FILE.  */
5656
5657 static void
5658 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5659 {
5660   const char *pref = "  invariants ";
5661   unsigned i;
5662   comp_cost cost = iv_ca_cost (ivs);
5663
5664   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5665   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5666            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5667   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5668
5669    for (i = 0; i < ivs->upto; i++)
5670     {
5671       struct iv_use *use = iv_use (data, i);
5672       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5673       if (cp)
5674         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5675                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5676       else
5677         fprintf (file, "   use:%d --> ??\n", use->id);
5678     }
5679
5680   for (i = 1; i <= data->max_inv_id; i++)
5681     if (ivs->n_invariant_uses[i])
5682       {
5683         fprintf (file, "%s%d", pref, i);
5684         pref = ", ";
5685       }
5686   fprintf (file, "\n\n");
5687 }
5688
5689 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5690    new set, and store differences in DELTA.  Number of induction variables
5691    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5692    the function will try to find a solution with mimimal iv candidates.  */
5693
5694 static comp_cost
5695 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5696               struct iv_cand *cand, struct iv_ca_delta **delta,
5697               unsigned *n_ivs, bool min_ncand)
5698 {
5699   unsigned i;
5700   comp_cost cost;
5701   struct iv_use *use;
5702   struct cost_pair *old_cp, *new_cp;
5703
5704   *delta = NULL;
5705   for (i = 0; i < ivs->upto; i++)
5706     {
5707       use = iv_use (data, i);
5708       old_cp = iv_ca_cand_for_use (ivs, use);
5709
5710       if (old_cp
5711           && old_cp->cand == cand)
5712         continue;
5713
5714       new_cp = get_use_iv_cost (data, use, cand);
5715       if (!new_cp)
5716         continue;
5717
5718       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5719         continue;
5720
5721       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5722         continue;
5723
5724       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5725     }
5726
5727   iv_ca_delta_commit (data, ivs, *delta, true);
5728   cost = iv_ca_cost (ivs);
5729   if (n_ivs)
5730     *n_ivs = iv_ca_n_cands (ivs);
5731   iv_ca_delta_commit (data, ivs, *delta, false);
5732
5733   return cost;
5734 }
5735
5736 /* Try narrowing set IVS by removing CAND.  Return the cost of
5737    the new set and store the differences in DELTA.  START is
5738    the candidate with which we start narrowing.  */
5739
5740 static comp_cost
5741 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5742               struct iv_cand *cand, struct iv_cand *start,
5743               struct iv_ca_delta **delta)
5744 {
5745   unsigned i, ci;
5746   struct iv_use *use;
5747   struct cost_pair *old_cp, *new_cp, *cp;
5748   bitmap_iterator bi;
5749   struct iv_cand *cnd;
5750   comp_cost cost, best_cost, acost;
5751
5752   *delta = NULL;
5753   for (i = 0; i < n_iv_uses (data); i++)
5754     {
5755       use = iv_use (data, i);
5756
5757       old_cp = iv_ca_cand_for_use (ivs, use);
5758       if (old_cp->cand != cand)
5759         continue;
5760
5761       best_cost = iv_ca_cost (ivs);
5762       /* Start narrowing with START.  */
5763       new_cp = get_use_iv_cost (data, use, start);
5764
5765       if (data->consider_all_candidates)
5766         {
5767           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5768             {
5769               if (ci == cand->id || (start && ci == start->id))
5770                 continue;
5771
5772               cnd = iv_cand (data, ci);
5773
5774               cp = get_use_iv_cost (data, use, cnd);
5775               if (!cp)
5776                 continue;
5777
5778               iv_ca_set_cp (data, ivs, use, cp);
5779               acost = iv_ca_cost (ivs);
5780
5781               if (compare_costs (acost, best_cost) < 0)
5782                 {
5783                   best_cost = acost;
5784                   new_cp = cp;
5785                 }
5786             }
5787         }
5788       else
5789         {
5790           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5791             {
5792               if (ci == cand->id || (start && ci == start->id))
5793                 continue;
5794
5795               cnd = iv_cand (data, ci);
5796
5797               cp = get_use_iv_cost (data, use, cnd);
5798               if (!cp)
5799                 continue;
5800
5801               iv_ca_set_cp (data, ivs, use, cp);
5802               acost = iv_ca_cost (ivs);
5803
5804               if (compare_costs (acost, best_cost) < 0)
5805                 {
5806                   best_cost = acost;
5807                   new_cp = cp;
5808                 }
5809             }
5810         }
5811       /* Restore to old cp for use.  */
5812       iv_ca_set_cp (data, ivs, use, old_cp);
5813
5814       if (!new_cp)
5815         {
5816           iv_ca_delta_free (delta);
5817           return infinite_cost;
5818         }
5819
5820       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5821     }
5822
5823   iv_ca_delta_commit (data, ivs, *delta, true);
5824   cost = iv_ca_cost (ivs);
5825   iv_ca_delta_commit (data, ivs, *delta, false);
5826
5827   return cost;
5828 }
5829
5830 /* Try optimizing the set of candidates IVS by removing candidates different
5831    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5832    differences in DELTA.  */
5833
5834 static comp_cost
5835 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5836              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5837 {
5838   bitmap_iterator bi;
5839   struct iv_ca_delta *act_delta, *best_delta;
5840   unsigned i;
5841   comp_cost best_cost, acost;
5842   struct iv_cand *cand;
5843
5844   best_delta = NULL;
5845   best_cost = iv_ca_cost (ivs);
5846
5847   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5848     {
5849       cand = iv_cand (data, i);
5850
5851       if (cand == except_cand)
5852         continue;
5853
5854       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5855
5856       if (compare_costs (acost, best_cost) < 0)
5857         {
5858           best_cost = acost;
5859           iv_ca_delta_free (&best_delta);
5860           best_delta = act_delta;
5861         }
5862       else
5863         iv_ca_delta_free (&act_delta);
5864     }
5865
5866   if (!best_delta)
5867     {
5868       *delta = NULL;
5869       return best_cost;
5870     }
5871
5872   /* Recurse to possibly remove other unnecessary ivs.  */
5873   iv_ca_delta_commit (data, ivs, best_delta, true);
5874   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5875   iv_ca_delta_commit (data, ivs, best_delta, false);
5876   *delta = iv_ca_delta_join (best_delta, *delta);
5877   return best_cost;
5878 }
5879
5880 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5881    cheaper local cost for USE than BEST_CP.  Return pointer to
5882    the corresponding cost_pair, otherwise just return BEST_CP.  */
5883
5884 static struct cost_pair*
5885 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5886                         unsigned int cand_idx, struct iv_cand *old_cand,
5887                         struct cost_pair *best_cp)
5888 {
5889   struct iv_cand *cand;
5890   struct cost_pair *cp;
5891
5892   gcc_assert (old_cand != NULL && best_cp != NULL);
5893   if (cand_idx == old_cand->id)
5894     return best_cp;
5895
5896   cand = iv_cand (data, cand_idx);
5897   cp = get_use_iv_cost (data, use, cand);
5898   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5899     return cp;
5900
5901   return best_cp;
5902 }
5903
5904 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5905    which are used by more than one iv uses.  For each of those candidates,
5906    this function tries to represent iv uses under that candidate using
5907    other ones with lower local cost, then tries to prune the new set.
5908    If the new set has lower cost, It returns the new cost after recording
5909    candidate replacement in list DELTA.  */
5910
5911 static comp_cost
5912 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5913                struct iv_ca_delta **delta)
5914 {
5915   bitmap_iterator bi, bj;
5916   unsigned int i, j, k;
5917   struct iv_use *use;
5918   struct iv_cand *cand;
5919   comp_cost orig_cost, acost;
5920   struct iv_ca_delta *act_delta, *tmp_delta;
5921   struct cost_pair *old_cp, *best_cp = NULL;
5922
5923   *delta = NULL;
5924   orig_cost = iv_ca_cost (ivs);
5925
5926   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5927     {
5928       if (ivs->n_cand_uses[i] == 1
5929           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5930         continue;
5931
5932       cand = iv_cand (data, i);
5933
5934       act_delta = NULL;
5935       /*  Represent uses under current candidate using other ones with
5936           lower local cost.  */
5937       for (j = 0; j < ivs->upto; j++)
5938         {
5939           use = iv_use (data, j);
5940           old_cp = iv_ca_cand_for_use (ivs, use);
5941
5942           if (old_cp->cand != cand)
5943             continue;
5944
5945           best_cp = old_cp;
5946           if (data->consider_all_candidates)
5947             for (k = 0; k < n_iv_cands (data); k++)
5948               best_cp = cheaper_cost_with_cand (data, use, k,
5949                                                 old_cp->cand, best_cp);
5950           else
5951             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
5952               best_cp = cheaper_cost_with_cand (data, use, k,
5953                                                 old_cp->cand, best_cp);
5954
5955           if (best_cp == old_cp)
5956             continue;
5957
5958           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
5959         }
5960       /* No need for further prune.  */
5961       if (!act_delta)
5962         continue;
5963
5964       /* Prune the new candidate set.  */
5965       iv_ca_delta_commit (data, ivs, act_delta, true);
5966       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
5967       iv_ca_delta_commit (data, ivs, act_delta, false);
5968       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5969
5970       if (compare_costs (acost, orig_cost) < 0)
5971         {
5972           *delta = act_delta;
5973           return acost;
5974         }
5975       else
5976         iv_ca_delta_free (&act_delta);
5977     }
5978
5979   return orig_cost;
5980 }
5981
5982 /* Tries to extend the sets IVS in the best possible way in order
5983    to express the USE.  If ORIGINALP is true, prefer candidates from
5984    the original set of IVs, otherwise favor important candidates not
5985    based on any memory object.  */
5986
5987 static bool
5988 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5989                   struct iv_use *use, bool originalp)
5990 {
5991   comp_cost best_cost, act_cost;
5992   unsigned i;
5993   bitmap_iterator bi;
5994   struct iv_cand *cand;
5995   struct iv_ca_delta *best_delta = NULL, *act_delta;
5996   struct cost_pair *cp;
5997
5998   iv_ca_add_use (data, ivs, use);
5999   best_cost = iv_ca_cost (ivs);
6000   cp = iv_ca_cand_for_use (ivs, use);
6001   if (cp)
6002     {
6003       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6004       iv_ca_set_no_cp (data, ivs, use);
6005     }
6006
6007   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6008      first try important candidates not based on any memory object.  Only if
6009      this fails, try the specific ones.  Rationale -- in loops with many
6010      variables the best choice often is to use just one generic biv.  If we
6011      added here many ivs specific to the uses, the optimization algorithm later
6012      would be likely to get stuck in a local minimum, thus causing us to create
6013      too many ivs.  The approach from few ivs to more seems more likely to be
6014      successful -- starting from few ivs, replacing an expensive use by a
6015      specific iv should always be a win.  */
6016   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6017     {
6018       cand = iv_cand (data, i);
6019
6020       if (originalp && cand->pos !=IP_ORIGINAL)
6021         continue;
6022
6023       if (!originalp && cand->iv->base_object != NULL_TREE)
6024         continue;
6025
6026       if (iv_ca_cand_used_p (ivs, cand))
6027         continue;
6028
6029       cp = get_use_iv_cost (data, use, cand);
6030       if (!cp)
6031         continue;
6032
6033       iv_ca_set_cp (data, ivs, use, cp);
6034       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6035                                true);
6036       iv_ca_set_no_cp (data, ivs, use);
6037       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6038
6039       if (compare_costs (act_cost, best_cost) < 0)
6040         {
6041           best_cost = act_cost;
6042
6043           iv_ca_delta_free (&best_delta);
6044           best_delta = act_delta;
6045         }
6046       else
6047         iv_ca_delta_free (&act_delta);
6048     }
6049
6050   if (infinite_cost_p (best_cost))
6051     {
6052       for (i = 0; i < use->n_map_members; i++)
6053         {
6054           cp = use->cost_map + i;
6055           cand = cp->cand;
6056           if (!cand)
6057             continue;
6058
6059           /* Already tried this.  */
6060           if (cand->important)
6061             {
6062               if (originalp && cand->pos == IP_ORIGINAL)
6063                 continue;
6064               if (!originalp && cand->iv->base_object == NULL_TREE)
6065                 continue;
6066             }
6067
6068           if (iv_ca_cand_used_p (ivs, cand))
6069             continue;
6070
6071           act_delta = NULL;
6072           iv_ca_set_cp (data, ivs, use, cp);
6073           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6074           iv_ca_set_no_cp (data, ivs, use);
6075           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6076                                        cp, act_delta);
6077
6078           if (compare_costs (act_cost, best_cost) < 0)
6079             {
6080               best_cost = act_cost;
6081
6082               if (best_delta)
6083                 iv_ca_delta_free (&best_delta);
6084               best_delta = act_delta;
6085             }
6086           else
6087             iv_ca_delta_free (&act_delta);
6088         }
6089     }
6090
6091   iv_ca_delta_commit (data, ivs, best_delta, true);
6092   iv_ca_delta_free (&best_delta);
6093
6094   return !infinite_cost_p (best_cost);
6095 }
6096
6097 /* Finds an initial assignment of candidates to uses.  */
6098
6099 static struct iv_ca *
6100 get_initial_solution (struct ivopts_data *data, bool originalp)
6101 {
6102   struct iv_ca *ivs = iv_ca_new (data);
6103   unsigned i;
6104
6105   for (i = 0; i < n_iv_uses (data); i++)
6106     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6107       {
6108         iv_ca_free (&ivs);
6109         return NULL;
6110       }
6111
6112   return ivs;
6113 }
6114
6115 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6116    points to a bool variable, this function tries to break local
6117    optimal fixed-point by replacing candidates in IVS if it's true.  */
6118
6119 static bool
6120 try_improve_iv_set (struct ivopts_data *data,
6121                     struct iv_ca *ivs, bool *try_replace_p)
6122 {
6123   unsigned i, n_ivs;
6124   comp_cost acost, best_cost = iv_ca_cost (ivs);
6125   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6126   struct iv_cand *cand;
6127
6128   /* Try extending the set of induction variables by one.  */
6129   for (i = 0; i < n_iv_cands (data); i++)
6130     {
6131       cand = iv_cand (data, i);
6132
6133       if (iv_ca_cand_used_p (ivs, cand))
6134         continue;
6135
6136       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6137       if (!act_delta)
6138         continue;
6139
6140       /* If we successfully added the candidate and the set is small enough,
6141          try optimizing it by removing other candidates.  */
6142       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6143         {
6144           iv_ca_delta_commit (data, ivs, act_delta, true);
6145           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6146           iv_ca_delta_commit (data, ivs, act_delta, false);
6147           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6148         }
6149
6150       if (compare_costs (acost, best_cost) < 0)
6151         {
6152           best_cost = acost;
6153           iv_ca_delta_free (&best_delta);
6154           best_delta = act_delta;
6155         }
6156       else
6157         iv_ca_delta_free (&act_delta);
6158     }
6159
6160   if (!best_delta)
6161     {
6162       /* Try removing the candidates from the set instead.  */
6163       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6164
6165       if (!best_delta && *try_replace_p)
6166         {
6167           *try_replace_p = false;
6168           /* So far candidate selecting algorithm tends to choose fewer IVs
6169              so that it can handle cases in which loops have many variables
6170              but the best choice is often to use only one general biv.  One
6171              weakness is it can't handle opposite cases, in which different
6172              candidates should be chosen with respect to each use.  To solve
6173              the problem, we replace candidates in a manner described by the
6174              comments of iv_ca_replace, thus give general algorithm a chance
6175              to break local optimal fixed-point in these cases.  */
6176           best_cost = iv_ca_replace (data, ivs, &best_delta);
6177         }
6178
6179       if (!best_delta)
6180         return false;
6181     }
6182
6183   iv_ca_delta_commit (data, ivs, best_delta, true);
6184   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6185   iv_ca_delta_free (&best_delta);
6186   return true;
6187 }
6188
6189 /* Attempts to find the optimal set of induction variables.  We do simple
6190    greedy heuristic -- we try to replace at most one candidate in the selected
6191    solution and remove the unused ivs while this improves the cost.  */
6192
6193 static struct iv_ca *
6194 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6195 {
6196   struct iv_ca *set;
6197   bool try_replace_p = true;
6198
6199   /* Get the initial solution.  */
6200   set = get_initial_solution (data, originalp);
6201   if (!set)
6202     {
6203       if (dump_file && (dump_flags & TDF_DETAILS))
6204         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6205       return NULL;
6206     }
6207
6208   if (dump_file && (dump_flags & TDF_DETAILS))
6209     {
6210       fprintf (dump_file, "Initial set of candidates:\n");
6211       iv_ca_dump (data, dump_file, set);
6212     }
6213
6214   while (try_improve_iv_set (data, set, &try_replace_p))
6215     {
6216       if (dump_file && (dump_flags & TDF_DETAILS))
6217         {
6218           fprintf (dump_file, "Improved to:\n");
6219           iv_ca_dump (data, dump_file, set);
6220         }
6221     }
6222
6223   return set;
6224 }
6225
6226 static struct iv_ca *
6227 find_optimal_iv_set (struct ivopts_data *data)
6228 {
6229   unsigned i;
6230   struct iv_ca *set, *origset;
6231   struct iv_use *use;
6232   comp_cost cost, origcost;
6233
6234   /* Determine the cost based on a strategy that starts with original IVs,
6235      and try again using a strategy that prefers candidates not based
6236      on any IVs.  */
6237   origset = find_optimal_iv_set_1 (data, true);
6238   set = find_optimal_iv_set_1 (data, false);
6239
6240   if (!origset && !set)
6241     return NULL;
6242
6243   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6244   cost = set ? iv_ca_cost (set) : infinite_cost;
6245
6246   if (dump_file && (dump_flags & TDF_DETAILS))
6247     {
6248       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6249                origcost.cost, origcost.complexity);
6250       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6251                cost.cost, cost.complexity);
6252     }
6253
6254   /* Choose the one with the best cost.  */
6255   if (compare_costs (origcost, cost) <= 0)
6256     {
6257       if (set)
6258         iv_ca_free (&set);
6259       set = origset;
6260     }
6261   else if (origset)
6262     iv_ca_free (&origset);
6263
6264   for (i = 0; i < n_iv_uses (data); i++)
6265     {
6266       use = iv_use (data, i);
6267       use->selected = iv_ca_cand_for_use (set, use)->cand;
6268     }
6269
6270   return set;
6271 }
6272
6273 /* Creates a new induction variable corresponding to CAND.  */
6274
6275 static void
6276 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6277 {
6278   gimple_stmt_iterator incr_pos;
6279   tree base;
6280   bool after = false;
6281
6282   if (!cand->iv)
6283     return;
6284
6285   switch (cand->pos)
6286     {
6287     case IP_NORMAL:
6288       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6289       break;
6290
6291     case IP_END:
6292       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6293       after = true;
6294       break;
6295
6296     case IP_AFTER_USE:
6297       after = true;
6298       /* fall through */
6299     case IP_BEFORE_USE:
6300       incr_pos = gsi_for_stmt (cand->incremented_at);
6301       break;
6302
6303     case IP_ORIGINAL:
6304       /* Mark that the iv is preserved.  */
6305       name_info (data, cand->var_before)->preserve_biv = true;
6306       name_info (data, cand->var_after)->preserve_biv = true;
6307
6308       /* Rewrite the increment so that it uses var_before directly.  */
6309       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6310       return;
6311     }
6312
6313   gimple_add_tmp_var (cand->var_before);
6314
6315   base = unshare_expr (cand->iv->base);
6316
6317   create_iv (base, unshare_expr (cand->iv->step),
6318              cand->var_before, data->current_loop,
6319              &incr_pos, after, &cand->var_before, &cand->var_after);
6320 }
6321
6322 /* Creates new induction variables described in SET.  */
6323
6324 static void
6325 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6326 {
6327   unsigned i;
6328   struct iv_cand *cand;
6329   bitmap_iterator bi;
6330
6331   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6332     {
6333       cand = iv_cand (data, i);
6334       create_new_iv (data, cand);
6335     }
6336
6337   if (dump_file && (dump_flags & TDF_DETAILS))
6338     {
6339       fprintf (dump_file, "\nSelected IV set: \n");
6340       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6341         {
6342           cand = iv_cand (data, i);
6343           dump_cand (dump_file, cand);
6344         }
6345       fprintf (dump_file, "\n");
6346     }
6347 }
6348
6349 /* Rewrites USE (definition of iv used in a nonlinear expression)
6350    using candidate CAND.  */
6351
6352 static void
6353 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6354                             struct iv_use *use, struct iv_cand *cand)
6355 {
6356   tree comp;
6357   tree op, tgt;
6358   gassign *ass;
6359   gimple_stmt_iterator bsi;
6360
6361   /* An important special case -- if we are asked to express value of
6362      the original iv by itself, just exit; there is no need to
6363      introduce a new computation (that might also need casting the
6364      variable to unsigned and back).  */
6365   if (cand->pos == IP_ORIGINAL
6366       && cand->incremented_at == use->stmt)
6367     {
6368       enum tree_code stmt_code;
6369
6370       gcc_assert (is_gimple_assign (use->stmt));
6371       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6372
6373       /* Check whether we may leave the computation unchanged.
6374          This is the case only if it does not rely on other
6375          computations in the loop -- otherwise, the computation
6376          we rely upon may be removed in remove_unused_ivs,
6377          thus leading to ICE.  */
6378       stmt_code = gimple_assign_rhs_code (use->stmt);
6379       if (stmt_code == PLUS_EXPR
6380           || stmt_code == MINUS_EXPR
6381           || stmt_code == POINTER_PLUS_EXPR)
6382         {
6383           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6384             op = gimple_assign_rhs2 (use->stmt);
6385           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6386             op = gimple_assign_rhs1 (use->stmt);
6387           else
6388             op = NULL_TREE;
6389         }
6390       else
6391         op = NULL_TREE;
6392
6393       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6394         return;
6395     }
6396
6397   comp = get_computation (data->current_loop, use, cand);
6398   gcc_assert (comp != NULL_TREE);
6399
6400   switch (gimple_code (use->stmt))
6401     {
6402     case GIMPLE_PHI:
6403       tgt = PHI_RESULT (use->stmt);
6404
6405       /* If we should keep the biv, do not replace it.  */
6406       if (name_info (data, tgt)->preserve_biv)
6407         return;
6408
6409       bsi = gsi_after_labels (gimple_bb (use->stmt));
6410       break;
6411
6412     case GIMPLE_ASSIGN:
6413       tgt = gimple_assign_lhs (use->stmt);
6414       bsi = gsi_for_stmt (use->stmt);
6415       break;
6416
6417     default:
6418       gcc_unreachable ();
6419     }
6420
6421   if (!valid_gimple_rhs_p (comp)
6422       || (gimple_code (use->stmt) != GIMPLE_PHI
6423           /* We can't allow re-allocating the stmt as it might be pointed
6424              to still.  */
6425           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6426               >= gimple_num_ops (gsi_stmt (bsi)))))
6427     {
6428       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6429                                        true, GSI_SAME_STMT);
6430       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6431         {
6432           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6433           /* As this isn't a plain copy we have to reset alignment
6434              information.  */
6435           if (SSA_NAME_PTR_INFO (comp))
6436             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6437         }
6438     }
6439
6440   if (gimple_code (use->stmt) == GIMPLE_PHI)
6441     {
6442       ass = gimple_build_assign (tgt, comp);
6443       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6444
6445       bsi = gsi_for_stmt (use->stmt);
6446       remove_phi_node (&bsi, false);
6447     }
6448   else
6449     {
6450       gimple_assign_set_rhs_from_tree (&bsi, comp);
6451       use->stmt = gsi_stmt (bsi);
6452     }
6453 }
6454
6455 /* Performs a peephole optimization to reorder the iv update statement with
6456    a mem ref to enable instruction combining in later phases. The mem ref uses
6457    the iv value before the update, so the reordering transformation requires
6458    adjustment of the offset. CAND is the selected IV_CAND.
6459
6460    Example:
6461
6462    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6463    iv2 = iv1 + 1;
6464
6465    if (t < val)      (1)
6466      goto L;
6467    goto Head;
6468
6469
6470    directly propagating t over to (1) will introduce overlapping live range
6471    thus increase register pressure. This peephole transform it into:
6472
6473
6474    iv2 = iv1 + 1;
6475    t = MEM_REF (base, iv2, 8, 8);
6476    if (t < val)
6477      goto L;
6478    goto Head;
6479 */
6480
6481 static void
6482 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6483 {
6484   tree var_after;
6485   gimple iv_update, stmt;
6486   basic_block bb;
6487   gimple_stmt_iterator gsi, gsi_iv;
6488
6489   if (cand->pos != IP_NORMAL)
6490     return;
6491
6492   var_after = cand->var_after;
6493   iv_update = SSA_NAME_DEF_STMT (var_after);
6494
6495   bb = gimple_bb (iv_update);
6496   gsi = gsi_last_nondebug_bb (bb);
6497   stmt = gsi_stmt (gsi);
6498
6499   /* Only handle conditional statement for now.  */
6500   if (gimple_code (stmt) != GIMPLE_COND)
6501     return;
6502
6503   gsi_prev_nondebug (&gsi);
6504   stmt = gsi_stmt (gsi);
6505   if (stmt != iv_update)
6506     return;
6507
6508   gsi_prev_nondebug (&gsi);
6509   if (gsi_end_p (gsi))
6510     return;
6511
6512   stmt = gsi_stmt (gsi);
6513   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6514     return;
6515
6516   if (stmt != use->stmt)
6517     return;
6518
6519   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6520     return;
6521
6522   if (dump_file && (dump_flags & TDF_DETAILS))
6523     {
6524       fprintf (dump_file, "Reordering \n");
6525       print_gimple_stmt (dump_file, iv_update, 0, 0);
6526       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6527       fprintf (dump_file, "\n");
6528     }
6529
6530   gsi = gsi_for_stmt (use->stmt);
6531   gsi_iv = gsi_for_stmt (iv_update);
6532   gsi_move_before (&gsi_iv, &gsi);
6533
6534   cand->pos = IP_BEFORE_USE;
6535   cand->incremented_at = use->stmt;
6536 }
6537
6538 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6539
6540 static void
6541 rewrite_use_address (struct ivopts_data *data,
6542                      struct iv_use *use, struct iv_cand *cand)
6543 {
6544   aff_tree aff;
6545   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6546   tree base_hint = NULL_TREE;
6547   tree ref, iv;
6548   bool ok;
6549
6550   adjust_iv_update_pos (cand, use);
6551   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6552   gcc_assert (ok);
6553   unshare_aff_combination (&aff);
6554
6555   /* To avoid undefined overflow problems, all IV candidates use unsigned
6556      integer types.  The drawback is that this makes it impossible for
6557      create_mem_ref to distinguish an IV that is based on a memory object
6558      from one that represents simply an offset.
6559
6560      To work around this problem, we pass a hint to create_mem_ref that
6561      indicates which variable (if any) in aff is an IV based on a memory
6562      object.  Note that we only consider the candidate.  If this is not
6563      based on an object, the base of the reference is in some subexpression
6564      of the use -- but these will use pointer types, so they are recognized
6565      by the create_mem_ref heuristics anyway.  */
6566   if (cand->iv->base_object)
6567     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6568
6569   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6570   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6571                         reference_alias_ptr_type (*use->op_p),
6572                         iv, base_hint, data->speed);
6573   copy_ref_info (ref, *use->op_p);
6574   *use->op_p = ref;
6575 }
6576
6577 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6578    candidate CAND.  */
6579
6580 static void
6581 rewrite_use_compare (struct ivopts_data *data,
6582                      struct iv_use *use, struct iv_cand *cand)
6583 {
6584   tree comp, *var_p, op, bound;
6585   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6586   enum tree_code compare;
6587   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6588   bool ok;
6589
6590   bound = cp->value;
6591   if (bound)
6592     {
6593       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6594       tree var_type = TREE_TYPE (var);
6595       gimple_seq stmts;
6596
6597       if (dump_file && (dump_flags & TDF_DETAILS))
6598         {
6599           fprintf (dump_file, "Replacing exit test: ");
6600           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6601         }
6602       compare = cp->comp;
6603       bound = unshare_expr (fold_convert (var_type, bound));
6604       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6605       if (stmts)
6606         gsi_insert_seq_on_edge_immediate (
6607                 loop_preheader_edge (data->current_loop),
6608                 stmts);
6609
6610       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6611       gimple_cond_set_lhs (cond_stmt, var);
6612       gimple_cond_set_code (cond_stmt, compare);
6613       gimple_cond_set_rhs (cond_stmt, op);
6614       return;
6615     }
6616
6617   /* The induction variable elimination failed; just express the original
6618      giv.  */
6619   comp = get_computation (data->current_loop, use, cand);
6620   gcc_assert (comp != NULL_TREE);
6621
6622   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6623   gcc_assert (ok);
6624
6625   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6626                                      true, GSI_SAME_STMT);
6627 }
6628
6629 /* Rewrites USE using candidate CAND.  */
6630
6631 static void
6632 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6633 {
6634   switch (use->type)
6635     {
6636       case USE_NONLINEAR_EXPR:
6637         rewrite_use_nonlinear_expr (data, use, cand);
6638         break;
6639
6640       case USE_ADDRESS:
6641         rewrite_use_address (data, use, cand);
6642         break;
6643
6644       case USE_COMPARE:
6645         rewrite_use_compare (data, use, cand);
6646         break;
6647
6648       default:
6649         gcc_unreachable ();
6650     }
6651
6652   update_stmt (use->stmt);
6653 }
6654
6655 /* Rewrite the uses using the selected induction variables.  */
6656
6657 static void
6658 rewrite_uses (struct ivopts_data *data)
6659 {
6660   unsigned i;
6661   struct iv_cand *cand;
6662   struct iv_use *use;
6663
6664   for (i = 0; i < n_iv_uses (data); i++)
6665     {
6666       use = iv_use (data, i);
6667       cand = use->selected;
6668       gcc_assert (cand);
6669
6670       rewrite_use (data, use, cand);
6671     }
6672 }
6673
6674 /* Removes the ivs that are not used after rewriting.  */
6675
6676 static void
6677 remove_unused_ivs (struct ivopts_data *data)
6678 {
6679   unsigned j;
6680   bitmap_iterator bi;
6681   bitmap toremove = BITMAP_ALLOC (NULL);
6682
6683   /* Figure out an order in which to release SSA DEFs so that we don't
6684      release something that we'd have to propagate into a debug stmt
6685      afterwards.  */
6686   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6687     {
6688       struct version_info *info;
6689
6690       info = ver_info (data, j);
6691       if (info->iv
6692           && !integer_zerop (info->iv->step)
6693           && !info->inv_id
6694           && !info->iv->have_use_for
6695           && !info->preserve_biv)
6696         {
6697           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6698
6699           tree def = info->iv->ssa_name;
6700
6701           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6702             {
6703               imm_use_iterator imm_iter;
6704               use_operand_p use_p;
6705               gimple stmt;
6706               int count = 0;
6707
6708               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6709                 {
6710                   if (!gimple_debug_bind_p (stmt))
6711                     continue;
6712
6713                   /* We just want to determine whether to do nothing
6714                      (count == 0), to substitute the computed
6715                      expression into a single use of the SSA DEF by
6716                      itself (count == 1), or to use a debug temp
6717                      because the SSA DEF is used multiple times or as
6718                      part of a larger expression (count > 1). */
6719                   count++;
6720                   if (gimple_debug_bind_get_value (stmt) != def)
6721                     count++;
6722
6723                   if (count > 1)
6724                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6725                 }
6726
6727               if (!count)
6728                 continue;
6729
6730               struct iv_use dummy_use;
6731               struct iv_cand *best_cand = NULL, *cand;
6732               unsigned i, best_pref = 0, cand_pref;
6733
6734               memset (&dummy_use, 0, sizeof (dummy_use));
6735               dummy_use.iv = info->iv;
6736               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6737                 {
6738                   cand = iv_use (data, i)->selected;
6739                   if (cand == best_cand)
6740                     continue;
6741                   cand_pref = operand_equal_p (cand->iv->step,
6742                                                info->iv->step, 0)
6743                     ? 4 : 0;
6744                   cand_pref
6745                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6746                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6747                     ? 2 : 0;
6748                   cand_pref
6749                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6750                     ? 1 : 0;
6751                   if (best_cand == NULL || best_pref < cand_pref)
6752                     {
6753                       best_cand = cand;
6754                       best_pref = cand_pref;
6755                     }
6756                 }
6757
6758               if (!best_cand)
6759                 continue;
6760
6761               tree comp = get_computation_at (data->current_loop,
6762                                               &dummy_use, best_cand,
6763                                               SSA_NAME_DEF_STMT (def));
6764               if (!comp)
6765                 continue;
6766
6767               if (count > 1)
6768                 {
6769                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6770                   DECL_ARTIFICIAL (vexpr) = 1;
6771                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6772                   if (SSA_NAME_VAR (def))
6773                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6774                   else
6775                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6776                   gdebug *def_temp
6777                     = gimple_build_debug_bind (vexpr, comp, NULL);
6778                   gimple_stmt_iterator gsi;
6779
6780                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6781                     gsi = gsi_after_labels (gimple_bb
6782                                             (SSA_NAME_DEF_STMT (def)));
6783                   else
6784                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6785
6786                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6787                   comp = vexpr;
6788                 }
6789
6790               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6791                 {
6792                   if (!gimple_debug_bind_p (stmt))
6793                     continue;
6794
6795                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6796                     SET_USE (use_p, comp);
6797
6798                   update_stmt (stmt);
6799                 }
6800             }
6801         }
6802     }
6803
6804   release_defs_bitset (toremove);
6805
6806   BITMAP_FREE (toremove);
6807 }
6808
6809 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6810    for hash_map::traverse.  */
6811
6812 bool
6813 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6814 {
6815   free (value);
6816   return true;
6817 }
6818
6819 /* Frees data allocated by the optimization of a single loop.  */
6820
6821 static void
6822 free_loop_data (struct ivopts_data *data)
6823 {
6824   unsigned i, j;
6825   bitmap_iterator bi;
6826   tree obj;
6827
6828   if (data->niters)
6829     {
6830       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6831       delete data->niters;
6832       data->niters = NULL;
6833     }
6834
6835   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6836     {
6837       struct version_info *info;
6838
6839       info = ver_info (data, i);
6840       free (info->iv);
6841       info->iv = NULL;
6842       info->has_nonlin_use = false;
6843       info->preserve_biv = false;
6844       info->inv_id = 0;
6845     }
6846   bitmap_clear (data->relevant);
6847   bitmap_clear (data->important_candidates);
6848
6849   for (i = 0; i < n_iv_uses (data); i++)
6850     {
6851       struct iv_use *use = iv_use (data, i);
6852
6853       free (use->iv);
6854       BITMAP_FREE (use->related_cands);
6855       for (j = 0; j < use->n_map_members; j++)
6856         if (use->cost_map[j].depends_on)
6857           BITMAP_FREE (use->cost_map[j].depends_on);
6858       free (use->cost_map);
6859       free (use);
6860     }
6861   data->iv_uses.truncate (0);
6862
6863   for (i = 0; i < n_iv_cands (data); i++)
6864     {
6865       struct iv_cand *cand = iv_cand (data, i);
6866
6867       free (cand->iv);
6868       if (cand->depends_on)
6869         BITMAP_FREE (cand->depends_on);
6870       free (cand);
6871     }
6872   data->iv_candidates.truncate (0);
6873
6874   if (data->version_info_size < num_ssa_names)
6875     {
6876       data->version_info_size = 2 * num_ssa_names;
6877       free (data->version_info);
6878       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6879     }
6880
6881   data->max_inv_id = 0;
6882
6883   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6884     SET_DECL_RTL (obj, NULL_RTX);
6885
6886   decl_rtl_to_reset.truncate (0);
6887
6888   data->inv_expr_tab->empty ();
6889   data->inv_expr_id = 0;
6890 }
6891
6892 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6893    loop tree.  */
6894
6895 static void
6896 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6897 {
6898   free_loop_data (data);
6899   free (data->version_info);
6900   BITMAP_FREE (data->relevant);
6901   BITMAP_FREE (data->important_candidates);
6902
6903   decl_rtl_to_reset.release ();
6904   data->iv_uses.release ();
6905   data->iv_candidates.release ();
6906   delete data->inv_expr_tab;
6907   data->inv_expr_tab = NULL;
6908   free_affine_expand_cache (&data->name_expansion_cache);
6909 }
6910
6911 /* Returns true if the loop body BODY includes any function calls.  */
6912
6913 static bool
6914 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6915 {
6916   gimple_stmt_iterator gsi;
6917   unsigned i;
6918
6919   for (i = 0; i < num_nodes; i++)
6920     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6921       {
6922         gimple stmt = gsi_stmt (gsi);
6923         if (is_gimple_call (stmt)
6924             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6925           return true;
6926       }
6927   return false;
6928 }
6929
6930 /* Optimizes the LOOP.  Returns true if anything changed.  */
6931
6932 static bool
6933 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6934 {
6935   bool changed = false;
6936   struct iv_ca *iv_ca;
6937   edge exit = single_dom_exit (loop);
6938   basic_block *body;
6939
6940   gcc_assert (!data->niters);
6941   data->current_loop = loop;
6942   data->speed = optimize_loop_for_speed_p (loop);
6943
6944   if (dump_file && (dump_flags & TDF_DETAILS))
6945     {
6946       fprintf (dump_file, "Processing loop %d\n", loop->num);
6947
6948       if (exit)
6949         {
6950           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6951                    exit->src->index, exit->dest->index);
6952           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6953           fprintf (dump_file, "\n");
6954         }
6955
6956       fprintf (dump_file, "\n");
6957     }
6958
6959   body = get_loop_body (loop);
6960   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6961   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6962   free (body);
6963
6964   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6965
6966   /* For each ssa name determines whether it behaves as an induction variable
6967      in some loop.  */
6968   if (!find_induction_variables (data))
6969     goto finish;
6970
6971   /* Finds interesting uses (item 1).  */
6972   find_interesting_uses (data);
6973   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6974     goto finish;
6975
6976   /* Finds candidates for the induction variables (item 2).  */
6977   find_iv_candidates (data);
6978
6979   /* Calculates the costs (item 3, part 1).  */
6980   determine_iv_costs (data);
6981   determine_use_iv_costs (data);
6982   determine_set_costs (data);
6983
6984   /* Find the optimal set of induction variables (item 3, part 2).  */
6985   iv_ca = find_optimal_iv_set (data);
6986   if (!iv_ca)
6987     goto finish;
6988   changed = true;
6989
6990   /* Create the new induction variables (item 4, part 1).  */
6991   create_new_ivs (data, iv_ca);
6992   iv_ca_free (&iv_ca);
6993
6994   /* Rewrite the uses (item 4, part 2).  */
6995   rewrite_uses (data);
6996
6997   /* Remove the ivs that are unused after rewriting.  */
6998   remove_unused_ivs (data);
6999
7000   /* We have changed the structure of induction variables; it might happen
7001      that definitions in the scev database refer to some of them that were
7002      eliminated.  */
7003   scev_reset ();
7004
7005 finish:
7006   free_loop_data (data);
7007
7008   return changed;
7009 }
7010
7011 /* Main entry point.  Optimizes induction variables in loops.  */
7012
7013 void
7014 tree_ssa_iv_optimize (void)
7015 {
7016   struct loop *loop;
7017   struct ivopts_data data;
7018
7019   tree_ssa_iv_optimize_init (&data);
7020
7021   /* Optimize the loops starting with the innermost ones.  */
7022   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7023     {
7024       if (dump_file && (dump_flags & TDF_DETAILS))
7025         flow_loop_dump (loop, dump_file, NULL, 1);
7026
7027       tree_ssa_iv_optimize_loop (&data, loop);
7028     }
7029
7030   tree_ssa_iv_optimize_finalize (&data);
7031 }