gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "backend.h"
  68 #include "cfghooks.h"
  69 #include "tree.h"
  70 #include "gimple.h"
  71 #include "rtl.h"
  72 #include "ssa.h"
  73 #include "alias.h"
  74 #include "fold-const.h"
  75 #include "stor-layout.h"
  76 #include "tm_p.h"
  77 #include "gimple-pretty-print.h"
  78 #include "internal-fn.h"
  79 #include "tree-eh.h"
  80 #include "gimplify.h"
  81 #include "gimple-iterator.h"
  82 #include "gimplify-me.h"
  83 #include "cgraph.h"
  84 #include "tree-cfg.h"
  85 #include "tree-ssa-loop-ivopts.h"
  86 #include "tree-ssa-loop-manip.h"
  87 #include "tree-ssa-loop-niter.h"
  88 #include "tree-ssa-loop.h"
  89 #include "flags.h"
  90 #include "insn-config.h"
  91 #include "expmed.h"
  92 #include "dojump.h"
  93 #include "explow.h"
  94 #include "calls.h"
  95 #include "emit-rtl.h"
  96 #include "varasm.h"
  97 #include "stmt.h"
  98 #include "expr.h"
  99 #include "tree-dfa.h"
 100 #include "tree-ssa.h"
 101 #include "cfgloop.h"
 102 #include "tree-pass.h"
 103 #include "tree-chrec.h"
 104 #include "tree-scalar-evolution.h"
 105 #include "params.h"
 106 #include "langhooks.h"
 107 #include "tree-affine.h"
 108 #include "target.h"
 109 #include "tree-inline.h"
 110 #include "tree-ssa-propagate.h"
 111 #include "tree-ssa-address.h"
 112 #include "builtins.h"
 113 #include "tree-vectorizer.h"
 114
 115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 116    cost of different addressing modes.  This should be moved to a TBD
 117    interface between the GIMPLE and RTL worlds.  */
 118 #include "recog.h"
 119
 120 /* The infinite cost.  */
 121 #define INFTY 10000000
 122
 123 #define AVG_LOOP_NITER(LOOP) 5
 124
 125 /* Returns the expected number of loop iterations for LOOP.
 126    The average trip count is computed from profile data if it
 127    exists. */
 128
 129 static inline HOST_WIDE_INT
 130 avg_loop_niter (struct loop *loop)
 131 {
 132   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 133   if (niter == -1)
 134     return AVG_LOOP_NITER (loop);
 135
 136   return niter;
 137 }
 138
 139 /* Representation of the induction variable.  */
 140 struct iv
 141 {
 142   tree base;            /* Initial value of the iv.  */
 143   tree base_object;     /* A memory object to that the induction variable points.  */
 144   tree step;            /* Step of the iv (constant only).  */
 145   tree ssa_name;        /* The ssa name with the value.  */
 146   unsigned use_id;      /* The identifier in the use if it is the case.  */
 147   bool biv_p;           /* Is it a biv?  */
 148   bool have_use_for;    /* Do we already have a use for it?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150 };
 151
 152 /* Per-ssa version information (induction variable descriptions, etc.).  */
 153 struct version_info
 154 {
 155   tree name;            /* The ssa name.  */
 156   struct iv *iv;        /* Induction variable description.  */
 157   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 158                            an expression that is not an induction variable.  */
 159   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 160   unsigned inv_id;      /* Id of an invariant.  */
 161 };
 162
 163 /* Types of uses.  */
 164 enum use_type
 165 {
 166   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 167   USE_ADDRESS,          /* Use in an address.  */
 168   USE_COMPARE           /* Use is a compare.  */
 169 };
 170
 171 /* Cost of a computation.  */
 172 typedef struct
 173 {
 174   int cost;             /* The runtime cost.  */
 175   unsigned complexity;  /* The estimate of the complexity of the code for
 176                            the computation (in no concrete units --
 177                            complexity field should be larger for more
 178                            complex expressions and addressing modes).  */
 179 } comp_cost;
 180
 181 static const comp_cost no_cost = {0, 0};
 182 static const comp_cost infinite_cost = {INFTY, INFTY};
 183
 184 /* The candidate - cost pair.  */
 185 struct cost_pair
 186 {
 187   struct iv_cand *cand; /* The candidate.  */
 188   comp_cost cost;       /* The cost.  */
 189   bitmap depends_on;    /* The list of invariants that have to be
 190                            preserved.  */
 191   tree value;           /* For final value elimination, the expression for
 192                            the final value of the iv.  For iv elimination,
 193                            the new bound to compare with.  */
 194   enum tree_code comp;  /* For iv elimination, the comparison.  */
 195   int inv_expr_id;      /* Loop invariant expression id.  */
 196 };
 197
 198 /* Use.  */
 199 struct iv_use
 200 {
 201   unsigned id;          /* The id of the use.  */
 202   unsigned sub_id;      /* The id of the sub use.  */
 203   enum use_type type;   /* Type of the use.  */
 204   struct iv *iv;        /* The induction variable it is based on.  */
 205   gimple stmt;          /* Statement in that it occurs.  */
 206   tree *op_p;           /* The place where it occurs.  */
 207   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 208                            important ones.  */
 209
 210   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 211   struct cost_pair *cost_map;
 212                         /* The costs wrto the iv candidates.  */
 213
 214   struct iv_cand *selected;
 215                         /* The selected candidate.  */
 216
 217   struct iv_use *next;  /* The next sub use.  */
 218   tree addr_base;       /* Base address with const offset stripped.  */
 219   unsigned HOST_WIDE_INT addr_offset;
 220                         /* Const offset stripped from base address.  */
 221 };
 222
 223 /* The position where the iv is computed.  */
 224 enum iv_position
 225 {
 226   IP_NORMAL,            /* At the end, just before the exit condition.  */
 227   IP_END,               /* At the end of the latch block.  */
 228   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 229   IP_AFTER_USE,         /* Immediately after a specific use.  */
 230   IP_ORIGINAL           /* The original biv.  */
 231 };
 232
 233 /* The induction variable candidate.  */
 234 struct iv_cand
 235 {
 236   unsigned id;          /* The number of the candidate.  */
 237   bool important;       /* Whether this is an "important" candidate, i.e. such
 238                            that it should be considered by all uses.  */
 239   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 240   gimple incremented_at;/* For original biv, the statement where it is
 241                            incremented.  */
 242   tree var_before;      /* The variable used for it before increment.  */
 243   tree var_after;       /* The variable used for it after increment.  */
 244   struct iv *iv;        /* The value of the candidate.  NULL for
 245                            "pseudocandidate" used to indicate the possibility
 246                            to replace the final value of an iv by direct
 247                            computation of the value.  */
 248   unsigned cost;        /* Cost of the candidate.  */
 249   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 250   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 251                               where it is incremented.  */
 252   bitmap depends_on;    /* The list of invariants that are used in step of the
 253                            biv.  */
 254 };
 255
 256 /* Loop invariant expression hashtable entry.  */
 257 struct iv_inv_expr_ent
 258 {
 259   tree expr;
 260   int id;
 261   hashval_t hash;
 262 };
 263
 264 /* The data used by the induction variable optimizations.  */
 265
 266 typedef struct iv_use *iv_use_p;
 267
 268 typedef struct iv_cand *iv_cand_p;
 269
 270 /* Hashtable helpers.  */
 271
 272 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 273 {
 274   static inline hashval_t hash (const iv_inv_expr_ent *);
 275   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 276 };
 277
 278 /* Hash function for loop invariant expressions.  */
 279
 280 inline hashval_t
 281 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 282 {
 283   return expr->hash;
 284 }
 285
 286 /* Hash table equality function for expressions.  */
 287
 288 inline bool
 289 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 290                            const iv_inv_expr_ent *expr2)
 291 {
 292   return expr1->hash == expr2->hash
 293          && operand_equal_p (expr1->expr, expr2->expr, 0);
 294 }
 295
 296 struct ivopts_data
 297 {
 298   /* The currently optimized loop.  */
 299   struct loop *current_loop;
 300   source_location loop_loc;
 301
 302   /* Numbers of iterations for all exits of the current loop.  */
 303   hash_map<edge, tree_niter_desc *> *niters;
 304
 305   /* Number of registers used in it.  */
 306   unsigned regs_used;
 307
 308   /* The size of version_info array allocated.  */
 309   unsigned version_info_size;
 310
 311   /* The array of information for the ssa names.  */
 312   struct version_info *version_info;
 313
 314   /* The hashtable of loop invariant expressions created
 315      by ivopt.  */
 316   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 317
 318   /* Loop invariant expression id.  */
 319   int inv_expr_id;
 320
 321   /* The bitmap of indices in version_info whose value was changed.  */
 322   bitmap relevant;
 323
 324   /* The uses of induction variables.  */
 325   vec<iv_use_p> iv_uses;
 326
 327   /* The candidates.  */
 328   vec<iv_cand_p> iv_candidates;
 329
 330   /* A bitmap of important candidates.  */
 331   bitmap important_candidates;
 332
 333   /* Cache used by tree_to_aff_combination_expand.  */
 334   hash_map<tree, name_expansion *> *name_expansion_cache;
 335
 336   /* The maximum invariant id.  */
 337   unsigned max_inv_id;
 338
 339   /* Obstack for iv structure.  */
 340   struct obstack iv_obstack;
 341
 342   /* Whether to consider just related and important candidates when replacing a
 343      use.  */
 344   bool consider_all_candidates;
 345
 346   /* Are we optimizing for speed?  */
 347   bool speed;
 348
 349   /* Whether the loop body includes any function calls.  */
 350   bool body_includes_call;
 351
 352   /* Whether the loop body can only be exited via single exit.  */
 353   bool loop_single_exit_p;
 354 };
 355
 356 /* An assignment of iv candidates to uses.  */
 357
 358 struct iv_ca
 359 {
 360   /* The number of uses covered by the assignment.  */
 361   unsigned upto;
 362
 363   /* Number of uses that cannot be expressed by the candidates in the set.  */
 364   unsigned bad_uses;
 365
 366   /* Candidate assigned to a use, together with the related costs.  */
 367   struct cost_pair **cand_for_use;
 368
 369   /* Number of times each candidate is used.  */
 370   unsigned *n_cand_uses;
 371
 372   /* The candidates used.  */
 373   bitmap cands;
 374
 375   /* The number of candidates in the set.  */
 376   unsigned n_cands;
 377
 378   /* Total number of registers needed.  */
 379   unsigned n_regs;
 380
 381   /* Total cost of expressing uses.  */
 382   comp_cost cand_use_cost;
 383
 384   /* Total cost of candidates.  */
 385   unsigned cand_cost;
 386
 387   /* Number of times each invariant is used.  */
 388   unsigned *n_invariant_uses;
 389
 390   /* The array holding the number of uses of each loop
 391      invariant expressions created by ivopt.  */
 392   unsigned *used_inv_expr;
 393
 394   /* The number of created loop invariants.  */
 395   unsigned num_used_inv_expr;
 396
 397   /* Total cost of the assignment.  */
 398   comp_cost cost;
 399 };
 400
 401 /* Difference of two iv candidate assignments.  */
 402
 403 struct iv_ca_delta
 404 {
 405   /* Changed use.  */
 406   struct iv_use *use;
 407
 408   /* An old assignment (for rollback purposes).  */
 409   struct cost_pair *old_cp;
 410
 411   /* A new assignment.  */
 412   struct cost_pair *new_cp;
 413
 414   /* Next change in the list.  */
 415   struct iv_ca_delta *next_change;
 416 };
 417
 418 /* Bound on number of candidates below that all candidates are considered.  */
 419
 420 #define CONSIDER_ALL_CANDIDATES_BOUND \
 421   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 422
 423 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 424    optimizing such a loop would help, and it would take ages).  */
 425
 426 #define MAX_CONSIDERED_USES \
 427   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 428
 429 /* If there are at most this number of ivs in the set, try removing unnecessary
 430    ivs from the set always.  */
 431
 432 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 433   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 434
 435 /* The list of trees for that the decl_rtl field must be reset is stored
 436    here.  */
 437
 438 static vec<tree> decl_rtl_to_reset;
 439
 440 static comp_cost force_expr_to_var_cost (tree, bool);
 441
 442 /* Number of uses recorded in DATA.  */
 443
 444 static inline unsigned
 445 n_iv_uses (struct ivopts_data *data)
 446 {
 447   return data->iv_uses.length ();
 448 }
 449
 450 /* Ith use recorded in DATA.  */
 451
 452 static inline struct iv_use *
 453 iv_use (struct ivopts_data *data, unsigned i)
 454 {
 455   return data->iv_uses[i];
 456 }
 457
 458 /* Number of candidates recorded in DATA.  */
 459
 460 static inline unsigned
 461 n_iv_cands (struct ivopts_data *data)
 462 {
 463   return data->iv_candidates.length ();
 464 }
 465
 466 /* Ith candidate recorded in DATA.  */
 467
 468 static inline struct iv_cand *
 469 iv_cand (struct ivopts_data *data, unsigned i)
 470 {
 471   return data->iv_candidates[i];
 472 }
 473
 474 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 475
 476 edge
 477 single_dom_exit (struct loop *loop)
 478 {
 479   edge exit = single_exit (loop);
 480
 481   if (!exit)
 482     return NULL;
 483
 484   if (!just_once_each_iteration_p (loop, exit->src))
 485     return NULL;
 486
 487   return exit;
 488 }
 489
 490 /* Dumps information about the induction variable IV to FILE.  */
 491
 492 void
 493 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 494 {
 495   if (iv->ssa_name && dump_name)
 496     {
 497       fprintf (file, "ssa name ");
 498       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 499       fprintf (file, "\n");
 500     }
 501
 502   fprintf (file, "  type ");
 503   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 504   fprintf (file, "\n");
 505
 506   if (iv->step)
 507     {
 508       fprintf (file, "  base ");
 509       print_generic_expr (file, iv->base, TDF_SLIM);
 510       fprintf (file, "\n");
 511
 512       fprintf (file, "  step ");
 513       print_generic_expr (file, iv->step, TDF_SLIM);
 514       fprintf (file, "\n");
 515     }
 516   else
 517     {
 518       fprintf (file, "  invariant ");
 519       print_generic_expr (file, iv->base, TDF_SLIM);
 520       fprintf (file, "\n");
 521     }
 522
 523   if (iv->base_object)
 524     {
 525       fprintf (file, "  base object ");
 526       print_generic_expr (file, iv->base_object, TDF_SLIM);
 527       fprintf (file, "\n");
 528     }
 529
 530   if (iv->biv_p)
 531     fprintf (file, "  is a biv\n");
 532 }
 533
 534 /* Dumps information about the USE to FILE.  */
 535
 536 void
 537 dump_use (FILE *file, struct iv_use *use)
 538 {
 539   fprintf (file, "use %d", use->id);
 540   if (use->sub_id)
 541     fprintf (file, ".%d", use->sub_id);
 542
 543   fprintf (file, "\n");
 544
 545   switch (use->type)
 546     {
 547     case USE_NONLINEAR_EXPR:
 548       fprintf (file, "  generic\n");
 549       break;
 550
 551     case USE_ADDRESS:
 552       fprintf (file, "  address\n");
 553       break;
 554
 555     case USE_COMPARE:
 556       fprintf (file, "  compare\n");
 557       break;
 558
 559     default:
 560       gcc_unreachable ();
 561     }
 562
 563   fprintf (file, "  in statement ");
 564   print_gimple_stmt (file, use->stmt, 0, 0);
 565   fprintf (file, "\n");
 566
 567   fprintf (file, "  at position ");
 568   if (use->op_p)
 569     print_generic_expr (file, *use->op_p, TDF_SLIM);
 570   fprintf (file, "\n");
 571
 572   dump_iv (file, use->iv, false);
 573
 574   if (use->related_cands)
 575     {
 576       fprintf (file, "  related candidates ");
 577       dump_bitmap (file, use->related_cands);
 578     }
 579 }
 580
 581 /* Dumps information about the uses to FILE.  */
 582
 583 void
 584 dump_uses (FILE *file, struct ivopts_data *data)
 585 {
 586   unsigned i;
 587   struct iv_use *use;
 588
 589   for (i = 0; i < n_iv_uses (data); i++)
 590     {
 591       use = iv_use (data, i);
 592       do
 593         {
 594           dump_use (file, use);
 595           use = use->next;
 596         }
 597       while (use);
 598       fprintf (file, "\n");
 599     }
 600 }
 601
 602 /* Dumps information about induction variable candidate CAND to FILE.  */
 603
 604 void
 605 dump_cand (FILE *file, struct iv_cand *cand)
 606 {
 607   struct iv *iv = cand->iv;
 608
 609   fprintf (file, "candidate %d%s\n",
 610            cand->id, cand->important ? " (important)" : "");
 611
 612   if (cand->depends_on)
 613     {
 614       fprintf (file, "  depends on ");
 615       dump_bitmap (file, cand->depends_on);
 616     }
 617
 618   if (!iv)
 619     {
 620       fprintf (file, "  final value replacement\n");
 621       return;
 622     }
 623
 624   if (cand->var_before)
 625     {
 626       fprintf (file, "  var_before ");
 627       print_generic_expr (file, cand->var_before, TDF_SLIM);
 628       fprintf (file, "\n");
 629     }
 630   if (cand->var_after)
 631     {
 632       fprintf (file, "  var_after ");
 633       print_generic_expr (file, cand->var_after, TDF_SLIM);
 634       fprintf (file, "\n");
 635     }
 636
 637   switch (cand->pos)
 638     {
 639     case IP_NORMAL:
 640       fprintf (file, "  incremented before exit test\n");
 641       break;
 642
 643     case IP_BEFORE_USE:
 644       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 645       break;
 646
 647     case IP_AFTER_USE:
 648       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 649       break;
 650
 651     case IP_END:
 652       fprintf (file, "  incremented at end\n");
 653       break;
 654
 655     case IP_ORIGINAL:
 656       fprintf (file, "  original biv\n");
 657       break;
 658     }
 659
 660   dump_iv (file, iv, false);
 661 }
 662
 663 /* Returns the info for ssa version VER.  */
 664
 665 static inline struct version_info *
 666 ver_info (struct ivopts_data *data, unsigned ver)
 667 {
 668   return data->version_info + ver;
 669 }
 670
 671 /* Returns the info for ssa name NAME.  */
 672
 673 static inline struct version_info *
 674 name_info (struct ivopts_data *data, tree name)
 675 {
 676   return ver_info (data, SSA_NAME_VERSION (name));
 677 }
 678
 679 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 680    emitted in LOOP.  */
 681
 682 static bool
 683 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 684 {
 685   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 686
 687   gcc_assert (bb);
 688
 689   if (sbb == loop->latch)
 690     return true;
 691
 692   if (sbb != bb)
 693     return false;
 694
 695   return stmt == last_stmt (bb);
 696 }
 697
 698 /* Returns true if STMT if after the place where the original induction
 699    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 700    if the positions are identical.  */
 701
 702 static bool
 703 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 704 {
 705   basic_block cand_bb = gimple_bb (cand->incremented_at);
 706   basic_block stmt_bb = gimple_bb (stmt);
 707
 708   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 709     return false;
 710
 711   if (stmt_bb != cand_bb)
 712     return true;
 713
 714   if (true_if_equal
 715       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 716     return true;
 717   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 718 }
 719
 720 /* Returns true if STMT if after the place where the induction variable
 721    CAND is incremented in LOOP.  */
 722
 723 static bool
 724 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 725 {
 726   switch (cand->pos)
 727     {
 728     case IP_END:
 729       return false;
 730
 731     case IP_NORMAL:
 732       return stmt_after_ip_normal_pos (loop, stmt);
 733
 734     case IP_ORIGINAL:
 735     case IP_AFTER_USE:
 736       return stmt_after_inc_pos (cand, stmt, false);
 737
 738     case IP_BEFORE_USE:
 739       return stmt_after_inc_pos (cand, stmt, true);
 740
 741     default:
 742       gcc_unreachable ();
 743     }
 744 }
 745
 746 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 747
 748 static bool
 749 abnormal_ssa_name_p (tree exp)
 750 {
 751   if (!exp)
 752     return false;
 753
 754   if (TREE_CODE (exp) != SSA_NAME)
 755     return false;
 756
 757   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 758 }
 759
 760 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 761    abnormal phi node.  Callback for for_each_index.  */
 762
 763 static bool
 764 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 765                                   void *data ATTRIBUTE_UNUSED)
 766 {
 767   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 768     {
 769       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 770         return false;
 771       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 772         return false;
 773     }
 774
 775   return !abnormal_ssa_name_p (*index);
 776 }
 777
 778 /* Returns true if EXPR contains a ssa name that occurs in an
 779    abnormal phi node.  */
 780
 781 bool
 782 contains_abnormal_ssa_name_p (tree expr)
 783 {
 784   enum tree_code code;
 785   enum tree_code_class codeclass;
 786
 787   if (!expr)
 788     return false;
 789
 790   code = TREE_CODE (expr);
 791   codeclass = TREE_CODE_CLASS (code);
 792
 793   if (code == SSA_NAME)
 794     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 795
 796   if (code == INTEGER_CST
 797       || is_gimple_min_invariant (expr))
 798     return false;
 799
 800   if (code == ADDR_EXPR)
 801     return !for_each_index (&TREE_OPERAND (expr, 0),
 802                             idx_contains_abnormal_ssa_name_p,
 803                             NULL);
 804
 805   if (code == COND_EXPR)
 806     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 807       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 808       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 809
 810   switch (codeclass)
 811     {
 812     case tcc_binary:
 813     case tcc_comparison:
 814       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 815         return true;
 816
 817       /* Fallthru.  */
 818     case tcc_unary:
 819       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 820         return true;
 821
 822       break;
 823
 824     default:
 825       gcc_unreachable ();
 826     }
 827
 828   return false;
 829 }
 830
 831 /*  Returns the structure describing number of iterations determined from
 832     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 833
 834 static struct tree_niter_desc *
 835 niter_for_exit (struct ivopts_data *data, edge exit)
 836 {
 837   struct tree_niter_desc *desc;
 838   tree_niter_desc **slot;
 839
 840   if (!data->niters)
 841     {
 842       data->niters = new hash_map<edge, tree_niter_desc *>;
 843       slot = NULL;
 844     }
 845   else
 846     slot = data->niters->get (exit);
 847
 848   if (!slot)
 849     {
 850       /* Try to determine number of iterations.  We cannot safely work with ssa
 851          names that appear in phi nodes on abnormal edges, so that we do not
 852          create overlapping life ranges for them (PR 27283).  */
 853       desc = XNEW (struct tree_niter_desc);
 854       if (!number_of_iterations_exit (data->current_loop,
 855                                       exit, desc, true)
 856           || contains_abnormal_ssa_name_p (desc->niter))
 857         {
 858           XDELETE (desc);
 859           desc = NULL;
 860         }
 861       data->niters->put (exit, desc);
 862     }
 863   else
 864     desc = *slot;
 865
 866   return desc;
 867 }
 868
 869 /* Returns the structure describing number of iterations determined from
 870    single dominating exit of DATA->current_loop, or NULL if something
 871    goes wrong.  */
 872
 873 static struct tree_niter_desc *
 874 niter_for_single_dom_exit (struct ivopts_data *data)
 875 {
 876   edge exit = single_dom_exit (data->current_loop);
 877
 878   if (!exit)
 879     return NULL;
 880
 881   return niter_for_exit (data, exit);
 882 }
 883
 884 /* Initializes data structures used by the iv optimization pass, stored
 885    in DATA.  */
 886
 887 static void
 888 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 889 {
 890   data->version_info_size = 2 * num_ssa_names;
 891   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 892   data->relevant = BITMAP_ALLOC (NULL);
 893   data->important_candidates = BITMAP_ALLOC (NULL);
 894   data->max_inv_id = 0;
 895   data->niters = NULL;
 896   data->iv_uses.create (20);
 897   data->iv_candidates.create (20);
 898   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 899   data->inv_expr_id = 0;
 900   data->name_expansion_cache = NULL;
 901   decl_rtl_to_reset.create (20);
 902   gcc_obstack_init (&data->iv_obstack);
 903 }
 904
 905 /* Returns a memory object to that EXPR points.  In case we are able to
 906    determine that it does not point to any such object, NULL is returned.  */
 907
 908 static tree
 909 determine_base_object (tree expr)
 910 {
 911   enum tree_code code = TREE_CODE (expr);
 912   tree base, obj;
 913
 914   /* If this is a pointer casted to any type, we need to determine
 915      the base object for the pointer; so handle conversions before
 916      throwing away non-pointer expressions.  */
 917   if (CONVERT_EXPR_P (expr))
 918     return determine_base_object (TREE_OPERAND (expr, 0));
 919
 920   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 921     return NULL_TREE;
 922
 923   switch (code)
 924     {
 925     case INTEGER_CST:
 926       return NULL_TREE;
 927
 928     case ADDR_EXPR:
 929       obj = TREE_OPERAND (expr, 0);
 930       base = get_base_address (obj);
 931
 932       if (!base)
 933         return expr;
 934
 935       if (TREE_CODE (base) == MEM_REF)
 936         return determine_base_object (TREE_OPERAND (base, 0));
 937
 938       return fold_convert (ptr_type_node,
 939                            build_fold_addr_expr (base));
 940
 941     case POINTER_PLUS_EXPR:
 942       return determine_base_object (TREE_OPERAND (expr, 0));
 943
 944     case PLUS_EXPR:
 945     case MINUS_EXPR:
 946       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 947       gcc_unreachable ();
 948
 949     default:
 950       return fold_convert (ptr_type_node, expr);
 951     }
 952 }
 953
 954 /* Return true if address expression with non-DECL_P operand appears
 955    in EXPR.  */
 956
 957 static bool
 958 contain_complex_addr_expr (tree expr)
 959 {
 960   bool res = false;
 961
 962   STRIP_NOPS (expr);
 963   switch (TREE_CODE (expr))
 964     {
 965     case POINTER_PLUS_EXPR:
 966     case PLUS_EXPR:
 967     case MINUS_EXPR:
 968       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 969       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 970       break;
 971
 972     case ADDR_EXPR:
 973       return (!DECL_P (TREE_OPERAND (expr, 0)));
 974
 975     default:
 976       return false;
 977     }
 978
 979   return res;
 980 }
 981
 982 /* Allocates an induction variable with given initial value BASE and step STEP
 983    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 984
 985 static struct iv *
 986 alloc_iv (struct ivopts_data *data, tree base, tree step,
 987           bool no_overflow = false)
 988 {
 989   tree expr = base;
 990   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
 991                                               sizeof (struct iv));
 992   gcc_assert (step != NULL_TREE);
 993
 994   /* Lower address expression in base except ones with DECL_P as operand.
 995      By doing this:
 996        1) More accurate cost can be computed for address expressions;
 997        2) Duplicate candidates won't be created for bases in different
 998           forms, like &a[0] and &a.  */
 999   STRIP_NOPS (expr);
1000   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1001       || contain_complex_addr_expr (expr))
1002     {
1003       aff_tree comb;
1004       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1005       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1006     }
1007
1008   iv->base = base;
1009   iv->base_object = determine_base_object (base);
1010   iv->step = step;
1011   iv->biv_p = false;
1012   iv->have_use_for = false;
1013   iv->use_id = 0;
1014   iv->ssa_name = NULL_TREE;
1015   iv->no_overflow = no_overflow;
1016
1017   return iv;
1018 }
1019
1020 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1021    doesn't overflow.  */
1022
1023 static void
1024 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1025         bool no_overflow)
1026 {
1027   struct version_info *info = name_info (data, iv);
1028
1029   gcc_assert (!info->iv);
1030
1031   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1032   info->iv = alloc_iv (data, base, step, no_overflow);
1033   info->iv->ssa_name = iv;
1034 }
1035
1036 /* Finds induction variable declaration for VAR.  */
1037
1038 static struct iv *
1039 get_iv (struct ivopts_data *data, tree var)
1040 {
1041   basic_block bb;
1042   tree type = TREE_TYPE (var);
1043
1044   if (!POINTER_TYPE_P (type)
1045       && !INTEGRAL_TYPE_P (type))
1046     return NULL;
1047
1048   if (!name_info (data, var)->iv)
1049     {
1050       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1051
1052       if (!bb
1053           || !flow_bb_inside_loop_p (data->current_loop, bb))
1054         set_iv (data, var, var, build_int_cst (type, 0), true);
1055     }
1056
1057   return name_info (data, var)->iv;
1058 }
1059
1060 /* Return the first non-invariant ssa var found in EXPR.  */
1061
1062 static tree
1063 extract_single_var_from_expr (tree expr)
1064 {
1065   int i, n;
1066   tree tmp;
1067   enum tree_code code;
1068
1069   if (!expr || is_gimple_min_invariant (expr))
1070     return NULL;
1071
1072   code = TREE_CODE (expr);
1073   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1074     {
1075       n = TREE_OPERAND_LENGTH (expr);
1076       for (i = 0; i < n; i++)
1077         {
1078           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1079
1080           if (tmp)
1081             return tmp;
1082         }
1083     }
1084   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1085 }
1086
1087 /* Finds basic ivs.  */
1088
1089 static bool
1090 find_bivs (struct ivopts_data *data)
1091 {
1092   gphi *phi;
1093   affine_iv iv;
1094   tree step, type, base, stop;
1095   bool found = false;
1096   struct loop *loop = data->current_loop;
1097   gphi_iterator psi;
1098
1099   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1100     {
1101       phi = psi.phi ();
1102
1103       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1104         continue;
1105
1106       if (virtual_operand_p (PHI_RESULT (phi)))
1107         continue;
1108
1109       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1110         continue;
1111
1112       if (integer_zerop (iv.step))
1113         continue;
1114
1115       step = iv.step;
1116       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1117       /* Stop expanding iv base at the first ssa var referred by iv step.
1118          Ideally we should stop at any ssa var, because that's expensive
1119          and unusual to happen, we just do it on the first one.
1120
1121          See PR64705 for the rationale.  */
1122       stop = extract_single_var_from_expr (step);
1123       base = expand_simple_operations (base, stop);
1124       if (contains_abnormal_ssa_name_p (base)
1125           || contains_abnormal_ssa_name_p (step))
1126         continue;
1127
1128       type = TREE_TYPE (PHI_RESULT (phi));
1129       base = fold_convert (type, base);
1130       if (step)
1131         {
1132           if (POINTER_TYPE_P (type))
1133             step = convert_to_ptrofftype (step);
1134           else
1135             step = fold_convert (type, step);
1136         }
1137
1138       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1139       found = true;
1140     }
1141
1142   return found;
1143 }
1144
1145 /* Marks basic ivs.  */
1146
1147 static void
1148 mark_bivs (struct ivopts_data *data)
1149 {
1150   gphi *phi;
1151   gimple def;
1152   tree var;
1153   struct iv *iv, *incr_iv;
1154   struct loop *loop = data->current_loop;
1155   basic_block incr_bb;
1156   gphi_iterator psi;
1157
1158   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1159     {
1160       phi = psi.phi ();
1161
1162       iv = get_iv (data, PHI_RESULT (phi));
1163       if (!iv)
1164         continue;
1165
1166       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1167       def = SSA_NAME_DEF_STMT (var);
1168       /* Don't mark iv peeled from other one as biv.  */
1169       if (def
1170           && gimple_code (def) == GIMPLE_PHI
1171           && gimple_bb (def) == loop->header)
1172         continue;
1173
1174       incr_iv = get_iv (data, var);
1175       if (!incr_iv)
1176         continue;
1177
1178       /* If the increment is in the subloop, ignore it.  */
1179       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1180       if (incr_bb->loop_father != data->current_loop
1181           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1182         continue;
1183
1184       iv->biv_p = true;
1185       incr_iv->biv_p = true;
1186     }
1187 }
1188
1189 /* Checks whether STMT defines a linear induction variable and stores its
1190    parameters to IV.  */
1191
1192 static bool
1193 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1194 {
1195   tree lhs, stop;
1196   struct loop *loop = data->current_loop;
1197
1198   iv->base = NULL_TREE;
1199   iv->step = NULL_TREE;
1200
1201   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1202     return false;
1203
1204   lhs = gimple_assign_lhs (stmt);
1205   if (TREE_CODE (lhs) != SSA_NAME)
1206     return false;
1207
1208   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1209     return false;
1210
1211   /* Stop expanding iv base at the first ssa var referred by iv step.
1212      Ideally we should stop at any ssa var, because that's expensive
1213      and unusual to happen, we just do it on the first one.
1214
1215      See PR64705 for the rationale.  */
1216   stop = extract_single_var_from_expr (iv->step);
1217   iv->base = expand_simple_operations (iv->base, stop);
1218   if (contains_abnormal_ssa_name_p (iv->base)
1219       || contains_abnormal_ssa_name_p (iv->step))
1220     return false;
1221
1222   /* If STMT could throw, then do not consider STMT as defining a GIV.
1223      While this will suppress optimizations, we can not safely delete this
1224      GIV and associated statements, even if it appears it is not used.  */
1225   if (stmt_could_throw_p (stmt))
1226     return false;
1227
1228   return true;
1229 }
1230
1231 /* Finds general ivs in statement STMT.  */
1232
1233 static void
1234 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1235 {
1236   affine_iv iv;
1237
1238   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1239     return;
1240
1241   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1242 }
1243
1244 /* Finds general ivs in basic block BB.  */
1245
1246 static void
1247 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1248 {
1249   gimple_stmt_iterator bsi;
1250
1251   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1252     find_givs_in_stmt (data, gsi_stmt (bsi));
1253 }
1254
1255 /* Finds general ivs.  */
1256
1257 static void
1258 find_givs (struct ivopts_data *data)
1259 {
1260   struct loop *loop = data->current_loop;
1261   basic_block *body = get_loop_body_in_dom_order (loop);
1262   unsigned i;
1263
1264   for (i = 0; i < loop->num_nodes; i++)
1265     find_givs_in_bb (data, body[i]);
1266   free (body);
1267 }
1268
1269 /* For each ssa name defined in LOOP determines whether it is an induction
1270    variable and if so, its initial value and step.  */
1271
1272 static bool
1273 find_induction_variables (struct ivopts_data *data)
1274 {
1275   unsigned i;
1276   bitmap_iterator bi;
1277
1278   if (!find_bivs (data))
1279     return false;
1280
1281   find_givs (data);
1282   mark_bivs (data);
1283
1284   if (dump_file && (dump_flags & TDF_DETAILS))
1285     {
1286       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1287
1288       if (niter)
1289         {
1290           fprintf (dump_file, "  number of iterations ");
1291           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1292           if (!integer_zerop (niter->may_be_zero))
1293             {
1294               fprintf (dump_file, "; zero if ");
1295               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1296             }
1297           fprintf (dump_file, "\n\n");
1298         };
1299
1300       fprintf (dump_file, "Induction variables:\n\n");
1301
1302       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1303         {
1304           if (ver_info (data, i)->iv)
1305             dump_iv (dump_file, ver_info (data, i)->iv, true);
1306         }
1307     }
1308
1309   return true;
1310 }
1311
1312 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1313    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1314    is the const offset stripped from IV base.  For uses of other types,
1315    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1316
1317 static struct iv_use *
1318 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1319             gimple stmt, enum use_type use_type, tree addr_base = NULL,
1320             unsigned HOST_WIDE_INT addr_offset = 0)
1321 {
1322   struct iv_use *use = XCNEW (struct iv_use);
1323
1324   use->id = n_iv_uses (data);
1325   use->sub_id = 0;
1326   use->type = use_type;
1327   use->iv = iv;
1328   use->stmt = stmt;
1329   use->op_p = use_p;
1330   use->related_cands = BITMAP_ALLOC (NULL);
1331   use->next = NULL;
1332   use->addr_base = addr_base;
1333   use->addr_offset = addr_offset;
1334
1335   data->iv_uses.safe_push (use);
1336
1337   return use;
1338 }
1339
1340 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1341    The sub use is recorded under the one whose use id is ID_GROUP.  */
1342
1343 static struct iv_use *
1344 record_sub_use (struct ivopts_data *data, tree *use_p,
1345                     struct iv *iv, gimple stmt, enum use_type use_type,
1346                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1347                     unsigned int id_group)
1348 {
1349   struct iv_use *use = XCNEW (struct iv_use);
1350   struct iv_use *group = iv_use (data, id_group);
1351
1352   use->id = group->id;
1353   use->sub_id = 0;
1354   use->type = use_type;
1355   use->iv = iv;
1356   use->stmt = stmt;
1357   use->op_p = use_p;
1358   use->related_cands = NULL;
1359   use->addr_base = addr_base;
1360   use->addr_offset = addr_offset;
1361
1362   /* Sub use list is maintained in offset ascending order.  */
1363   if (addr_offset <= group->addr_offset)
1364     {
1365       use->related_cands = group->related_cands;
1366       group->related_cands = NULL;
1367       use->next = group;
1368       data->iv_uses[id_group] = use;
1369     }
1370   else
1371     {
1372       struct iv_use *pre;
1373       do
1374         {
1375           pre = group;
1376           group = group->next;
1377         }
1378       while (group && addr_offset > group->addr_offset);
1379       use->next = pre->next;
1380       pre->next = use;
1381     }
1382
1383   return use;
1384 }
1385
1386 /* Checks whether OP is a loop-level invariant and if so, records it.
1387    NONLINEAR_USE is true if the invariant is used in a way we do not
1388    handle specially.  */
1389
1390 static void
1391 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1392 {
1393   basic_block bb;
1394   struct version_info *info;
1395
1396   if (TREE_CODE (op) != SSA_NAME
1397       || virtual_operand_p (op))
1398     return;
1399
1400   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1401   if (bb
1402       && flow_bb_inside_loop_p (data->current_loop, bb))
1403     return;
1404
1405   info = name_info (data, op);
1406   info->name = op;
1407   info->has_nonlin_use |= nonlinear_use;
1408   if (!info->inv_id)
1409     info->inv_id = ++data->max_inv_id;
1410   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1411 }
1412
1413 /* Checks whether the use OP is interesting and if so, records it.  */
1414
1415 static struct iv_use *
1416 find_interesting_uses_op (struct ivopts_data *data, tree op)
1417 {
1418   struct iv *iv;
1419   gimple stmt;
1420   struct iv_use *use;
1421
1422   if (TREE_CODE (op) != SSA_NAME)
1423     return NULL;
1424
1425   iv = get_iv (data, op);
1426   if (!iv)
1427     return NULL;
1428
1429   if (iv->have_use_for)
1430     {
1431       use = iv_use (data, iv->use_id);
1432
1433       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1434       return use;
1435     }
1436
1437   if (integer_zerop (iv->step))
1438     {
1439       record_invariant (data, op, true);
1440       return NULL;
1441     }
1442   iv->have_use_for = true;
1443
1444   stmt = SSA_NAME_DEF_STMT (op);
1445   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1446               || is_gimple_assign (stmt));
1447
1448   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1449   iv->use_id = use->id;
1450
1451   return use;
1452 }
1453
1454 /* Given a condition in statement STMT, checks whether it is a compare
1455    of an induction variable and an invariant.  If this is the case,
1456    CONTROL_VAR is set to location of the iv, BOUND to the location of
1457    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1458    induction variable descriptions, and true is returned.  If this is not
1459    the case, CONTROL_VAR and BOUND are set to the arguments of the
1460    condition and false is returned.  */
1461
1462 static bool
1463 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1464                        tree **control_var, tree **bound,
1465                        struct iv **iv_var, struct iv **iv_bound)
1466 {
1467   /* The objects returned when COND has constant operands.  */
1468   static struct iv const_iv;
1469   static tree zero;
1470   tree *op0 = &zero, *op1 = &zero;
1471   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1472   bool ret = false;
1473
1474   if (gimple_code (stmt) == GIMPLE_COND)
1475     {
1476       gcond *cond_stmt = as_a <gcond *> (stmt);
1477       op0 = gimple_cond_lhs_ptr (cond_stmt);
1478       op1 = gimple_cond_rhs_ptr (cond_stmt);
1479     }
1480   else
1481     {
1482       op0 = gimple_assign_rhs1_ptr (stmt);
1483       op1 = gimple_assign_rhs2_ptr (stmt);
1484     }
1485
1486   zero = integer_zero_node;
1487   const_iv.step = integer_zero_node;
1488
1489   if (TREE_CODE (*op0) == SSA_NAME)
1490     iv0 = get_iv (data, *op0);
1491   if (TREE_CODE (*op1) == SSA_NAME)
1492     iv1 = get_iv (data, *op1);
1493
1494   /* Exactly one of the compared values must be an iv, and the other one must
1495      be an invariant.  */
1496   if (!iv0 || !iv1)
1497     goto end;
1498
1499   if (integer_zerop (iv0->step))
1500     {
1501       /* Control variable may be on the other side.  */
1502       std::swap (op0, op1);
1503       std::swap (iv0, iv1);
1504     }
1505   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1506
1507 end:
1508   if (control_var)
1509     *control_var = op0;
1510   if (iv_var)
1511     *iv_var = iv0;
1512   if (bound)
1513     *bound = op1;
1514   if (iv_bound)
1515     *iv_bound = iv1;
1516
1517   return ret;
1518 }
1519
1520 /* Checks whether the condition in STMT is interesting and if so,
1521    records it.  */
1522
1523 static void
1524 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1525 {
1526   tree *var_p, *bound_p;
1527   struct iv *var_iv;
1528
1529   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1530     {
1531       find_interesting_uses_op (data, *var_p);
1532       find_interesting_uses_op (data, *bound_p);
1533       return;
1534     }
1535
1536   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1537 }
1538
1539 /* Returns the outermost loop EXPR is obviously invariant in
1540    relative to the loop LOOP, i.e. if all its operands are defined
1541    outside of the returned loop.  Returns NULL if EXPR is not
1542    even obviously invariant in LOOP.  */
1543
1544 struct loop *
1545 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1546 {
1547   basic_block def_bb;
1548   unsigned i, len;
1549
1550   if (is_gimple_min_invariant (expr))
1551     return current_loops->tree_root;
1552
1553   if (TREE_CODE (expr) == SSA_NAME)
1554     {
1555       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1556       if (def_bb)
1557         {
1558           if (flow_bb_inside_loop_p (loop, def_bb))
1559             return NULL;
1560           return superloop_at_depth (loop,
1561                                      loop_depth (def_bb->loop_father) + 1);
1562         }
1563
1564       return current_loops->tree_root;
1565     }
1566
1567   if (!EXPR_P (expr))
1568     return NULL;
1569
1570   unsigned maxdepth = 0;
1571   len = TREE_OPERAND_LENGTH (expr);
1572   for (i = 0; i < len; i++)
1573     {
1574       struct loop *ivloop;
1575       if (!TREE_OPERAND (expr, i))
1576         continue;
1577
1578       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1579       if (!ivloop)
1580         return NULL;
1581       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1582     }
1583
1584   return superloop_at_depth (loop, maxdepth);
1585 }
1586
1587 /* Returns true if expression EXPR is obviously invariant in LOOP,
1588    i.e. if all its operands are defined outside of the LOOP.  LOOP
1589    should not be the function body.  */
1590
1591 bool
1592 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1593 {
1594   basic_block def_bb;
1595   unsigned i, len;
1596
1597   gcc_assert (loop_depth (loop) > 0);
1598
1599   if (is_gimple_min_invariant (expr))
1600     return true;
1601
1602   if (TREE_CODE (expr) == SSA_NAME)
1603     {
1604       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1605       if (def_bb
1606           && flow_bb_inside_loop_p (loop, def_bb))
1607         return false;
1608
1609       return true;
1610     }
1611
1612   if (!EXPR_P (expr))
1613     return false;
1614
1615   len = TREE_OPERAND_LENGTH (expr);
1616   for (i = 0; i < len; i++)
1617     if (TREE_OPERAND (expr, i)
1618         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1619       return false;
1620
1621   return true;
1622 }
1623
1624 /* Cumulates the steps of indices into DATA and replaces their values with the
1625    initial ones.  Returns false when the value of the index cannot be determined.
1626    Callback for for_each_index.  */
1627
1628 struct ifs_ivopts_data
1629 {
1630   struct ivopts_data *ivopts_data;
1631   gimple stmt;
1632   tree step;
1633 };
1634
1635 static bool
1636 idx_find_step (tree base, tree *idx, void *data)
1637 {
1638   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1639   struct iv *iv;
1640   bool use_overflow_semantics = false;
1641   tree step, iv_base, iv_step, lbound, off;
1642   struct loop *loop = dta->ivopts_data->current_loop;
1643
1644   /* If base is a component ref, require that the offset of the reference
1645      be invariant.  */
1646   if (TREE_CODE (base) == COMPONENT_REF)
1647     {
1648       off = component_ref_field_offset (base);
1649       return expr_invariant_in_loop_p (loop, off);
1650     }
1651
1652   /* If base is array, first check whether we will be able to move the
1653      reference out of the loop (in order to take its address in strength
1654      reduction).  In order for this to work we need both lower bound
1655      and step to be loop invariants.  */
1656   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1657     {
1658       /* Moreover, for a range, the size needs to be invariant as well.  */
1659       if (TREE_CODE (base) == ARRAY_RANGE_REF
1660           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1661         return false;
1662
1663       step = array_ref_element_size (base);
1664       lbound = array_ref_low_bound (base);
1665
1666       if (!expr_invariant_in_loop_p (loop, step)
1667           || !expr_invariant_in_loop_p (loop, lbound))
1668         return false;
1669     }
1670
1671   if (TREE_CODE (*idx) != SSA_NAME)
1672     return true;
1673
1674   iv = get_iv (dta->ivopts_data, *idx);
1675   if (!iv)
1676     return false;
1677
1678   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1679           *&x[0], which is not folded and does not trigger the
1680           ARRAY_REF path below.  */
1681   *idx = iv->base;
1682
1683   if (integer_zerop (iv->step))
1684     return true;
1685
1686   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1687     {
1688       step = array_ref_element_size (base);
1689
1690       /* We only handle addresses whose step is an integer constant.  */
1691       if (TREE_CODE (step) != INTEGER_CST)
1692         return false;
1693     }
1694   else
1695     /* The step for pointer arithmetics already is 1 byte.  */
1696     step = size_one_node;
1697
1698   iv_base = iv->base;
1699   iv_step = iv->step;
1700   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1701     use_overflow_semantics = true;
1702
1703   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1704                             sizetype, &iv_base, &iv_step, dta->stmt,
1705                             use_overflow_semantics))
1706     {
1707       /* The index might wrap.  */
1708       return false;
1709     }
1710
1711   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1712   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1713
1714   return true;
1715 }
1716
1717 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1718    object is passed to it in DATA.  */
1719
1720 static bool
1721 idx_record_use (tree base, tree *idx,
1722                 void *vdata)
1723 {
1724   struct ivopts_data *data = (struct ivopts_data *) vdata;
1725   find_interesting_uses_op (data, *idx);
1726   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1727     {
1728       find_interesting_uses_op (data, array_ref_element_size (base));
1729       find_interesting_uses_op (data, array_ref_low_bound (base));
1730     }
1731   return true;
1732 }
1733
1734 /* If we can prove that TOP = cst * BOT for some constant cst,
1735    store cst to MUL and return true.  Otherwise return false.
1736    The returned value is always sign-extended, regardless of the
1737    signedness of TOP and BOT.  */
1738
1739 static bool
1740 constant_multiple_of (tree top, tree bot, widest_int *mul)
1741 {
1742   tree mby;
1743   enum tree_code code;
1744   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1745   widest_int res, p0, p1;
1746
1747   STRIP_NOPS (top);
1748   STRIP_NOPS (bot);
1749
1750   if (operand_equal_p (top, bot, 0))
1751     {
1752       *mul = 1;
1753       return true;
1754     }
1755
1756   code = TREE_CODE (top);
1757   switch (code)
1758     {
1759     case MULT_EXPR:
1760       mby = TREE_OPERAND (top, 1);
1761       if (TREE_CODE (mby) != INTEGER_CST)
1762         return false;
1763
1764       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1765         return false;
1766
1767       *mul = wi::sext (res * wi::to_widest (mby), precision);
1768       return true;
1769
1770     case PLUS_EXPR:
1771     case MINUS_EXPR:
1772       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1773           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1774         return false;
1775
1776       if (code == MINUS_EXPR)
1777         p1 = -p1;
1778       *mul = wi::sext (p0 + p1, precision);
1779       return true;
1780
1781     case INTEGER_CST:
1782       if (TREE_CODE (bot) != INTEGER_CST)
1783         return false;
1784
1785       p0 = widest_int::from (top, SIGNED);
1786       p1 = widest_int::from (bot, SIGNED);
1787       if (p1 == 0)
1788         return false;
1789       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1790       return res == 0;
1791
1792     default:
1793       return false;
1794     }
1795 }
1796
1797 /* Return true if memory reference REF with step STEP may be unaligned.  */
1798
1799 static bool
1800 may_be_unaligned_p (tree ref, tree step)
1801 {
1802   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1803      thus they are not misaligned.  */
1804   if (TREE_CODE (ref) == TARGET_MEM_REF)
1805     return false;
1806
1807   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1808   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1809     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1810
1811   unsigned HOST_WIDE_INT bitpos;
1812   unsigned int ref_align;
1813   get_object_alignment_1 (ref, &ref_align, &bitpos);
1814   if (ref_align < align
1815       || (bitpos % align) != 0
1816       || (bitpos % BITS_PER_UNIT) != 0)
1817     return true;
1818
1819   unsigned int trailing_zeros = tree_ctz (step);
1820   if (trailing_zeros < HOST_BITS_PER_INT
1821       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1822     return true;
1823
1824   return false;
1825 }
1826
1827 /* Return true if EXPR may be non-addressable.   */
1828
1829 bool
1830 may_be_nonaddressable_p (tree expr)
1831 {
1832   switch (TREE_CODE (expr))
1833     {
1834     case TARGET_MEM_REF:
1835       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1836          target, thus they are always addressable.  */
1837       return false;
1838
1839     case COMPONENT_REF:
1840       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1841              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1842
1843     case VIEW_CONVERT_EXPR:
1844       /* This kind of view-conversions may wrap non-addressable objects
1845          and make them look addressable.  After some processing the
1846          non-addressability may be uncovered again, causing ADDR_EXPRs
1847          of inappropriate objects to be built.  */
1848       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1849           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1850         return true;
1851
1852       /* ... fall through ... */
1853
1854     case ARRAY_REF:
1855     case ARRAY_RANGE_REF:
1856       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1857
1858     CASE_CONVERT:
1859       return true;
1860
1861     default:
1862       break;
1863     }
1864
1865   return false;
1866 }
1867
1868 static tree
1869 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1870
1871 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1872    If there is an existing use which has same stripped iv base and step,
1873    this function records this one as a sub use to that; otherwise records
1874    it as a normal one.  */
1875
1876 static struct iv_use *
1877 record_group_use (struct ivopts_data *data, tree *use_p,
1878                   struct iv *iv, gimple stmt, enum use_type use_type)
1879 {
1880   unsigned int i;
1881   struct iv_use *use;
1882   tree addr_base;
1883   unsigned HOST_WIDE_INT addr_offset;
1884
1885   /* Only support sub use for address type uses, that is, with base
1886      object.  */
1887   if (!iv->base_object)
1888     return record_use (data, use_p, iv, stmt, use_type);
1889
1890   addr_base = strip_offset (iv->base, &addr_offset);
1891   for (i = 0; i < n_iv_uses (data); i++)
1892     {
1893       use = iv_use (data, i);
1894       if (use->type != USE_ADDRESS || !use->iv->base_object)
1895         continue;
1896
1897       /* Check if it has the same stripped base and step.  */
1898       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1899           && operand_equal_p (iv->step, use->iv->step, 0)
1900           && operand_equal_p (addr_base, use->addr_base, 0))
1901         break;
1902     }
1903
1904   if (i == n_iv_uses (data))
1905     return record_use (data, use_p, iv, stmt,
1906                        use_type, addr_base, addr_offset);
1907   else
1908     return record_sub_use (data, use_p, iv, stmt,
1909                            use_type, addr_base, addr_offset, i);
1910 }
1911
1912 /* Finds addresses in *OP_P inside STMT.  */
1913
1914 static void
1915 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1916 {
1917   tree base = *op_p, step = size_zero_node;
1918   struct iv *civ;
1919   struct ifs_ivopts_data ifs_ivopts_data;
1920
1921   /* Do not play with volatile memory references.  A bit too conservative,
1922      perhaps, but safe.  */
1923   if (gimple_has_volatile_ops (stmt))
1924     goto fail;
1925
1926   /* Ignore bitfields for now.  Not really something terribly complicated
1927      to handle.  TODO.  */
1928   if (TREE_CODE (base) == BIT_FIELD_REF)
1929     goto fail;
1930
1931   base = unshare_expr (base);
1932
1933   if (TREE_CODE (base) == TARGET_MEM_REF)
1934     {
1935       tree type = build_pointer_type (TREE_TYPE (base));
1936       tree astep;
1937
1938       if (TMR_BASE (base)
1939           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1940         {
1941           civ = get_iv (data, TMR_BASE (base));
1942           if (!civ)
1943             goto fail;
1944
1945           TMR_BASE (base) = civ->base;
1946           step = civ->step;
1947         }
1948       if (TMR_INDEX2 (base)
1949           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1950         {
1951           civ = get_iv (data, TMR_INDEX2 (base));
1952           if (!civ)
1953             goto fail;
1954
1955           TMR_INDEX2 (base) = civ->base;
1956           step = civ->step;
1957         }
1958       if (TMR_INDEX (base)
1959           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1960         {
1961           civ = get_iv (data, TMR_INDEX (base));
1962           if (!civ)
1963             goto fail;
1964
1965           TMR_INDEX (base) = civ->base;
1966           astep = civ->step;
1967
1968           if (astep)
1969             {
1970               if (TMR_STEP (base))
1971                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1972
1973               step = fold_build2 (PLUS_EXPR, type, step, astep);
1974             }
1975         }
1976
1977       if (integer_zerop (step))
1978         goto fail;
1979       base = tree_mem_ref_addr (type, base);
1980     }
1981   else
1982     {
1983       ifs_ivopts_data.ivopts_data = data;
1984       ifs_ivopts_data.stmt = stmt;
1985       ifs_ivopts_data.step = size_zero_node;
1986       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1987           || integer_zerop (ifs_ivopts_data.step))
1988         goto fail;
1989       step = ifs_ivopts_data.step;
1990
1991       /* Check that the base expression is addressable.  This needs
1992          to be done after substituting bases of IVs into it.  */
1993       if (may_be_nonaddressable_p (base))
1994         goto fail;
1995
1996       /* Moreover, on strict alignment platforms, check that it is
1997          sufficiently aligned.  */
1998       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1999         goto fail;
2000
2001       base = build_fold_addr_expr (base);
2002
2003       /* Substituting bases of IVs into the base expression might
2004          have caused folding opportunities.  */
2005       if (TREE_CODE (base) == ADDR_EXPR)
2006         {
2007           tree *ref = &TREE_OPERAND (base, 0);
2008           while (handled_component_p (*ref))
2009             ref = &TREE_OPERAND (*ref, 0);
2010           if (TREE_CODE (*ref) == MEM_REF)
2011             {
2012               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2013                                       TREE_OPERAND (*ref, 0),
2014                                       TREE_OPERAND (*ref, 1));
2015               if (tem)
2016                 *ref = tem;
2017             }
2018         }
2019     }
2020
2021   civ = alloc_iv (data, base, step);
2022   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2023   return;
2024
2025 fail:
2026   for_each_index (op_p, idx_record_use, data);
2027 }
2028
2029 /* Finds and records invariants used in STMT.  */
2030
2031 static void
2032 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2033 {
2034   ssa_op_iter iter;
2035   use_operand_p use_p;
2036   tree op;
2037
2038   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2039     {
2040       op = USE_FROM_PTR (use_p);
2041       record_invariant (data, op, false);
2042     }
2043 }
2044
2045 /* Finds interesting uses of induction variables in the statement STMT.  */
2046
2047 static void
2048 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2049 {
2050   struct iv *iv;
2051   tree op, *lhs, *rhs;
2052   ssa_op_iter iter;
2053   use_operand_p use_p;
2054   enum tree_code code;
2055
2056   find_invariants_stmt (data, stmt);
2057
2058   if (gimple_code (stmt) == GIMPLE_COND)
2059     {
2060       find_interesting_uses_cond (data, stmt);
2061       return;
2062     }
2063
2064   if (is_gimple_assign (stmt))
2065     {
2066       lhs = gimple_assign_lhs_ptr (stmt);
2067       rhs = gimple_assign_rhs1_ptr (stmt);
2068
2069       if (TREE_CODE (*lhs) == SSA_NAME)
2070         {
2071           /* If the statement defines an induction variable, the uses are not
2072              interesting by themselves.  */
2073
2074           iv = get_iv (data, *lhs);
2075
2076           if (iv && !integer_zerop (iv->step))
2077             return;
2078         }
2079
2080       code = gimple_assign_rhs_code (stmt);
2081       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2082           && (REFERENCE_CLASS_P (*rhs)
2083               || is_gimple_val (*rhs)))
2084         {
2085           if (REFERENCE_CLASS_P (*rhs))
2086             find_interesting_uses_address (data, stmt, rhs);
2087           else
2088             find_interesting_uses_op (data, *rhs);
2089
2090           if (REFERENCE_CLASS_P (*lhs))
2091             find_interesting_uses_address (data, stmt, lhs);
2092           return;
2093         }
2094       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2095         {
2096           find_interesting_uses_cond (data, stmt);
2097           return;
2098         }
2099
2100       /* TODO -- we should also handle address uses of type
2101
2102          memory = call (whatever);
2103
2104          and
2105
2106          call (memory).  */
2107     }
2108
2109   if (gimple_code (stmt) == GIMPLE_PHI
2110       && gimple_bb (stmt) == data->current_loop->header)
2111     {
2112       iv = get_iv (data, PHI_RESULT (stmt));
2113
2114       if (iv && !integer_zerop (iv->step))
2115         return;
2116     }
2117
2118   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2119     {
2120       op = USE_FROM_PTR (use_p);
2121
2122       if (TREE_CODE (op) != SSA_NAME)
2123         continue;
2124
2125       iv = get_iv (data, op);
2126       if (!iv)
2127         continue;
2128
2129       find_interesting_uses_op (data, op);
2130     }
2131 }
2132
2133 /* Finds interesting uses of induction variables outside of loops
2134    on loop exit edge EXIT.  */
2135
2136 static void
2137 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2138 {
2139   gphi *phi;
2140   gphi_iterator psi;
2141   tree def;
2142
2143   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2144     {
2145       phi = psi.phi ();
2146       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2147       if (!virtual_operand_p (def))
2148         find_interesting_uses_op (data, def);
2149     }
2150 }
2151
2152 /* Finds uses of the induction variables that are interesting.  */
2153
2154 static void
2155 find_interesting_uses (struct ivopts_data *data)
2156 {
2157   basic_block bb;
2158   gimple_stmt_iterator bsi;
2159   basic_block *body = get_loop_body (data->current_loop);
2160   unsigned i;
2161   struct version_info *info;
2162   edge e;
2163
2164   if (dump_file && (dump_flags & TDF_DETAILS))
2165     fprintf (dump_file, "Uses:\n\n");
2166
2167   for (i = 0; i < data->current_loop->num_nodes; i++)
2168     {
2169       edge_iterator ei;
2170       bb = body[i];
2171
2172       FOR_EACH_EDGE (e, ei, bb->succs)
2173         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2174             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2175           find_interesting_uses_outside (data, e);
2176
2177       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2178         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2179       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2180         if (!is_gimple_debug (gsi_stmt (bsi)))
2181           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2182     }
2183
2184   if (dump_file && (dump_flags & TDF_DETAILS))
2185     {
2186       bitmap_iterator bi;
2187
2188       fprintf (dump_file, "\n");
2189
2190       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2191         {
2192           info = ver_info (data, i);
2193           if (info->inv_id)
2194             {
2195               fprintf (dump_file, "  ");
2196               print_generic_expr (dump_file, info->name, TDF_SLIM);
2197               fprintf (dump_file, " is invariant (%d)%s\n",
2198                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2199             }
2200         }
2201
2202       fprintf (dump_file, "\n");
2203     }
2204
2205   free (body);
2206 }
2207
2208 /* Compute maximum offset of [base + offset] addressing mode
2209    for memory reference represented by USE.  */
2210
2211 static HOST_WIDE_INT
2212 compute_max_addr_offset (struct iv_use *use)
2213 {
2214   int width;
2215   rtx reg, addr;
2216   HOST_WIDE_INT i, off;
2217   unsigned list_index, num;
2218   addr_space_t as;
2219   machine_mode mem_mode, addr_mode;
2220   static vec<HOST_WIDE_INT> max_offset_list;
2221
2222   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2223   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2224
2225   num = max_offset_list.length ();
2226   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2227   if (list_index >= num)
2228     {
2229       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2230       for (; num < max_offset_list.length (); num++)
2231         max_offset_list[num] = -1;
2232     }
2233
2234   off = max_offset_list[list_index];
2235   if (off != -1)
2236     return off;
2237
2238   addr_mode = targetm.addr_space.address_mode (as);
2239   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2240   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2241
2242   width = GET_MODE_BITSIZE (addr_mode) - 1;
2243   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2244     width = HOST_BITS_PER_WIDE_INT - 1;
2245
2246   for (i = width; i > 0; i--)
2247     {
2248       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2249       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2250       if (memory_address_addr_space_p (mem_mode, addr, as))
2251         break;
2252
2253       /* For some strict-alignment targets, the offset must be naturally
2254          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2255       off = ((unsigned HOST_WIDE_INT) 1 << i);
2256       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2257         {
2258           off -= GET_MODE_SIZE (mem_mode);
2259           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2260           if (memory_address_addr_space_p (mem_mode, addr, as))
2261             break;
2262         }
2263     }
2264   if (i == 0)
2265     off = 0;
2266
2267   max_offset_list[list_index] = off;
2268   return off;
2269 }
2270
2271 /* Check if all small groups should be split.  Return true if and
2272    only if:
2273
2274      1) At least one groups contain two uses with different offsets.
2275      2) No group contains more than two uses with different offsets.
2276
2277    Return false otherwise.  We want to split such groups because:
2278
2279      1) Small groups don't have much benefit and may interfer with
2280         general candidate selection.
2281      2) Size for problem with only small groups is usually small and
2282         general algorithm can handle it well.
2283
2284    TODO -- Above claim may not hold when auto increment is supported.  */
2285
2286 static bool
2287 split_all_small_groups (struct ivopts_data *data)
2288 {
2289   bool split_p = false;
2290   unsigned int i, n, distinct;
2291   struct iv_use *pre, *use;
2292
2293   n = n_iv_uses (data);
2294   for (i = 0; i < n; i++)
2295     {
2296       use = iv_use (data, i);
2297       if (!use->next)
2298         continue;
2299
2300       distinct = 1;
2301       gcc_assert (use->type == USE_ADDRESS);
2302       for (pre = use, use = use->next; use; pre = use, use = use->next)
2303         {
2304           if (pre->addr_offset != use->addr_offset)
2305             distinct++;
2306
2307           if (distinct > 2)
2308             return false;
2309         }
2310       if (distinct == 2)
2311         split_p = true;
2312     }
2313
2314   return split_p;
2315 }
2316
2317 /* For each group of address type uses, this function further groups
2318    these uses according to the maximum offset supported by target's
2319    [base + offset] addressing mode.  */
2320
2321 static void
2322 group_address_uses (struct ivopts_data *data)
2323 {
2324   HOST_WIDE_INT max_offset = -1;
2325   unsigned int i, n, sub_id;
2326   struct iv_use *pre, *use;
2327   unsigned HOST_WIDE_INT addr_offset_first;
2328
2329   /* Reset max offset to split all small groups.  */
2330   if (split_all_small_groups (data))
2331     max_offset = 0;
2332
2333   n = n_iv_uses (data);
2334   for (i = 0; i < n; i++)
2335     {
2336       use = iv_use (data, i);
2337       if (!use->next)
2338         continue;
2339
2340       gcc_assert (use->type == USE_ADDRESS);
2341       if (max_offset != 0)
2342         max_offset = compute_max_addr_offset (use);
2343
2344       while (use)
2345         {
2346           sub_id = 0;
2347           addr_offset_first = use->addr_offset;
2348           /* Only uses with offset that can fit in offset part against
2349              the first use can be grouped together.  */
2350           for (pre = use, use = use->next;
2351                use && (use->addr_offset - addr_offset_first
2352                        <= (unsigned HOST_WIDE_INT) max_offset);
2353                pre = use, use = use->next)
2354             {
2355               use->id = pre->id;
2356               use->sub_id = ++sub_id;
2357             }
2358
2359           /* Break the list and create new group.  */
2360           if (use)
2361             {
2362               pre->next = NULL;
2363               use->id = n_iv_uses (data);
2364               use->related_cands = BITMAP_ALLOC (NULL);
2365               data->iv_uses.safe_push (use);
2366             }
2367         }
2368     }
2369
2370   if (dump_file && (dump_flags & TDF_DETAILS))
2371     dump_uses (dump_file, data);
2372 }
2373
2374 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2375    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2376    we are at the top-level of the processed address.  */
2377
2378 static tree
2379 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2380                 HOST_WIDE_INT *offset)
2381 {
2382   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2383   enum tree_code code;
2384   tree type, orig_type = TREE_TYPE (expr);
2385   HOST_WIDE_INT off0, off1, st;
2386   tree orig_expr = expr;
2387
2388   STRIP_NOPS (expr);
2389
2390   type = TREE_TYPE (expr);
2391   code = TREE_CODE (expr);
2392   *offset = 0;
2393
2394   switch (code)
2395     {
2396     case INTEGER_CST:
2397       if (!cst_and_fits_in_hwi (expr)
2398           || integer_zerop (expr))
2399         return orig_expr;
2400
2401       *offset = int_cst_value (expr);
2402       return build_int_cst (orig_type, 0);
2403
2404     case POINTER_PLUS_EXPR:
2405     case PLUS_EXPR:
2406     case MINUS_EXPR:
2407       op0 = TREE_OPERAND (expr, 0);
2408       op1 = TREE_OPERAND (expr, 1);
2409
2410       op0 = strip_offset_1 (op0, false, false, &off0);
2411       op1 = strip_offset_1 (op1, false, false, &off1);
2412
2413       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2414       if (op0 == TREE_OPERAND (expr, 0)
2415           && op1 == TREE_OPERAND (expr, 1))
2416         return orig_expr;
2417
2418       if (integer_zerop (op1))
2419         expr = op0;
2420       else if (integer_zerop (op0))
2421         {
2422           if (code == MINUS_EXPR)
2423             expr = fold_build1 (NEGATE_EXPR, type, op1);
2424           else
2425             expr = op1;
2426         }
2427       else
2428         expr = fold_build2 (code, type, op0, op1);
2429
2430       return fold_convert (orig_type, expr);
2431
2432     case MULT_EXPR:
2433       op1 = TREE_OPERAND (expr, 1);
2434       if (!cst_and_fits_in_hwi (op1))
2435         return orig_expr;
2436
2437       op0 = TREE_OPERAND (expr, 0);
2438       op0 = strip_offset_1 (op0, false, false, &off0);
2439       if (op0 == TREE_OPERAND (expr, 0))
2440         return orig_expr;
2441
2442       *offset = off0 * int_cst_value (op1);
2443       if (integer_zerop (op0))
2444         expr = op0;
2445       else
2446         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2447
2448       return fold_convert (orig_type, expr);
2449
2450     case ARRAY_REF:
2451     case ARRAY_RANGE_REF:
2452       if (!inside_addr)
2453         return orig_expr;
2454
2455       step = array_ref_element_size (expr);
2456       if (!cst_and_fits_in_hwi (step))
2457         break;
2458
2459       st = int_cst_value (step);
2460       op1 = TREE_OPERAND (expr, 1);
2461       op1 = strip_offset_1 (op1, false, false, &off1);
2462       *offset = off1 * st;
2463
2464       if (top_compref
2465           && integer_zerop (op1))
2466         {
2467           /* Strip the component reference completely.  */
2468           op0 = TREE_OPERAND (expr, 0);
2469           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2470           *offset += off0;
2471           return op0;
2472         }
2473       break;
2474
2475     case COMPONENT_REF:
2476       {
2477         tree field;
2478
2479         if (!inside_addr)
2480           return orig_expr;
2481
2482         tmp = component_ref_field_offset (expr);
2483         field = TREE_OPERAND (expr, 1);
2484         if (top_compref
2485             && cst_and_fits_in_hwi (tmp)
2486             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2487           {
2488             HOST_WIDE_INT boffset, abs_off;
2489
2490             /* Strip the component reference completely.  */
2491             op0 = TREE_OPERAND (expr, 0);
2492             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2493             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2494             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2495             if (boffset < 0)
2496               abs_off = -abs_off;
2497
2498             *offset = off0 + int_cst_value (tmp) + abs_off;
2499             return op0;
2500           }
2501       }
2502       break;
2503
2504     case ADDR_EXPR:
2505       op0 = TREE_OPERAND (expr, 0);
2506       op0 = strip_offset_1 (op0, true, true, &off0);
2507       *offset += off0;
2508
2509       if (op0 == TREE_OPERAND (expr, 0))
2510         return orig_expr;
2511
2512       expr = build_fold_addr_expr (op0);
2513       return fold_convert (orig_type, expr);
2514
2515     case MEM_REF:
2516       /* ???  Offset operand?  */
2517       inside_addr = false;
2518       break;
2519
2520     default:
2521       return orig_expr;
2522     }
2523
2524   /* Default handling of expressions for that we want to recurse into
2525      the first operand.  */
2526   op0 = TREE_OPERAND (expr, 0);
2527   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2528   *offset += off0;
2529
2530   if (op0 == TREE_OPERAND (expr, 0)
2531       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2532     return orig_expr;
2533
2534   expr = copy_node (expr);
2535   TREE_OPERAND (expr, 0) = op0;
2536   if (op1)
2537     TREE_OPERAND (expr, 1) = op1;
2538
2539   /* Inside address, we might strip the top level component references,
2540      thus changing type of the expression.  Handling of ADDR_EXPR
2541      will fix that.  */
2542   expr = fold_convert (orig_type, expr);
2543
2544   return expr;
2545 }
2546
2547 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2548
2549 static tree
2550 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2551 {
2552   HOST_WIDE_INT off;
2553   tree core = strip_offset_1 (expr, false, false, &off);
2554   *offset = off;
2555   return core;
2556 }
2557
2558 /* Returns variant of TYPE that can be used as base for different uses.
2559    We return unsigned type with the same precision, which avoids problems
2560    with overflows.  */
2561
2562 static tree
2563 generic_type_for (tree type)
2564 {
2565   if (POINTER_TYPE_P (type))
2566     return unsigned_type_for (type);
2567
2568   if (TYPE_UNSIGNED (type))
2569     return type;
2570
2571   return unsigned_type_for (type);
2572 }
2573
2574 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2575    the bitmap to that we should store it.  */
2576
2577 static struct ivopts_data *fd_ivopts_data;
2578 static tree
2579 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2580 {
2581   bitmap *depends_on = (bitmap *) data;
2582   struct version_info *info;
2583
2584   if (TREE_CODE (*expr_p) != SSA_NAME)
2585     return NULL_TREE;
2586   info = name_info (fd_ivopts_data, *expr_p);
2587
2588   if (!info->inv_id || info->has_nonlin_use)
2589     return NULL_TREE;
2590
2591   if (!*depends_on)
2592     *depends_on = BITMAP_ALLOC (NULL);
2593   bitmap_set_bit (*depends_on, info->inv_id);
2594
2595   return NULL_TREE;
2596 }
2597
2598 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2599    position to POS.  If USE is not NULL, the candidate is set as related to
2600    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2601    replacement of the final value of the iv by a direct computation.  */
2602
2603 static struct iv_cand *
2604 add_candidate_1 (struct ivopts_data *data,
2605                  tree base, tree step, bool important, enum iv_position pos,
2606                  struct iv_use *use, gimple incremented_at)
2607 {
2608   unsigned i;
2609   struct iv_cand *cand = NULL;
2610   tree type, orig_type;
2611
2612   /* For non-original variables, make sure their values are computed in a type
2613      that does not invoke undefined behavior on overflows (since in general,
2614      we cannot prove that these induction variables are non-wrapping).  */
2615   if (pos != IP_ORIGINAL)
2616     {
2617       orig_type = TREE_TYPE (base);
2618       type = generic_type_for (orig_type);
2619       if (type != orig_type)
2620         {
2621           base = fold_convert (type, base);
2622           step = fold_convert (type, step);
2623         }
2624     }
2625
2626   for (i = 0; i < n_iv_cands (data); i++)
2627     {
2628       cand = iv_cand (data, i);
2629
2630       if (cand->pos != pos)
2631         continue;
2632
2633       if (cand->incremented_at != incremented_at
2634           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2635               && cand->ainc_use != use))
2636         continue;
2637
2638       if (!cand->iv)
2639         {
2640           if (!base && !step)
2641             break;
2642
2643           continue;
2644         }
2645
2646       if (!base && !step)
2647         continue;
2648
2649       if (operand_equal_p (base, cand->iv->base, 0)
2650           && operand_equal_p (step, cand->iv->step, 0)
2651           && (TYPE_PRECISION (TREE_TYPE (base))
2652               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2653         break;
2654     }
2655
2656   if (i == n_iv_cands (data))
2657     {
2658       cand = XCNEW (struct iv_cand);
2659       cand->id = i;
2660
2661       if (!base && !step)
2662         cand->iv = NULL;
2663       else
2664         cand->iv = alloc_iv (data, base, step);
2665
2666       cand->pos = pos;
2667       if (pos != IP_ORIGINAL && cand->iv)
2668         {
2669           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2670           cand->var_after = cand->var_before;
2671         }
2672       cand->important = important;
2673       cand->incremented_at = incremented_at;
2674       data->iv_candidates.safe_push (cand);
2675
2676       if (step
2677           && TREE_CODE (step) != INTEGER_CST)
2678         {
2679           fd_ivopts_data = data;
2680           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2681         }
2682
2683       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2684         cand->ainc_use = use;
2685       else
2686         cand->ainc_use = NULL;
2687
2688       if (dump_file && (dump_flags & TDF_DETAILS))
2689         dump_cand (dump_file, cand);
2690     }
2691
2692   if (important && !cand->important)
2693     {
2694       cand->important = true;
2695       if (dump_file && (dump_flags & TDF_DETAILS))
2696         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2697     }
2698
2699   if (use)
2700     {
2701       bitmap_set_bit (use->related_cands, i);
2702       if (dump_file && (dump_flags & TDF_DETAILS))
2703         fprintf (dump_file, "Candidate %d is related to use %d\n",
2704                  cand->id, use->id);
2705     }
2706
2707   return cand;
2708 }
2709
2710 /* Returns true if incrementing the induction variable at the end of the LOOP
2711    is allowed.
2712
2713    The purpose is to avoid splitting latch edge with a biv increment, thus
2714    creating a jump, possibly confusing other optimization passes and leaving
2715    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2716    is not available (so we do not have a better alternative), or if the latch
2717    edge is already nonempty.  */
2718
2719 static bool
2720 allow_ip_end_pos_p (struct loop *loop)
2721 {
2722   if (!ip_normal_pos (loop))
2723     return true;
2724
2725   if (!empty_block_p (ip_end_pos (loop)))
2726     return true;
2727
2728   return false;
2729 }
2730
2731 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2732    Important field is set to IMPORTANT.  */
2733
2734 static void
2735 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2736                         bool important, struct iv_use *use)
2737 {
2738   basic_block use_bb = gimple_bb (use->stmt);
2739   machine_mode mem_mode;
2740   unsigned HOST_WIDE_INT cstepi;
2741
2742   /* If we insert the increment in any position other than the standard
2743      ones, we must ensure that it is incremented once per iteration.
2744      It must not be in an inner nested loop, or one side of an if
2745      statement.  */
2746   if (use_bb->loop_father != data->current_loop
2747       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2748       || stmt_could_throw_p (use->stmt)
2749       || !cst_and_fits_in_hwi (step))
2750     return;
2751
2752   cstepi = int_cst_value (step);
2753
2754   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2755   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2756         || USE_STORE_PRE_INCREMENT (mem_mode))
2757        && GET_MODE_SIZE (mem_mode) == cstepi)
2758       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2759            || USE_STORE_PRE_DECREMENT (mem_mode))
2760           && GET_MODE_SIZE (mem_mode) == -cstepi))
2761     {
2762       enum tree_code code = MINUS_EXPR;
2763       tree new_base;
2764       tree new_step = step;
2765
2766       if (POINTER_TYPE_P (TREE_TYPE (base)))
2767         {
2768           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2769           code = POINTER_PLUS_EXPR;
2770         }
2771       else
2772         new_step = fold_convert (TREE_TYPE (base), new_step);
2773       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2774       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2775                        use->stmt);
2776     }
2777   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2778         || USE_STORE_POST_INCREMENT (mem_mode))
2779        && GET_MODE_SIZE (mem_mode) == cstepi)
2780       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2781            || USE_STORE_POST_DECREMENT (mem_mode))
2782           && GET_MODE_SIZE (mem_mode) == -cstepi))
2783     {
2784       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2785                        use->stmt);
2786     }
2787 }
2788
2789 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2790    position to POS.  If USE is not NULL, the candidate is set as related to
2791    it.  The candidate computation is scheduled before exit condition and at
2792    the end of loop.  */
2793
2794 static void
2795 add_candidate (struct ivopts_data *data,
2796                tree base, tree step, bool important, struct iv_use *use)
2797 {
2798   gcc_assert (use == NULL || use->sub_id == 0);
2799
2800   if (ip_normal_pos (data->current_loop))
2801     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2802   if (ip_end_pos (data->current_loop)
2803       && allow_ip_end_pos_p (data->current_loop))
2804     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2805 }
2806
2807 /* Adds standard iv candidates.  */
2808
2809 static void
2810 add_standard_iv_candidates (struct ivopts_data *data)
2811 {
2812   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2813
2814   /* The same for a double-integer type if it is still fast enough.  */
2815   if (TYPE_PRECISION
2816         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2817       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2818     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2819                    build_int_cst (long_integer_type_node, 1), true, NULL);
2820
2821   /* The same for a double-integer type if it is still fast enough.  */
2822   if (TYPE_PRECISION
2823         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2824       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2825     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2826                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2827 }
2828
2829
2830 /* Adds candidates bases on the old induction variable IV.  */
2831
2832 static void
2833 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2834 {
2835   gimple phi;
2836   tree def;
2837   struct iv_cand *cand;
2838
2839   add_candidate (data, iv->base, iv->step, true, NULL);
2840
2841   /* The same, but with initial value zero.  */
2842   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2843     add_candidate (data, size_int (0), iv->step, true, NULL);
2844   else
2845     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2846                    iv->step, true, NULL);
2847
2848   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2849   if (gimple_code (phi) == GIMPLE_PHI)
2850     {
2851       /* Additionally record the possibility of leaving the original iv
2852          untouched.  */
2853       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2854       /* Don't add candidate if it's from another PHI node because
2855          it's an affine iv appearing in the form of PEELED_CHREC.  */
2856       phi = SSA_NAME_DEF_STMT (def);
2857       if (gimple_code (phi) != GIMPLE_PHI)
2858         {
2859           cand = add_candidate_1 (data,
2860                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2861                                   SSA_NAME_DEF_STMT (def));
2862           cand->var_before = iv->ssa_name;
2863           cand->var_after = def;
2864         }
2865       else
2866         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2867     }
2868 }
2869
2870 /* Adds candidates based on the old induction variables.  */
2871
2872 static void
2873 add_iv_candidate_for_bivs (struct ivopts_data *data)
2874 {
2875   unsigned i;
2876   struct iv *iv;
2877   bitmap_iterator bi;
2878
2879   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2880     {
2881       iv = ver_info (data, i)->iv;
2882       if (iv && iv->biv_p && !integer_zerop (iv->step))
2883         add_iv_candidate_for_biv (data, iv);
2884     }
2885 }
2886
2887 /* Adds candidates based on the value of USE's iv.  */
2888
2889 static void
2890 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
2891 {
2892   unsigned HOST_WIDE_INT offset;
2893   tree base;
2894   tree basetype;
2895   struct iv *iv = use->iv;
2896
2897   add_candidate (data, iv->base, iv->step, false, use);
2898
2899   /* The same, but with initial value zero.  Make such variable important,
2900      since it is generic enough so that possibly many uses may be based
2901      on it.  */
2902   basetype = TREE_TYPE (iv->base);
2903   if (POINTER_TYPE_P (basetype))
2904     basetype = sizetype;
2905   add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
2906
2907   /* Third, try removing the constant offset.  Make sure to even
2908      add a candidate for &a[0] vs. (T *)&a.  */
2909   base = strip_offset (iv->base, &offset);
2910   if (offset || base != iv->base)
2911     add_candidate (data, base, iv->step, false, use);
2912
2913   /* At last, add auto-incremental candidates.  Make such variables
2914      important since other iv uses with same base object may be based
2915      on it.  */
2916   if (use != NULL && use->type == USE_ADDRESS)
2917     add_autoinc_candidates (data, iv->base, iv->step, true, use);
2918 }
2919
2920 /* Adds candidates based on the uses.  */
2921
2922 static void
2923 add_iv_candidate_for_uses (struct ivopts_data *data)
2924 {
2925   unsigned i;
2926
2927   for (i = 0; i < n_iv_uses (data); i++)
2928     {
2929       struct iv_use *use = iv_use (data, i);
2930
2931       if (!use)
2932         continue;
2933
2934       switch (use->type)
2935         {
2936         case USE_NONLINEAR_EXPR:
2937         case USE_COMPARE:
2938         case USE_ADDRESS:
2939           /* Just add the ivs based on the value of the iv used here.  */
2940           add_iv_candidate_for_use (data, use);
2941           break;
2942
2943         default:
2944           gcc_unreachable ();
2945         }
2946     }
2947 }
2948
2949 /* Record important candidates and add them to related_cands bitmaps
2950    if needed.  */
2951
2952 static void
2953 record_important_candidates (struct ivopts_data *data)
2954 {
2955   unsigned i;
2956   struct iv_use *use;
2957
2958   for (i = 0; i < n_iv_cands (data); i++)
2959     {
2960       struct iv_cand *cand = iv_cand (data, i);
2961
2962       if (cand->important)
2963         bitmap_set_bit (data->important_candidates, i);
2964     }
2965
2966   data->consider_all_candidates = (n_iv_cands (data)
2967                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2968
2969   if (data->consider_all_candidates)
2970     {
2971       /* We will not need "related_cands" bitmaps in this case,
2972          so release them to decrease peak memory consumption.  */
2973       for (i = 0; i < n_iv_uses (data); i++)
2974         {
2975           use = iv_use (data, i);
2976           BITMAP_FREE (use->related_cands);
2977         }
2978     }
2979   else
2980     {
2981       /* Add important candidates to the related_cands bitmaps.  */
2982       for (i = 0; i < n_iv_uses (data); i++)
2983         bitmap_ior_into (iv_use (data, i)->related_cands,
2984                          data->important_candidates);
2985     }
2986 }
2987
2988 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2989    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2990    we allocate a simple list to every use.  */
2991
2992 static void
2993 alloc_use_cost_map (struct ivopts_data *data)
2994 {
2995   unsigned i, size, s;
2996
2997   for (i = 0; i < n_iv_uses (data); i++)
2998     {
2999       struct iv_use *use = iv_use (data, i);
3000
3001       if (data->consider_all_candidates)
3002         size = n_iv_cands (data);
3003       else
3004         {
3005           s = bitmap_count_bits (use->related_cands);
3006
3007           /* Round up to the power of two, so that moduling by it is fast.  */
3008           size = s ? (1 << ceil_log2 (s)) : 1;
3009         }
3010
3011       use->n_map_members = size;
3012       use->cost_map = XCNEWVEC (struct cost_pair, size);
3013     }
3014 }
3015
3016 /* Returns description of computation cost of expression whose runtime
3017    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3018
3019 static comp_cost
3020 new_cost (unsigned runtime, unsigned complexity)
3021 {
3022   comp_cost cost;
3023
3024   cost.cost = runtime;
3025   cost.complexity = complexity;
3026
3027   return cost;
3028 }
3029
3030 /* Returns true if COST is infinite.  */
3031
3032 static bool
3033 infinite_cost_p (comp_cost cost)
3034 {
3035   return cost.cost == INFTY;
3036 }
3037
3038 /* Adds costs COST1 and COST2.  */
3039
3040 static comp_cost
3041 add_costs (comp_cost cost1, comp_cost cost2)
3042 {
3043   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3044     return infinite_cost;
3045
3046   cost1.cost += cost2.cost;
3047   cost1.complexity += cost2.complexity;
3048
3049   return cost1;
3050 }
3051 /* Subtracts costs COST1 and COST2.  */
3052
3053 static comp_cost
3054 sub_costs (comp_cost cost1, comp_cost cost2)
3055 {
3056   cost1.cost -= cost2.cost;
3057   cost1.complexity -= cost2.complexity;
3058
3059   return cost1;
3060 }
3061
3062 /* Returns a negative number if COST1 < COST2, a positive number if
3063    COST1 > COST2, and 0 if COST1 = COST2.  */
3064
3065 static int
3066 compare_costs (comp_cost cost1, comp_cost cost2)
3067 {
3068   if (cost1.cost == cost2.cost)
3069     return cost1.complexity - cost2.complexity;
3070
3071   return cost1.cost - cost2.cost;
3072 }
3073
3074 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3075    on invariants DEPENDS_ON and that the value used in expressing it
3076    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3077
3078 static void
3079 set_use_iv_cost (struct ivopts_data *data,
3080                  struct iv_use *use, struct iv_cand *cand,
3081                  comp_cost cost, bitmap depends_on, tree value,
3082                  enum tree_code comp, int inv_expr_id)
3083 {
3084   unsigned i, s;
3085
3086   if (infinite_cost_p (cost))
3087     {
3088       BITMAP_FREE (depends_on);
3089       return;
3090     }
3091
3092   if (data->consider_all_candidates)
3093     {
3094       use->cost_map[cand->id].cand = cand;
3095       use->cost_map[cand->id].cost = cost;
3096       use->cost_map[cand->id].depends_on = depends_on;
3097       use->cost_map[cand->id].value = value;
3098       use->cost_map[cand->id].comp = comp;
3099       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3100       return;
3101     }
3102
3103   /* n_map_members is a power of two, so this computes modulo.  */
3104   s = cand->id & (use->n_map_members - 1);
3105   for (i = s; i < use->n_map_members; i++)
3106     if (!use->cost_map[i].cand)
3107       goto found;
3108   for (i = 0; i < s; i++)
3109     if (!use->cost_map[i].cand)
3110       goto found;
3111
3112   gcc_unreachable ();
3113
3114 found:
3115   use->cost_map[i].cand = cand;
3116   use->cost_map[i].cost = cost;
3117   use->cost_map[i].depends_on = depends_on;
3118   use->cost_map[i].value = value;
3119   use->cost_map[i].comp = comp;
3120   use->cost_map[i].inv_expr_id = inv_expr_id;
3121 }
3122
3123 /* Gets cost of (USE, CANDIDATE) pair.  */
3124
3125 static struct cost_pair *
3126 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3127                  struct iv_cand *cand)
3128 {
3129   unsigned i, s;
3130   struct cost_pair *ret;
3131
3132   if (!cand)
3133     return NULL;
3134
3135   if (data->consider_all_candidates)
3136     {
3137       ret = use->cost_map + cand->id;
3138       if (!ret->cand)
3139         return NULL;
3140
3141       return ret;
3142     }
3143
3144   /* n_map_members is a power of two, so this computes modulo.  */
3145   s = cand->id & (use->n_map_members - 1);
3146   for (i = s; i < use->n_map_members; i++)
3147     if (use->cost_map[i].cand == cand)
3148       return use->cost_map + i;
3149     else if (use->cost_map[i].cand == NULL)
3150       return NULL;
3151   for (i = 0; i < s; i++)
3152     if (use->cost_map[i].cand == cand)
3153       return use->cost_map + i;
3154     else if (use->cost_map[i].cand == NULL)
3155       return NULL;
3156
3157   return NULL;
3158 }
3159
3160 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3161 static rtx
3162 produce_memory_decl_rtl (tree obj, int *regno)
3163 {
3164   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3165   machine_mode address_mode = targetm.addr_space.address_mode (as);
3166   rtx x;
3167
3168   gcc_assert (obj);
3169   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3170     {
3171       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3172       x = gen_rtx_SYMBOL_REF (address_mode, name);
3173       SET_SYMBOL_REF_DECL (x, obj);
3174       x = gen_rtx_MEM (DECL_MODE (obj), x);
3175       set_mem_addr_space (x, as);
3176       targetm.encode_section_info (obj, x, true);
3177     }
3178   else
3179     {
3180       x = gen_raw_REG (address_mode, (*regno)++);
3181       x = gen_rtx_MEM (DECL_MODE (obj), x);
3182       set_mem_addr_space (x, as);
3183     }
3184
3185   return x;
3186 }
3187
3188 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3189    walk_tree.  DATA contains the actual fake register number.  */
3190
3191 static tree
3192 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3193 {
3194   tree obj = NULL_TREE;
3195   rtx x = NULL_RTX;
3196   int *regno = (int *) data;
3197
3198   switch (TREE_CODE (*expr_p))
3199     {
3200     case ADDR_EXPR:
3201       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3202            handled_component_p (*expr_p);
3203            expr_p = &TREE_OPERAND (*expr_p, 0))
3204         continue;
3205       obj = *expr_p;
3206       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3207         x = produce_memory_decl_rtl (obj, regno);
3208       break;
3209
3210     case SSA_NAME:
3211       *ws = 0;
3212       obj = SSA_NAME_VAR (*expr_p);
3213       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3214       if (!obj)
3215         return NULL_TREE;
3216       if (!DECL_RTL_SET_P (obj))
3217         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3218       break;
3219
3220     case VAR_DECL:
3221     case PARM_DECL:
3222     case RESULT_DECL:
3223       *ws = 0;
3224       obj = *expr_p;
3225
3226       if (DECL_RTL_SET_P (obj))
3227         break;
3228
3229       if (DECL_MODE (obj) == BLKmode)
3230         x = produce_memory_decl_rtl (obj, regno);
3231       else
3232         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3233
3234       break;
3235
3236     default:
3237       break;
3238     }
3239
3240   if (x)
3241     {
3242       decl_rtl_to_reset.safe_push (obj);
3243       SET_DECL_RTL (obj, x);
3244     }
3245
3246   return NULL_TREE;
3247 }
3248
3249 /* Determines cost of the computation of EXPR.  */
3250
3251 static unsigned
3252 computation_cost (tree expr, bool speed)
3253 {
3254   rtx_insn *seq;
3255   rtx rslt;
3256   tree type = TREE_TYPE (expr);
3257   unsigned cost;
3258   /* Avoid using hard regs in ways which may be unsupported.  */
3259   int regno = LAST_VIRTUAL_REGISTER + 1;
3260   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3261   enum node_frequency real_frequency = node->frequency;
3262
3263   node->frequency = NODE_FREQUENCY_NORMAL;
3264   crtl->maybe_hot_insn_p = speed;
3265   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3266   start_sequence ();
3267   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3268   seq = get_insns ();
3269   end_sequence ();
3270   default_rtl_profile ();
3271   node->frequency = real_frequency;
3272
3273   cost = seq_cost (seq, speed);
3274   if (MEM_P (rslt))
3275     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3276                           TYPE_ADDR_SPACE (type), speed);
3277   else if (!REG_P (rslt))
3278     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3279
3280   return cost;
3281 }
3282
3283 /* Returns variable containing the value of candidate CAND at statement AT.  */
3284
3285 static tree
3286 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3287 {
3288   if (stmt_after_increment (loop, cand, stmt))
3289     return cand->var_after;
3290   else
3291     return cand->var_before;
3292 }
3293
3294 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3295    same precision that is at least as wide as the precision of TYPE, stores
3296    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3297    type of A and B.  */
3298
3299 static tree
3300 determine_common_wider_type (tree *a, tree *b)
3301 {
3302   tree wider_type = NULL;
3303   tree suba, subb;
3304   tree atype = TREE_TYPE (*a);
3305
3306   if (CONVERT_EXPR_P (*a))
3307     {
3308       suba = TREE_OPERAND (*a, 0);
3309       wider_type = TREE_TYPE (suba);
3310       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3311         return atype;
3312     }
3313   else
3314     return atype;
3315
3316   if (CONVERT_EXPR_P (*b))
3317     {
3318       subb = TREE_OPERAND (*b, 0);
3319       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3320         return atype;
3321     }
3322   else
3323     return atype;
3324
3325   *a = suba;
3326   *b = subb;
3327   return wider_type;
3328 }
3329
3330 /* Determines the expression by that USE is expressed from induction variable
3331    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3332    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3333
3334 static bool
3335 get_computation_aff (struct loop *loop,
3336                      struct iv_use *use, struct iv_cand *cand, gimple at,
3337                      struct aff_tree *aff)
3338 {
3339   tree ubase = use->iv->base;
3340   tree ustep = use->iv->step;
3341   tree cbase = cand->iv->base;
3342   tree cstep = cand->iv->step, cstep_common;
3343   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3344   tree common_type, var;
3345   tree uutype;
3346   aff_tree cbase_aff, var_aff;
3347   widest_int rat;
3348
3349   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3350     {
3351       /* We do not have a precision to express the values of use.  */
3352       return false;
3353     }
3354
3355   var = var_at_stmt (loop, cand, at);
3356   uutype = unsigned_type_for (utype);
3357
3358   /* If the conversion is not noop, perform it.  */
3359   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3360     {
3361       cstep = fold_convert (uutype, cstep);
3362       cbase = fold_convert (uutype, cbase);
3363       var = fold_convert (uutype, var);
3364     }
3365
3366   if (!constant_multiple_of (ustep, cstep, &rat))
3367     return false;
3368
3369   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3370      type, we achieve better folding by computing their difference in this
3371      wider type, and cast the result to UUTYPE.  We do not need to worry about
3372      overflows, as all the arithmetics will in the end be performed in UUTYPE
3373      anyway.  */
3374   common_type = determine_common_wider_type (&ubase, &cbase);
3375
3376   /* use = ubase - ratio * cbase + ratio * var.  */
3377   tree_to_aff_combination (ubase, common_type, aff);
3378   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3379   tree_to_aff_combination (var, uutype, &var_aff);
3380
3381   /* We need to shift the value if we are after the increment.  */
3382   if (stmt_after_increment (loop, cand, at))
3383     {
3384       aff_tree cstep_aff;
3385
3386       if (common_type != uutype)
3387         cstep_common = fold_convert (common_type, cstep);
3388       else
3389         cstep_common = cstep;
3390
3391       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3392       aff_combination_add (&cbase_aff, &cstep_aff);
3393     }
3394
3395   aff_combination_scale (&cbase_aff, -rat);
3396   aff_combination_add (aff, &cbase_aff);
3397   if (common_type != uutype)
3398     aff_combination_convert (aff, uutype);
3399
3400   aff_combination_scale (&var_aff, rat);
3401   aff_combination_add (aff, &var_aff);
3402
3403   return true;
3404 }
3405
3406 /* Return the type of USE.  */
3407
3408 static tree
3409 get_use_type (struct iv_use *use)
3410 {
3411   tree base_type = TREE_TYPE (use->iv->base);
3412   tree type;
3413
3414   if (use->type == USE_ADDRESS)
3415     {
3416       /* The base_type may be a void pointer.  Create a pointer type based on
3417          the mem_ref instead.  */
3418       type = build_pointer_type (TREE_TYPE (*use->op_p));
3419       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3420                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3421     }
3422   else
3423     type = base_type;
3424
3425   return type;
3426 }
3427
3428 /* Determines the expression by that USE is expressed from induction variable
3429    CAND at statement AT in LOOP.  The computation is unshared.  */
3430
3431 static tree
3432 get_computation_at (struct loop *loop,
3433                     struct iv_use *use, struct iv_cand *cand, gimple at)
3434 {
3435   aff_tree aff;
3436   tree type = get_use_type (use);
3437
3438   if (!get_computation_aff (loop, use, cand, at, &aff))
3439     return NULL_TREE;
3440   unshare_aff_combination (&aff);
3441   return fold_convert (type, aff_combination_to_tree (&aff));
3442 }
3443
3444 /* Determines the expression by that USE is expressed from induction variable
3445    CAND in LOOP.  The computation is unshared.  */
3446
3447 static tree
3448 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3449 {
3450   return get_computation_at (loop, use, cand, use->stmt);
3451 }
3452
3453 /* Adjust the cost COST for being in loop setup rather than loop body.
3454    If we're optimizing for space, the loop setup overhead is constant;
3455    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3456 static unsigned
3457 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3458 {
3459   if (cost == INFTY)
3460     return cost;
3461   else if (optimize_loop_for_speed_p (data->current_loop))
3462     return cost / avg_loop_niter (data->current_loop);
3463   else
3464     return cost;
3465 }
3466
3467 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3468    validity for a memory reference accessing memory of mode MODE in
3469    address space AS.  */
3470
3471
3472 bool
3473 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3474                                  addr_space_t as)
3475 {
3476 #define MAX_RATIO 128
3477   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3478   static vec<sbitmap> valid_mult_list;
3479   sbitmap valid_mult;
3480
3481   if (data_index >= valid_mult_list.length ())
3482     valid_mult_list.safe_grow_cleared (data_index + 1);
3483
3484   valid_mult = valid_mult_list[data_index];
3485   if (!valid_mult)
3486     {
3487       machine_mode address_mode = targetm.addr_space.address_mode (as);
3488       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3489       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3490       rtx addr, scaled;
3491       HOST_WIDE_INT i;
3492
3493       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3494       bitmap_clear (valid_mult);
3495       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3496       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3497       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3498         {
3499           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3500           if (memory_address_addr_space_p (mode, addr, as)
3501               || memory_address_addr_space_p (mode, scaled, as))
3502             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3503         }
3504
3505       if (dump_file && (dump_flags & TDF_DETAILS))
3506         {
3507           fprintf (dump_file, "  allowed multipliers:");
3508           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3509             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3510               fprintf (dump_file, " %d", (int) i);
3511           fprintf (dump_file, "\n");
3512           fprintf (dump_file, "\n");
3513         }
3514
3515       valid_mult_list[data_index] = valid_mult;
3516     }
3517
3518   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3519     return false;
3520
3521   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3522 }
3523
3524 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3525    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3526    variable is omitted.  Compute the cost for a memory reference that accesses
3527    a memory location of mode MEM_MODE in address space AS.
3528
3529    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3530    size of MEM_MODE / RATIO) is available.  To make this determination, we
3531    look at the size of the increment to be made, which is given in CSTEP.
3532    CSTEP may be zero if the step is unknown.
3533    STMT_AFTER_INC is true iff the statement we're looking at is after the
3534    increment of the original biv.
3535
3536    TODO -- there must be some better way.  This all is quite crude.  */
3537
3538 enum ainc_type
3539 {
3540   AINC_PRE_INC,         /* Pre increment.  */
3541   AINC_PRE_DEC,         /* Pre decrement.  */
3542   AINC_POST_INC,        /* Post increment.  */
3543   AINC_POST_DEC,        /* Post decrement.  */
3544   AINC_NONE             /* Also the number of auto increment types.  */
3545 };
3546
3547 typedef struct address_cost_data_s
3548 {
3549   HOST_WIDE_INT min_offset, max_offset;
3550   unsigned costs[2][2][2][2];
3551   unsigned ainc_costs[AINC_NONE];
3552 } *address_cost_data;
3553
3554
3555 static comp_cost
3556 get_address_cost (bool symbol_present, bool var_present,
3557                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3558                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3559                   addr_space_t as, bool speed,
3560                   bool stmt_after_inc, bool *may_autoinc)
3561 {
3562   machine_mode address_mode = targetm.addr_space.address_mode (as);
3563   static vec<address_cost_data> address_cost_data_list;
3564   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3565   address_cost_data data;
3566   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3567   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3568   unsigned cost, acost, complexity;
3569   enum ainc_type autoinc_type;
3570   bool offset_p, ratio_p, autoinc;
3571   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3572   unsigned HOST_WIDE_INT mask;
3573   unsigned bits;
3574
3575   if (data_index >= address_cost_data_list.length ())
3576     address_cost_data_list.safe_grow_cleared (data_index + 1);
3577
3578   data = address_cost_data_list[data_index];
3579   if (!data)
3580     {
3581       HOST_WIDE_INT i;
3582       HOST_WIDE_INT rat, off = 0;
3583       int old_cse_not_expected, width;
3584       unsigned sym_p, var_p, off_p, rat_p, add_c;
3585       rtx_insn *seq;
3586       rtx addr, base;
3587       rtx reg0, reg1;
3588
3589       data = (address_cost_data) xcalloc (1, sizeof (*data));
3590
3591       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3592
3593       width = GET_MODE_BITSIZE (address_mode) - 1;
3594       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3595         width = HOST_BITS_PER_WIDE_INT - 1;
3596       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3597
3598       for (i = width; i >= 0; i--)
3599         {
3600           off = -((unsigned HOST_WIDE_INT) 1 << i);
3601           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3602           if (memory_address_addr_space_p (mem_mode, addr, as))
3603             break;
3604         }
3605       data->min_offset = (i == -1? 0 : off);
3606
3607       for (i = width; i >= 0; i--)
3608         {
3609           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3610           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3611           if (memory_address_addr_space_p (mem_mode, addr, as))
3612             break;
3613           /* For some strict-alignment targets, the offset must be naturally
3614              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3615           off = mem_mode != QImode
3616                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3617                     - GET_MODE_SIZE (mem_mode)
3618                 : 0;
3619           if (off > 0)
3620             {
3621               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3622               if (memory_address_addr_space_p (mem_mode, addr, as))
3623                 break;
3624             }
3625         }
3626       if (i == -1)
3627         off = 0;
3628       data->max_offset = off;
3629
3630       if (dump_file && (dump_flags & TDF_DETAILS))
3631         {
3632           fprintf (dump_file, "get_address_cost:\n");
3633           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3634                    GET_MODE_NAME (mem_mode),
3635                    data->min_offset);
3636           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3637                    GET_MODE_NAME (mem_mode),
3638                    data->max_offset);
3639         }
3640
3641       rat = 1;
3642       for (i = 2; i <= MAX_RATIO; i++)
3643         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3644           {
3645             rat = i;
3646             break;
3647           }
3648
3649       /* Compute the cost of various addressing modes.  */
3650       acost = 0;
3651       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3652       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3653
3654       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3655           || USE_STORE_PRE_DECREMENT (mem_mode))
3656         {
3657           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3658           has_predec[mem_mode]
3659             = memory_address_addr_space_p (mem_mode, addr, as);
3660
3661           if (has_predec[mem_mode])
3662             data->ainc_costs[AINC_PRE_DEC]
3663               = address_cost (addr, mem_mode, as, speed);
3664         }
3665       if (USE_LOAD_POST_DECREMENT (mem_mode)
3666           || USE_STORE_POST_DECREMENT (mem_mode))
3667         {
3668           addr = gen_rtx_POST_DEC (address_mode, reg0);
3669           has_postdec[mem_mode]
3670             = memory_address_addr_space_p (mem_mode, addr, as);
3671
3672           if (has_postdec[mem_mode])
3673             data->ainc_costs[AINC_POST_DEC]
3674               = address_cost (addr, mem_mode, as, speed);
3675         }
3676       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3677           || USE_STORE_PRE_DECREMENT (mem_mode))
3678         {
3679           addr = gen_rtx_PRE_INC (address_mode, reg0);
3680           has_preinc[mem_mode]
3681             = memory_address_addr_space_p (mem_mode, addr, as);
3682
3683           if (has_preinc[mem_mode])
3684             data->ainc_costs[AINC_PRE_INC]
3685               = address_cost (addr, mem_mode, as, speed);
3686         }
3687       if (USE_LOAD_POST_INCREMENT (mem_mode)
3688           || USE_STORE_POST_INCREMENT (mem_mode))
3689         {
3690           addr = gen_rtx_POST_INC (address_mode, reg0);
3691           has_postinc[mem_mode]
3692             = memory_address_addr_space_p (mem_mode, addr, as);
3693
3694           if (has_postinc[mem_mode])
3695             data->ainc_costs[AINC_POST_INC]
3696               = address_cost (addr, mem_mode, as, speed);
3697         }
3698       for (i = 0; i < 16; i++)
3699         {
3700           sym_p = i & 1;
3701           var_p = (i >> 1) & 1;
3702           off_p = (i >> 2) & 1;
3703           rat_p = (i >> 3) & 1;
3704
3705           addr = reg0;
3706           if (rat_p)
3707             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3708                                    gen_int_mode (rat, address_mode));
3709
3710           if (var_p)
3711             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3712
3713           if (sym_p)
3714             {
3715               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3716               /* ??? We can run into trouble with some backends by presenting
3717                  it with symbols which haven't been properly passed through
3718                  targetm.encode_section_info.  By setting the local bit, we
3719                  enhance the probability of things working.  */
3720               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3721
3722               if (off_p)
3723                 base = gen_rtx_fmt_e (CONST, address_mode,
3724                                       gen_rtx_fmt_ee
3725                                         (PLUS, address_mode, base,
3726                                          gen_int_mode (off, address_mode)));
3727             }
3728           else if (off_p)
3729             base = gen_int_mode (off, address_mode);
3730           else
3731             base = NULL_RTX;
3732
3733           if (base)
3734             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3735
3736           start_sequence ();
3737           /* To avoid splitting addressing modes, pretend that no cse will
3738              follow.  */
3739           old_cse_not_expected = cse_not_expected;
3740           cse_not_expected = true;
3741           addr = memory_address_addr_space (mem_mode, addr, as);
3742           cse_not_expected = old_cse_not_expected;
3743           seq = get_insns ();
3744           end_sequence ();
3745
3746           acost = seq_cost (seq, speed);
3747           acost += address_cost (addr, mem_mode, as, speed);
3748
3749           if (!acost)
3750             acost = 1;
3751           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3752         }
3753
3754       /* On some targets, it is quite expensive to load symbol to a register,
3755          which makes addresses that contain symbols look much more expensive.
3756          However, the symbol will have to be loaded in any case before the
3757          loop (and quite likely we have it in register already), so it does not
3758          make much sense to penalize them too heavily.  So make some final
3759          tweaks for the SYMBOL_PRESENT modes:
3760
3761          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3762          var is cheaper, use this mode with small penalty.
3763          If VAR_PRESENT is true, try whether the mode with
3764          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3765          if this is the case, use it.  */
3766       add_c = add_cost (speed, address_mode);
3767       for (i = 0; i < 8; i++)
3768         {
3769           var_p = i & 1;
3770           off_p = (i >> 1) & 1;
3771           rat_p = (i >> 2) & 1;
3772
3773           acost = data->costs[0][1][off_p][rat_p] + 1;
3774           if (var_p)
3775             acost += add_c;
3776
3777           if (acost < data->costs[1][var_p][off_p][rat_p])
3778             data->costs[1][var_p][off_p][rat_p] = acost;
3779         }
3780
3781       if (dump_file && (dump_flags & TDF_DETAILS))
3782         {
3783           fprintf (dump_file, "Address costs:\n");
3784
3785           for (i = 0; i < 16; i++)
3786             {
3787               sym_p = i & 1;
3788               var_p = (i >> 1) & 1;
3789               off_p = (i >> 2) & 1;
3790               rat_p = (i >> 3) & 1;
3791
3792               fprintf (dump_file, "  ");
3793               if (sym_p)
3794                 fprintf (dump_file, "sym + ");
3795               if (var_p)
3796                 fprintf (dump_file, "var + ");
3797               if (off_p)
3798                 fprintf (dump_file, "cst + ");
3799               if (rat_p)
3800                 fprintf (dump_file, "rat * ");
3801
3802               acost = data->costs[sym_p][var_p][off_p][rat_p];
3803               fprintf (dump_file, "index costs %d\n", acost);
3804             }
3805           if (has_predec[mem_mode] || has_postdec[mem_mode]
3806               || has_preinc[mem_mode] || has_postinc[mem_mode])
3807             fprintf (dump_file, "  May include autoinc/dec\n");
3808           fprintf (dump_file, "\n");
3809         }
3810
3811       address_cost_data_list[data_index] = data;
3812     }
3813
3814   bits = GET_MODE_BITSIZE (address_mode);
3815   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3816   offset &= mask;
3817   if ((offset >> (bits - 1) & 1))
3818     offset |= ~mask;
3819   s_offset = offset;
3820
3821   autoinc = false;
3822   autoinc_type = AINC_NONE;
3823   msize = GET_MODE_SIZE (mem_mode);
3824   autoinc_offset = offset;
3825   if (stmt_after_inc)
3826     autoinc_offset += ratio * cstep;
3827   if (symbol_present || var_present || ratio != 1)
3828     autoinc = false;
3829   else
3830     {
3831       if (has_postinc[mem_mode] && autoinc_offset == 0
3832           && msize == cstep)
3833         autoinc_type = AINC_POST_INC;
3834       else if (has_postdec[mem_mode] && autoinc_offset == 0
3835                && msize == -cstep)
3836         autoinc_type = AINC_POST_DEC;
3837       else if (has_preinc[mem_mode] && autoinc_offset == msize
3838                && msize == cstep)
3839         autoinc_type = AINC_PRE_INC;
3840       else if (has_predec[mem_mode] && autoinc_offset == -msize
3841                && msize == -cstep)
3842         autoinc_type = AINC_PRE_DEC;
3843
3844       if (autoinc_type != AINC_NONE)
3845         autoinc = true;
3846     }
3847
3848   cost = 0;
3849   offset_p = (s_offset != 0
3850               && data->min_offset <= s_offset
3851               && s_offset <= data->max_offset);
3852   ratio_p = (ratio != 1
3853              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3854
3855   if (ratio != 1 && !ratio_p)
3856     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3857
3858   if (s_offset && !offset_p && !symbol_present)
3859     cost += add_cost (speed, address_mode);
3860
3861   if (may_autoinc)
3862     *may_autoinc = autoinc;
3863   if (autoinc)
3864     acost = data->ainc_costs[autoinc_type];
3865   else
3866     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3867   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3868   return new_cost (cost + acost, complexity);
3869 }
3870
3871  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3872     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3873     calculating the operands of EXPR.  Returns true if successful, and returns
3874     the cost in COST.  */
3875
3876 static bool
3877 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3878                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3879 {
3880   comp_cost res;
3881   tree op1 = TREE_OPERAND (expr, 1);
3882   tree cst = TREE_OPERAND (mult, 1);
3883   tree multop = TREE_OPERAND (mult, 0);
3884   int m = exact_log2 (int_cst_value (cst));
3885   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3886   int as_cost, sa_cost;
3887   bool mult_in_op1;
3888
3889   if (!(m >= 0 && m < maxm))
3890     return false;
3891
3892   mult_in_op1 = operand_equal_p (op1, mult, 0);
3893
3894   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3895
3896   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3897      use that in preference to a shift insn followed by an add insn.  */
3898   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3899              ? shiftadd_cost (speed, mode, m)
3900              : (mult_in_op1
3901                 ? shiftsub1_cost (speed, mode, m)
3902                 : shiftsub0_cost (speed, mode, m)));
3903
3904   res = new_cost (MIN (as_cost, sa_cost), 0);
3905   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3906
3907   STRIP_NOPS (multop);
3908   if (!is_gimple_val (multop))
3909     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3910
3911   *cost = res;
3912   return true;
3913 }
3914
3915 /* Estimates cost of forcing expression EXPR into a variable.  */
3916
3917 static comp_cost
3918 force_expr_to_var_cost (tree expr, bool speed)
3919 {
3920   static bool costs_initialized = false;
3921   static unsigned integer_cost [2];
3922   static unsigned symbol_cost [2];
3923   static unsigned address_cost [2];
3924   tree op0, op1;
3925   comp_cost cost0, cost1, cost;
3926   machine_mode mode;
3927
3928   if (!costs_initialized)
3929     {
3930       tree type = build_pointer_type (integer_type_node);
3931       tree var, addr;
3932       rtx x;
3933       int i;
3934
3935       var = create_tmp_var_raw (integer_type_node, "test_var");
3936       TREE_STATIC (var) = 1;
3937       x = produce_memory_decl_rtl (var, NULL);
3938       SET_DECL_RTL (var, x);
3939
3940       addr = build1 (ADDR_EXPR, type, var);
3941
3942
3943       for (i = 0; i < 2; i++)
3944         {
3945           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3946                                                              2000), i);
3947
3948           symbol_cost[i] = computation_cost (addr, i) + 1;
3949
3950           address_cost[i]
3951             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3952           if (dump_file && (dump_flags & TDF_DETAILS))
3953             {
3954               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3955               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3956               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3957               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3958               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3959               fprintf (dump_file, "\n");
3960             }
3961         }
3962
3963       costs_initialized = true;
3964     }
3965
3966   STRIP_NOPS (expr);
3967
3968   if (SSA_VAR_P (expr))
3969     return no_cost;
3970
3971   if (is_gimple_min_invariant (expr))
3972     {
3973       if (TREE_CODE (expr) == INTEGER_CST)
3974         return new_cost (integer_cost [speed], 0);
3975
3976       if (TREE_CODE (expr) == ADDR_EXPR)
3977         {
3978           tree obj = TREE_OPERAND (expr, 0);
3979
3980           if (TREE_CODE (obj) == VAR_DECL
3981               || TREE_CODE (obj) == PARM_DECL
3982               || TREE_CODE (obj) == RESULT_DECL)
3983             return new_cost (symbol_cost [speed], 0);
3984         }
3985
3986       return new_cost (address_cost [speed], 0);
3987     }
3988
3989   switch (TREE_CODE (expr))
3990     {
3991     case POINTER_PLUS_EXPR:
3992     case PLUS_EXPR:
3993     case MINUS_EXPR:
3994     case MULT_EXPR:
3995       op0 = TREE_OPERAND (expr, 0);
3996       op1 = TREE_OPERAND (expr, 1);
3997       STRIP_NOPS (op0);
3998       STRIP_NOPS (op1);
3999       break;
4000
4001     CASE_CONVERT:
4002     case NEGATE_EXPR:
4003       op0 = TREE_OPERAND (expr, 0);
4004       STRIP_NOPS (op0);
4005       op1 = NULL_TREE;
4006       break;
4007
4008     default:
4009       /* Just an arbitrary value, FIXME.  */
4010       return new_cost (target_spill_cost[speed], 0);
4011     }
4012
4013   if (op0 == NULL_TREE
4014       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4015     cost0 = no_cost;
4016   else
4017     cost0 = force_expr_to_var_cost (op0, speed);
4018
4019   if (op1 == NULL_TREE
4020       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4021     cost1 = no_cost;
4022   else
4023     cost1 = force_expr_to_var_cost (op1, speed);
4024
4025   mode = TYPE_MODE (TREE_TYPE (expr));
4026   switch (TREE_CODE (expr))
4027     {
4028     case POINTER_PLUS_EXPR:
4029     case PLUS_EXPR:
4030     case MINUS_EXPR:
4031     case NEGATE_EXPR:
4032       cost = new_cost (add_cost (speed, mode), 0);
4033       if (TREE_CODE (expr) != NEGATE_EXPR)
4034         {
4035           tree mult = NULL_TREE;
4036           comp_cost sa_cost;
4037           if (TREE_CODE (op1) == MULT_EXPR)
4038             mult = op1;
4039           else if (TREE_CODE (op0) == MULT_EXPR)
4040             mult = op0;
4041
4042           if (mult != NULL_TREE
4043               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4044               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4045                                     speed, &sa_cost))
4046             return sa_cost;
4047         }
4048       break;
4049
4050     CASE_CONVERT:
4051       {
4052         tree inner_mode, outer_mode;
4053         outer_mode = TREE_TYPE (expr);
4054         inner_mode = TREE_TYPE (op0);
4055         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4056                                        TYPE_MODE (inner_mode), speed), 0);
4057       }
4058       break;
4059
4060     case MULT_EXPR:
4061       if (cst_and_fits_in_hwi (op0))
4062         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4063                                              mode, speed), 0);
4064       else if (cst_and_fits_in_hwi (op1))
4065         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4066                                              mode, speed), 0);
4067       else
4068         return new_cost (target_spill_cost [speed], 0);
4069       break;
4070
4071     default:
4072       gcc_unreachable ();
4073     }
4074
4075   cost = add_costs (cost, cost0);
4076   cost = add_costs (cost, cost1);
4077
4078   /* Bound the cost by target_spill_cost.  The parts of complicated
4079      computations often are either loop invariant or at least can
4080      be shared between several iv uses, so letting this grow without
4081      limits would not give reasonable results.  */
4082   if (cost.cost > (int) target_spill_cost [speed])
4083     cost.cost = target_spill_cost [speed];
4084
4085   return cost;
4086 }
4087
4088 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4089    invariants the computation depends on.  */
4090
4091 static comp_cost
4092 force_var_cost (struct ivopts_data *data,
4093                 tree expr, bitmap *depends_on)
4094 {
4095   if (depends_on)
4096     {
4097       fd_ivopts_data = data;
4098       walk_tree (&expr, find_depends, depends_on, NULL);
4099     }
4100
4101   return force_expr_to_var_cost (expr, data->speed);
4102 }
4103
4104 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4105    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4106    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4107    invariants the computation depends on.  */
4108
4109 static comp_cost
4110 split_address_cost (struct ivopts_data *data,
4111                     tree addr, bool *symbol_present, bool *var_present,
4112                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4113 {
4114   tree core;
4115   HOST_WIDE_INT bitsize;
4116   HOST_WIDE_INT bitpos;
4117   tree toffset;
4118   machine_mode mode;
4119   int unsignedp, volatilep;
4120
4121   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4122                               &unsignedp, &volatilep, false);
4123
4124   if (toffset != 0
4125       || bitpos % BITS_PER_UNIT != 0
4126       || TREE_CODE (core) != VAR_DECL)
4127     {
4128       *symbol_present = false;
4129       *var_present = true;
4130       fd_ivopts_data = data;
4131       walk_tree (&addr, find_depends, depends_on, NULL);
4132       return new_cost (target_spill_cost[data->speed], 0);
4133     }
4134
4135   *offset += bitpos / BITS_PER_UNIT;
4136   if (TREE_STATIC (core)
4137       || DECL_EXTERNAL (core))
4138     {
4139       *symbol_present = true;
4140       *var_present = false;
4141       return no_cost;
4142     }
4143
4144   *symbol_present = false;
4145   *var_present = true;
4146   return no_cost;
4147 }
4148
4149 /* Estimates cost of expressing difference of addresses E1 - E2 as
4150    var + symbol + offset.  The value of offset is added to OFFSET,
4151    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4152    part is missing.  DEPENDS_ON is a set of the invariants the computation
4153    depends on.  */
4154
4155 static comp_cost
4156 ptr_difference_cost (struct ivopts_data *data,
4157                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4158                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4159 {
4160   HOST_WIDE_INT diff = 0;
4161   aff_tree aff_e1, aff_e2;
4162   tree type;
4163
4164   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4165
4166   if (ptr_difference_const (e1, e2, &diff))
4167     {
4168       *offset += diff;
4169       *symbol_present = false;
4170       *var_present = false;
4171       return no_cost;
4172     }
4173
4174   if (integer_zerop (e2))
4175     return split_address_cost (data, TREE_OPERAND (e1, 0),
4176                                symbol_present, var_present, offset, depends_on);
4177
4178   *symbol_present = false;
4179   *var_present = true;
4180
4181   type = signed_type_for (TREE_TYPE (e1));
4182   tree_to_aff_combination (e1, type, &aff_e1);
4183   tree_to_aff_combination (e2, type, &aff_e2);
4184   aff_combination_scale (&aff_e2, -1);
4185   aff_combination_add (&aff_e1, &aff_e2);
4186
4187   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4188 }
4189
4190 /* Estimates cost of expressing difference E1 - E2 as
4191    var + symbol + offset.  The value of offset is added to OFFSET,
4192    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4193    part is missing.  DEPENDS_ON is a set of the invariants the computation
4194    depends on.  */
4195
4196 static comp_cost
4197 difference_cost (struct ivopts_data *data,
4198                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4199                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4200 {
4201   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4202   unsigned HOST_WIDE_INT off1, off2;
4203   aff_tree aff_e1, aff_e2;
4204   tree type;
4205
4206   e1 = strip_offset (e1, &off1);
4207   e2 = strip_offset (e2, &off2);
4208   *offset += off1 - off2;
4209
4210   STRIP_NOPS (e1);
4211   STRIP_NOPS (e2);
4212
4213   if (TREE_CODE (e1) == ADDR_EXPR)
4214     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4215                                 offset, depends_on);
4216   *symbol_present = false;
4217
4218   if (operand_equal_p (e1, e2, 0))
4219     {
4220       *var_present = false;
4221       return no_cost;
4222     }
4223
4224   *var_present = true;
4225
4226   if (integer_zerop (e2))
4227     return force_var_cost (data, e1, depends_on);
4228
4229   if (integer_zerop (e1))
4230     {
4231       comp_cost cost = force_var_cost (data, e2, depends_on);
4232       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4233       return cost;
4234     }
4235
4236   type = signed_type_for (TREE_TYPE (e1));
4237   tree_to_aff_combination (e1, type, &aff_e1);
4238   tree_to_aff_combination (e2, type, &aff_e2);
4239   aff_combination_scale (&aff_e2, -1);
4240   aff_combination_add (&aff_e1, &aff_e2);
4241
4242   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4243 }
4244
4245 /* Returns true if AFF1 and AFF2 are identical.  */
4246
4247 static bool
4248 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4249 {
4250   unsigned i;
4251
4252   if (aff1->n != aff2->n)
4253     return false;
4254
4255   for (i = 0; i < aff1->n; i++)
4256     {
4257       if (aff1->elts[i].coef != aff2->elts[i].coef)
4258         return false;
4259
4260       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4261         return false;
4262     }
4263   return true;
4264 }
4265
4266 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4267
4268 static int
4269 get_expr_id (struct ivopts_data *data, tree expr)
4270 {
4271   struct iv_inv_expr_ent ent;
4272   struct iv_inv_expr_ent **slot;
4273
4274   ent.expr = expr;
4275   ent.hash = iterative_hash_expr (expr, 0);
4276   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4277   if (*slot)
4278     return (*slot)->id;
4279
4280   *slot = XNEW (struct iv_inv_expr_ent);
4281   (*slot)->expr = expr;
4282   (*slot)->hash = ent.hash;
4283   (*slot)->id = data->inv_expr_id++;
4284   return (*slot)->id;
4285 }
4286
4287 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4288    requires a new compiler generated temporary.  Returns -1 otherwise.
4289    ADDRESS_P is a flag indicating if the expression is for address
4290    computation.  */
4291
4292 static int
4293 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4294                             tree cbase, HOST_WIDE_INT ratio,
4295                             bool address_p)
4296 {
4297   aff_tree ubase_aff, cbase_aff;
4298   tree expr, ub, cb;
4299
4300   STRIP_NOPS (ubase);
4301   STRIP_NOPS (cbase);
4302   ub = ubase;
4303   cb = cbase;
4304
4305   if ((TREE_CODE (ubase) == INTEGER_CST)
4306       && (TREE_CODE (cbase) == INTEGER_CST))
4307     return -1;
4308
4309   /* Strips the constant part. */
4310   if (TREE_CODE (ubase) == PLUS_EXPR
4311       || TREE_CODE (ubase) == MINUS_EXPR
4312       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4313     {
4314       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4315         ubase = TREE_OPERAND (ubase, 0);
4316     }
4317
4318   /* Strips the constant part. */
4319   if (TREE_CODE (cbase) == PLUS_EXPR
4320       || TREE_CODE (cbase) == MINUS_EXPR
4321       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4322     {
4323       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4324         cbase = TREE_OPERAND (cbase, 0);
4325     }
4326
4327   if (address_p)
4328     {
4329       if (((TREE_CODE (ubase) == SSA_NAME)
4330            || (TREE_CODE (ubase) == ADDR_EXPR
4331                && is_gimple_min_invariant (ubase)))
4332           && (TREE_CODE (cbase) == INTEGER_CST))
4333         return -1;
4334
4335       if (((TREE_CODE (cbase) == SSA_NAME)
4336            || (TREE_CODE (cbase) == ADDR_EXPR
4337                && is_gimple_min_invariant (cbase)))
4338           && (TREE_CODE (ubase) == INTEGER_CST))
4339         return -1;
4340     }
4341
4342   if (ratio == 1)
4343     {
4344       if (operand_equal_p (ubase, cbase, 0))
4345         return -1;
4346
4347       if (TREE_CODE (ubase) == ADDR_EXPR
4348           && TREE_CODE (cbase) == ADDR_EXPR)
4349         {
4350           tree usym, csym;
4351
4352           usym = TREE_OPERAND (ubase, 0);
4353           csym = TREE_OPERAND (cbase, 0);
4354           if (TREE_CODE (usym) == ARRAY_REF)
4355             {
4356               tree ind = TREE_OPERAND (usym, 1);
4357               if (TREE_CODE (ind) == INTEGER_CST
4358                   && tree_fits_shwi_p (ind)
4359                   && tree_to_shwi (ind) == 0)
4360                 usym = TREE_OPERAND (usym, 0);
4361             }
4362           if (TREE_CODE (csym) == ARRAY_REF)
4363             {
4364               tree ind = TREE_OPERAND (csym, 1);
4365               if (TREE_CODE (ind) == INTEGER_CST
4366                   && tree_fits_shwi_p (ind)
4367                   && tree_to_shwi (ind) == 0)
4368                 csym = TREE_OPERAND (csym, 0);
4369             }
4370           if (operand_equal_p (usym, csym, 0))
4371             return -1;
4372         }
4373       /* Now do more complex comparison  */
4374       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4375       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4376       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4377         return -1;
4378     }
4379
4380   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4381   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4382
4383   aff_combination_scale (&cbase_aff, -1 * ratio);
4384   aff_combination_add (&ubase_aff, &cbase_aff);
4385   expr = aff_combination_to_tree (&ubase_aff);
4386   return get_expr_id (data, expr);
4387 }
4388
4389
4390
4391 /* Determines the cost of the computation by that USE is expressed
4392    from induction variable CAND.  If ADDRESS_P is true, we just need
4393    to create an address from it, otherwise we want to get it into
4394    register.  A set of invariants we depend on is stored in
4395    DEPENDS_ON.  AT is the statement at that the value is computed.
4396    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4397    addressing is likely.  */
4398
4399 static comp_cost
4400 get_computation_cost_at (struct ivopts_data *data,
4401                          struct iv_use *use, struct iv_cand *cand,
4402                          bool address_p, bitmap *depends_on, gimple at,
4403                          bool *can_autoinc,
4404                          int *inv_expr_id)
4405 {
4406   tree ubase = use->iv->base, ustep = use->iv->step;
4407   tree cbase, cstep;
4408   tree utype = TREE_TYPE (ubase), ctype;
4409   unsigned HOST_WIDE_INT cstepi, offset = 0;
4410   HOST_WIDE_INT ratio, aratio;
4411   bool var_present, symbol_present, stmt_is_after_inc;
4412   comp_cost cost;
4413   widest_int rat;
4414   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4415   machine_mode mem_mode = (address_p
4416                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4417                                 : VOIDmode);
4418
4419   *depends_on = NULL;
4420
4421   /* Only consider real candidates.  */
4422   if (!cand->iv)
4423     return infinite_cost;
4424
4425   cbase = cand->iv->base;
4426   cstep = cand->iv->step;
4427   ctype = TREE_TYPE (cbase);
4428
4429   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4430     {
4431       /* We do not have a precision to express the values of use.  */
4432       return infinite_cost;
4433     }
4434
4435   if (address_p
4436       || (use->iv->base_object
4437           && cand->iv->base_object
4438           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4439           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4440     {
4441       /* Do not try to express address of an object with computation based
4442          on address of a different object.  This may cause problems in rtl
4443          level alias analysis (that does not expect this to be happening,
4444          as this is illegal in C), and would be unlikely to be useful
4445          anyway.  */
4446       if (use->iv->base_object
4447           && cand->iv->base_object
4448           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4449         return infinite_cost;
4450     }
4451
4452   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4453     {
4454       /* TODO -- add direct handling of this case.  */
4455       goto fallback;
4456     }
4457
4458   /* CSTEPI is removed from the offset in case statement is after the
4459      increment.  If the step is not constant, we use zero instead.
4460      This is a bit imprecise (there is the extra addition), but
4461      redundancy elimination is likely to transform the code so that
4462      it uses value of the variable before increment anyway,
4463      so it is not that much unrealistic.  */
4464   if (cst_and_fits_in_hwi (cstep))
4465     cstepi = int_cst_value (cstep);
4466   else
4467     cstepi = 0;
4468
4469   if (!constant_multiple_of (ustep, cstep, &rat))
4470     return infinite_cost;
4471
4472   if (wi::fits_shwi_p (rat))
4473     ratio = rat.to_shwi ();
4474   else
4475     return infinite_cost;
4476
4477   STRIP_NOPS (cbase);
4478   ctype = TREE_TYPE (cbase);
4479
4480   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4481
4482   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4483      or ratio == 1, it is better to handle this like
4484
4485      ubase - ratio * cbase + ratio * var
4486
4487      (also holds in the case ratio == -1, TODO.  */
4488
4489   if (cst_and_fits_in_hwi (cbase))
4490     {
4491       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4492       cost = difference_cost (data,
4493                               ubase, build_int_cst (utype, 0),
4494                               &symbol_present, &var_present, &offset,
4495                               depends_on);
4496       cost.cost /= avg_loop_niter (data->current_loop);
4497     }
4498   else if (ratio == 1)
4499     {
4500       tree real_cbase = cbase;
4501
4502       /* Check to see if any adjustment is needed.  */
4503       if (cstepi == 0 && stmt_is_after_inc)
4504         {
4505           aff_tree real_cbase_aff;
4506           aff_tree cstep_aff;
4507
4508           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4509                                    &real_cbase_aff);
4510           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4511
4512           aff_combination_add (&real_cbase_aff, &cstep_aff);
4513           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4514         }
4515
4516       cost = difference_cost (data,
4517                               ubase, real_cbase,
4518                               &symbol_present, &var_present, &offset,
4519                               depends_on);
4520       cost.cost /= avg_loop_niter (data->current_loop);
4521     }
4522   else if (address_p
4523            && !POINTER_TYPE_P (ctype)
4524            && multiplier_allowed_in_address_p
4525                 (ratio, mem_mode,
4526                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4527     {
4528       cbase
4529         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4530       cost = difference_cost (data,
4531                               ubase, cbase,
4532                               &symbol_present, &var_present, &offset,
4533                               depends_on);
4534       cost.cost /= avg_loop_niter (data->current_loop);
4535     }
4536   else
4537     {
4538       cost = force_var_cost (data, cbase, depends_on);
4539       cost = add_costs (cost,
4540                         difference_cost (data,
4541                                          ubase, build_int_cst (utype, 0),
4542                                          &symbol_present, &var_present,
4543                                          &offset, depends_on));
4544       cost.cost /= avg_loop_niter (data->current_loop);
4545       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4546     }
4547
4548   /* Set of invariants depended on by sub use has already been computed
4549      for the first use in the group.  */
4550   if (use->sub_id)
4551     {
4552       cost.cost = 0;
4553       if (depends_on && *depends_on)
4554         bitmap_clear (*depends_on);
4555     }
4556   else if (inv_expr_id)
4557     {
4558       *inv_expr_id =
4559           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4560       /* Clear depends on.  */
4561       if (*inv_expr_id != -1 && depends_on && *depends_on)
4562         bitmap_clear (*depends_on);
4563     }
4564
4565   /* If we are after the increment, the value of the candidate is higher by
4566      one iteration.  */
4567   if (stmt_is_after_inc)
4568     offset -= ratio * cstepi;
4569
4570   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4571      (symbol/var1/const parts may be omitted).  If we are looking for an
4572      address, find the cost of addressing this.  */
4573   if (address_p)
4574     return add_costs (cost,
4575                       get_address_cost (symbol_present, var_present,
4576                                         offset, ratio, cstepi,
4577                                         mem_mode,
4578                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4579                                         speed, stmt_is_after_inc,
4580                                         can_autoinc));
4581
4582   /* Otherwise estimate the costs for computing the expression.  */
4583   if (!symbol_present && !var_present && !offset)
4584     {
4585       if (ratio != 1)
4586         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4587       return cost;
4588     }
4589
4590   /* Symbol + offset should be compile-time computable so consider that they
4591       are added once to the variable, if present.  */
4592   if (var_present && (symbol_present || offset))
4593     cost.cost += adjust_setup_cost (data,
4594                                     add_cost (speed, TYPE_MODE (ctype)));
4595
4596   /* Having offset does not affect runtime cost in case it is added to
4597      symbol, but it increases complexity.  */
4598   if (offset)
4599     cost.complexity++;
4600
4601   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4602
4603   aratio = ratio > 0 ? ratio : -ratio;
4604   if (aratio != 1)
4605     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4606   return cost;
4607
4608 fallback:
4609   if (can_autoinc)
4610     *can_autoinc = false;
4611
4612   {
4613     /* Just get the expression, expand it and measure the cost.  */
4614     tree comp = get_computation_at (data->current_loop, use, cand, at);
4615
4616     if (!comp)
4617       return infinite_cost;
4618
4619     if (address_p)
4620       comp = build_simple_mem_ref (comp);
4621
4622     return new_cost (computation_cost (comp, speed), 0);
4623   }
4624 }
4625
4626 /* Determines the cost of the computation by that USE is expressed
4627    from induction variable CAND.  If ADDRESS_P is true, we just need
4628    to create an address from it, otherwise we want to get it into
4629    register.  A set of invariants we depend on is stored in
4630    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4631    autoinc addressing is likely.  */
4632
4633 static comp_cost
4634 get_computation_cost (struct ivopts_data *data,
4635                       struct iv_use *use, struct iv_cand *cand,
4636                       bool address_p, bitmap *depends_on,
4637                       bool *can_autoinc, int *inv_expr_id)
4638 {
4639   return get_computation_cost_at (data,
4640                                   use, cand, address_p, depends_on, use->stmt,
4641                                   can_autoinc, inv_expr_id);
4642 }
4643
4644 /* Determines cost of basing replacement of USE on CAND in a generic
4645    expression.  */
4646
4647 static bool
4648 determine_use_iv_cost_generic (struct ivopts_data *data,
4649                                struct iv_use *use, struct iv_cand *cand)
4650 {
4651   bitmap depends_on;
4652   comp_cost cost;
4653   int inv_expr_id = -1;
4654
4655   /* The simple case first -- if we need to express value of the preserved
4656      original biv, the cost is 0.  This also prevents us from counting the
4657      cost of increment twice -- once at this use and once in the cost of
4658      the candidate.  */
4659   if (cand->pos == IP_ORIGINAL
4660       && cand->incremented_at == use->stmt)
4661     {
4662       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4663                        ERROR_MARK, -1);
4664       return true;
4665     }
4666
4667   cost = get_computation_cost (data, use, cand, false, &depends_on,
4668                                NULL, &inv_expr_id);
4669
4670   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4671                    inv_expr_id);
4672
4673   return !infinite_cost_p (cost);
4674 }
4675
4676 /* Determines cost of basing replacement of USE on CAND in an address.  */
4677
4678 static bool
4679 determine_use_iv_cost_address (struct ivopts_data *data,
4680                                struct iv_use *use, struct iv_cand *cand)
4681 {
4682   bitmap depends_on;
4683   bool can_autoinc;
4684   int inv_expr_id = -1;
4685   struct iv_use *sub_use;
4686   comp_cost sub_cost;
4687   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4688                                          &can_autoinc, &inv_expr_id);
4689
4690   if (cand->ainc_use == use)
4691     {
4692       if (can_autoinc)
4693         cost.cost -= cand->cost_step;
4694       /* If we generated the candidate solely for exploiting autoincrement
4695          opportunities, and it turns out it can't be used, set the cost to
4696          infinity to make sure we ignore it.  */
4697       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4698         cost = infinite_cost;
4699     }
4700   for (sub_use = use->next;
4701        sub_use && !infinite_cost_p (cost);
4702        sub_use = sub_use->next)
4703     {
4704        sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4705                                         &can_autoinc, &inv_expr_id);
4706        cost = add_costs (cost, sub_cost);
4707     }
4708
4709   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4710                    inv_expr_id);
4711
4712   return !infinite_cost_p (cost);
4713 }
4714
4715 /* Computes value of candidate CAND at position AT in iteration NITER, and
4716    stores it to VAL.  */
4717
4718 static void
4719 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4720                aff_tree *val)
4721 {
4722   aff_tree step, delta, nit;
4723   struct iv *iv = cand->iv;
4724   tree type = TREE_TYPE (iv->base);
4725   tree steptype = type;
4726   if (POINTER_TYPE_P (type))
4727     steptype = sizetype;
4728   steptype = unsigned_type_for (type);
4729
4730   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4731   aff_combination_convert (&step, steptype);
4732   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4733   aff_combination_convert (&nit, steptype);
4734   aff_combination_mult (&nit, &step, &delta);
4735   if (stmt_after_increment (loop, cand, at))
4736     aff_combination_add (&delta, &step);
4737
4738   tree_to_aff_combination (iv->base, type, val);
4739   if (!POINTER_TYPE_P (type))
4740     aff_combination_convert (val, steptype);
4741   aff_combination_add (val, &delta);
4742 }
4743
4744 /* Returns period of induction variable iv.  */
4745
4746 static tree
4747 iv_period (struct iv *iv)
4748 {
4749   tree step = iv->step, period, type;
4750   tree pow2div;
4751
4752   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4753
4754   type = unsigned_type_for (TREE_TYPE (step));
4755   /* Period of the iv is lcm (step, type_range)/step -1,
4756      i.e., N*type_range/step - 1. Since type range is power
4757      of two, N == (step >> num_of_ending_zeros_binary (step),
4758      so the final result is
4759
4760        (type_range >> num_of_ending_zeros_binary (step)) - 1
4761
4762   */
4763   pow2div = num_ending_zeros (step);
4764
4765   period = build_low_bits_mask (type,
4766                                 (TYPE_PRECISION (type)
4767                                  - tree_to_uhwi (pow2div)));
4768
4769   return period;
4770 }
4771
4772 /* Returns the comparison operator used when eliminating the iv USE.  */
4773
4774 static enum tree_code
4775 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4776 {
4777   struct loop *loop = data->current_loop;
4778   basic_block ex_bb;
4779   edge exit;
4780
4781   ex_bb = gimple_bb (use->stmt);
4782   exit = EDGE_SUCC (ex_bb, 0);
4783   if (flow_bb_inside_loop_p (loop, exit->dest))
4784     exit = EDGE_SUCC (ex_bb, 1);
4785
4786   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4787 }
4788
4789 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4790    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4791    calculation is performed in non-wrapping type.
4792
4793    TODO: More generally, we could test for the situation that
4794          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4795          This would require knowing the sign of OFFSET.  */
4796
4797 static bool
4798 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4799 {
4800   enum tree_code code;
4801   tree e1, e2;
4802   aff_tree aff_e1, aff_e2, aff_offset;
4803
4804   if (!nowrap_type_p (TREE_TYPE (base)))
4805     return false;
4806
4807   base = expand_simple_operations (base);
4808
4809   if (TREE_CODE (base) == SSA_NAME)
4810     {
4811       gimple stmt = SSA_NAME_DEF_STMT (base);
4812
4813       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4814         return false;
4815
4816       code = gimple_assign_rhs_code (stmt);
4817       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4818         return false;
4819
4820       e1 = gimple_assign_rhs1 (stmt);
4821       e2 = gimple_assign_rhs2 (stmt);
4822     }
4823   else
4824     {
4825       code = TREE_CODE (base);
4826       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4827         return false;
4828       e1 = TREE_OPERAND (base, 0);
4829       e2 = TREE_OPERAND (base, 1);
4830     }
4831
4832   /* Use affine expansion as deeper inspection to prove the equality.  */
4833   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4834                                   &aff_e2, &data->name_expansion_cache);
4835   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4836                                   &aff_offset, &data->name_expansion_cache);
4837   aff_combination_scale (&aff_offset, -1);
4838   switch (code)
4839     {
4840     case PLUS_EXPR:
4841       aff_combination_add (&aff_e2, &aff_offset);
4842       if (aff_combination_zero_p (&aff_e2))
4843         return true;
4844
4845       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4846                                       &aff_e1, &data->name_expansion_cache);
4847       aff_combination_add (&aff_e1, &aff_offset);
4848       return aff_combination_zero_p (&aff_e1);
4849
4850     case POINTER_PLUS_EXPR:
4851       aff_combination_add (&aff_e2, &aff_offset);
4852       return aff_combination_zero_p (&aff_e2);
4853
4854     default:
4855       return false;
4856     }
4857 }
4858
4859 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4860    comparison with CAND.  NITER describes the number of iterations of
4861    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4862
4863    We aim to handle the following situation:
4864
4865    sometype *base, *p;
4866    int a, b, i;
4867
4868    i = a;
4869    p = p_0 = base + a;
4870
4871    do
4872      {
4873        bla (*p);
4874        p++;
4875        i++;
4876      }
4877    while (i < b);
4878
4879    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4880    We aim to optimize this to
4881
4882    p = p_0 = base + a;
4883    do
4884      {
4885        bla (*p);
4886        p++;
4887      }
4888    while (p < p_0 - a + b);
4889
4890    This preserves the correctness, since the pointer arithmetics does not
4891    overflow.  More precisely:
4892
4893    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4894       overflow in computing it or the values of p.
4895    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4896       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4897
4898 static bool
4899 iv_elimination_compare_lt (struct ivopts_data *data,
4900                            struct iv_cand *cand, enum tree_code *comp_p,
4901                            struct tree_niter_desc *niter)
4902 {
4903   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4904   struct aff_tree nit, tmpa, tmpb;
4905   enum tree_code comp;
4906   HOST_WIDE_INT step;
4907
4908   /* We need to know that the candidate induction variable does not overflow.
4909      While more complex analysis may be used to prove this, for now just
4910      check that the variable appears in the original program and that it
4911      is computed in a type that guarantees no overflows.  */
4912   cand_type = TREE_TYPE (cand->iv->base);
4913   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4914     return false;
4915
4916   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4917      the calculation of the BOUND could overflow, making the comparison
4918      invalid.  */
4919   if (!data->loop_single_exit_p)
4920     return false;
4921
4922   /* We need to be able to decide whether candidate is increasing or decreasing
4923      in order to choose the right comparison operator.  */
4924   if (!cst_and_fits_in_hwi (cand->iv->step))
4925     return false;
4926   step = int_cst_value (cand->iv->step);
4927
4928   /* Check that the number of iterations matches the expected pattern:
4929      a + 1 > b ? 0 : b - a - 1.  */
4930   mbz = niter->may_be_zero;
4931   if (TREE_CODE (mbz) == GT_EXPR)
4932     {
4933       /* Handle a + 1 > b.  */
4934       tree op0 = TREE_OPERAND (mbz, 0);
4935       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4936         {
4937           a = TREE_OPERAND (op0, 0);
4938           b = TREE_OPERAND (mbz, 1);
4939         }
4940       else
4941         return false;
4942     }
4943   else if (TREE_CODE (mbz) == LT_EXPR)
4944     {
4945       tree op1 = TREE_OPERAND (mbz, 1);
4946
4947       /* Handle b < a + 1.  */
4948       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4949         {
4950           a = TREE_OPERAND (op1, 0);
4951           b = TREE_OPERAND (mbz, 0);
4952         }
4953       else
4954         return false;
4955     }
4956   else
4957     return false;
4958
4959   /* Expected number of iterations is B - A - 1.  Check that it matches
4960      the actual number, i.e., that B - A - NITER = 1.  */
4961   tree_to_aff_combination (niter->niter, nit_type, &nit);
4962   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4963   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4964   aff_combination_scale (&nit, -1);
4965   aff_combination_scale (&tmpa, -1);
4966   aff_combination_add (&tmpb, &tmpa);
4967   aff_combination_add (&tmpb, &nit);
4968   if (tmpb.n != 0 || tmpb.offset != 1)
4969     return false;
4970
4971   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4972      overflow.  */
4973   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4974                         cand->iv->step,
4975                         fold_convert (TREE_TYPE (cand->iv->step), a));
4976   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4977     return false;
4978
4979   /* Determine the new comparison operator.  */
4980   comp = step < 0 ? GT_EXPR : LT_EXPR;
4981   if (*comp_p == NE_EXPR)
4982     *comp_p = comp;
4983   else if (*comp_p == EQ_EXPR)
4984     *comp_p = invert_tree_comparison (comp, false);
4985   else
4986     gcc_unreachable ();
4987
4988   return true;
4989 }
4990
4991 /* Check whether it is possible to express the condition in USE by comparison
4992    of candidate CAND.  If so, store the value compared with to BOUND, and the
4993    comparison operator to COMP.  */
4994
4995 static bool
4996 may_eliminate_iv (struct ivopts_data *data,
4997                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4998                   enum tree_code *comp)
4999 {
5000   basic_block ex_bb;
5001   edge exit;
5002   tree period;
5003   struct loop *loop = data->current_loop;
5004   aff_tree bnd;
5005   struct tree_niter_desc *desc = NULL;
5006
5007   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5008     return false;
5009
5010   /* For now works only for exits that dominate the loop latch.
5011      TODO: extend to other conditions inside loop body.  */
5012   ex_bb = gimple_bb (use->stmt);
5013   if (use->stmt != last_stmt (ex_bb)
5014       || gimple_code (use->stmt) != GIMPLE_COND
5015       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5016     return false;
5017
5018   exit = EDGE_SUCC (ex_bb, 0);
5019   if (flow_bb_inside_loop_p (loop, exit->dest))
5020     exit = EDGE_SUCC (ex_bb, 1);
5021   if (flow_bb_inside_loop_p (loop, exit->dest))
5022     return false;
5023
5024   desc = niter_for_exit (data, exit);
5025   if (!desc)
5026     return false;
5027
5028   /* Determine whether we can use the variable to test the exit condition.
5029      This is the case iff the period of the induction variable is greater
5030      than the number of iterations for which the exit condition is true.  */
5031   period = iv_period (cand->iv);
5032
5033   /* If the number of iterations is constant, compare against it directly.  */
5034   if (TREE_CODE (desc->niter) == INTEGER_CST)
5035     {
5036       /* See cand_value_at.  */
5037       if (stmt_after_increment (loop, cand, use->stmt))
5038         {
5039           if (!tree_int_cst_lt (desc->niter, period))
5040             return false;
5041         }
5042       else
5043         {
5044           if (tree_int_cst_lt (period, desc->niter))
5045             return false;
5046         }
5047     }
5048
5049   /* If not, and if this is the only possible exit of the loop, see whether
5050      we can get a conservative estimate on the number of iterations of the
5051      entire loop and compare against that instead.  */
5052   else
5053     {
5054       widest_int period_value, max_niter;
5055
5056       max_niter = desc->max;
5057       if (stmt_after_increment (loop, cand, use->stmt))
5058         max_niter += 1;
5059       period_value = wi::to_widest (period);
5060       if (wi::gtu_p (max_niter, period_value))
5061         {
5062           /* See if we can take advantage of inferred loop bound information.  */
5063           if (data->loop_single_exit_p)
5064             {
5065               if (!max_loop_iterations (loop, &max_niter))
5066                 return false;
5067               /* The loop bound is already adjusted by adding 1.  */
5068               if (wi::gtu_p (max_niter, period_value))
5069                 return false;
5070             }
5071           else
5072             return false;
5073         }
5074     }
5075
5076   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5077
5078   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5079                          aff_combination_to_tree (&bnd));
5080   *comp = iv_elimination_compare (data, use);
5081
5082   /* It is unlikely that computing the number of iterations using division
5083      would be more profitable than keeping the original induction variable.  */
5084   if (expression_expensive_p (*bound))
5085     return false;
5086
5087   /* Sometimes, it is possible to handle the situation that the number of
5088      iterations may be zero unless additional assumtions by using <
5089      instead of != in the exit condition.
5090
5091      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5092            base the exit condition on it.  However, that is often too
5093            expensive.  */
5094   if (!integer_zerop (desc->may_be_zero))
5095     return iv_elimination_compare_lt (data, cand, comp, desc);
5096
5097   return true;
5098 }
5099
5100  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5101     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5102
5103 static int
5104 parm_decl_cost (struct ivopts_data *data, tree bound)
5105 {
5106   tree sbound = bound;
5107   STRIP_NOPS (sbound);
5108
5109   if (TREE_CODE (sbound) == SSA_NAME
5110       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5111       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5112       && data->body_includes_call)
5113     return COSTS_N_INSNS (1);
5114
5115   return 0;
5116 }
5117
5118 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5119
5120 static bool
5121 determine_use_iv_cost_condition (struct ivopts_data *data,
5122                                  struct iv_use *use, struct iv_cand *cand)
5123 {
5124   tree bound = NULL_TREE;
5125   struct iv *cmp_iv;
5126   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5127   comp_cost elim_cost, express_cost, cost, bound_cost;
5128   bool ok;
5129   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5130   tree *control_var, *bound_cst;
5131   enum tree_code comp = ERROR_MARK;
5132
5133   /* Only consider real candidates.  */
5134   if (!cand->iv)
5135     {
5136       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5137                        ERROR_MARK, -1);
5138       return false;
5139     }
5140
5141   /* Try iv elimination.  */
5142   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5143     {
5144       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5145       if (elim_cost.cost == 0)
5146         elim_cost.cost = parm_decl_cost (data, bound);
5147       else if (TREE_CODE (bound) == INTEGER_CST)
5148         elim_cost.cost = 0;
5149       /* If we replace a loop condition 'i < n' with 'p < base + n',
5150          depends_on_elim will have 'base' and 'n' set, which implies
5151          that both 'base' and 'n' will be live during the loop.  More likely,
5152          'base + n' will be loop invariant, resulting in only one live value
5153          during the loop.  So in that case we clear depends_on_elim and set
5154         elim_inv_expr_id instead.  */
5155       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5156         {
5157           elim_inv_expr_id = get_expr_id (data, bound);
5158           bitmap_clear (depends_on_elim);
5159         }
5160       /* The bound is a loop invariant, so it will be only computed
5161          once.  */
5162       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5163     }
5164   else
5165     elim_cost = infinite_cost;
5166
5167   /* Try expressing the original giv.  If it is compared with an invariant,
5168      note that we cannot get rid of it.  */
5169   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5170                               NULL, &cmp_iv);
5171   gcc_assert (ok);
5172
5173   /* When the condition is a comparison of the candidate IV against
5174      zero, prefer this IV.
5175
5176      TODO: The constant that we're subtracting from the cost should
5177      be target-dependent.  This information should be added to the
5178      target costs for each backend.  */
5179   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5180       && integer_zerop (*bound_cst)
5181       && (operand_equal_p (*control_var, cand->var_after, 0)
5182           || operand_equal_p (*control_var, cand->var_before, 0)))
5183     elim_cost.cost -= 1;
5184
5185   express_cost = get_computation_cost (data, use, cand, false,
5186                                        &depends_on_express, NULL,
5187                                        &express_inv_expr_id);
5188   fd_ivopts_data = data;
5189   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5190
5191   /* Count the cost of the original bound as well.  */
5192   bound_cost = force_var_cost (data, *bound_cst, NULL);
5193   if (bound_cost.cost == 0)
5194     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5195   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5196     bound_cost.cost = 0;
5197   express_cost.cost += bound_cost.cost;
5198
5199   /* Choose the better approach, preferring the eliminated IV. */
5200   if (compare_costs (elim_cost, express_cost) <= 0)
5201     {
5202       cost = elim_cost;
5203       depends_on = depends_on_elim;
5204       depends_on_elim = NULL;
5205       inv_expr_id = elim_inv_expr_id;
5206     }
5207   else
5208     {
5209       cost = express_cost;
5210       depends_on = depends_on_express;
5211       depends_on_express = NULL;
5212       bound = NULL_TREE;
5213       comp = ERROR_MARK;
5214       inv_expr_id = express_inv_expr_id;
5215     }
5216
5217   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5218
5219   if (depends_on_elim)
5220     BITMAP_FREE (depends_on_elim);
5221   if (depends_on_express)
5222     BITMAP_FREE (depends_on_express);
5223
5224   return !infinite_cost_p (cost);
5225 }
5226
5227 /* Determines cost of basing replacement of USE on CAND.  Returns false
5228    if USE cannot be based on CAND.  */
5229
5230 static bool
5231 determine_use_iv_cost (struct ivopts_data *data,
5232                        struct iv_use *use, struct iv_cand *cand)
5233 {
5234   switch (use->type)
5235     {
5236     case USE_NONLINEAR_EXPR:
5237       return determine_use_iv_cost_generic (data, use, cand);
5238
5239     case USE_ADDRESS:
5240       return determine_use_iv_cost_address (data, use, cand);
5241
5242     case USE_COMPARE:
5243       return determine_use_iv_cost_condition (data, use, cand);
5244
5245     default:
5246       gcc_unreachable ();
5247     }
5248 }
5249
5250 /* Return true if get_computation_cost indicates that autoincrement is
5251    a possibility for the pair of USE and CAND, false otherwise.  */
5252
5253 static bool
5254 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5255                            struct iv_cand *cand)
5256 {
5257   bitmap depends_on;
5258   bool can_autoinc;
5259   comp_cost cost;
5260
5261   if (use->type != USE_ADDRESS)
5262     return false;
5263
5264   cost = get_computation_cost (data, use, cand, true, &depends_on,
5265                                &can_autoinc, NULL);
5266
5267   BITMAP_FREE (depends_on);
5268
5269   return !infinite_cost_p (cost) && can_autoinc;
5270 }
5271
5272 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5273    use that allows autoincrement, and set their AINC_USE if possible.  */
5274
5275 static void
5276 set_autoinc_for_original_candidates (struct ivopts_data *data)
5277 {
5278   unsigned i, j;
5279
5280   for (i = 0; i < n_iv_cands (data); i++)
5281     {
5282       struct iv_cand *cand = iv_cand (data, i);
5283       struct iv_use *closest_before = NULL;
5284       struct iv_use *closest_after = NULL;
5285       if (cand->pos != IP_ORIGINAL)
5286         continue;
5287
5288       for (j = 0; j < n_iv_uses (data); j++)
5289         {
5290           struct iv_use *use = iv_use (data, j);
5291           unsigned uid = gimple_uid (use->stmt);
5292
5293           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5294             continue;
5295
5296           if (uid < gimple_uid (cand->incremented_at)
5297               && (closest_before == NULL
5298                   || uid > gimple_uid (closest_before->stmt)))
5299             closest_before = use;
5300
5301           if (uid > gimple_uid (cand->incremented_at)
5302               && (closest_after == NULL
5303                   || uid < gimple_uid (closest_after->stmt)))
5304             closest_after = use;
5305         }
5306
5307       if (closest_before != NULL
5308           && autoinc_possible_for_pair (data, closest_before, cand))
5309         cand->ainc_use = closest_before;
5310       else if (closest_after != NULL
5311                && autoinc_possible_for_pair (data, closest_after, cand))
5312         cand->ainc_use = closest_after;
5313     }
5314 }
5315
5316 /* Finds the candidates for the induction variables.  */
5317
5318 static void
5319 find_iv_candidates (struct ivopts_data *data)
5320 {
5321   /* Add commonly used ivs.  */
5322   add_standard_iv_candidates (data);
5323
5324   /* Add old induction variables.  */
5325   add_iv_candidate_for_bivs (data);
5326
5327   /* Add induction variables derived from uses.  */
5328   add_iv_candidate_for_uses (data);
5329
5330   set_autoinc_for_original_candidates (data);
5331
5332   /* Record the important candidates.  */
5333   record_important_candidates (data);
5334 }
5335
5336 /* Determines costs of basing the use of the iv on an iv candidate.  */
5337
5338 static void
5339 determine_use_iv_costs (struct ivopts_data *data)
5340 {
5341   unsigned i, j;
5342   struct iv_use *use;
5343   struct iv_cand *cand;
5344   bitmap to_clear = BITMAP_ALLOC (NULL);
5345
5346   alloc_use_cost_map (data);
5347
5348   for (i = 0; i < n_iv_uses (data); i++)
5349     {
5350       use = iv_use (data, i);
5351
5352       if (data->consider_all_candidates)
5353         {
5354           for (j = 0; j < n_iv_cands (data); j++)
5355             {
5356               cand = iv_cand (data, j);
5357               determine_use_iv_cost (data, use, cand);
5358             }
5359         }
5360       else
5361         {
5362           bitmap_iterator bi;
5363
5364           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5365             {
5366               cand = iv_cand (data, j);
5367               if (!determine_use_iv_cost (data, use, cand))
5368                 bitmap_set_bit (to_clear, j);
5369             }
5370
5371           /* Remove the candidates for that the cost is infinite from
5372              the list of related candidates.  */
5373           bitmap_and_compl_into (use->related_cands, to_clear);
5374           bitmap_clear (to_clear);
5375         }
5376     }
5377
5378   BITMAP_FREE (to_clear);
5379
5380   if (dump_file && (dump_flags & TDF_DETAILS))
5381     {
5382       fprintf (dump_file, "Use-candidate costs:\n");
5383
5384       for (i = 0; i < n_iv_uses (data); i++)
5385         {
5386           use = iv_use (data, i);
5387
5388           fprintf (dump_file, "Use %d:\n", i);
5389           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5390           for (j = 0; j < use->n_map_members; j++)
5391             {
5392               if (!use->cost_map[j].cand
5393                   || infinite_cost_p (use->cost_map[j].cost))
5394                 continue;
5395
5396               fprintf (dump_file, "  %d\t%d\t%d\t",
5397                        use->cost_map[j].cand->id,
5398                        use->cost_map[j].cost.cost,
5399                        use->cost_map[j].cost.complexity);
5400               if (use->cost_map[j].depends_on)
5401                 bitmap_print (dump_file,
5402                               use->cost_map[j].depends_on, "","");
5403               if (use->cost_map[j].inv_expr_id != -1)
5404                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5405               fprintf (dump_file, "\n");
5406             }
5407
5408           fprintf (dump_file, "\n");
5409         }
5410       fprintf (dump_file, "\n");
5411     }
5412 }
5413
5414 /* Determines cost of the candidate CAND.  */
5415
5416 static void
5417 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5418 {
5419   comp_cost cost_base;
5420   unsigned cost, cost_step;
5421   tree base;
5422
5423   if (!cand->iv)
5424     {
5425       cand->cost = 0;
5426       return;
5427     }
5428
5429   /* There are two costs associated with the candidate -- its increment
5430      and its initialization.  The second is almost negligible for any loop
5431      that rolls enough, so we take it just very little into account.  */
5432
5433   base = cand->iv->base;
5434   cost_base = force_var_cost (data, base, NULL);
5435   /* It will be exceptional that the iv register happens to be initialized with
5436      the proper value at no cost.  In general, there will at least be a regcopy
5437      or a const set.  */
5438   if (cost_base.cost == 0)
5439     cost_base.cost = COSTS_N_INSNS (1);
5440   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5441
5442   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5443
5444   /* Prefer the original ivs unless we may gain something by replacing it.
5445      The reason is to make debugging simpler; so this is not relevant for
5446      artificial ivs created by other optimization passes.  */
5447   if (cand->pos != IP_ORIGINAL
5448       || !SSA_NAME_VAR (cand->var_before)
5449       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5450     cost++;
5451
5452   /* Prefer not to insert statements into latch unless there are some
5453      already (so that we do not create unnecessary jumps).  */
5454   if (cand->pos == IP_END
5455       && empty_block_p (ip_end_pos (data->current_loop)))
5456     cost++;
5457
5458   cand->cost = cost;
5459   cand->cost_step = cost_step;
5460 }
5461
5462 /* Determines costs of computation of the candidates.  */
5463
5464 static void
5465 determine_iv_costs (struct ivopts_data *data)
5466 {
5467   unsigned i;
5468
5469   if (dump_file && (dump_flags & TDF_DETAILS))
5470     {
5471       fprintf (dump_file, "Candidate costs:\n");
5472       fprintf (dump_file, "  cand\tcost\n");
5473     }
5474
5475   for (i = 0; i < n_iv_cands (data); i++)
5476     {
5477       struct iv_cand *cand = iv_cand (data, i);
5478
5479       determine_iv_cost (data, cand);
5480
5481       if (dump_file && (dump_flags & TDF_DETAILS))
5482         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5483     }
5484
5485   if (dump_file && (dump_flags & TDF_DETAILS))
5486     fprintf (dump_file, "\n");
5487 }
5488
5489 /* Calculates cost for having SIZE induction variables.  */
5490
5491 static unsigned
5492 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5493 {
5494   /* We add size to the cost, so that we prefer eliminating ivs
5495      if possible.  */
5496   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5497                                             data->body_includes_call);
5498 }
5499
5500 /* For each size of the induction variable set determine the penalty.  */
5501
5502 static void
5503 determine_set_costs (struct ivopts_data *data)
5504 {
5505   unsigned j, n;
5506   gphi *phi;
5507   gphi_iterator psi;
5508   tree op;
5509   struct loop *loop = data->current_loop;
5510   bitmap_iterator bi;
5511
5512   if (dump_file && (dump_flags & TDF_DETAILS))
5513     {
5514       fprintf (dump_file, "Global costs:\n");
5515       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5516       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5517       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5518       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5519     }
5520
5521   n = 0;
5522   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5523     {
5524       phi = psi.phi ();
5525       op = PHI_RESULT (phi);
5526
5527       if (virtual_operand_p (op))
5528         continue;
5529
5530       if (get_iv (data, op))
5531         continue;
5532
5533       n++;
5534     }
5535
5536   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5537     {
5538       struct version_info *info = ver_info (data, j);
5539
5540       if (info->inv_id && info->has_nonlin_use)
5541         n++;
5542     }
5543
5544   data->regs_used = n;
5545   if (dump_file && (dump_flags & TDF_DETAILS))
5546     fprintf (dump_file, "  regs_used %d\n", n);
5547
5548   if (dump_file && (dump_flags & TDF_DETAILS))
5549     {
5550       fprintf (dump_file, "  cost for size:\n");
5551       fprintf (dump_file, "  ivs\tcost\n");
5552       for (j = 0; j <= 2 * target_avail_regs; j++)
5553         fprintf (dump_file, "  %d\t%d\n", j,
5554                  ivopts_global_cost_for_size (data, j));
5555       fprintf (dump_file, "\n");
5556     }
5557 }
5558
5559 /* Returns true if A is a cheaper cost pair than B.  */
5560
5561 static bool
5562 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5563 {
5564   int cmp;
5565
5566   if (!a)
5567     return false;
5568
5569   if (!b)
5570     return true;
5571
5572   cmp = compare_costs (a->cost, b->cost);
5573   if (cmp < 0)
5574     return true;
5575
5576   if (cmp > 0)
5577     return false;
5578
5579   /* In case the costs are the same, prefer the cheaper candidate.  */
5580   if (a->cand->cost < b->cand->cost)
5581     return true;
5582
5583   return false;
5584 }
5585
5586
5587 /* Returns candidate by that USE is expressed in IVS.  */
5588
5589 static struct cost_pair *
5590 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5591 {
5592   return ivs->cand_for_use[use->id];
5593 }
5594
5595 /* Computes the cost field of IVS structure.  */
5596
5597 static void
5598 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5599 {
5600   comp_cost cost = ivs->cand_use_cost;
5601
5602   cost.cost += ivs->cand_cost;
5603
5604   cost.cost += ivopts_global_cost_for_size (data,
5605                                             ivs->n_regs + ivs->num_used_inv_expr);
5606
5607   ivs->cost = cost;
5608 }
5609
5610 /* Remove invariants in set INVS to set IVS.  */
5611
5612 static void
5613 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5614 {
5615   bitmap_iterator bi;
5616   unsigned iid;
5617
5618   if (!invs)
5619     return;
5620
5621   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5622     {
5623       ivs->n_invariant_uses[iid]--;
5624       if (ivs->n_invariant_uses[iid] == 0)
5625         ivs->n_regs--;
5626     }
5627 }
5628
5629 /* Set USE not to be expressed by any candidate in IVS.  */
5630
5631 static void
5632 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5633                  struct iv_use *use)
5634 {
5635   unsigned uid = use->id, cid;
5636   struct cost_pair *cp;
5637
5638   cp = ivs->cand_for_use[uid];
5639   if (!cp)
5640     return;
5641   cid = cp->cand->id;
5642
5643   ivs->bad_uses++;
5644   ivs->cand_for_use[uid] = NULL;
5645   ivs->n_cand_uses[cid]--;
5646
5647   if (ivs->n_cand_uses[cid] == 0)
5648     {
5649       bitmap_clear_bit (ivs->cands, cid);
5650       /* Do not count the pseudocandidates.  */
5651       if (cp->cand->iv)
5652         ivs->n_regs--;
5653       ivs->n_cands--;
5654       ivs->cand_cost -= cp->cand->cost;
5655
5656       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5657     }
5658
5659   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5660
5661   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5662
5663   if (cp->inv_expr_id != -1)
5664     {
5665       ivs->used_inv_expr[cp->inv_expr_id]--;
5666       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5667         ivs->num_used_inv_expr--;
5668     }
5669   iv_ca_recount_cost (data, ivs);
5670 }
5671
5672 /* Add invariants in set INVS to set IVS.  */
5673
5674 static void
5675 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5676 {
5677   bitmap_iterator bi;
5678   unsigned iid;
5679
5680   if (!invs)
5681     return;
5682
5683   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5684     {
5685       ivs->n_invariant_uses[iid]++;
5686       if (ivs->n_invariant_uses[iid] == 1)
5687         ivs->n_regs++;
5688     }
5689 }
5690
5691 /* Set cost pair for USE in set IVS to CP.  */
5692
5693 static void
5694 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5695               struct iv_use *use, struct cost_pair *cp)
5696 {
5697   unsigned uid = use->id, cid;
5698
5699   if (ivs->cand_for_use[uid] == cp)
5700     return;
5701
5702   if (ivs->cand_for_use[uid])
5703     iv_ca_set_no_cp (data, ivs, use);
5704
5705   if (cp)
5706     {
5707       cid = cp->cand->id;
5708
5709       ivs->bad_uses--;
5710       ivs->cand_for_use[uid] = cp;
5711       ivs->n_cand_uses[cid]++;
5712       if (ivs->n_cand_uses[cid] == 1)
5713         {
5714           bitmap_set_bit (ivs->cands, cid);
5715           /* Do not count the pseudocandidates.  */
5716           if (cp->cand->iv)
5717             ivs->n_regs++;
5718           ivs->n_cands++;
5719           ivs->cand_cost += cp->cand->cost;
5720
5721           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5722         }
5723
5724       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5725       iv_ca_set_add_invariants (ivs, cp->depends_on);
5726
5727       if (cp->inv_expr_id != -1)
5728         {
5729           ivs->used_inv_expr[cp->inv_expr_id]++;
5730           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5731             ivs->num_used_inv_expr++;
5732         }
5733       iv_ca_recount_cost (data, ivs);
5734     }
5735 }
5736
5737 /* Extend set IVS by expressing USE by some of the candidates in it
5738    if possible.  Consider all important candidates if candidates in
5739    set IVS don't give any result.  */
5740
5741 static void
5742 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5743                struct iv_use *use)
5744 {
5745   struct cost_pair *best_cp = NULL, *cp;
5746   bitmap_iterator bi;
5747   unsigned i;
5748   struct iv_cand *cand;
5749
5750   gcc_assert (ivs->upto >= use->id);
5751   ivs->upto++;
5752   ivs->bad_uses++;
5753
5754   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5755     {
5756       cand = iv_cand (data, i);
5757       cp = get_use_iv_cost (data, use, cand);
5758       if (cheaper_cost_pair (cp, best_cp))
5759         best_cp = cp;
5760     }
5761
5762   if (best_cp == NULL)
5763     {
5764       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5765         {
5766           cand = iv_cand (data, i);
5767           cp = get_use_iv_cost (data, use, cand);
5768           if (cheaper_cost_pair (cp, best_cp))
5769             best_cp = cp;
5770         }
5771     }
5772
5773   iv_ca_set_cp (data, ivs, use, best_cp);
5774 }
5775
5776 /* Get cost for assignment IVS.  */
5777
5778 static comp_cost
5779 iv_ca_cost (struct iv_ca *ivs)
5780 {
5781   /* This was a conditional expression but it triggered a bug in
5782      Sun C 5.5.  */
5783   if (ivs->bad_uses)
5784     return infinite_cost;
5785   else
5786     return ivs->cost;
5787 }
5788
5789 /* Returns true if all dependences of CP are among invariants in IVS.  */
5790
5791 static bool
5792 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5793 {
5794   unsigned i;
5795   bitmap_iterator bi;
5796
5797   if (!cp->depends_on)
5798     return true;
5799
5800   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5801     {
5802       if (ivs->n_invariant_uses[i] == 0)
5803         return false;
5804     }
5805
5806   return true;
5807 }
5808
5809 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5810    it before NEXT_CHANGE.  */
5811
5812 static struct iv_ca_delta *
5813 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5814                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5815 {
5816   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5817
5818   change->use = use;
5819   change->old_cp = old_cp;
5820   change->new_cp = new_cp;
5821   change->next_change = next_change;
5822
5823   return change;
5824 }
5825
5826 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5827    are rewritten.  */
5828
5829 static struct iv_ca_delta *
5830 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5831 {
5832   struct iv_ca_delta *last;
5833
5834   if (!l2)
5835     return l1;
5836
5837   if (!l1)
5838     return l2;
5839
5840   for (last = l1; last->next_change; last = last->next_change)
5841     continue;
5842   last->next_change = l2;
5843
5844   return l1;
5845 }
5846
5847 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5848
5849 static struct iv_ca_delta *
5850 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5851 {
5852   struct iv_ca_delta *act, *next, *prev = NULL;
5853
5854   for (act = delta; act; act = next)
5855     {
5856       next = act->next_change;
5857       act->next_change = prev;
5858       prev = act;
5859
5860       std::swap (act->old_cp, act->new_cp);
5861     }
5862
5863   return prev;
5864 }
5865
5866 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5867    reverted instead.  */
5868
5869 static void
5870 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5871                     struct iv_ca_delta *delta, bool forward)
5872 {
5873   struct cost_pair *from, *to;
5874   struct iv_ca_delta *act;
5875
5876   if (!forward)
5877     delta = iv_ca_delta_reverse (delta);
5878
5879   for (act = delta; act; act = act->next_change)
5880     {
5881       from = act->old_cp;
5882       to = act->new_cp;
5883       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5884       iv_ca_set_cp (data, ivs, act->use, to);
5885     }
5886
5887   if (!forward)
5888     iv_ca_delta_reverse (delta);
5889 }
5890
5891 /* Returns true if CAND is used in IVS.  */
5892
5893 static bool
5894 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5895 {
5896   return ivs->n_cand_uses[cand->id] > 0;
5897 }
5898
5899 /* Returns number of induction variable candidates in the set IVS.  */
5900
5901 static unsigned
5902 iv_ca_n_cands (struct iv_ca *ivs)
5903 {
5904   return ivs->n_cands;
5905 }
5906
5907 /* Free the list of changes DELTA.  */
5908
5909 static void
5910 iv_ca_delta_free (struct iv_ca_delta **delta)
5911 {
5912   struct iv_ca_delta *act, *next;
5913
5914   for (act = *delta; act; act = next)
5915     {
5916       next = act->next_change;
5917       free (act);
5918     }
5919
5920   *delta = NULL;
5921 }
5922
5923 /* Allocates new iv candidates assignment.  */
5924
5925 static struct iv_ca *
5926 iv_ca_new (struct ivopts_data *data)
5927 {
5928   struct iv_ca *nw = XNEW (struct iv_ca);
5929
5930   nw->upto = 0;
5931   nw->bad_uses = 0;
5932   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5933   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5934   nw->cands = BITMAP_ALLOC (NULL);
5935   nw->n_cands = 0;
5936   nw->n_regs = 0;
5937   nw->cand_use_cost = no_cost;
5938   nw->cand_cost = 0;
5939   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5940   nw->cost = no_cost;
5941   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5942   nw->num_used_inv_expr = 0;
5943
5944   return nw;
5945 }
5946
5947 /* Free memory occupied by the set IVS.  */
5948
5949 static void
5950 iv_ca_free (struct iv_ca **ivs)
5951 {
5952   free ((*ivs)->cand_for_use);
5953   free ((*ivs)->n_cand_uses);
5954   BITMAP_FREE ((*ivs)->cands);
5955   free ((*ivs)->n_invariant_uses);
5956   free ((*ivs)->used_inv_expr);
5957   free (*ivs);
5958   *ivs = NULL;
5959 }
5960
5961 /* Dumps IVS to FILE.  */
5962
5963 static void
5964 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5965 {
5966   const char *pref = "  invariants ";
5967   unsigned i;
5968   comp_cost cost = iv_ca_cost (ivs);
5969
5970   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5971   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5972            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5973   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5974
5975    for (i = 0; i < ivs->upto; i++)
5976     {
5977       struct iv_use *use = iv_use (data, i);
5978       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5979       if (cp)
5980         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5981                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5982       else
5983         fprintf (file, "   use:%d --> ??\n", use->id);
5984     }
5985
5986   for (i = 1; i <= data->max_inv_id; i++)
5987     if (ivs->n_invariant_uses[i])
5988       {
5989         fprintf (file, "%s%d", pref, i);
5990         pref = ", ";
5991       }
5992   fprintf (file, "\n\n");
5993 }
5994
5995 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5996    new set, and store differences in DELTA.  Number of induction variables
5997    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5998    the function will try to find a solution with mimimal iv candidates.  */
5999
6000 static comp_cost
6001 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6002               struct iv_cand *cand, struct iv_ca_delta **delta,
6003               unsigned *n_ivs, bool min_ncand)
6004 {
6005   unsigned i;
6006   comp_cost cost;
6007   struct iv_use *use;
6008   struct cost_pair *old_cp, *new_cp;
6009
6010   *delta = NULL;
6011   for (i = 0; i < ivs->upto; i++)
6012     {
6013       use = iv_use (data, i);
6014       old_cp = iv_ca_cand_for_use (ivs, use);
6015
6016       if (old_cp
6017           && old_cp->cand == cand)
6018         continue;
6019
6020       new_cp = get_use_iv_cost (data, use, cand);
6021       if (!new_cp)
6022         continue;
6023
6024       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6025         continue;
6026
6027       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6028         continue;
6029
6030       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6031     }
6032
6033   iv_ca_delta_commit (data, ivs, *delta, true);
6034   cost = iv_ca_cost (ivs);
6035   if (n_ivs)
6036     *n_ivs = iv_ca_n_cands (ivs);
6037   iv_ca_delta_commit (data, ivs, *delta, false);
6038
6039   return cost;
6040 }
6041
6042 /* Try narrowing set IVS by removing CAND.  Return the cost of
6043    the new set and store the differences in DELTA.  START is
6044    the candidate with which we start narrowing.  */
6045
6046 static comp_cost
6047 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6048               struct iv_cand *cand, struct iv_cand *start,
6049               struct iv_ca_delta **delta)
6050 {
6051   unsigned i, ci;
6052   struct iv_use *use;
6053   struct cost_pair *old_cp, *new_cp, *cp;
6054   bitmap_iterator bi;
6055   struct iv_cand *cnd;
6056   comp_cost cost, best_cost, acost;
6057
6058   *delta = NULL;
6059   for (i = 0; i < n_iv_uses (data); i++)
6060     {
6061       use = iv_use (data, i);
6062
6063       old_cp = iv_ca_cand_for_use (ivs, use);
6064       if (old_cp->cand != cand)
6065         continue;
6066
6067       best_cost = iv_ca_cost (ivs);
6068       /* Start narrowing with START.  */
6069       new_cp = get_use_iv_cost (data, use, start);
6070
6071       if (data->consider_all_candidates)
6072         {
6073           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6074             {
6075               if (ci == cand->id || (start && ci == start->id))
6076                 continue;
6077
6078               cnd = iv_cand (data, ci);
6079
6080               cp = get_use_iv_cost (data, use, cnd);
6081               if (!cp)
6082                 continue;
6083
6084               iv_ca_set_cp (data, ivs, use, cp);
6085               acost = iv_ca_cost (ivs);
6086
6087               if (compare_costs (acost, best_cost) < 0)
6088                 {
6089                   best_cost = acost;
6090                   new_cp = cp;
6091                 }
6092             }
6093         }
6094       else
6095         {
6096           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6097             {
6098               if (ci == cand->id || (start && ci == start->id))
6099                 continue;
6100
6101               cnd = iv_cand (data, ci);
6102
6103               cp = get_use_iv_cost (data, use, cnd);
6104               if (!cp)
6105                 continue;
6106
6107               iv_ca_set_cp (data, ivs, use, cp);
6108               acost = iv_ca_cost (ivs);
6109
6110               if (compare_costs (acost, best_cost) < 0)
6111                 {
6112                   best_cost = acost;
6113                   new_cp = cp;
6114                 }
6115             }
6116         }
6117       /* Restore to old cp for use.  */
6118       iv_ca_set_cp (data, ivs, use, old_cp);
6119
6120       if (!new_cp)
6121         {
6122           iv_ca_delta_free (delta);
6123           return infinite_cost;
6124         }
6125
6126       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6127     }
6128
6129   iv_ca_delta_commit (data, ivs, *delta, true);
6130   cost = iv_ca_cost (ivs);
6131   iv_ca_delta_commit (data, ivs, *delta, false);
6132
6133   return cost;
6134 }
6135
6136 /* Try optimizing the set of candidates IVS by removing candidates different
6137    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6138    differences in DELTA.  */
6139
6140 static comp_cost
6141 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6142              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6143 {
6144   bitmap_iterator bi;
6145   struct iv_ca_delta *act_delta, *best_delta;
6146   unsigned i;
6147   comp_cost best_cost, acost;
6148   struct iv_cand *cand;
6149
6150   best_delta = NULL;
6151   best_cost = iv_ca_cost (ivs);
6152
6153   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6154     {
6155       cand = iv_cand (data, i);
6156
6157       if (cand == except_cand)
6158         continue;
6159
6160       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6161
6162       if (compare_costs (acost, best_cost) < 0)
6163         {
6164           best_cost = acost;
6165           iv_ca_delta_free (&best_delta);
6166           best_delta = act_delta;
6167         }
6168       else
6169         iv_ca_delta_free (&act_delta);
6170     }
6171
6172   if (!best_delta)
6173     {
6174       *delta = NULL;
6175       return best_cost;
6176     }
6177
6178   /* Recurse to possibly remove other unnecessary ivs.  */
6179   iv_ca_delta_commit (data, ivs, best_delta, true);
6180   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6181   iv_ca_delta_commit (data, ivs, best_delta, false);
6182   *delta = iv_ca_delta_join (best_delta, *delta);
6183   return best_cost;
6184 }
6185
6186 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6187    cheaper local cost for USE than BEST_CP.  Return pointer to
6188    the corresponding cost_pair, otherwise just return BEST_CP.  */
6189
6190 static struct cost_pair*
6191 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6192                         unsigned int cand_idx, struct iv_cand *old_cand,
6193                         struct cost_pair *best_cp)
6194 {
6195   struct iv_cand *cand;
6196   struct cost_pair *cp;
6197
6198   gcc_assert (old_cand != NULL && best_cp != NULL);
6199   if (cand_idx == old_cand->id)
6200     return best_cp;
6201
6202   cand = iv_cand (data, cand_idx);
6203   cp = get_use_iv_cost (data, use, cand);
6204   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6205     return cp;
6206
6207   return best_cp;
6208 }
6209
6210 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6211    which are used by more than one iv uses.  For each of those candidates,
6212    this function tries to represent iv uses under that candidate using
6213    other ones with lower local cost, then tries to prune the new set.
6214    If the new set has lower cost, It returns the new cost after recording
6215    candidate replacement in list DELTA.  */
6216
6217 static comp_cost
6218 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6219                struct iv_ca_delta **delta)
6220 {
6221   bitmap_iterator bi, bj;
6222   unsigned int i, j, k;
6223   struct iv_use *use;
6224   struct iv_cand *cand;
6225   comp_cost orig_cost, acost;
6226   struct iv_ca_delta *act_delta, *tmp_delta;
6227   struct cost_pair *old_cp, *best_cp = NULL;
6228
6229   *delta = NULL;
6230   orig_cost = iv_ca_cost (ivs);
6231
6232   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6233     {
6234       if (ivs->n_cand_uses[i] == 1
6235           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6236         continue;
6237
6238       cand = iv_cand (data, i);
6239
6240       act_delta = NULL;
6241       /*  Represent uses under current candidate using other ones with
6242           lower local cost.  */
6243       for (j = 0; j < ivs->upto; j++)
6244         {
6245           use = iv_use (data, j);
6246           old_cp = iv_ca_cand_for_use (ivs, use);
6247
6248           if (old_cp->cand != cand)
6249             continue;
6250
6251           best_cp = old_cp;
6252           if (data->consider_all_candidates)
6253             for (k = 0; k < n_iv_cands (data); k++)
6254               best_cp = cheaper_cost_with_cand (data, use, k,
6255                                                 old_cp->cand, best_cp);
6256           else
6257             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6258               best_cp = cheaper_cost_with_cand (data, use, k,
6259                                                 old_cp->cand, best_cp);
6260
6261           if (best_cp == old_cp)
6262             continue;
6263
6264           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6265         }
6266       /* No need for further prune.  */
6267       if (!act_delta)
6268         continue;
6269
6270       /* Prune the new candidate set.  */
6271       iv_ca_delta_commit (data, ivs, act_delta, true);
6272       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6273       iv_ca_delta_commit (data, ivs, act_delta, false);
6274       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6275
6276       if (compare_costs (acost, orig_cost) < 0)
6277         {
6278           *delta = act_delta;
6279           return acost;
6280         }
6281       else
6282         iv_ca_delta_free (&act_delta);
6283     }
6284
6285   return orig_cost;
6286 }
6287
6288 /* Tries to extend the sets IVS in the best possible way in order
6289    to express the USE.  If ORIGINALP is true, prefer candidates from
6290    the original set of IVs, otherwise favor important candidates not
6291    based on any memory object.  */
6292
6293 static bool
6294 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6295                   struct iv_use *use, bool originalp)
6296 {
6297   comp_cost best_cost, act_cost;
6298   unsigned i;
6299   bitmap_iterator bi;
6300   struct iv_cand *cand;
6301   struct iv_ca_delta *best_delta = NULL, *act_delta;
6302   struct cost_pair *cp;
6303
6304   iv_ca_add_use (data, ivs, use);
6305   best_cost = iv_ca_cost (ivs);
6306   cp = iv_ca_cand_for_use (ivs, use);
6307   if (cp)
6308     {
6309       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6310       iv_ca_set_no_cp (data, ivs, use);
6311     }
6312
6313   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6314      first try important candidates not based on any memory object.  Only if
6315      this fails, try the specific ones.  Rationale -- in loops with many
6316      variables the best choice often is to use just one generic biv.  If we
6317      added here many ivs specific to the uses, the optimization algorithm later
6318      would be likely to get stuck in a local minimum, thus causing us to create
6319      too many ivs.  The approach from few ivs to more seems more likely to be
6320      successful -- starting from few ivs, replacing an expensive use by a
6321      specific iv should always be a win.  */
6322   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6323     {
6324       cand = iv_cand (data, i);
6325
6326       if (originalp && cand->pos !=IP_ORIGINAL)
6327         continue;
6328
6329       if (!originalp && cand->iv->base_object != NULL_TREE)
6330         continue;
6331
6332       if (iv_ca_cand_used_p (ivs, cand))
6333         continue;
6334
6335       cp = get_use_iv_cost (data, use, cand);
6336       if (!cp)
6337         continue;
6338
6339       iv_ca_set_cp (data, ivs, use, cp);
6340       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6341                                true);
6342       iv_ca_set_no_cp (data, ivs, use);
6343       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6344
6345       if (compare_costs (act_cost, best_cost) < 0)
6346         {
6347           best_cost = act_cost;
6348
6349           iv_ca_delta_free (&best_delta);
6350           best_delta = act_delta;
6351         }
6352       else
6353         iv_ca_delta_free (&act_delta);
6354     }
6355
6356   if (infinite_cost_p (best_cost))
6357     {
6358       for (i = 0; i < use->n_map_members; i++)
6359         {
6360           cp = use->cost_map + i;
6361           cand = cp->cand;
6362           if (!cand)
6363             continue;
6364
6365           /* Already tried this.  */
6366           if (cand->important)
6367             {
6368               if (originalp && cand->pos == IP_ORIGINAL)
6369                 continue;
6370               if (!originalp && cand->iv->base_object == NULL_TREE)
6371                 continue;
6372             }
6373
6374           if (iv_ca_cand_used_p (ivs, cand))
6375             continue;
6376
6377           act_delta = NULL;
6378           iv_ca_set_cp (data, ivs, use, cp);
6379           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6380           iv_ca_set_no_cp (data, ivs, use);
6381           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6382                                        cp, act_delta);
6383
6384           if (compare_costs (act_cost, best_cost) < 0)
6385             {
6386               best_cost = act_cost;
6387
6388               if (best_delta)
6389                 iv_ca_delta_free (&best_delta);
6390               best_delta = act_delta;
6391             }
6392           else
6393             iv_ca_delta_free (&act_delta);
6394         }
6395     }
6396
6397   iv_ca_delta_commit (data, ivs, best_delta, true);
6398   iv_ca_delta_free (&best_delta);
6399
6400   return !infinite_cost_p (best_cost);
6401 }
6402
6403 /* Finds an initial assignment of candidates to uses.  */
6404
6405 static struct iv_ca *
6406 get_initial_solution (struct ivopts_data *data, bool originalp)
6407 {
6408   struct iv_ca *ivs = iv_ca_new (data);
6409   unsigned i;
6410
6411   for (i = 0; i < n_iv_uses (data); i++)
6412     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6413       {
6414         iv_ca_free (&ivs);
6415         return NULL;
6416       }
6417
6418   return ivs;
6419 }
6420
6421 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6422    points to a bool variable, this function tries to break local
6423    optimal fixed-point by replacing candidates in IVS if it's true.  */
6424
6425 static bool
6426 try_improve_iv_set (struct ivopts_data *data,
6427                     struct iv_ca *ivs, bool *try_replace_p)
6428 {
6429   unsigned i, n_ivs;
6430   comp_cost acost, best_cost = iv_ca_cost (ivs);
6431   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6432   struct iv_cand *cand;
6433
6434   /* Try extending the set of induction variables by one.  */
6435   for (i = 0; i < n_iv_cands (data); i++)
6436     {
6437       cand = iv_cand (data, i);
6438
6439       if (iv_ca_cand_used_p (ivs, cand))
6440         continue;
6441
6442       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6443       if (!act_delta)
6444         continue;
6445
6446       /* If we successfully added the candidate and the set is small enough,
6447          try optimizing it by removing other candidates.  */
6448       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6449         {
6450           iv_ca_delta_commit (data, ivs, act_delta, true);
6451           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6452           iv_ca_delta_commit (data, ivs, act_delta, false);
6453           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6454         }
6455
6456       if (compare_costs (acost, best_cost) < 0)
6457         {
6458           best_cost = acost;
6459           iv_ca_delta_free (&best_delta);
6460           best_delta = act_delta;
6461         }
6462       else
6463         iv_ca_delta_free (&act_delta);
6464     }
6465
6466   if (!best_delta)
6467     {
6468       /* Try removing the candidates from the set instead.  */
6469       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6470
6471       if (!best_delta && *try_replace_p)
6472         {
6473           *try_replace_p = false;
6474           /* So far candidate selecting algorithm tends to choose fewer IVs
6475              so that it can handle cases in which loops have many variables
6476              but the best choice is often to use only one general biv.  One
6477              weakness is it can't handle opposite cases, in which different
6478              candidates should be chosen with respect to each use.  To solve
6479              the problem, we replace candidates in a manner described by the
6480              comments of iv_ca_replace, thus give general algorithm a chance
6481              to break local optimal fixed-point in these cases.  */
6482           best_cost = iv_ca_replace (data, ivs, &best_delta);
6483         }
6484
6485       if (!best_delta)
6486         return false;
6487     }
6488
6489   iv_ca_delta_commit (data, ivs, best_delta, true);
6490   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6491   iv_ca_delta_free (&best_delta);
6492   return true;
6493 }
6494
6495 /* Attempts to find the optimal set of induction variables.  We do simple
6496    greedy heuristic -- we try to replace at most one candidate in the selected
6497    solution and remove the unused ivs while this improves the cost.  */
6498
6499 static struct iv_ca *
6500 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6501 {
6502   struct iv_ca *set;
6503   bool try_replace_p = true;
6504
6505   /* Get the initial solution.  */
6506   set = get_initial_solution (data, originalp);
6507   if (!set)
6508     {
6509       if (dump_file && (dump_flags & TDF_DETAILS))
6510         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6511       return NULL;
6512     }
6513
6514   if (dump_file && (dump_flags & TDF_DETAILS))
6515     {
6516       fprintf (dump_file, "Initial set of candidates:\n");
6517       iv_ca_dump (data, dump_file, set);
6518     }
6519
6520   while (try_improve_iv_set (data, set, &try_replace_p))
6521     {
6522       if (dump_file && (dump_flags & TDF_DETAILS))
6523         {
6524           fprintf (dump_file, "Improved to:\n");
6525           iv_ca_dump (data, dump_file, set);
6526         }
6527     }
6528
6529   return set;
6530 }
6531
6532 static struct iv_ca *
6533 find_optimal_iv_set (struct ivopts_data *data)
6534 {
6535   unsigned i;
6536   struct iv_ca *set, *origset;
6537   struct iv_use *use;
6538   comp_cost cost, origcost;
6539
6540   /* Determine the cost based on a strategy that starts with original IVs,
6541      and try again using a strategy that prefers candidates not based
6542      on any IVs.  */
6543   origset = find_optimal_iv_set_1 (data, true);
6544   set = find_optimal_iv_set_1 (data, false);
6545
6546   if (!origset && !set)
6547     return NULL;
6548
6549   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6550   cost = set ? iv_ca_cost (set) : infinite_cost;
6551
6552   if (dump_file && (dump_flags & TDF_DETAILS))
6553     {
6554       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6555                origcost.cost, origcost.complexity);
6556       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6557                cost.cost, cost.complexity);
6558     }
6559
6560   /* Choose the one with the best cost.  */
6561   if (compare_costs (origcost, cost) <= 0)
6562     {
6563       if (set)
6564         iv_ca_free (&set);
6565       set = origset;
6566     }
6567   else if (origset)
6568     iv_ca_free (&origset);
6569
6570   for (i = 0; i < n_iv_uses (data); i++)
6571     {
6572       use = iv_use (data, i);
6573       use->selected = iv_ca_cand_for_use (set, use)->cand;
6574     }
6575
6576   return set;
6577 }
6578
6579 /* Creates a new induction variable corresponding to CAND.  */
6580
6581 static void
6582 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6583 {
6584   gimple_stmt_iterator incr_pos;
6585   tree base;
6586   bool after = false;
6587
6588   if (!cand->iv)
6589     return;
6590
6591   switch (cand->pos)
6592     {
6593     case IP_NORMAL:
6594       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6595       break;
6596
6597     case IP_END:
6598       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6599       after = true;
6600       break;
6601
6602     case IP_AFTER_USE:
6603       after = true;
6604       /* fall through */
6605     case IP_BEFORE_USE:
6606       incr_pos = gsi_for_stmt (cand->incremented_at);
6607       break;
6608
6609     case IP_ORIGINAL:
6610       /* Mark that the iv is preserved.  */
6611       name_info (data, cand->var_before)->preserve_biv = true;
6612       name_info (data, cand->var_after)->preserve_biv = true;
6613
6614       /* Rewrite the increment so that it uses var_before directly.  */
6615       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6616       return;
6617     }
6618
6619   gimple_add_tmp_var (cand->var_before);
6620
6621   base = unshare_expr (cand->iv->base);
6622
6623   create_iv (base, unshare_expr (cand->iv->step),
6624              cand->var_before, data->current_loop,
6625              &incr_pos, after, &cand->var_before, &cand->var_after);
6626 }
6627
6628 /* Creates new induction variables described in SET.  */
6629
6630 static void
6631 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6632 {
6633   unsigned i;
6634   struct iv_cand *cand;
6635   bitmap_iterator bi;
6636
6637   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6638     {
6639       cand = iv_cand (data, i);
6640       create_new_iv (data, cand);
6641     }
6642
6643   if (dump_file && (dump_flags & TDF_DETAILS))
6644     {
6645       fprintf (dump_file, "Selected IV set for loop %d",
6646                data->current_loop->num);
6647       if (data->loop_loc != UNKNOWN_LOCATION)
6648         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6649                  LOCATION_LINE (data->loop_loc));
6650       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6651       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6652         {
6653           cand = iv_cand (data, i);
6654           dump_cand (dump_file, cand);
6655         }
6656       fprintf (dump_file, "\n");
6657     }
6658 }
6659
6660 /* Rewrites USE (definition of iv used in a nonlinear expression)
6661    using candidate CAND.  */
6662
6663 static void
6664 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6665                             struct iv_use *use, struct iv_cand *cand)
6666 {
6667   tree comp;
6668   tree op, tgt;
6669   gassign *ass;
6670   gimple_stmt_iterator bsi;
6671
6672   /* An important special case -- if we are asked to express value of
6673      the original iv by itself, just exit; there is no need to
6674      introduce a new computation (that might also need casting the
6675      variable to unsigned and back).  */
6676   if (cand->pos == IP_ORIGINAL
6677       && cand->incremented_at == use->stmt)
6678     {
6679       enum tree_code stmt_code;
6680
6681       gcc_assert (is_gimple_assign (use->stmt));
6682       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6683
6684       /* Check whether we may leave the computation unchanged.
6685          This is the case only if it does not rely on other
6686          computations in the loop -- otherwise, the computation
6687          we rely upon may be removed in remove_unused_ivs,
6688          thus leading to ICE.  */
6689       stmt_code = gimple_assign_rhs_code (use->stmt);
6690       if (stmt_code == PLUS_EXPR
6691           || stmt_code == MINUS_EXPR
6692           || stmt_code == POINTER_PLUS_EXPR)
6693         {
6694           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6695             op = gimple_assign_rhs2 (use->stmt);
6696           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6697             op = gimple_assign_rhs1 (use->stmt);
6698           else
6699             op = NULL_TREE;
6700         }
6701       else
6702         op = NULL_TREE;
6703
6704       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6705         return;
6706     }
6707
6708   comp = get_computation (data->current_loop, use, cand);
6709   gcc_assert (comp != NULL_TREE);
6710
6711   switch (gimple_code (use->stmt))
6712     {
6713     case GIMPLE_PHI:
6714       tgt = PHI_RESULT (use->stmt);
6715
6716       /* If we should keep the biv, do not replace it.  */
6717       if (name_info (data, tgt)->preserve_biv)
6718         return;
6719
6720       bsi = gsi_after_labels (gimple_bb (use->stmt));
6721       break;
6722
6723     case GIMPLE_ASSIGN:
6724       tgt = gimple_assign_lhs (use->stmt);
6725       bsi = gsi_for_stmt (use->stmt);
6726       break;
6727
6728     default:
6729       gcc_unreachable ();
6730     }
6731
6732   if (!valid_gimple_rhs_p (comp)
6733       || (gimple_code (use->stmt) != GIMPLE_PHI
6734           /* We can't allow re-allocating the stmt as it might be pointed
6735              to still.  */
6736           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6737               >= gimple_num_ops (gsi_stmt (bsi)))))
6738     {
6739       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6740                                        true, GSI_SAME_STMT);
6741       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6742         {
6743           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6744           /* As this isn't a plain copy we have to reset alignment
6745              information.  */
6746           if (SSA_NAME_PTR_INFO (comp))
6747             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6748         }
6749     }
6750
6751   if (gimple_code (use->stmt) == GIMPLE_PHI)
6752     {
6753       ass = gimple_build_assign (tgt, comp);
6754       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6755
6756       bsi = gsi_for_stmt (use->stmt);
6757       remove_phi_node (&bsi, false);
6758     }
6759   else
6760     {
6761       gimple_assign_set_rhs_from_tree (&bsi, comp);
6762       use->stmt = gsi_stmt (bsi);
6763     }
6764 }
6765
6766 /* Performs a peephole optimization to reorder the iv update statement with
6767    a mem ref to enable instruction combining in later phases. The mem ref uses
6768    the iv value before the update, so the reordering transformation requires
6769    adjustment of the offset. CAND is the selected IV_CAND.
6770
6771    Example:
6772
6773    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6774    iv2 = iv1 + 1;
6775
6776    if (t < val)      (1)
6777      goto L;
6778    goto Head;
6779
6780
6781    directly propagating t over to (1) will introduce overlapping live range
6782    thus increase register pressure. This peephole transform it into:
6783
6784
6785    iv2 = iv1 + 1;
6786    t = MEM_REF (base, iv2, 8, 8);
6787    if (t < val)
6788      goto L;
6789    goto Head;
6790 */
6791
6792 static void
6793 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6794 {
6795   tree var_after;
6796   gimple iv_update, stmt;
6797   basic_block bb;
6798   gimple_stmt_iterator gsi, gsi_iv;
6799
6800   if (cand->pos != IP_NORMAL)
6801     return;
6802
6803   var_after = cand->var_after;
6804   iv_update = SSA_NAME_DEF_STMT (var_after);
6805
6806   bb = gimple_bb (iv_update);
6807   gsi = gsi_last_nondebug_bb (bb);
6808   stmt = gsi_stmt (gsi);
6809
6810   /* Only handle conditional statement for now.  */
6811   if (gimple_code (stmt) != GIMPLE_COND)
6812     return;
6813
6814   gsi_prev_nondebug (&gsi);
6815   stmt = gsi_stmt (gsi);
6816   if (stmt != iv_update)
6817     return;
6818
6819   gsi_prev_nondebug (&gsi);
6820   if (gsi_end_p (gsi))
6821     return;
6822
6823   stmt = gsi_stmt (gsi);
6824   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6825     return;
6826
6827   if (stmt != use->stmt)
6828     return;
6829
6830   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6831     return;
6832
6833   if (dump_file && (dump_flags & TDF_DETAILS))
6834     {
6835       fprintf (dump_file, "Reordering \n");
6836       print_gimple_stmt (dump_file, iv_update, 0, 0);
6837       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6838       fprintf (dump_file, "\n");
6839     }
6840
6841   gsi = gsi_for_stmt (use->stmt);
6842   gsi_iv = gsi_for_stmt (iv_update);
6843   gsi_move_before (&gsi_iv, &gsi);
6844
6845   cand->pos = IP_BEFORE_USE;
6846   cand->incremented_at = use->stmt;
6847 }
6848
6849 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6850
6851 static void
6852 rewrite_use_address_1 (struct ivopts_data *data,
6853                        struct iv_use *use, struct iv_cand *cand)
6854 {
6855   aff_tree aff;
6856   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6857   tree base_hint = NULL_TREE;
6858   tree ref, iv;
6859   bool ok;
6860
6861   adjust_iv_update_pos (cand, use);
6862   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6863   gcc_assert (ok);
6864   unshare_aff_combination (&aff);
6865
6866   /* To avoid undefined overflow problems, all IV candidates use unsigned
6867      integer types.  The drawback is that this makes it impossible for
6868      create_mem_ref to distinguish an IV that is based on a memory object
6869      from one that represents simply an offset.
6870
6871      To work around this problem, we pass a hint to create_mem_ref that
6872      indicates which variable (if any) in aff is an IV based on a memory
6873      object.  Note that we only consider the candidate.  If this is not
6874      based on an object, the base of the reference is in some subexpression
6875      of the use -- but these will use pointer types, so they are recognized
6876      by the create_mem_ref heuristics anyway.  */
6877   if (cand->iv->base_object)
6878     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6879
6880   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6881   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6882                         reference_alias_ptr_type (*use->op_p),
6883                         iv, base_hint, data->speed);
6884   copy_ref_info (ref, *use->op_p);
6885   *use->op_p = ref;
6886 }
6887
6888 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
6889    first use of a group, rewrites sub uses in the group too.  */
6890
6891 static void
6892 rewrite_use_address (struct ivopts_data *data,
6893                       struct iv_use *use, struct iv_cand *cand)
6894 {
6895   struct iv_use *next;
6896
6897   gcc_assert (use->sub_id == 0);
6898   rewrite_use_address_1 (data, use, cand);
6899   update_stmt (use->stmt);
6900
6901   for (next = use->next; next != NULL; next = next->next)
6902     {
6903       rewrite_use_address_1 (data, next, cand);
6904       update_stmt (next->stmt);
6905     }
6906
6907   return;
6908 }
6909
6910 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6911    candidate CAND.  */
6912
6913 static void
6914 rewrite_use_compare (struct ivopts_data *data,
6915                      struct iv_use *use, struct iv_cand *cand)
6916 {
6917   tree comp, *var_p, op, bound;
6918   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6919   enum tree_code compare;
6920   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6921   bool ok;
6922
6923   bound = cp->value;
6924   if (bound)
6925     {
6926       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6927       tree var_type = TREE_TYPE (var);
6928       gimple_seq stmts;
6929
6930       if (dump_file && (dump_flags & TDF_DETAILS))
6931         {
6932           fprintf (dump_file, "Replacing exit test: ");
6933           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6934         }
6935       compare = cp->comp;
6936       bound = unshare_expr (fold_convert (var_type, bound));
6937       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6938       if (stmts)
6939         gsi_insert_seq_on_edge_immediate (
6940                 loop_preheader_edge (data->current_loop),
6941                 stmts);
6942
6943       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6944       gimple_cond_set_lhs (cond_stmt, var);
6945       gimple_cond_set_code (cond_stmt, compare);
6946       gimple_cond_set_rhs (cond_stmt, op);
6947       return;
6948     }
6949
6950   /* The induction variable elimination failed; just express the original
6951      giv.  */
6952   comp = get_computation (data->current_loop, use, cand);
6953   gcc_assert (comp != NULL_TREE);
6954
6955   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6956   gcc_assert (ok);
6957
6958   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6959                                      true, GSI_SAME_STMT);
6960 }
6961
6962 /* Rewrites USE using candidate CAND.  */
6963
6964 static void
6965 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6966 {
6967   switch (use->type)
6968     {
6969       case USE_NONLINEAR_EXPR:
6970         rewrite_use_nonlinear_expr (data, use, cand);
6971         break;
6972
6973       case USE_ADDRESS:
6974         rewrite_use_address (data, use, cand);
6975         break;
6976
6977       case USE_COMPARE:
6978         rewrite_use_compare (data, use, cand);
6979         break;
6980
6981       default:
6982         gcc_unreachable ();
6983     }
6984
6985   update_stmt (use->stmt);
6986 }
6987
6988 /* Rewrite the uses using the selected induction variables.  */
6989
6990 static void
6991 rewrite_uses (struct ivopts_data *data)
6992 {
6993   unsigned i;
6994   struct iv_cand *cand;
6995   struct iv_use *use;
6996
6997   for (i = 0; i < n_iv_uses (data); i++)
6998     {
6999       use = iv_use (data, i);
7000       cand = use->selected;
7001       gcc_assert (cand);
7002
7003       rewrite_use (data, use, cand);
7004     }
7005 }
7006
7007 /* Removes the ivs that are not used after rewriting.  */
7008
7009 static void
7010 remove_unused_ivs (struct ivopts_data *data)
7011 {
7012   unsigned j;
7013   bitmap_iterator bi;
7014   bitmap toremove = BITMAP_ALLOC (NULL);
7015
7016   /* Figure out an order in which to release SSA DEFs so that we don't
7017      release something that we'd have to propagate into a debug stmt
7018      afterwards.  */
7019   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7020     {
7021       struct version_info *info;
7022
7023       info = ver_info (data, j);
7024       if (info->iv
7025           && !integer_zerop (info->iv->step)
7026           && !info->inv_id
7027           && !info->iv->have_use_for
7028           && !info->preserve_biv)
7029         {
7030           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7031
7032           tree def = info->iv->ssa_name;
7033
7034           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7035             {
7036               imm_use_iterator imm_iter;
7037               use_operand_p use_p;
7038               gimple stmt;
7039               int count = 0;
7040
7041               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7042                 {
7043                   if (!gimple_debug_bind_p (stmt))
7044                     continue;
7045
7046                   /* We just want to determine whether to do nothing
7047                      (count == 0), to substitute the computed
7048                      expression into a single use of the SSA DEF by
7049                      itself (count == 1), or to use a debug temp
7050                      because the SSA DEF is used multiple times or as
7051                      part of a larger expression (count > 1). */
7052                   count++;
7053                   if (gimple_debug_bind_get_value (stmt) != def)
7054                     count++;
7055
7056                   if (count > 1)
7057                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7058                 }
7059
7060               if (!count)
7061                 continue;
7062
7063               struct iv_use dummy_use;
7064               struct iv_cand *best_cand = NULL, *cand;
7065               unsigned i, best_pref = 0, cand_pref;
7066
7067               memset (&dummy_use, 0, sizeof (dummy_use));
7068               dummy_use.iv = info->iv;
7069               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7070                 {
7071                   cand = iv_use (data, i)->selected;
7072                   if (cand == best_cand)
7073                     continue;
7074                   cand_pref = operand_equal_p (cand->iv->step,
7075                                                info->iv->step, 0)
7076                     ? 4 : 0;
7077                   cand_pref
7078                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7079                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7080                     ? 2 : 0;
7081                   cand_pref
7082                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7083                     ? 1 : 0;
7084                   if (best_cand == NULL || best_pref < cand_pref)
7085                     {
7086                       best_cand = cand;
7087                       best_pref = cand_pref;
7088                     }
7089                 }
7090
7091               if (!best_cand)
7092                 continue;
7093
7094               tree comp = get_computation_at (data->current_loop,
7095                                               &dummy_use, best_cand,
7096                                               SSA_NAME_DEF_STMT (def));
7097               if (!comp)
7098                 continue;
7099
7100               if (count > 1)
7101                 {
7102                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7103                   DECL_ARTIFICIAL (vexpr) = 1;
7104                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7105                   if (SSA_NAME_VAR (def))
7106                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7107                   else
7108                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7109                   gdebug *def_temp
7110                     = gimple_build_debug_bind (vexpr, comp, NULL);
7111                   gimple_stmt_iterator gsi;
7112
7113                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7114                     gsi = gsi_after_labels (gimple_bb
7115                                             (SSA_NAME_DEF_STMT (def)));
7116                   else
7117                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7118
7119                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7120                   comp = vexpr;
7121                 }
7122
7123               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7124                 {
7125                   if (!gimple_debug_bind_p (stmt))
7126                     continue;
7127
7128                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7129                     SET_USE (use_p, comp);
7130
7131                   update_stmt (stmt);
7132                 }
7133             }
7134         }
7135     }
7136
7137   release_defs_bitset (toremove);
7138
7139   BITMAP_FREE (toremove);
7140 }
7141
7142 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7143    for hash_map::traverse.  */
7144
7145 bool
7146 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7147 {
7148   free (value);
7149   return true;
7150 }
7151
7152 /* Frees data allocated by the optimization of a single loop.  */
7153
7154 static void
7155 free_loop_data (struct ivopts_data *data)
7156 {
7157   unsigned i, j;
7158   bitmap_iterator bi;
7159   tree obj;
7160
7161   if (data->niters)
7162     {
7163       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7164       delete data->niters;
7165       data->niters = NULL;
7166     }
7167
7168   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7169     {
7170       struct version_info *info;
7171
7172       info = ver_info (data, i);
7173       info->iv = NULL;
7174       info->has_nonlin_use = false;
7175       info->preserve_biv = false;
7176       info->inv_id = 0;
7177     }
7178   bitmap_clear (data->relevant);
7179   bitmap_clear (data->important_candidates);
7180
7181   for (i = 0; i < n_iv_uses (data); i++)
7182     {
7183       struct iv_use *use = iv_use (data, i);
7184       struct iv_use *pre = use, *sub = use->next;
7185
7186       while (sub)
7187         {
7188           gcc_assert (sub->related_cands == NULL);
7189           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7190
7191           pre = sub;
7192           sub = sub->next;
7193           free (pre);
7194         }
7195
7196       BITMAP_FREE (use->related_cands);
7197       for (j = 0; j < use->n_map_members; j++)
7198         if (use->cost_map[j].depends_on)
7199           BITMAP_FREE (use->cost_map[j].depends_on);
7200       free (use->cost_map);
7201       free (use);
7202     }
7203   data->iv_uses.truncate (0);
7204
7205   for (i = 0; i < n_iv_cands (data); i++)
7206     {
7207       struct iv_cand *cand = iv_cand (data, i);
7208
7209       if (cand->depends_on)
7210         BITMAP_FREE (cand->depends_on);
7211       free (cand);
7212     }
7213   data->iv_candidates.truncate (0);
7214
7215   if (data->version_info_size < num_ssa_names)
7216     {
7217       data->version_info_size = 2 * num_ssa_names;
7218       free (data->version_info);
7219       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7220     }
7221
7222   data->max_inv_id = 0;
7223
7224   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7225     SET_DECL_RTL (obj, NULL_RTX);
7226
7227   decl_rtl_to_reset.truncate (0);
7228
7229   data->inv_expr_tab->empty ();
7230   data->inv_expr_id = 0;
7231 }
7232
7233 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7234    loop tree.  */
7235
7236 static void
7237 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7238 {
7239   free_loop_data (data);
7240   free (data->version_info);
7241   BITMAP_FREE (data->relevant);
7242   BITMAP_FREE (data->important_candidates);
7243
7244   decl_rtl_to_reset.release ();
7245   data->iv_uses.release ();
7246   data->iv_candidates.release ();
7247   delete data->inv_expr_tab;
7248   data->inv_expr_tab = NULL;
7249   free_affine_expand_cache (&data->name_expansion_cache);
7250   obstack_free (&data->iv_obstack, NULL);
7251 }
7252
7253 /* Returns true if the loop body BODY includes any function calls.  */
7254
7255 static bool
7256 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7257 {
7258   gimple_stmt_iterator gsi;
7259   unsigned i;
7260
7261   for (i = 0; i < num_nodes; i++)
7262     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7263       {
7264         gimple stmt = gsi_stmt (gsi);
7265         if (is_gimple_call (stmt)
7266             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7267           return true;
7268       }
7269   return false;
7270 }
7271
7272 /* Optimizes the LOOP.  Returns true if anything changed.  */
7273
7274 static bool
7275 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7276 {
7277   bool changed = false;
7278   struct iv_ca *iv_ca;
7279   edge exit = single_dom_exit (loop);
7280   basic_block *body;
7281
7282   gcc_assert (!data->niters);
7283   data->current_loop = loop;
7284   data->loop_loc = find_loop_location (loop);
7285   data->speed = optimize_loop_for_speed_p (loop);
7286
7287   if (dump_file && (dump_flags & TDF_DETAILS))
7288     {
7289       fprintf (dump_file, "Processing loop %d", loop->num);
7290       if (data->loop_loc != UNKNOWN_LOCATION)
7291         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7292                  LOCATION_LINE (data->loop_loc));
7293       fprintf (dump_file, "\n");
7294
7295       if (exit)
7296         {
7297           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7298                    exit->src->index, exit->dest->index);
7299           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7300           fprintf (dump_file, "\n");
7301         }
7302
7303       fprintf (dump_file, "\n");
7304     }
7305
7306   body = get_loop_body (loop);
7307   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7308   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7309   free (body);
7310
7311   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7312
7313   /* For each ssa name determines whether it behaves as an induction variable
7314      in some loop.  */
7315   if (!find_induction_variables (data))
7316     goto finish;
7317
7318   /* Finds interesting uses (item 1).  */
7319   find_interesting_uses (data);
7320   group_address_uses (data);
7321   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7322     goto finish;
7323
7324   /* Finds candidates for the induction variables (item 2).  */
7325   find_iv_candidates (data);
7326
7327   /* Calculates the costs (item 3, part 1).  */
7328   determine_iv_costs (data);
7329   determine_use_iv_costs (data);
7330   determine_set_costs (data);
7331
7332   /* Find the optimal set of induction variables (item 3, part 2).  */
7333   iv_ca = find_optimal_iv_set (data);
7334   if (!iv_ca)
7335     goto finish;
7336   changed = true;
7337
7338   /* Create the new induction variables (item 4, part 1).  */
7339   create_new_ivs (data, iv_ca);
7340   iv_ca_free (&iv_ca);
7341
7342   /* Rewrite the uses (item 4, part 2).  */
7343   rewrite_uses (data);
7344
7345   /* Remove the ivs that are unused after rewriting.  */
7346   remove_unused_ivs (data);
7347
7348   /* We have changed the structure of induction variables; it might happen
7349      that definitions in the scev database refer to some of them that were
7350      eliminated.  */
7351   scev_reset ();
7352
7353 finish:
7354   free_loop_data (data);
7355
7356   return changed;
7357 }
7358
7359 /* Main entry point.  Optimizes induction variables in loops.  */
7360
7361 void
7362 tree_ssa_iv_optimize (void)
7363 {
7364   struct loop *loop;
7365   struct ivopts_data data;
7366
7367   tree_ssa_iv_optimize_init (&data);
7368
7369   /* Optimize the loops starting with the innermost ones.  */
7370   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7371     {
7372       if (dump_file && (dump_flags & TDF_DETAILS))
7373         flow_loop_dump (loop, dump_file, NULL, 1);
7374
7375       tree_ssa_iv_optimize_loop (&data, loop);
7376     }
7377
7378   tree_ssa_iv_optimize_finalize (&data);
7379 }