gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "alias.h"
  69 #include "symtab.h"
  70 #include "tree.h"
  71 #include "fold-const.h"
  72 #include "stor-layout.h"
  73 #include "tm_p.h"
  74 #include "predict.h"
  75 #include "hard-reg-set.h"
  76 #include "function.h"
  77 #include "dominance.h"
  78 #include "cfg.h"
  79 #include "basic-block.h"
  80 #include "gimple-pretty-print.h"
  81 #include "tree-ssa-alias.h"
  82 #include "internal-fn.h"
  83 #include "tree-eh.h"
  84 #include "gimple-expr.h"
  85 #include "gimple.h"
  86 #include "gimplify.h"
  87 #include "gimple-iterator.h"
  88 #include "gimplify-me.h"
  89 #include "gimple-ssa.h"
  90 #include "cgraph.h"
  91 #include "tree-cfg.h"
  92 #include "tree-phinodes.h"
  93 #include "ssa-iterators.h"
  94 #include "stringpool.h"
  95 #include "tree-ssanames.h"
  96 #include "tree-ssa-loop-ivopts.h"
  97 #include "tree-ssa-loop-manip.h"
  98 #include "tree-ssa-loop-niter.h"
  99 #include "tree-ssa-loop.h"
 100 #include "rtl.h"
 101 #include "flags.h"
 102 #include "insn-config.h"
 103 #include "expmed.h"
 104 #include "dojump.h"
 105 #include "explow.h"
 106 #include "calls.h"
 107 #include "emit-rtl.h"
 108 #include "varasm.h"
 109 #include "stmt.h"
 110 #include "expr.h"
 111 #include "tree-dfa.h"
 112 #include "tree-ssa.h"
 113 #include "cfgloop.h"
 114 #include "tree-pass.h"
 115 #include "tree-chrec.h"
 116 #include "tree-scalar-evolution.h"
 117 #include "params.h"
 118 #include "langhooks.h"
 119 #include "tree-affine.h"
 120 #include "target.h"
 121 #include "tree-inline.h"
 122 #include "tree-ssa-propagate.h"
 123 #include "tree-ssa-address.h"
 124 #include "builtins.h"
 125 #include "tree-vectorizer.h"
 126
 127 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 128    cost of different addressing modes.  This should be moved to a TBD
 129    interface between the GIMPLE and RTL worlds.  */
 130 #include "recog.h"
 131
 132 /* The infinite cost.  */
 133 #define INFTY 10000000
 134
 135 #define AVG_LOOP_NITER(LOOP) 5
 136
 137 /* Returns the expected number of loop iterations for LOOP.
 138    The average trip count is computed from profile data if it
 139    exists. */
 140
 141 static inline HOST_WIDE_INT
 142 avg_loop_niter (struct loop *loop)
 143 {
 144   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 145   if (niter == -1)
 146     return AVG_LOOP_NITER (loop);
 147
 148   return niter;
 149 }
 150
 151 /* Representation of the induction variable.  */
 152 struct iv
 153 {
 154   tree base;            /* Initial value of the iv.  */
 155   tree base_object;     /* A memory object to that the induction variable points.  */
 156   tree step;            /* Step of the iv (constant only).  */
 157   tree ssa_name;        /* The ssa name with the value.  */
 158   unsigned use_id;      /* The identifier in the use if it is the case.  */
 159   bool biv_p;           /* Is it a biv?  */
 160   bool have_use_for;    /* Do we already have a use for it?  */
 161   bool no_overflow;     /* True if the iv doesn't overflow.  */
 162 };
 163
 164 /* Per-ssa version information (induction variable descriptions, etc.).  */
 165 struct version_info
 166 {
 167   tree name;            /* The ssa name.  */
 168   struct iv *iv;        /* Induction variable description.  */
 169   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 170                            an expression that is not an induction variable.  */
 171   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 172   unsigned inv_id;      /* Id of an invariant.  */
 173 };
 174
 175 /* Types of uses.  */
 176 enum use_type
 177 {
 178   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 179   USE_ADDRESS,          /* Use in an address.  */
 180   USE_COMPARE           /* Use is a compare.  */
 181 };
 182
 183 /* Cost of a computation.  */
 184 typedef struct
 185 {
 186   int cost;             /* The runtime cost.  */
 187   unsigned complexity;  /* The estimate of the complexity of the code for
 188                            the computation (in no concrete units --
 189                            complexity field should be larger for more
 190                            complex expressions and addressing modes).  */
 191 } comp_cost;
 192
 193 static const comp_cost no_cost = {0, 0};
 194 static const comp_cost infinite_cost = {INFTY, INFTY};
 195
 196 /* The candidate - cost pair.  */
 197 struct cost_pair
 198 {
 199   struct iv_cand *cand; /* The candidate.  */
 200   comp_cost cost;       /* The cost.  */
 201   bitmap depends_on;    /* The list of invariants that have to be
 202                            preserved.  */
 203   tree value;           /* For final value elimination, the expression for
 204                            the final value of the iv.  For iv elimination,
 205                            the new bound to compare with.  */
 206   enum tree_code comp;  /* For iv elimination, the comparison.  */
 207   int inv_expr_id;      /* Loop invariant expression id.  */
 208 };
 209
 210 /* Use.  */
 211 struct iv_use
 212 {
 213   unsigned id;          /* The id of the use.  */
 214   unsigned sub_id;      /* The id of the sub use.  */
 215   enum use_type type;   /* Type of the use.  */
 216   struct iv *iv;        /* The induction variable it is based on.  */
 217   gimple stmt;          /* Statement in that it occurs.  */
 218   tree *op_p;           /* The place where it occurs.  */
 219   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 220                            important ones.  */
 221
 222   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 223   struct cost_pair *cost_map;
 224                         /* The costs wrto the iv candidates.  */
 225
 226   struct iv_cand *selected;
 227                         /* The selected candidate.  */
 228
 229   struct iv_use *next;  /* The next sub use.  */
 230   tree addr_base;       /* Base address with const offset stripped.  */
 231   unsigned HOST_WIDE_INT addr_offset;
 232                         /* Const offset stripped from base address.  */
 233 };
 234
 235 /* The position where the iv is computed.  */
 236 enum iv_position
 237 {
 238   IP_NORMAL,            /* At the end, just before the exit condition.  */
 239   IP_END,               /* At the end of the latch block.  */
 240   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 241   IP_AFTER_USE,         /* Immediately after a specific use.  */
 242   IP_ORIGINAL           /* The original biv.  */
 243 };
 244
 245 /* The induction variable candidate.  */
 246 struct iv_cand
 247 {
 248   unsigned id;          /* The number of the candidate.  */
 249   bool important;       /* Whether this is an "important" candidate, i.e. such
 250                            that it should be considered by all uses.  */
 251   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 252   gimple incremented_at;/* For original biv, the statement where it is
 253                            incremented.  */
 254   tree var_before;      /* The variable used for it before increment.  */
 255   tree var_after;       /* The variable used for it after increment.  */
 256   struct iv *iv;        /* The value of the candidate.  NULL for
 257                            "pseudocandidate" used to indicate the possibility
 258                            to replace the final value of an iv by direct
 259                            computation of the value.  */
 260   unsigned cost;        /* Cost of the candidate.  */
 261   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 262   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 263                               where it is incremented.  */
 264   bitmap depends_on;    /* The list of invariants that are used in step of the
 265                            biv.  */
 266 };
 267
 268 /* Loop invariant expression hashtable entry.  */
 269 struct iv_inv_expr_ent
 270 {
 271   tree expr;
 272   int id;
 273   hashval_t hash;
 274 };
 275
 276 /* The data used by the induction variable optimizations.  */
 277
 278 typedef struct iv_use *iv_use_p;
 279
 280 typedef struct iv_cand *iv_cand_p;
 281
 282 /* Hashtable helpers.  */
 283
 284 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 285 {
 286   static inline hashval_t hash (const iv_inv_expr_ent *);
 287   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 288 };
 289
 290 /* Hash function for loop invariant expressions.  */
 291
 292 inline hashval_t
 293 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 294 {
 295   return expr->hash;
 296 }
 297
 298 /* Hash table equality function for expressions.  */
 299
 300 inline bool
 301 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 302                            const iv_inv_expr_ent *expr2)
 303 {
 304   return expr1->hash == expr2->hash
 305          && operand_equal_p (expr1->expr, expr2->expr, 0);
 306 }
 307
 308 struct ivopts_data
 309 {
 310   /* The currently optimized loop.  */
 311   struct loop *current_loop;
 312   source_location loop_loc;
 313
 314   /* Numbers of iterations for all exits of the current loop.  */
 315   hash_map<edge, tree_niter_desc *> *niters;
 316
 317   /* Number of registers used in it.  */
 318   unsigned regs_used;
 319
 320   /* The size of version_info array allocated.  */
 321   unsigned version_info_size;
 322
 323   /* The array of information for the ssa names.  */
 324   struct version_info *version_info;
 325
 326   /* The hashtable of loop invariant expressions created
 327      by ivopt.  */
 328   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 329
 330   /* Loop invariant expression id.  */
 331   int inv_expr_id;
 332
 333   /* The bitmap of indices in version_info whose value was changed.  */
 334   bitmap relevant;
 335
 336   /* The uses of induction variables.  */
 337   vec<iv_use_p> iv_uses;
 338
 339   /* The candidates.  */
 340   vec<iv_cand_p> iv_candidates;
 341
 342   /* A bitmap of important candidates.  */
 343   bitmap important_candidates;
 344
 345   /* Cache used by tree_to_aff_combination_expand.  */
 346   hash_map<tree, name_expansion *> *name_expansion_cache;
 347
 348   /* The maximum invariant id.  */
 349   unsigned max_inv_id;
 350
 351   /* Whether to consider just related and important candidates when replacing a
 352      use.  */
 353   bool consider_all_candidates;
 354
 355   /* Are we optimizing for speed?  */
 356   bool speed;
 357
 358   /* Whether the loop body includes any function calls.  */
 359   bool body_includes_call;
 360
 361   /* Whether the loop body can only be exited via single exit.  */
 362   bool loop_single_exit_p;
 363 };
 364
 365 /* An assignment of iv candidates to uses.  */
 366
 367 struct iv_ca
 368 {
 369   /* The number of uses covered by the assignment.  */
 370   unsigned upto;
 371
 372   /* Number of uses that cannot be expressed by the candidates in the set.  */
 373   unsigned bad_uses;
 374
 375   /* Candidate assigned to a use, together with the related costs.  */
 376   struct cost_pair **cand_for_use;
 377
 378   /* Number of times each candidate is used.  */
 379   unsigned *n_cand_uses;
 380
 381   /* The candidates used.  */
 382   bitmap cands;
 383
 384   /* The number of candidates in the set.  */
 385   unsigned n_cands;
 386
 387   /* Total number of registers needed.  */
 388   unsigned n_regs;
 389
 390   /* Total cost of expressing uses.  */
 391   comp_cost cand_use_cost;
 392
 393   /* Total cost of candidates.  */
 394   unsigned cand_cost;
 395
 396   /* Number of times each invariant is used.  */
 397   unsigned *n_invariant_uses;
 398
 399   /* The array holding the number of uses of each loop
 400      invariant expressions created by ivopt.  */
 401   unsigned *used_inv_expr;
 402
 403   /* The number of created loop invariants.  */
 404   unsigned num_used_inv_expr;
 405
 406   /* Total cost of the assignment.  */
 407   comp_cost cost;
 408 };
 409
 410 /* Difference of two iv candidate assignments.  */
 411
 412 struct iv_ca_delta
 413 {
 414   /* Changed use.  */
 415   struct iv_use *use;
 416
 417   /* An old assignment (for rollback purposes).  */
 418   struct cost_pair *old_cp;
 419
 420   /* A new assignment.  */
 421   struct cost_pair *new_cp;
 422
 423   /* Next change in the list.  */
 424   struct iv_ca_delta *next_change;
 425 };
 426
 427 /* Bound on number of candidates below that all candidates are considered.  */
 428
 429 #define CONSIDER_ALL_CANDIDATES_BOUND \
 430   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 431
 432 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 433    optimizing such a loop would help, and it would take ages).  */
 434
 435 #define MAX_CONSIDERED_USES \
 436   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 437
 438 /* If there are at most this number of ivs in the set, try removing unnecessary
 439    ivs from the set always.  */
 440
 441 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 442   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 443
 444 /* The list of trees for that the decl_rtl field must be reset is stored
 445    here.  */
 446
 447 static vec<tree> decl_rtl_to_reset;
 448
 449 static comp_cost force_expr_to_var_cost (tree, bool);
 450
 451 /* Number of uses recorded in DATA.  */
 452
 453 static inline unsigned
 454 n_iv_uses (struct ivopts_data *data)
 455 {
 456   return data->iv_uses.length ();
 457 }
 458
 459 /* Ith use recorded in DATA.  */
 460
 461 static inline struct iv_use *
 462 iv_use (struct ivopts_data *data, unsigned i)
 463 {
 464   return data->iv_uses[i];
 465 }
 466
 467 /* Number of candidates recorded in DATA.  */
 468
 469 static inline unsigned
 470 n_iv_cands (struct ivopts_data *data)
 471 {
 472   return data->iv_candidates.length ();
 473 }
 474
 475 /* Ith candidate recorded in DATA.  */
 476
 477 static inline struct iv_cand *
 478 iv_cand (struct ivopts_data *data, unsigned i)
 479 {
 480   return data->iv_candidates[i];
 481 }
 482
 483 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 484
 485 edge
 486 single_dom_exit (struct loop *loop)
 487 {
 488   edge exit = single_exit (loop);
 489
 490   if (!exit)
 491     return NULL;
 492
 493   if (!just_once_each_iteration_p (loop, exit->src))
 494     return NULL;
 495
 496   return exit;
 497 }
 498
 499 /* Dumps information about the induction variable IV to FILE.  */
 500
 501 void
 502 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 503 {
 504   if (iv->ssa_name && dump_name)
 505     {
 506       fprintf (file, "ssa name ");
 507       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 508       fprintf (file, "\n");
 509     }
 510
 511   fprintf (file, "  type ");
 512   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 513   fprintf (file, "\n");
 514
 515   if (iv->step)
 516     {
 517       fprintf (file, "  base ");
 518       print_generic_expr (file, iv->base, TDF_SLIM);
 519       fprintf (file, "\n");
 520
 521       fprintf (file, "  step ");
 522       print_generic_expr (file, iv->step, TDF_SLIM);
 523       fprintf (file, "\n");
 524     }
 525   else
 526     {
 527       fprintf (file, "  invariant ");
 528       print_generic_expr (file, iv->base, TDF_SLIM);
 529       fprintf (file, "\n");
 530     }
 531
 532   if (iv->base_object)
 533     {
 534       fprintf (file, "  base object ");
 535       print_generic_expr (file, iv->base_object, TDF_SLIM);
 536       fprintf (file, "\n");
 537     }
 538
 539   if (iv->biv_p)
 540     fprintf (file, "  is a biv\n");
 541 }
 542
 543 /* Dumps information about the USE to FILE.  */
 544
 545 void
 546 dump_use (FILE *file, struct iv_use *use)
 547 {
 548   fprintf (file, "use %d", use->id);
 549   if (use->sub_id)
 550     fprintf (file, ".%d", use->sub_id);
 551
 552   fprintf (file, "\n");
 553
 554   switch (use->type)
 555     {
 556     case USE_NONLINEAR_EXPR:
 557       fprintf (file, "  generic\n");
 558       break;
 559
 560     case USE_ADDRESS:
 561       fprintf (file, "  address\n");
 562       break;
 563
 564     case USE_COMPARE:
 565       fprintf (file, "  compare\n");
 566       break;
 567
 568     default:
 569       gcc_unreachable ();
 570     }
 571
 572   fprintf (file, "  in statement ");
 573   print_gimple_stmt (file, use->stmt, 0, 0);
 574   fprintf (file, "\n");
 575
 576   fprintf (file, "  at position ");
 577   if (use->op_p)
 578     print_generic_expr (file, *use->op_p, TDF_SLIM);
 579   fprintf (file, "\n");
 580
 581   dump_iv (file, use->iv, false);
 582
 583   if (use->related_cands)
 584     {
 585       fprintf (file, "  related candidates ");
 586       dump_bitmap (file, use->related_cands);
 587     }
 588 }
 589
 590 /* Dumps information about the uses to FILE.  */
 591
 592 void
 593 dump_uses (FILE *file, struct ivopts_data *data)
 594 {
 595   unsigned i;
 596   struct iv_use *use;
 597
 598   for (i = 0; i < n_iv_uses (data); i++)
 599     {
 600       use = iv_use (data, i);
 601       do
 602         {
 603           dump_use (file, use);
 604           use = use->next;
 605         }
 606       while (use);
 607       fprintf (file, "\n");
 608     }
 609 }
 610
 611 /* Dumps information about induction variable candidate CAND to FILE.  */
 612
 613 void
 614 dump_cand (FILE *file, struct iv_cand *cand)
 615 {
 616   struct iv *iv = cand->iv;
 617
 618   fprintf (file, "candidate %d%s\n",
 619            cand->id, cand->important ? " (important)" : "");
 620
 621   if (cand->depends_on)
 622     {
 623       fprintf (file, "  depends on ");
 624       dump_bitmap (file, cand->depends_on);
 625     }
 626
 627   if (!iv)
 628     {
 629       fprintf (file, "  final value replacement\n");
 630       return;
 631     }
 632
 633   if (cand->var_before)
 634     {
 635       fprintf (file, "  var_before ");
 636       print_generic_expr (file, cand->var_before, TDF_SLIM);
 637       fprintf (file, "\n");
 638     }
 639   if (cand->var_after)
 640     {
 641       fprintf (file, "  var_after ");
 642       print_generic_expr (file, cand->var_after, TDF_SLIM);
 643       fprintf (file, "\n");
 644     }
 645
 646   switch (cand->pos)
 647     {
 648     case IP_NORMAL:
 649       fprintf (file, "  incremented before exit test\n");
 650       break;
 651
 652     case IP_BEFORE_USE:
 653       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 654       break;
 655
 656     case IP_AFTER_USE:
 657       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 658       break;
 659
 660     case IP_END:
 661       fprintf (file, "  incremented at end\n");
 662       break;
 663
 664     case IP_ORIGINAL:
 665       fprintf (file, "  original biv\n");
 666       break;
 667     }
 668
 669   dump_iv (file, iv, false);
 670 }
 671
 672 /* Returns the info for ssa version VER.  */
 673
 674 static inline struct version_info *
 675 ver_info (struct ivopts_data *data, unsigned ver)
 676 {
 677   return data->version_info + ver;
 678 }
 679
 680 /* Returns the info for ssa name NAME.  */
 681
 682 static inline struct version_info *
 683 name_info (struct ivopts_data *data, tree name)
 684 {
 685   return ver_info (data, SSA_NAME_VERSION (name));
 686 }
 687
 688 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 689    emitted in LOOP.  */
 690
 691 static bool
 692 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 693 {
 694   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 695
 696   gcc_assert (bb);
 697
 698   if (sbb == loop->latch)
 699     return true;
 700
 701   if (sbb != bb)
 702     return false;
 703
 704   return stmt == last_stmt (bb);
 705 }
 706
 707 /* Returns true if STMT if after the place where the original induction
 708    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 709    if the positions are identical.  */
 710
 711 static bool
 712 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 713 {
 714   basic_block cand_bb = gimple_bb (cand->incremented_at);
 715   basic_block stmt_bb = gimple_bb (stmt);
 716
 717   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 718     return false;
 719
 720   if (stmt_bb != cand_bb)
 721     return true;
 722
 723   if (true_if_equal
 724       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 725     return true;
 726   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 727 }
 728
 729 /* Returns true if STMT if after the place where the induction variable
 730    CAND is incremented in LOOP.  */
 731
 732 static bool
 733 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 734 {
 735   switch (cand->pos)
 736     {
 737     case IP_END:
 738       return false;
 739
 740     case IP_NORMAL:
 741       return stmt_after_ip_normal_pos (loop, stmt);
 742
 743     case IP_ORIGINAL:
 744     case IP_AFTER_USE:
 745       return stmt_after_inc_pos (cand, stmt, false);
 746
 747     case IP_BEFORE_USE:
 748       return stmt_after_inc_pos (cand, stmt, true);
 749
 750     default:
 751       gcc_unreachable ();
 752     }
 753 }
 754
 755 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 756
 757 static bool
 758 abnormal_ssa_name_p (tree exp)
 759 {
 760   if (!exp)
 761     return false;
 762
 763   if (TREE_CODE (exp) != SSA_NAME)
 764     return false;
 765
 766   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 767 }
 768
 769 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 770    abnormal phi node.  Callback for for_each_index.  */
 771
 772 static bool
 773 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 774                                   void *data ATTRIBUTE_UNUSED)
 775 {
 776   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 777     {
 778       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 779         return false;
 780       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 781         return false;
 782     }
 783
 784   return !abnormal_ssa_name_p (*index);
 785 }
 786
 787 /* Returns true if EXPR contains a ssa name that occurs in an
 788    abnormal phi node.  */
 789
 790 bool
 791 contains_abnormal_ssa_name_p (tree expr)
 792 {
 793   enum tree_code code;
 794   enum tree_code_class codeclass;
 795
 796   if (!expr)
 797     return false;
 798
 799   code = TREE_CODE (expr);
 800   codeclass = TREE_CODE_CLASS (code);
 801
 802   if (code == SSA_NAME)
 803     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 804
 805   if (code == INTEGER_CST
 806       || is_gimple_min_invariant (expr))
 807     return false;
 808
 809   if (code == ADDR_EXPR)
 810     return !for_each_index (&TREE_OPERAND (expr, 0),
 811                             idx_contains_abnormal_ssa_name_p,
 812                             NULL);
 813
 814   if (code == COND_EXPR)
 815     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 816       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 817       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 818
 819   switch (codeclass)
 820     {
 821     case tcc_binary:
 822     case tcc_comparison:
 823       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 824         return true;
 825
 826       /* Fallthru.  */
 827     case tcc_unary:
 828       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 829         return true;
 830
 831       break;
 832
 833     default:
 834       gcc_unreachable ();
 835     }
 836
 837   return false;
 838 }
 839
 840 /*  Returns the structure describing number of iterations determined from
 841     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 842
 843 static struct tree_niter_desc *
 844 niter_for_exit (struct ivopts_data *data, edge exit)
 845 {
 846   struct tree_niter_desc *desc;
 847   tree_niter_desc **slot;
 848
 849   if (!data->niters)
 850     {
 851       data->niters = new hash_map<edge, tree_niter_desc *>;
 852       slot = NULL;
 853     }
 854   else
 855     slot = data->niters->get (exit);
 856
 857   if (!slot)
 858     {
 859       /* Try to determine number of iterations.  We cannot safely work with ssa
 860          names that appear in phi nodes on abnormal edges, so that we do not
 861          create overlapping life ranges for them (PR 27283).  */
 862       desc = XNEW (struct tree_niter_desc);
 863       if (!number_of_iterations_exit (data->current_loop,
 864                                       exit, desc, true)
 865           || contains_abnormal_ssa_name_p (desc->niter))
 866         {
 867           XDELETE (desc);
 868           desc = NULL;
 869         }
 870       data->niters->put (exit, desc);
 871     }
 872   else
 873     desc = *slot;
 874
 875   return desc;
 876 }
 877
 878 /* Returns the structure describing number of iterations determined from
 879    single dominating exit of DATA->current_loop, or NULL if something
 880    goes wrong.  */
 881
 882 static struct tree_niter_desc *
 883 niter_for_single_dom_exit (struct ivopts_data *data)
 884 {
 885   edge exit = single_dom_exit (data->current_loop);
 886
 887   if (!exit)
 888     return NULL;
 889
 890   return niter_for_exit (data, exit);
 891 }
 892
 893 /* Initializes data structures used by the iv optimization pass, stored
 894    in DATA.  */
 895
 896 static void
 897 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 898 {
 899   data->version_info_size = 2 * num_ssa_names;
 900   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 901   data->relevant = BITMAP_ALLOC (NULL);
 902   data->important_candidates = BITMAP_ALLOC (NULL);
 903   data->max_inv_id = 0;
 904   data->niters = NULL;
 905   data->iv_uses.create (20);
 906   data->iv_candidates.create (20);
 907   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 908   data->inv_expr_id = 0;
 909   data->name_expansion_cache = NULL;
 910   decl_rtl_to_reset.create (20);
 911 }
 912
 913 /* Returns a memory object to that EXPR points.  In case we are able to
 914    determine that it does not point to any such object, NULL is returned.  */
 915
 916 static tree
 917 determine_base_object (tree expr)
 918 {
 919   enum tree_code code = TREE_CODE (expr);
 920   tree base, obj;
 921
 922   /* If this is a pointer casted to any type, we need to determine
 923      the base object for the pointer; so handle conversions before
 924      throwing away non-pointer expressions.  */
 925   if (CONVERT_EXPR_P (expr))
 926     return determine_base_object (TREE_OPERAND (expr, 0));
 927
 928   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 929     return NULL_TREE;
 930
 931   switch (code)
 932     {
 933     case INTEGER_CST:
 934       return NULL_TREE;
 935
 936     case ADDR_EXPR:
 937       obj = TREE_OPERAND (expr, 0);
 938       base = get_base_address (obj);
 939
 940       if (!base)
 941         return expr;
 942
 943       if (TREE_CODE (base) == MEM_REF)
 944         return determine_base_object (TREE_OPERAND (base, 0));
 945
 946       return fold_convert (ptr_type_node,
 947                            build_fold_addr_expr (base));
 948
 949     case POINTER_PLUS_EXPR:
 950       return determine_base_object (TREE_OPERAND (expr, 0));
 951
 952     case PLUS_EXPR:
 953     case MINUS_EXPR:
 954       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 955       gcc_unreachable ();
 956
 957     default:
 958       return fold_convert (ptr_type_node, expr);
 959     }
 960 }
 961
 962 /* Return true if address expression with non-DECL_P operand appears
 963    in EXPR.  */
 964
 965 static bool
 966 contain_complex_addr_expr (tree expr)
 967 {
 968   bool res = false;
 969
 970   STRIP_NOPS (expr);
 971   switch (TREE_CODE (expr))
 972     {
 973     case POINTER_PLUS_EXPR:
 974     case PLUS_EXPR:
 975     case MINUS_EXPR:
 976       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 977       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 978       break;
 979
 980     case ADDR_EXPR:
 981       return (!DECL_P (TREE_OPERAND (expr, 0)));
 982
 983     default:
 984       return false;
 985     }
 986
 987   return res;
 988 }
 989
 990 /* Allocates an induction variable with given initial value BASE and step STEP
 991    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 992
 993 static struct iv *
 994 alloc_iv (tree base, tree step, bool no_overflow = false)
 995 {
 996   tree expr = base;
 997   struct iv *iv = XCNEW (struct iv);
 998   gcc_assert (step != NULL_TREE);
 999
1000   /* Lower address expression in base except ones with DECL_P as operand.
1001      By doing this:
1002        1) More accurate cost can be computed for address expressions;
1003        2) Duplicate candidates won't be created for bases in different
1004           forms, like &a[0] and &a.  */
1005   STRIP_NOPS (expr);
1006   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1007       || contain_complex_addr_expr (expr))
1008     {
1009       aff_tree comb;
1010       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1011       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1012     }
1013
1014   iv->base = base;
1015   iv->base_object = determine_base_object (base);
1016   iv->step = step;
1017   iv->biv_p = false;
1018   iv->have_use_for = false;
1019   iv->use_id = 0;
1020   iv->ssa_name = NULL_TREE;
1021   iv->no_overflow = no_overflow;
1022
1023   return iv;
1024 }
1025
1026 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1027    doesn't overflow.  */
1028
1029 static void
1030 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1031         bool no_overflow)
1032 {
1033   struct version_info *info = name_info (data, iv);
1034
1035   gcc_assert (!info->iv);
1036
1037   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1038   info->iv = alloc_iv (base, step, no_overflow);
1039   info->iv->ssa_name = iv;
1040 }
1041
1042 /* Finds induction variable declaration for VAR.  */
1043
1044 static struct iv *
1045 get_iv (struct ivopts_data *data, tree var)
1046 {
1047   basic_block bb;
1048   tree type = TREE_TYPE (var);
1049
1050   if (!POINTER_TYPE_P (type)
1051       && !INTEGRAL_TYPE_P (type))
1052     return NULL;
1053
1054   if (!name_info (data, var)->iv)
1055     {
1056       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1057
1058       if (!bb
1059           || !flow_bb_inside_loop_p (data->current_loop, bb))
1060         set_iv (data, var, var, build_int_cst (type, 0), true);
1061     }
1062
1063   return name_info (data, var)->iv;
1064 }
1065
1066 /* Return the first non-invariant ssa var found in EXPR.  */
1067
1068 static tree
1069 extract_single_var_from_expr (tree expr)
1070 {
1071   int i, n;
1072   tree tmp;
1073   enum tree_code code;
1074
1075   if (!expr || is_gimple_min_invariant (expr))
1076     return NULL;
1077
1078   code = TREE_CODE (expr);
1079   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1080     {
1081       n = TREE_OPERAND_LENGTH (expr);
1082       for (i = 0; i < n; i++)
1083         {
1084           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1085
1086           if (tmp)
1087             return tmp;
1088         }
1089     }
1090   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1091 }
1092
1093 /* Finds basic ivs.  */
1094
1095 static bool
1096 find_bivs (struct ivopts_data *data)
1097 {
1098   gphi *phi;
1099   affine_iv iv;
1100   tree step, type, base, stop;
1101   bool found = false;
1102   struct loop *loop = data->current_loop;
1103   gphi_iterator psi;
1104
1105   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1106     {
1107       phi = psi.phi ();
1108
1109       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1110         continue;
1111
1112       if (virtual_operand_p (PHI_RESULT (phi)))
1113         continue;
1114
1115       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1116         continue;
1117
1118       if (integer_zerop (iv.step))
1119         continue;
1120
1121       step = iv.step;
1122       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1123       /* Stop expanding iv base at the first ssa var referred by iv step.
1124          Ideally we should stop at any ssa var, because that's expensive
1125          and unusual to happen, we just do it on the first one.
1126
1127          See PR64705 for the rationale.  */
1128       stop = extract_single_var_from_expr (step);
1129       base = expand_simple_operations (base, stop);
1130       if (contains_abnormal_ssa_name_p (base)
1131           || contains_abnormal_ssa_name_p (step))
1132         continue;
1133
1134       type = TREE_TYPE (PHI_RESULT (phi));
1135       base = fold_convert (type, base);
1136       if (step)
1137         {
1138           if (POINTER_TYPE_P (type))
1139             step = convert_to_ptrofftype (step);
1140           else
1141             step = fold_convert (type, step);
1142         }
1143
1144       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1145       found = true;
1146     }
1147
1148   return found;
1149 }
1150
1151 /* Marks basic ivs.  */
1152
1153 static void
1154 mark_bivs (struct ivopts_data *data)
1155 {
1156   gphi *phi;
1157   gimple def;
1158   tree var;
1159   struct iv *iv, *incr_iv;
1160   struct loop *loop = data->current_loop;
1161   basic_block incr_bb;
1162   gphi_iterator psi;
1163
1164   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1165     {
1166       phi = psi.phi ();
1167
1168       iv = get_iv (data, PHI_RESULT (phi));
1169       if (!iv)
1170         continue;
1171
1172       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1173       def = SSA_NAME_DEF_STMT (var);
1174       /* Don't mark iv peeled from other one as biv.  */
1175       if (def
1176           && gimple_code (def) == GIMPLE_PHI
1177           && gimple_bb (def) == loop->header)
1178         continue;
1179
1180       incr_iv = get_iv (data, var);
1181       if (!incr_iv)
1182         continue;
1183
1184       /* If the increment is in the subloop, ignore it.  */
1185       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1186       if (incr_bb->loop_father != data->current_loop
1187           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1188         continue;
1189
1190       iv->biv_p = true;
1191       incr_iv->biv_p = true;
1192     }
1193 }
1194
1195 /* Checks whether STMT defines a linear induction variable and stores its
1196    parameters to IV.  */
1197
1198 static bool
1199 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1200 {
1201   tree lhs, stop;
1202   struct loop *loop = data->current_loop;
1203
1204   iv->base = NULL_TREE;
1205   iv->step = NULL_TREE;
1206
1207   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1208     return false;
1209
1210   lhs = gimple_assign_lhs (stmt);
1211   if (TREE_CODE (lhs) != SSA_NAME)
1212     return false;
1213
1214   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1215     return false;
1216
1217   /* Stop expanding iv base at the first ssa var referred by iv step.
1218      Ideally we should stop at any ssa var, because that's expensive
1219      and unusual to happen, we just do it on the first one.
1220
1221      See PR64705 for the rationale.  */
1222   stop = extract_single_var_from_expr (iv->step);
1223   iv->base = expand_simple_operations (iv->base, stop);
1224   if (contains_abnormal_ssa_name_p (iv->base)
1225       || contains_abnormal_ssa_name_p (iv->step))
1226     return false;
1227
1228   /* If STMT could throw, then do not consider STMT as defining a GIV.
1229      While this will suppress optimizations, we can not safely delete this
1230      GIV and associated statements, even if it appears it is not used.  */
1231   if (stmt_could_throw_p (stmt))
1232     return false;
1233
1234   return true;
1235 }
1236
1237 /* Finds general ivs in statement STMT.  */
1238
1239 static void
1240 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1241 {
1242   affine_iv iv;
1243
1244   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1245     return;
1246
1247   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1248 }
1249
1250 /* Finds general ivs in basic block BB.  */
1251
1252 static void
1253 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1254 {
1255   gimple_stmt_iterator bsi;
1256
1257   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1258     find_givs_in_stmt (data, gsi_stmt (bsi));
1259 }
1260
1261 /* Finds general ivs.  */
1262
1263 static void
1264 find_givs (struct ivopts_data *data)
1265 {
1266   struct loop *loop = data->current_loop;
1267   basic_block *body = get_loop_body_in_dom_order (loop);
1268   unsigned i;
1269
1270   for (i = 0; i < loop->num_nodes; i++)
1271     find_givs_in_bb (data, body[i]);
1272   free (body);
1273 }
1274
1275 /* For each ssa name defined in LOOP determines whether it is an induction
1276    variable and if so, its initial value and step.  */
1277
1278 static bool
1279 find_induction_variables (struct ivopts_data *data)
1280 {
1281   unsigned i;
1282   bitmap_iterator bi;
1283
1284   if (!find_bivs (data))
1285     return false;
1286
1287   find_givs (data);
1288   mark_bivs (data);
1289
1290   if (dump_file && (dump_flags & TDF_DETAILS))
1291     {
1292       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1293
1294       if (niter)
1295         {
1296           fprintf (dump_file, "  number of iterations ");
1297           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1298           if (!integer_zerop (niter->may_be_zero))
1299             {
1300               fprintf (dump_file, "; zero if ");
1301               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1302             }
1303           fprintf (dump_file, "\n\n");
1304         };
1305
1306       fprintf (dump_file, "Induction variables:\n\n");
1307
1308       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1309         {
1310           if (ver_info (data, i)->iv)
1311             dump_iv (dump_file, ver_info (data, i)->iv, true);
1312         }
1313     }
1314
1315   return true;
1316 }
1317
1318 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1319    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1320    is the const offset stripped from IV base.  For uses of other types,
1321    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1322
1323 static struct iv_use *
1324 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1325             gimple stmt, enum use_type use_type, tree addr_base = NULL,
1326             unsigned HOST_WIDE_INT addr_offset = 0)
1327 {
1328   struct iv_use *use = XCNEW (struct iv_use);
1329
1330   use->id = n_iv_uses (data);
1331   use->sub_id = 0;
1332   use->type = use_type;
1333   use->iv = iv;
1334   use->stmt = stmt;
1335   use->op_p = use_p;
1336   use->related_cands = BITMAP_ALLOC (NULL);
1337   use->next = NULL;
1338   use->addr_base = addr_base;
1339   use->addr_offset = addr_offset;
1340
1341   data->iv_uses.safe_push (use);
1342
1343   return use;
1344 }
1345
1346 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1347    The sub use is recorded under the one whose use id is ID_GROUP.  */
1348
1349 static struct iv_use *
1350 record_sub_use (struct ivopts_data *data, tree *use_p,
1351                     struct iv *iv, gimple stmt, enum use_type use_type,
1352                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1353                     unsigned int id_group)
1354 {
1355   struct iv_use *use = XCNEW (struct iv_use);
1356   struct iv_use *group = iv_use (data, id_group);
1357
1358   use->id = group->id;
1359   use->sub_id = 0;
1360   use->type = use_type;
1361   use->iv = iv;
1362   use->stmt = stmt;
1363   use->op_p = use_p;
1364   use->related_cands = NULL;
1365   use->addr_base = addr_base;
1366   use->addr_offset = addr_offset;
1367
1368   /* Sub use list is maintained in offset ascending order.  */
1369   if (addr_offset <= group->addr_offset)
1370     {
1371       use->related_cands = group->related_cands;
1372       group->related_cands = NULL;
1373       use->next = group;
1374       data->iv_uses[id_group] = use;
1375     }
1376   else
1377     {
1378       struct iv_use *pre;
1379       do
1380         {
1381           pre = group;
1382           group = group->next;
1383         }
1384       while (group && addr_offset > group->addr_offset);
1385       use->next = pre->next;
1386       pre->next = use;
1387     }
1388
1389   /* To avoid showing ssa name in the dumps, if it was not reset by the
1390      caller.  */
1391   iv->ssa_name = NULL_TREE;
1392
1393   return use;
1394 }
1395
1396 /* Checks whether OP is a loop-level invariant and if so, records it.
1397    NONLINEAR_USE is true if the invariant is used in a way we do not
1398    handle specially.  */
1399
1400 static void
1401 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1402 {
1403   basic_block bb;
1404   struct version_info *info;
1405
1406   if (TREE_CODE (op) != SSA_NAME
1407       || virtual_operand_p (op))
1408     return;
1409
1410   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1411   if (bb
1412       && flow_bb_inside_loop_p (data->current_loop, bb))
1413     return;
1414
1415   info = name_info (data, op);
1416   info->name = op;
1417   info->has_nonlin_use |= nonlinear_use;
1418   if (!info->inv_id)
1419     info->inv_id = ++data->max_inv_id;
1420   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1421 }
1422
1423 /* Checks whether the use OP is interesting and if so, records it.  */
1424
1425 static struct iv_use *
1426 find_interesting_uses_op (struct ivopts_data *data, tree op)
1427 {
1428   struct iv *iv;
1429   struct iv *civ;
1430   gimple stmt;
1431   struct iv_use *use;
1432
1433   if (TREE_CODE (op) != SSA_NAME)
1434     return NULL;
1435
1436   iv = get_iv (data, op);
1437   if (!iv)
1438     return NULL;
1439
1440   if (iv->have_use_for)
1441     {
1442       use = iv_use (data, iv->use_id);
1443
1444       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1445       return use;
1446     }
1447
1448   if (integer_zerop (iv->step))
1449     {
1450       record_invariant (data, op, true);
1451       return NULL;
1452     }
1453   iv->have_use_for = true;
1454
1455   civ = XNEW (struct iv);
1456   *civ = *iv;
1457
1458   stmt = SSA_NAME_DEF_STMT (op);
1459   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1460               || is_gimple_assign (stmt));
1461
1462   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1463   iv->use_id = use->id;
1464
1465   return use;
1466 }
1467
1468 /* Given a condition in statement STMT, checks whether it is a compare
1469    of an induction variable and an invariant.  If this is the case,
1470    CONTROL_VAR is set to location of the iv, BOUND to the location of
1471    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1472    induction variable descriptions, and true is returned.  If this is not
1473    the case, CONTROL_VAR and BOUND are set to the arguments of the
1474    condition and false is returned.  */
1475
1476 static bool
1477 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1478                        tree **control_var, tree **bound,
1479                        struct iv **iv_var, struct iv **iv_bound)
1480 {
1481   /* The objects returned when COND has constant operands.  */
1482   static struct iv const_iv;
1483   static tree zero;
1484   tree *op0 = &zero, *op1 = &zero;
1485   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1486   bool ret = false;
1487
1488   if (gimple_code (stmt) == GIMPLE_COND)
1489     {
1490       gcond *cond_stmt = as_a <gcond *> (stmt);
1491       op0 = gimple_cond_lhs_ptr (cond_stmt);
1492       op1 = gimple_cond_rhs_ptr (cond_stmt);
1493     }
1494   else
1495     {
1496       op0 = gimple_assign_rhs1_ptr (stmt);
1497       op1 = gimple_assign_rhs2_ptr (stmt);
1498     }
1499
1500   zero = integer_zero_node;
1501   const_iv.step = integer_zero_node;
1502
1503   if (TREE_CODE (*op0) == SSA_NAME)
1504     iv0 = get_iv (data, *op0);
1505   if (TREE_CODE (*op1) == SSA_NAME)
1506     iv1 = get_iv (data, *op1);
1507
1508   /* Exactly one of the compared values must be an iv, and the other one must
1509      be an invariant.  */
1510   if (!iv0 || !iv1)
1511     goto end;
1512
1513   if (integer_zerop (iv0->step))
1514     {
1515       /* Control variable may be on the other side.  */
1516       std::swap (op0, op1);
1517       std::swap (iv0, iv1);
1518     }
1519   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1520
1521 end:
1522   if (control_var)
1523     *control_var = op0;
1524   if (iv_var)
1525     *iv_var = iv0;
1526   if (bound)
1527     *bound = op1;
1528   if (iv_bound)
1529     *iv_bound = iv1;
1530
1531   return ret;
1532 }
1533
1534 /* Checks whether the condition in STMT is interesting and if so,
1535    records it.  */
1536
1537 static void
1538 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1539 {
1540   tree *var_p, *bound_p;
1541   struct iv *var_iv, *civ;
1542
1543   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1544     {
1545       find_interesting_uses_op (data, *var_p);
1546       find_interesting_uses_op (data, *bound_p);
1547       return;
1548     }
1549
1550   civ = XNEW (struct iv);
1551   *civ = *var_iv;
1552   record_use (data, NULL, civ, stmt, USE_COMPARE);
1553 }
1554
1555 /* Returns the outermost loop EXPR is obviously invariant in
1556    relative to the loop LOOP, i.e. if all its operands are defined
1557    outside of the returned loop.  Returns NULL if EXPR is not
1558    even obviously invariant in LOOP.  */
1559
1560 struct loop *
1561 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1562 {
1563   basic_block def_bb;
1564   unsigned i, len;
1565
1566   if (is_gimple_min_invariant (expr))
1567     return current_loops->tree_root;
1568
1569   if (TREE_CODE (expr) == SSA_NAME)
1570     {
1571       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1572       if (def_bb)
1573         {
1574           if (flow_bb_inside_loop_p (loop, def_bb))
1575             return NULL;
1576           return superloop_at_depth (loop,
1577                                      loop_depth (def_bb->loop_father) + 1);
1578         }
1579
1580       return current_loops->tree_root;
1581     }
1582
1583   if (!EXPR_P (expr))
1584     return NULL;
1585
1586   unsigned maxdepth = 0;
1587   len = TREE_OPERAND_LENGTH (expr);
1588   for (i = 0; i < len; i++)
1589     {
1590       struct loop *ivloop;
1591       if (!TREE_OPERAND (expr, i))
1592         continue;
1593
1594       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1595       if (!ivloop)
1596         return NULL;
1597       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1598     }
1599
1600   return superloop_at_depth (loop, maxdepth);
1601 }
1602
1603 /* Returns true if expression EXPR is obviously invariant in LOOP,
1604    i.e. if all its operands are defined outside of the LOOP.  LOOP
1605    should not be the function body.  */
1606
1607 bool
1608 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1609 {
1610   basic_block def_bb;
1611   unsigned i, len;
1612
1613   gcc_assert (loop_depth (loop) > 0);
1614
1615   if (is_gimple_min_invariant (expr))
1616     return true;
1617
1618   if (TREE_CODE (expr) == SSA_NAME)
1619     {
1620       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1621       if (def_bb
1622           && flow_bb_inside_loop_p (loop, def_bb))
1623         return false;
1624
1625       return true;
1626     }
1627
1628   if (!EXPR_P (expr))
1629     return false;
1630
1631   len = TREE_OPERAND_LENGTH (expr);
1632   for (i = 0; i < len; i++)
1633     if (TREE_OPERAND (expr, i)
1634         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1635       return false;
1636
1637   return true;
1638 }
1639
1640 /* Cumulates the steps of indices into DATA and replaces their values with the
1641    initial ones.  Returns false when the value of the index cannot be determined.
1642    Callback for for_each_index.  */
1643
1644 struct ifs_ivopts_data
1645 {
1646   struct ivopts_data *ivopts_data;
1647   gimple stmt;
1648   tree step;
1649 };
1650
1651 static bool
1652 idx_find_step (tree base, tree *idx, void *data)
1653 {
1654   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1655   struct iv *iv;
1656   bool use_overflow_semantics = false;
1657   tree step, iv_base, iv_step, lbound, off;
1658   struct loop *loop = dta->ivopts_data->current_loop;
1659
1660   /* If base is a component ref, require that the offset of the reference
1661      be invariant.  */
1662   if (TREE_CODE (base) == COMPONENT_REF)
1663     {
1664       off = component_ref_field_offset (base);
1665       return expr_invariant_in_loop_p (loop, off);
1666     }
1667
1668   /* If base is array, first check whether we will be able to move the
1669      reference out of the loop (in order to take its address in strength
1670      reduction).  In order for this to work we need both lower bound
1671      and step to be loop invariants.  */
1672   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1673     {
1674       /* Moreover, for a range, the size needs to be invariant as well.  */
1675       if (TREE_CODE (base) == ARRAY_RANGE_REF
1676           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1677         return false;
1678
1679       step = array_ref_element_size (base);
1680       lbound = array_ref_low_bound (base);
1681
1682       if (!expr_invariant_in_loop_p (loop, step)
1683           || !expr_invariant_in_loop_p (loop, lbound))
1684         return false;
1685     }
1686
1687   if (TREE_CODE (*idx) != SSA_NAME)
1688     return true;
1689
1690   iv = get_iv (dta->ivopts_data, *idx);
1691   if (!iv)
1692     return false;
1693
1694   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1695           *&x[0], which is not folded and does not trigger the
1696           ARRAY_REF path below.  */
1697   *idx = iv->base;
1698
1699   if (integer_zerop (iv->step))
1700     return true;
1701
1702   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1703     {
1704       step = array_ref_element_size (base);
1705
1706       /* We only handle addresses whose step is an integer constant.  */
1707       if (TREE_CODE (step) != INTEGER_CST)
1708         return false;
1709     }
1710   else
1711     /* The step for pointer arithmetics already is 1 byte.  */
1712     step = size_one_node;
1713
1714   iv_base = iv->base;
1715   iv_step = iv->step;
1716   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1717     use_overflow_semantics = true;
1718
1719   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1720                             sizetype, &iv_base, &iv_step, dta->stmt,
1721                             use_overflow_semantics))
1722     {
1723       /* The index might wrap.  */
1724       return false;
1725     }
1726
1727   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1728   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1729
1730   return true;
1731 }
1732
1733 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1734    object is passed to it in DATA.  */
1735
1736 static bool
1737 idx_record_use (tree base, tree *idx,
1738                 void *vdata)
1739 {
1740   struct ivopts_data *data = (struct ivopts_data *) vdata;
1741   find_interesting_uses_op (data, *idx);
1742   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1743     {
1744       find_interesting_uses_op (data, array_ref_element_size (base));
1745       find_interesting_uses_op (data, array_ref_low_bound (base));
1746     }
1747   return true;
1748 }
1749
1750 /* If we can prove that TOP = cst * BOT for some constant cst,
1751    store cst to MUL and return true.  Otherwise return false.
1752    The returned value is always sign-extended, regardless of the
1753    signedness of TOP and BOT.  */
1754
1755 static bool
1756 constant_multiple_of (tree top, tree bot, widest_int *mul)
1757 {
1758   tree mby;
1759   enum tree_code code;
1760   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1761   widest_int res, p0, p1;
1762
1763   STRIP_NOPS (top);
1764   STRIP_NOPS (bot);
1765
1766   if (operand_equal_p (top, bot, 0))
1767     {
1768       *mul = 1;
1769       return true;
1770     }
1771
1772   code = TREE_CODE (top);
1773   switch (code)
1774     {
1775     case MULT_EXPR:
1776       mby = TREE_OPERAND (top, 1);
1777       if (TREE_CODE (mby) != INTEGER_CST)
1778         return false;
1779
1780       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1781         return false;
1782
1783       *mul = wi::sext (res * wi::to_widest (mby), precision);
1784       return true;
1785
1786     case PLUS_EXPR:
1787     case MINUS_EXPR:
1788       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1789           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1790         return false;
1791
1792       if (code == MINUS_EXPR)
1793         p1 = -p1;
1794       *mul = wi::sext (p0 + p1, precision);
1795       return true;
1796
1797     case INTEGER_CST:
1798       if (TREE_CODE (bot) != INTEGER_CST)
1799         return false;
1800
1801       p0 = widest_int::from (top, SIGNED);
1802       p1 = widest_int::from (bot, SIGNED);
1803       if (p1 == 0)
1804         return false;
1805       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1806       return res == 0;
1807
1808     default:
1809       return false;
1810     }
1811 }
1812
1813 /* Return true if memory reference REF with step STEP may be unaligned.  */
1814
1815 static bool
1816 may_be_unaligned_p (tree ref, tree step)
1817 {
1818   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1819      thus they are not misaligned.  */
1820   if (TREE_CODE (ref) == TARGET_MEM_REF)
1821     return false;
1822
1823   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1824   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1825     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1826
1827   unsigned HOST_WIDE_INT bitpos;
1828   unsigned int ref_align;
1829   get_object_alignment_1 (ref, &ref_align, &bitpos);
1830   if (ref_align < align
1831       || (bitpos % align) != 0
1832       || (bitpos % BITS_PER_UNIT) != 0)
1833     return true;
1834
1835   unsigned int trailing_zeros = tree_ctz (step);
1836   if (trailing_zeros < HOST_BITS_PER_INT
1837       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1838     return true;
1839
1840   return false;
1841 }
1842
1843 /* Return true if EXPR may be non-addressable.   */
1844
1845 bool
1846 may_be_nonaddressable_p (tree expr)
1847 {
1848   switch (TREE_CODE (expr))
1849     {
1850     case TARGET_MEM_REF:
1851       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1852          target, thus they are always addressable.  */
1853       return false;
1854
1855     case COMPONENT_REF:
1856       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1857              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1858
1859     case VIEW_CONVERT_EXPR:
1860       /* This kind of view-conversions may wrap non-addressable objects
1861          and make them look addressable.  After some processing the
1862          non-addressability may be uncovered again, causing ADDR_EXPRs
1863          of inappropriate objects to be built.  */
1864       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1865           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1866         return true;
1867
1868       /* ... fall through ... */
1869
1870     case ARRAY_REF:
1871     case ARRAY_RANGE_REF:
1872       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1873
1874     CASE_CONVERT:
1875       return true;
1876
1877     default:
1878       break;
1879     }
1880
1881   return false;
1882 }
1883
1884 static tree
1885 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1886
1887 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1888    If there is an existing use which has same stripped iv base and step,
1889    this function records this one as a sub use to that; otherwise records
1890    it as a normal one.  */
1891
1892 static struct iv_use *
1893 record_group_use (struct ivopts_data *data, tree *use_p,
1894                   struct iv *iv, gimple stmt, enum use_type use_type)
1895 {
1896   unsigned int i;
1897   struct iv_use *use;
1898   tree addr_base;
1899   unsigned HOST_WIDE_INT addr_offset;
1900
1901   /* Only support sub use for address type uses, that is, with base
1902      object.  */
1903   if (!iv->base_object)
1904     return record_use (data, use_p, iv, stmt, use_type);
1905
1906   addr_base = strip_offset (iv->base, &addr_offset);
1907   for (i = 0; i < n_iv_uses (data); i++)
1908     {
1909       use = iv_use (data, i);
1910       if (use->type != USE_ADDRESS || !use->iv->base_object)
1911         continue;
1912
1913       /* Check if it has the same stripped base and step.  */
1914       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1915           && operand_equal_p (iv->step, use->iv->step, 0)
1916           && operand_equal_p (addr_base, use->addr_base, 0))
1917         break;
1918     }
1919
1920   if (i == n_iv_uses (data))
1921     return record_use (data, use_p, iv, stmt,
1922                        use_type, addr_base, addr_offset);
1923   else
1924     return record_sub_use (data, use_p, iv, stmt,
1925                            use_type, addr_base, addr_offset, i);
1926 }
1927
1928 /* Finds addresses in *OP_P inside STMT.  */
1929
1930 static void
1931 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1932 {
1933   tree base = *op_p, step = size_zero_node;
1934   struct iv *civ;
1935   struct ifs_ivopts_data ifs_ivopts_data;
1936
1937   /* Do not play with volatile memory references.  A bit too conservative,
1938      perhaps, but safe.  */
1939   if (gimple_has_volatile_ops (stmt))
1940     goto fail;
1941
1942   /* Ignore bitfields for now.  Not really something terribly complicated
1943      to handle.  TODO.  */
1944   if (TREE_CODE (base) == BIT_FIELD_REF)
1945     goto fail;
1946
1947   base = unshare_expr (base);
1948
1949   if (TREE_CODE (base) == TARGET_MEM_REF)
1950     {
1951       tree type = build_pointer_type (TREE_TYPE (base));
1952       tree astep;
1953
1954       if (TMR_BASE (base)
1955           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1956         {
1957           civ = get_iv (data, TMR_BASE (base));
1958           if (!civ)
1959             goto fail;
1960
1961           TMR_BASE (base) = civ->base;
1962           step = civ->step;
1963         }
1964       if (TMR_INDEX2 (base)
1965           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1966         {
1967           civ = get_iv (data, TMR_INDEX2 (base));
1968           if (!civ)
1969             goto fail;
1970
1971           TMR_INDEX2 (base) = civ->base;
1972           step = civ->step;
1973         }
1974       if (TMR_INDEX (base)
1975           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1976         {
1977           civ = get_iv (data, TMR_INDEX (base));
1978           if (!civ)
1979             goto fail;
1980
1981           TMR_INDEX (base) = civ->base;
1982           astep = civ->step;
1983
1984           if (astep)
1985             {
1986               if (TMR_STEP (base))
1987                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1988
1989               step = fold_build2 (PLUS_EXPR, type, step, astep);
1990             }
1991         }
1992
1993       if (integer_zerop (step))
1994         goto fail;
1995       base = tree_mem_ref_addr (type, base);
1996     }
1997   else
1998     {
1999       ifs_ivopts_data.ivopts_data = data;
2000       ifs_ivopts_data.stmt = stmt;
2001       ifs_ivopts_data.step = size_zero_node;
2002       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2003           || integer_zerop (ifs_ivopts_data.step))
2004         goto fail;
2005       step = ifs_ivopts_data.step;
2006
2007       /* Check that the base expression is addressable.  This needs
2008          to be done after substituting bases of IVs into it.  */
2009       if (may_be_nonaddressable_p (base))
2010         goto fail;
2011
2012       /* Moreover, on strict alignment platforms, check that it is
2013          sufficiently aligned.  */
2014       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2015         goto fail;
2016
2017       base = build_fold_addr_expr (base);
2018
2019       /* Substituting bases of IVs into the base expression might
2020          have caused folding opportunities.  */
2021       if (TREE_CODE (base) == ADDR_EXPR)
2022         {
2023           tree *ref = &TREE_OPERAND (base, 0);
2024           while (handled_component_p (*ref))
2025             ref = &TREE_OPERAND (*ref, 0);
2026           if (TREE_CODE (*ref) == MEM_REF)
2027             {
2028               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2029                                       TREE_OPERAND (*ref, 0),
2030                                       TREE_OPERAND (*ref, 1));
2031               if (tem)
2032                 *ref = tem;
2033             }
2034         }
2035     }
2036
2037   civ = alloc_iv (base, step);
2038   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2039   return;
2040
2041 fail:
2042   for_each_index (op_p, idx_record_use, data);
2043 }
2044
2045 /* Finds and records invariants used in STMT.  */
2046
2047 static void
2048 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2049 {
2050   ssa_op_iter iter;
2051   use_operand_p use_p;
2052   tree op;
2053
2054   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2055     {
2056       op = USE_FROM_PTR (use_p);
2057       record_invariant (data, op, false);
2058     }
2059 }
2060
2061 /* Finds interesting uses of induction variables in the statement STMT.  */
2062
2063 static void
2064 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2065 {
2066   struct iv *iv;
2067   tree op, *lhs, *rhs;
2068   ssa_op_iter iter;
2069   use_operand_p use_p;
2070   enum tree_code code;
2071
2072   find_invariants_stmt (data, stmt);
2073
2074   if (gimple_code (stmt) == GIMPLE_COND)
2075     {
2076       find_interesting_uses_cond (data, stmt);
2077       return;
2078     }
2079
2080   if (is_gimple_assign (stmt))
2081     {
2082       lhs = gimple_assign_lhs_ptr (stmt);
2083       rhs = gimple_assign_rhs1_ptr (stmt);
2084
2085       if (TREE_CODE (*lhs) == SSA_NAME)
2086         {
2087           /* If the statement defines an induction variable, the uses are not
2088              interesting by themselves.  */
2089
2090           iv = get_iv (data, *lhs);
2091
2092           if (iv && !integer_zerop (iv->step))
2093             return;
2094         }
2095
2096       code = gimple_assign_rhs_code (stmt);
2097       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2098           && (REFERENCE_CLASS_P (*rhs)
2099               || is_gimple_val (*rhs)))
2100         {
2101           if (REFERENCE_CLASS_P (*rhs))
2102             find_interesting_uses_address (data, stmt, rhs);
2103           else
2104             find_interesting_uses_op (data, *rhs);
2105
2106           if (REFERENCE_CLASS_P (*lhs))
2107             find_interesting_uses_address (data, stmt, lhs);
2108           return;
2109         }
2110       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2111         {
2112           find_interesting_uses_cond (data, stmt);
2113           return;
2114         }
2115
2116       /* TODO -- we should also handle address uses of type
2117
2118          memory = call (whatever);
2119
2120          and
2121
2122          call (memory).  */
2123     }
2124
2125   if (gimple_code (stmt) == GIMPLE_PHI
2126       && gimple_bb (stmt) == data->current_loop->header)
2127     {
2128       iv = get_iv (data, PHI_RESULT (stmt));
2129
2130       if (iv && !integer_zerop (iv->step))
2131         return;
2132     }
2133
2134   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2135     {
2136       op = USE_FROM_PTR (use_p);
2137
2138       if (TREE_CODE (op) != SSA_NAME)
2139         continue;
2140
2141       iv = get_iv (data, op);
2142       if (!iv)
2143         continue;
2144
2145       find_interesting_uses_op (data, op);
2146     }
2147 }
2148
2149 /* Finds interesting uses of induction variables outside of loops
2150    on loop exit edge EXIT.  */
2151
2152 static void
2153 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2154 {
2155   gphi *phi;
2156   gphi_iterator psi;
2157   tree def;
2158
2159   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2160     {
2161       phi = psi.phi ();
2162       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2163       if (!virtual_operand_p (def))
2164         find_interesting_uses_op (data, def);
2165     }
2166 }
2167
2168 /* Finds uses of the induction variables that are interesting.  */
2169
2170 static void
2171 find_interesting_uses (struct ivopts_data *data)
2172 {
2173   basic_block bb;
2174   gimple_stmt_iterator bsi;
2175   basic_block *body = get_loop_body (data->current_loop);
2176   unsigned i;
2177   struct version_info *info;
2178   edge e;
2179
2180   if (dump_file && (dump_flags & TDF_DETAILS))
2181     fprintf (dump_file, "Uses:\n\n");
2182
2183   for (i = 0; i < data->current_loop->num_nodes; i++)
2184     {
2185       edge_iterator ei;
2186       bb = body[i];
2187
2188       FOR_EACH_EDGE (e, ei, bb->succs)
2189         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2190             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2191           find_interesting_uses_outside (data, e);
2192
2193       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2194         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2195       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2196         if (!is_gimple_debug (gsi_stmt (bsi)))
2197           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2198     }
2199
2200   if (dump_file && (dump_flags & TDF_DETAILS))
2201     {
2202       bitmap_iterator bi;
2203
2204       fprintf (dump_file, "\n");
2205
2206       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2207         {
2208           info = ver_info (data, i);
2209           if (info->inv_id)
2210             {
2211               fprintf (dump_file, "  ");
2212               print_generic_expr (dump_file, info->name, TDF_SLIM);
2213               fprintf (dump_file, " is invariant (%d)%s\n",
2214                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2215             }
2216         }
2217
2218       fprintf (dump_file, "\n");
2219     }
2220
2221   free (body);
2222 }
2223
2224 /* Compute maximum offset of [base + offset] addressing mode
2225    for memory reference represented by USE.  */
2226
2227 static HOST_WIDE_INT
2228 compute_max_addr_offset (struct iv_use *use)
2229 {
2230   int width;
2231   rtx reg, addr;
2232   HOST_WIDE_INT i, off;
2233   unsigned list_index, num;
2234   addr_space_t as;
2235   machine_mode mem_mode, addr_mode;
2236   static vec<HOST_WIDE_INT> max_offset_list;
2237
2238   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2239   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2240
2241   num = max_offset_list.length ();
2242   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2243   if (list_index >= num)
2244     {
2245       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2246       for (; num < max_offset_list.length (); num++)
2247         max_offset_list[num] = -1;
2248     }
2249
2250   off = max_offset_list[list_index];
2251   if (off != -1)
2252     return off;
2253
2254   addr_mode = targetm.addr_space.address_mode (as);
2255   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2256   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2257
2258   width = GET_MODE_BITSIZE (addr_mode) - 1;
2259   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2260     width = HOST_BITS_PER_WIDE_INT - 1;
2261
2262   for (i = width; i > 0; i--)
2263     {
2264       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2265       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2266       if (memory_address_addr_space_p (mem_mode, addr, as))
2267         break;
2268
2269       /* For some strict-alignment targets, the offset must be naturally
2270          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2271       off = ((unsigned HOST_WIDE_INT) 1 << i);
2272       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2273         {
2274           off -= GET_MODE_SIZE (mem_mode);
2275           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2276           if (memory_address_addr_space_p (mem_mode, addr, as))
2277             break;
2278         }
2279     }
2280   if (i == 0)
2281     off = 0;
2282
2283   max_offset_list[list_index] = off;
2284   return off;
2285 }
2286
2287 /* Check if all small groups should be split.  Return true if and
2288    only if:
2289
2290      1) At least one groups contain two uses with different offsets.
2291      2) No group contains more than two uses with different offsets.
2292
2293    Return false otherwise.  We want to split such groups because:
2294
2295      1) Small groups don't have much benefit and may interfer with
2296         general candidate selection.
2297      2) Size for problem with only small groups is usually small and
2298         general algorithm can handle it well.
2299
2300    TODO -- Above claim may not hold when auto increment is supported.  */
2301
2302 static bool
2303 split_all_small_groups (struct ivopts_data *data)
2304 {
2305   bool split_p = false;
2306   unsigned int i, n, distinct;
2307   struct iv_use *pre, *use;
2308
2309   n = n_iv_uses (data);
2310   for (i = 0; i < n; i++)
2311     {
2312       use = iv_use (data, i);
2313       if (!use->next)
2314         continue;
2315
2316       distinct = 1;
2317       gcc_assert (use->type == USE_ADDRESS);
2318       for (pre = use, use = use->next; use; pre = use, use = use->next)
2319         {
2320           if (pre->addr_offset != use->addr_offset)
2321             distinct++;
2322
2323           if (distinct > 2)
2324             return false;
2325         }
2326       if (distinct == 2)
2327         split_p = true;
2328     }
2329
2330   return split_p;
2331 }
2332
2333 /* For each group of address type uses, this function further groups
2334    these uses according to the maximum offset supported by target's
2335    [base + offset] addressing mode.  */
2336
2337 static void
2338 group_address_uses (struct ivopts_data *data)
2339 {
2340   HOST_WIDE_INT max_offset = -1;
2341   unsigned int i, n, sub_id;
2342   struct iv_use *pre, *use;
2343   unsigned HOST_WIDE_INT addr_offset_first;
2344
2345   /* Reset max offset to split all small groups.  */
2346   if (split_all_small_groups (data))
2347     max_offset = 0;
2348
2349   n = n_iv_uses (data);
2350   for (i = 0; i < n; i++)
2351     {
2352       use = iv_use (data, i);
2353       if (!use->next)
2354         continue;
2355
2356       gcc_assert (use->type == USE_ADDRESS);
2357       if (max_offset != 0)
2358         max_offset = compute_max_addr_offset (use);
2359
2360       while (use)
2361         {
2362           sub_id = 0;
2363           addr_offset_first = use->addr_offset;
2364           /* Only uses with offset that can fit in offset part against
2365              the first use can be grouped together.  */
2366           for (pre = use, use = use->next;
2367                use && (use->addr_offset - addr_offset_first
2368                        <= (unsigned HOST_WIDE_INT) max_offset);
2369                pre = use, use = use->next)
2370             {
2371               use->id = pre->id;
2372               use->sub_id = ++sub_id;
2373             }
2374
2375           /* Break the list and create new group.  */
2376           if (use)
2377             {
2378               pre->next = NULL;
2379               use->id = n_iv_uses (data);
2380               use->related_cands = BITMAP_ALLOC (NULL);
2381               data->iv_uses.safe_push (use);
2382             }
2383         }
2384     }
2385
2386   if (dump_file && (dump_flags & TDF_DETAILS))
2387     dump_uses (dump_file, data);
2388 }
2389
2390 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2391    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2392    we are at the top-level of the processed address.  */
2393
2394 static tree
2395 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2396                 HOST_WIDE_INT *offset)
2397 {
2398   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2399   enum tree_code code;
2400   tree type, orig_type = TREE_TYPE (expr);
2401   HOST_WIDE_INT off0, off1, st;
2402   tree orig_expr = expr;
2403
2404   STRIP_NOPS (expr);
2405
2406   type = TREE_TYPE (expr);
2407   code = TREE_CODE (expr);
2408   *offset = 0;
2409
2410   switch (code)
2411     {
2412     case INTEGER_CST:
2413       if (!cst_and_fits_in_hwi (expr)
2414           || integer_zerop (expr))
2415         return orig_expr;
2416
2417       *offset = int_cst_value (expr);
2418       return build_int_cst (orig_type, 0);
2419
2420     case POINTER_PLUS_EXPR:
2421     case PLUS_EXPR:
2422     case MINUS_EXPR:
2423       op0 = TREE_OPERAND (expr, 0);
2424       op1 = TREE_OPERAND (expr, 1);
2425
2426       op0 = strip_offset_1 (op0, false, false, &off0);
2427       op1 = strip_offset_1 (op1, false, false, &off1);
2428
2429       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2430       if (op0 == TREE_OPERAND (expr, 0)
2431           && op1 == TREE_OPERAND (expr, 1))
2432         return orig_expr;
2433
2434       if (integer_zerop (op1))
2435         expr = op0;
2436       else if (integer_zerop (op0))
2437         {
2438           if (code == MINUS_EXPR)
2439             expr = fold_build1 (NEGATE_EXPR, type, op1);
2440           else
2441             expr = op1;
2442         }
2443       else
2444         expr = fold_build2 (code, type, op0, op1);
2445
2446       return fold_convert (orig_type, expr);
2447
2448     case MULT_EXPR:
2449       op1 = TREE_OPERAND (expr, 1);
2450       if (!cst_and_fits_in_hwi (op1))
2451         return orig_expr;
2452
2453       op0 = TREE_OPERAND (expr, 0);
2454       op0 = strip_offset_1 (op0, false, false, &off0);
2455       if (op0 == TREE_OPERAND (expr, 0))
2456         return orig_expr;
2457
2458       *offset = off0 * int_cst_value (op1);
2459       if (integer_zerop (op0))
2460         expr = op0;
2461       else
2462         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2463
2464       return fold_convert (orig_type, expr);
2465
2466     case ARRAY_REF:
2467     case ARRAY_RANGE_REF:
2468       if (!inside_addr)
2469         return orig_expr;
2470
2471       step = array_ref_element_size (expr);
2472       if (!cst_and_fits_in_hwi (step))
2473         break;
2474
2475       st = int_cst_value (step);
2476       op1 = TREE_OPERAND (expr, 1);
2477       op1 = strip_offset_1 (op1, false, false, &off1);
2478       *offset = off1 * st;
2479
2480       if (top_compref
2481           && integer_zerop (op1))
2482         {
2483           /* Strip the component reference completely.  */
2484           op0 = TREE_OPERAND (expr, 0);
2485           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2486           *offset += off0;
2487           return op0;
2488         }
2489       break;
2490
2491     case COMPONENT_REF:
2492       {
2493         tree field;
2494
2495         if (!inside_addr)
2496           return orig_expr;
2497
2498         tmp = component_ref_field_offset (expr);
2499         field = TREE_OPERAND (expr, 1);
2500         if (top_compref
2501             && cst_and_fits_in_hwi (tmp)
2502             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2503           {
2504             HOST_WIDE_INT boffset, abs_off;
2505
2506             /* Strip the component reference completely.  */
2507             op0 = TREE_OPERAND (expr, 0);
2508             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2509             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2510             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2511             if (boffset < 0)
2512               abs_off = -abs_off;
2513
2514             *offset = off0 + int_cst_value (tmp) + abs_off;
2515             return op0;
2516           }
2517       }
2518       break;
2519
2520     case ADDR_EXPR:
2521       op0 = TREE_OPERAND (expr, 0);
2522       op0 = strip_offset_1 (op0, true, true, &off0);
2523       *offset += off0;
2524
2525       if (op0 == TREE_OPERAND (expr, 0))
2526         return orig_expr;
2527
2528       expr = build_fold_addr_expr (op0);
2529       return fold_convert (orig_type, expr);
2530
2531     case MEM_REF:
2532       /* ???  Offset operand?  */
2533       inside_addr = false;
2534       break;
2535
2536     default:
2537       return orig_expr;
2538     }
2539
2540   /* Default handling of expressions for that we want to recurse into
2541      the first operand.  */
2542   op0 = TREE_OPERAND (expr, 0);
2543   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2544   *offset += off0;
2545
2546   if (op0 == TREE_OPERAND (expr, 0)
2547       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2548     return orig_expr;
2549
2550   expr = copy_node (expr);
2551   TREE_OPERAND (expr, 0) = op0;
2552   if (op1)
2553     TREE_OPERAND (expr, 1) = op1;
2554
2555   /* Inside address, we might strip the top level component references,
2556      thus changing type of the expression.  Handling of ADDR_EXPR
2557      will fix that.  */
2558   expr = fold_convert (orig_type, expr);
2559
2560   return expr;
2561 }
2562
2563 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2564
2565 static tree
2566 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2567 {
2568   HOST_WIDE_INT off;
2569   tree core = strip_offset_1 (expr, false, false, &off);
2570   *offset = off;
2571   return core;
2572 }
2573
2574 /* Returns variant of TYPE that can be used as base for different uses.
2575    We return unsigned type with the same precision, which avoids problems
2576    with overflows.  */
2577
2578 static tree
2579 generic_type_for (tree type)
2580 {
2581   if (POINTER_TYPE_P (type))
2582     return unsigned_type_for (type);
2583
2584   if (TYPE_UNSIGNED (type))
2585     return type;
2586
2587   return unsigned_type_for (type);
2588 }
2589
2590 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2591    the bitmap to that we should store it.  */
2592
2593 static struct ivopts_data *fd_ivopts_data;
2594 static tree
2595 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2596 {
2597   bitmap *depends_on = (bitmap *) data;
2598   struct version_info *info;
2599
2600   if (TREE_CODE (*expr_p) != SSA_NAME)
2601     return NULL_TREE;
2602   info = name_info (fd_ivopts_data, *expr_p);
2603
2604   if (!info->inv_id || info->has_nonlin_use)
2605     return NULL_TREE;
2606
2607   if (!*depends_on)
2608     *depends_on = BITMAP_ALLOC (NULL);
2609   bitmap_set_bit (*depends_on, info->inv_id);
2610
2611   return NULL_TREE;
2612 }
2613
2614 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2615    position to POS.  If USE is not NULL, the candidate is set as related to
2616    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2617    replacement of the final value of the iv by a direct computation.  */
2618
2619 static struct iv_cand *
2620 add_candidate_1 (struct ivopts_data *data,
2621                  tree base, tree step, bool important, enum iv_position pos,
2622                  struct iv_use *use, gimple incremented_at)
2623 {
2624   unsigned i;
2625   struct iv_cand *cand = NULL;
2626   tree type, orig_type;
2627
2628   /* For non-original variables, make sure their values are computed in a type
2629      that does not invoke undefined behavior on overflows (since in general,
2630      we cannot prove that these induction variables are non-wrapping).  */
2631   if (pos != IP_ORIGINAL)
2632     {
2633       orig_type = TREE_TYPE (base);
2634       type = generic_type_for (orig_type);
2635       if (type != orig_type)
2636         {
2637           base = fold_convert (type, base);
2638           step = fold_convert (type, step);
2639         }
2640     }
2641
2642   for (i = 0; i < n_iv_cands (data); i++)
2643     {
2644       cand = iv_cand (data, i);
2645
2646       if (cand->pos != pos)
2647         continue;
2648
2649       if (cand->incremented_at != incremented_at
2650           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2651               && cand->ainc_use != use))
2652         continue;
2653
2654       if (!cand->iv)
2655         {
2656           if (!base && !step)
2657             break;
2658
2659           continue;
2660         }
2661
2662       if (!base && !step)
2663         continue;
2664
2665       if (operand_equal_p (base, cand->iv->base, 0)
2666           && operand_equal_p (step, cand->iv->step, 0)
2667           && (TYPE_PRECISION (TREE_TYPE (base))
2668               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2669         break;
2670     }
2671
2672   if (i == n_iv_cands (data))
2673     {
2674       cand = XCNEW (struct iv_cand);
2675       cand->id = i;
2676
2677       if (!base && !step)
2678         cand->iv = NULL;
2679       else
2680         cand->iv = alloc_iv (base, step);
2681
2682       cand->pos = pos;
2683       if (pos != IP_ORIGINAL && cand->iv)
2684         {
2685           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2686           cand->var_after = cand->var_before;
2687         }
2688       cand->important = important;
2689       cand->incremented_at = incremented_at;
2690       data->iv_candidates.safe_push (cand);
2691
2692       if (step
2693           && TREE_CODE (step) != INTEGER_CST)
2694         {
2695           fd_ivopts_data = data;
2696           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2697         }
2698
2699       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2700         cand->ainc_use = use;
2701       else
2702         cand->ainc_use = NULL;
2703
2704       if (dump_file && (dump_flags & TDF_DETAILS))
2705         dump_cand (dump_file, cand);
2706     }
2707
2708   if (important && !cand->important)
2709     {
2710       cand->important = true;
2711       if (dump_file && (dump_flags & TDF_DETAILS))
2712         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2713     }
2714
2715   if (use)
2716     {
2717       bitmap_set_bit (use->related_cands, i);
2718       if (dump_file && (dump_flags & TDF_DETAILS))
2719         fprintf (dump_file, "Candidate %d is related to use %d\n",
2720                  cand->id, use->id);
2721     }
2722
2723   return cand;
2724 }
2725
2726 /* Returns true if incrementing the induction variable at the end of the LOOP
2727    is allowed.
2728
2729    The purpose is to avoid splitting latch edge with a biv increment, thus
2730    creating a jump, possibly confusing other optimization passes and leaving
2731    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2732    is not available (so we do not have a better alternative), or if the latch
2733    edge is already nonempty.  */
2734
2735 static bool
2736 allow_ip_end_pos_p (struct loop *loop)
2737 {
2738   if (!ip_normal_pos (loop))
2739     return true;
2740
2741   if (!empty_block_p (ip_end_pos (loop)))
2742     return true;
2743
2744   return false;
2745 }
2746
2747 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2748    Important field is set to IMPORTANT.  */
2749
2750 static void
2751 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2752                         bool important, struct iv_use *use)
2753 {
2754   basic_block use_bb = gimple_bb (use->stmt);
2755   machine_mode mem_mode;
2756   unsigned HOST_WIDE_INT cstepi;
2757
2758   /* If we insert the increment in any position other than the standard
2759      ones, we must ensure that it is incremented once per iteration.
2760      It must not be in an inner nested loop, or one side of an if
2761      statement.  */
2762   if (use_bb->loop_father != data->current_loop
2763       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2764       || stmt_could_throw_p (use->stmt)
2765       || !cst_and_fits_in_hwi (step))
2766     return;
2767
2768   cstepi = int_cst_value (step);
2769
2770   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2771   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2772         || USE_STORE_PRE_INCREMENT (mem_mode))
2773        && GET_MODE_SIZE (mem_mode) == cstepi)
2774       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2775            || USE_STORE_PRE_DECREMENT (mem_mode))
2776           && GET_MODE_SIZE (mem_mode) == -cstepi))
2777     {
2778       enum tree_code code = MINUS_EXPR;
2779       tree new_base;
2780       tree new_step = step;
2781
2782       if (POINTER_TYPE_P (TREE_TYPE (base)))
2783         {
2784           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2785           code = POINTER_PLUS_EXPR;
2786         }
2787       else
2788         new_step = fold_convert (TREE_TYPE (base), new_step);
2789       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2790       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2791                        use->stmt);
2792     }
2793   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2794         || USE_STORE_POST_INCREMENT (mem_mode))
2795        && GET_MODE_SIZE (mem_mode) == cstepi)
2796       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2797            || USE_STORE_POST_DECREMENT (mem_mode))
2798           && GET_MODE_SIZE (mem_mode) == -cstepi))
2799     {
2800       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2801                        use->stmt);
2802     }
2803 }
2804
2805 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2806    position to POS.  If USE is not NULL, the candidate is set as related to
2807    it.  The candidate computation is scheduled on all available positions.  */
2808
2809 static void
2810 add_candidate (struct ivopts_data *data,
2811                tree base, tree step, bool important, struct iv_use *use)
2812 {
2813   gcc_assert (use == NULL || use->sub_id == 0);
2814
2815   if (ip_normal_pos (data->current_loop))
2816     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2817   if (ip_end_pos (data->current_loop)
2818       && allow_ip_end_pos_p (data->current_loop))
2819     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2820
2821   if (use != NULL && use->type == USE_ADDRESS)
2822     add_autoinc_candidates (data, base, step, important, use);
2823 }
2824
2825 /* Adds standard iv candidates.  */
2826
2827 static void
2828 add_standard_iv_candidates (struct ivopts_data *data)
2829 {
2830   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2831
2832   /* The same for a double-integer type if it is still fast enough.  */
2833   if (TYPE_PRECISION
2834         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2835       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2836     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2837                    build_int_cst (long_integer_type_node, 1), true, NULL);
2838
2839   /* The same for a double-integer type if it is still fast enough.  */
2840   if (TYPE_PRECISION
2841         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2842       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2843     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2844                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2845 }
2846
2847
2848 /* Adds candidates bases on the old induction variable IV.  */
2849
2850 static void
2851 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2852 {
2853   gimple phi;
2854   tree def;
2855   struct iv_cand *cand;
2856
2857   add_candidate (data, iv->base, iv->step, true, NULL);
2858
2859   /* The same, but with initial value zero.  */
2860   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2861     add_candidate (data, size_int (0), iv->step, true, NULL);
2862   else
2863     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2864                    iv->step, true, NULL);
2865
2866   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2867   if (gimple_code (phi) == GIMPLE_PHI)
2868     {
2869       /* Additionally record the possibility of leaving the original iv
2870          untouched.  */
2871       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2872       /* Don't add candidate if it's from another PHI node because
2873          it's an affine iv appearing in the form of PEELED_CHREC.  */
2874       phi = SSA_NAME_DEF_STMT (def);
2875       if (gimple_code (phi) != GIMPLE_PHI)
2876         {
2877           cand = add_candidate_1 (data,
2878                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2879                                   SSA_NAME_DEF_STMT (def));
2880           cand->var_before = iv->ssa_name;
2881           cand->var_after = def;
2882         }
2883       else
2884         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2885     }
2886 }
2887
2888 /* Adds candidates based on the old induction variables.  */
2889
2890 static void
2891 add_old_ivs_candidates (struct ivopts_data *data)
2892 {
2893   unsigned i;
2894   struct iv *iv;
2895   bitmap_iterator bi;
2896
2897   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2898     {
2899       iv = ver_info (data, i)->iv;
2900       if (iv && iv->biv_p && !integer_zerop (iv->step))
2901         add_old_iv_candidates (data, iv);
2902     }
2903 }
2904
2905 /* Adds candidates based on the value of the induction variable IV and USE.  */
2906
2907 static void
2908 add_iv_value_candidates (struct ivopts_data *data,
2909                          struct iv *iv, struct iv_use *use)
2910 {
2911   unsigned HOST_WIDE_INT offset;
2912   tree base;
2913   tree basetype;
2914
2915   add_candidate (data, iv->base, iv->step, false, use);
2916
2917   /* The same, but with initial value zero.  Make such variable important,
2918      since it is generic enough so that possibly many uses may be based
2919      on it.  */
2920   basetype = TREE_TYPE (iv->base);
2921   if (POINTER_TYPE_P (basetype))
2922     basetype = sizetype;
2923   add_candidate (data, build_int_cst (basetype, 0),
2924                  iv->step, true, use);
2925
2926   /* Third, try removing the constant offset.  Make sure to even
2927      add a candidate for &a[0] vs. (T *)&a.  */
2928   base = strip_offset (iv->base, &offset);
2929   if (offset
2930       || base != iv->base)
2931     add_candidate (data, base, iv->step, false, use);
2932 }
2933
2934 /* Adds candidates based on the uses.  */
2935
2936 static void
2937 add_derived_ivs_candidates (struct ivopts_data *data)
2938 {
2939   unsigned i;
2940
2941   for (i = 0; i < n_iv_uses (data); i++)
2942     {
2943       struct iv_use *use = iv_use (data, i);
2944
2945       if (!use)
2946         continue;
2947
2948       switch (use->type)
2949         {
2950         case USE_NONLINEAR_EXPR:
2951         case USE_COMPARE:
2952         case USE_ADDRESS:
2953           /* Just add the ivs based on the value of the iv used here.  */
2954           add_iv_value_candidates (data, use->iv, use);
2955           break;
2956
2957         default:
2958           gcc_unreachable ();
2959         }
2960     }
2961 }
2962
2963 /* Record important candidates and add them to related_cands bitmaps
2964    if needed.  */
2965
2966 static void
2967 record_important_candidates (struct ivopts_data *data)
2968 {
2969   unsigned i;
2970   struct iv_use *use;
2971
2972   for (i = 0; i < n_iv_cands (data); i++)
2973     {
2974       struct iv_cand *cand = iv_cand (data, i);
2975
2976       if (cand->important)
2977         bitmap_set_bit (data->important_candidates, i);
2978     }
2979
2980   data->consider_all_candidates = (n_iv_cands (data)
2981                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2982
2983   if (data->consider_all_candidates)
2984     {
2985       /* We will not need "related_cands" bitmaps in this case,
2986          so release them to decrease peak memory consumption.  */
2987       for (i = 0; i < n_iv_uses (data); i++)
2988         {
2989           use = iv_use (data, i);
2990           BITMAP_FREE (use->related_cands);
2991         }
2992     }
2993   else
2994     {
2995       /* Add important candidates to the related_cands bitmaps.  */
2996       for (i = 0; i < n_iv_uses (data); i++)
2997         bitmap_ior_into (iv_use (data, i)->related_cands,
2998                          data->important_candidates);
2999     }
3000 }
3001
3002 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3003    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3004    we allocate a simple list to every use.  */
3005
3006 static void
3007 alloc_use_cost_map (struct ivopts_data *data)
3008 {
3009   unsigned i, size, s;
3010
3011   for (i = 0; i < n_iv_uses (data); i++)
3012     {
3013       struct iv_use *use = iv_use (data, i);
3014
3015       if (data->consider_all_candidates)
3016         size = n_iv_cands (data);
3017       else
3018         {
3019           s = bitmap_count_bits (use->related_cands);
3020
3021           /* Round up to the power of two, so that moduling by it is fast.  */
3022           size = s ? (1 << ceil_log2 (s)) : 1;
3023         }
3024
3025       use->n_map_members = size;
3026       use->cost_map = XCNEWVEC (struct cost_pair, size);
3027     }
3028 }
3029
3030 /* Returns description of computation cost of expression whose runtime
3031    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3032
3033 static comp_cost
3034 new_cost (unsigned runtime, unsigned complexity)
3035 {
3036   comp_cost cost;
3037
3038   cost.cost = runtime;
3039   cost.complexity = complexity;
3040
3041   return cost;
3042 }
3043
3044 /* Returns true if COST is infinite.  */
3045
3046 static bool
3047 infinite_cost_p (comp_cost cost)
3048 {
3049   return cost.cost == INFTY;
3050 }
3051
3052 /* Adds costs COST1 and COST2.  */
3053
3054 static comp_cost
3055 add_costs (comp_cost cost1, comp_cost cost2)
3056 {
3057   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3058     return infinite_cost;
3059
3060   cost1.cost += cost2.cost;
3061   cost1.complexity += cost2.complexity;
3062
3063   return cost1;
3064 }
3065 /* Subtracts costs COST1 and COST2.  */
3066
3067 static comp_cost
3068 sub_costs (comp_cost cost1, comp_cost cost2)
3069 {
3070   cost1.cost -= cost2.cost;
3071   cost1.complexity -= cost2.complexity;
3072
3073   return cost1;
3074 }
3075
3076 /* Returns a negative number if COST1 < COST2, a positive number if
3077    COST1 > COST2, and 0 if COST1 = COST2.  */
3078
3079 static int
3080 compare_costs (comp_cost cost1, comp_cost cost2)
3081 {
3082   if (cost1.cost == cost2.cost)
3083     return cost1.complexity - cost2.complexity;
3084
3085   return cost1.cost - cost2.cost;
3086 }
3087
3088 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3089    on invariants DEPENDS_ON and that the value used in expressing it
3090    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3091
3092 static void
3093 set_use_iv_cost (struct ivopts_data *data,
3094                  struct iv_use *use, struct iv_cand *cand,
3095                  comp_cost cost, bitmap depends_on, tree value,
3096                  enum tree_code comp, int inv_expr_id)
3097 {
3098   unsigned i, s;
3099
3100   if (infinite_cost_p (cost))
3101     {
3102       BITMAP_FREE (depends_on);
3103       return;
3104     }
3105
3106   if (data->consider_all_candidates)
3107     {
3108       use->cost_map[cand->id].cand = cand;
3109       use->cost_map[cand->id].cost = cost;
3110       use->cost_map[cand->id].depends_on = depends_on;
3111       use->cost_map[cand->id].value = value;
3112       use->cost_map[cand->id].comp = comp;
3113       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3114       return;
3115     }
3116
3117   /* n_map_members is a power of two, so this computes modulo.  */
3118   s = cand->id & (use->n_map_members - 1);
3119   for (i = s; i < use->n_map_members; i++)
3120     if (!use->cost_map[i].cand)
3121       goto found;
3122   for (i = 0; i < s; i++)
3123     if (!use->cost_map[i].cand)
3124       goto found;
3125
3126   gcc_unreachable ();
3127
3128 found:
3129   use->cost_map[i].cand = cand;
3130   use->cost_map[i].cost = cost;
3131   use->cost_map[i].depends_on = depends_on;
3132   use->cost_map[i].value = value;
3133   use->cost_map[i].comp = comp;
3134   use->cost_map[i].inv_expr_id = inv_expr_id;
3135 }
3136
3137 /* Gets cost of (USE, CANDIDATE) pair.  */
3138
3139 static struct cost_pair *
3140 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3141                  struct iv_cand *cand)
3142 {
3143   unsigned i, s;
3144   struct cost_pair *ret;
3145
3146   if (!cand)
3147     return NULL;
3148
3149   if (data->consider_all_candidates)
3150     {
3151       ret = use->cost_map + cand->id;
3152       if (!ret->cand)
3153         return NULL;
3154
3155       return ret;
3156     }
3157
3158   /* n_map_members is a power of two, so this computes modulo.  */
3159   s = cand->id & (use->n_map_members - 1);
3160   for (i = s; i < use->n_map_members; i++)
3161     if (use->cost_map[i].cand == cand)
3162       return use->cost_map + i;
3163     else if (use->cost_map[i].cand == NULL)
3164       return NULL;
3165   for (i = 0; i < s; i++)
3166     if (use->cost_map[i].cand == cand)
3167       return use->cost_map + i;
3168     else if (use->cost_map[i].cand == NULL)
3169       return NULL;
3170
3171   return NULL;
3172 }
3173
3174 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3175 static rtx
3176 produce_memory_decl_rtl (tree obj, int *regno)
3177 {
3178   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3179   machine_mode address_mode = targetm.addr_space.address_mode (as);
3180   rtx x;
3181
3182   gcc_assert (obj);
3183   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3184     {
3185       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3186       x = gen_rtx_SYMBOL_REF (address_mode, name);
3187       SET_SYMBOL_REF_DECL (x, obj);
3188       x = gen_rtx_MEM (DECL_MODE (obj), x);
3189       set_mem_addr_space (x, as);
3190       targetm.encode_section_info (obj, x, true);
3191     }
3192   else
3193     {
3194       x = gen_raw_REG (address_mode, (*regno)++);
3195       x = gen_rtx_MEM (DECL_MODE (obj), x);
3196       set_mem_addr_space (x, as);
3197     }
3198
3199   return x;
3200 }
3201
3202 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3203    walk_tree.  DATA contains the actual fake register number.  */
3204
3205 static tree
3206 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3207 {
3208   tree obj = NULL_TREE;
3209   rtx x = NULL_RTX;
3210   int *regno = (int *) data;
3211
3212   switch (TREE_CODE (*expr_p))
3213     {
3214     case ADDR_EXPR:
3215       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3216            handled_component_p (*expr_p);
3217            expr_p = &TREE_OPERAND (*expr_p, 0))
3218         continue;
3219       obj = *expr_p;
3220       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3221         x = produce_memory_decl_rtl (obj, regno);
3222       break;
3223
3224     case SSA_NAME:
3225       *ws = 0;
3226       obj = SSA_NAME_VAR (*expr_p);
3227       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3228       if (!obj)
3229         return NULL_TREE;
3230       if (!DECL_RTL_SET_P (obj))
3231         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3232       break;
3233
3234     case VAR_DECL:
3235     case PARM_DECL:
3236     case RESULT_DECL:
3237       *ws = 0;
3238       obj = *expr_p;
3239
3240       if (DECL_RTL_SET_P (obj))
3241         break;
3242
3243       if (DECL_MODE (obj) == BLKmode)
3244         x = produce_memory_decl_rtl (obj, regno);
3245       else
3246         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3247
3248       break;
3249
3250     default:
3251       break;
3252     }
3253
3254   if (x)
3255     {
3256       decl_rtl_to_reset.safe_push (obj);
3257       SET_DECL_RTL (obj, x);
3258     }
3259
3260   return NULL_TREE;
3261 }
3262
3263 /* Determines cost of the computation of EXPR.  */
3264
3265 static unsigned
3266 computation_cost (tree expr, bool speed)
3267 {
3268   rtx_insn *seq;
3269   rtx rslt;
3270   tree type = TREE_TYPE (expr);
3271   unsigned cost;
3272   /* Avoid using hard regs in ways which may be unsupported.  */
3273   int regno = LAST_VIRTUAL_REGISTER + 1;
3274   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3275   enum node_frequency real_frequency = node->frequency;
3276
3277   node->frequency = NODE_FREQUENCY_NORMAL;
3278   crtl->maybe_hot_insn_p = speed;
3279   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3280   start_sequence ();
3281   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3282   seq = get_insns ();
3283   end_sequence ();
3284   default_rtl_profile ();
3285   node->frequency = real_frequency;
3286
3287   cost = seq_cost (seq, speed);
3288   if (MEM_P (rslt))
3289     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3290                           TYPE_ADDR_SPACE (type), speed);
3291   else if (!REG_P (rslt))
3292     cost += set_src_cost (rslt, speed);
3293
3294   return cost;
3295 }
3296
3297 /* Returns variable containing the value of candidate CAND at statement AT.  */
3298
3299 static tree
3300 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3301 {
3302   if (stmt_after_increment (loop, cand, stmt))
3303     return cand->var_after;
3304   else
3305     return cand->var_before;
3306 }
3307
3308 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3309    same precision that is at least as wide as the precision of TYPE, stores
3310    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3311    type of A and B.  */
3312
3313 static tree
3314 determine_common_wider_type (tree *a, tree *b)
3315 {
3316   tree wider_type = NULL;
3317   tree suba, subb;
3318   tree atype = TREE_TYPE (*a);
3319
3320   if (CONVERT_EXPR_P (*a))
3321     {
3322       suba = TREE_OPERAND (*a, 0);
3323       wider_type = TREE_TYPE (suba);
3324       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3325         return atype;
3326     }
3327   else
3328     return atype;
3329
3330   if (CONVERT_EXPR_P (*b))
3331     {
3332       subb = TREE_OPERAND (*b, 0);
3333       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3334         return atype;
3335     }
3336   else
3337     return atype;
3338
3339   *a = suba;
3340   *b = subb;
3341   return wider_type;
3342 }
3343
3344 /* Determines the expression by that USE is expressed from induction variable
3345    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3346    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3347
3348 static bool
3349 get_computation_aff (struct loop *loop,
3350                      struct iv_use *use, struct iv_cand *cand, gimple at,
3351                      struct aff_tree *aff)
3352 {
3353   tree ubase = use->iv->base;
3354   tree ustep = use->iv->step;
3355   tree cbase = cand->iv->base;
3356   tree cstep = cand->iv->step, cstep_common;
3357   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3358   tree common_type, var;
3359   tree uutype;
3360   aff_tree cbase_aff, var_aff;
3361   widest_int rat;
3362
3363   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3364     {
3365       /* We do not have a precision to express the values of use.  */
3366       return false;
3367     }
3368
3369   var = var_at_stmt (loop, cand, at);
3370   uutype = unsigned_type_for (utype);
3371
3372   /* If the conversion is not noop, perform it.  */
3373   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3374     {
3375       cstep = fold_convert (uutype, cstep);
3376       cbase = fold_convert (uutype, cbase);
3377       var = fold_convert (uutype, var);
3378     }
3379
3380   if (!constant_multiple_of (ustep, cstep, &rat))
3381     return false;
3382
3383   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3384      type, we achieve better folding by computing their difference in this
3385      wider type, and cast the result to UUTYPE.  We do not need to worry about
3386      overflows, as all the arithmetics will in the end be performed in UUTYPE
3387      anyway.  */
3388   common_type = determine_common_wider_type (&ubase, &cbase);
3389
3390   /* use = ubase - ratio * cbase + ratio * var.  */
3391   tree_to_aff_combination (ubase, common_type, aff);
3392   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3393   tree_to_aff_combination (var, uutype, &var_aff);
3394
3395   /* We need to shift the value if we are after the increment.  */
3396   if (stmt_after_increment (loop, cand, at))
3397     {
3398       aff_tree cstep_aff;
3399
3400       if (common_type != uutype)
3401         cstep_common = fold_convert (common_type, cstep);
3402       else
3403         cstep_common = cstep;
3404
3405       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3406       aff_combination_add (&cbase_aff, &cstep_aff);
3407     }
3408
3409   aff_combination_scale (&cbase_aff, -rat);
3410   aff_combination_add (aff, &cbase_aff);
3411   if (common_type != uutype)
3412     aff_combination_convert (aff, uutype);
3413
3414   aff_combination_scale (&var_aff, rat);
3415   aff_combination_add (aff, &var_aff);
3416
3417   return true;
3418 }
3419
3420 /* Return the type of USE.  */
3421
3422 static tree
3423 get_use_type (struct iv_use *use)
3424 {
3425   tree base_type = TREE_TYPE (use->iv->base);
3426   tree type;
3427
3428   if (use->type == USE_ADDRESS)
3429     {
3430       /* The base_type may be a void pointer.  Create a pointer type based on
3431          the mem_ref instead.  */
3432       type = build_pointer_type (TREE_TYPE (*use->op_p));
3433       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3434                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3435     }
3436   else
3437     type = base_type;
3438
3439   return type;
3440 }
3441
3442 /* Determines the expression by that USE is expressed from induction variable
3443    CAND at statement AT in LOOP.  The computation is unshared.  */
3444
3445 static tree
3446 get_computation_at (struct loop *loop,
3447                     struct iv_use *use, struct iv_cand *cand, gimple at)
3448 {
3449   aff_tree aff;
3450   tree type = get_use_type (use);
3451
3452   if (!get_computation_aff (loop, use, cand, at, &aff))
3453     return NULL_TREE;
3454   unshare_aff_combination (&aff);
3455   return fold_convert (type, aff_combination_to_tree (&aff));
3456 }
3457
3458 /* Determines the expression by that USE is expressed from induction variable
3459    CAND in LOOP.  The computation is unshared.  */
3460
3461 static tree
3462 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3463 {
3464   return get_computation_at (loop, use, cand, use->stmt);
3465 }
3466
3467 /* Adjust the cost COST for being in loop setup rather than loop body.
3468    If we're optimizing for space, the loop setup overhead is constant;
3469    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3470 static unsigned
3471 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3472 {
3473   if (cost == INFTY)
3474     return cost;
3475   else if (optimize_loop_for_speed_p (data->current_loop))
3476     return cost / avg_loop_niter (data->current_loop);
3477   else
3478     return cost;
3479 }
3480
3481 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3482    validity for a memory reference accessing memory of mode MODE in
3483    address space AS.  */
3484
3485
3486 bool
3487 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3488                                  addr_space_t as)
3489 {
3490 #define MAX_RATIO 128
3491   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3492   static vec<sbitmap> valid_mult_list;
3493   sbitmap valid_mult;
3494
3495   if (data_index >= valid_mult_list.length ())
3496     valid_mult_list.safe_grow_cleared (data_index + 1);
3497
3498   valid_mult = valid_mult_list[data_index];
3499   if (!valid_mult)
3500     {
3501       machine_mode address_mode = targetm.addr_space.address_mode (as);
3502       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3503       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3504       rtx addr, scaled;
3505       HOST_WIDE_INT i;
3506
3507       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3508       bitmap_clear (valid_mult);
3509       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3510       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3511       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3512         {
3513           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3514           if (memory_address_addr_space_p (mode, addr, as)
3515               || memory_address_addr_space_p (mode, scaled, as))
3516             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3517         }
3518
3519       if (dump_file && (dump_flags & TDF_DETAILS))
3520         {
3521           fprintf (dump_file, "  allowed multipliers:");
3522           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3523             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3524               fprintf (dump_file, " %d", (int) i);
3525           fprintf (dump_file, "\n");
3526           fprintf (dump_file, "\n");
3527         }
3528
3529       valid_mult_list[data_index] = valid_mult;
3530     }
3531
3532   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3533     return false;
3534
3535   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3536 }
3537
3538 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3539    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3540    variable is omitted.  Compute the cost for a memory reference that accesses
3541    a memory location of mode MEM_MODE in address space AS.
3542
3543    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3544    size of MEM_MODE / RATIO) is available.  To make this determination, we
3545    look at the size of the increment to be made, which is given in CSTEP.
3546    CSTEP may be zero if the step is unknown.
3547    STMT_AFTER_INC is true iff the statement we're looking at is after the
3548    increment of the original biv.
3549
3550    TODO -- there must be some better way.  This all is quite crude.  */
3551
3552 enum ainc_type
3553 {
3554   AINC_PRE_INC,         /* Pre increment.  */
3555   AINC_PRE_DEC,         /* Pre decrement.  */
3556   AINC_POST_INC,        /* Post increment.  */
3557   AINC_POST_DEC,        /* Post decrement.  */
3558   AINC_NONE             /* Also the number of auto increment types.  */
3559 };
3560
3561 typedef struct address_cost_data_s
3562 {
3563   HOST_WIDE_INT min_offset, max_offset;
3564   unsigned costs[2][2][2][2];
3565   unsigned ainc_costs[AINC_NONE];
3566 } *address_cost_data;
3567
3568
3569 static comp_cost
3570 get_address_cost (bool symbol_present, bool var_present,
3571                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3572                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3573                   addr_space_t as, bool speed,
3574                   bool stmt_after_inc, bool *may_autoinc)
3575 {
3576   machine_mode address_mode = targetm.addr_space.address_mode (as);
3577   static vec<address_cost_data> address_cost_data_list;
3578   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3579   address_cost_data data;
3580   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3581   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3582   unsigned cost, acost, complexity;
3583   enum ainc_type autoinc_type;
3584   bool offset_p, ratio_p, autoinc;
3585   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3586   unsigned HOST_WIDE_INT mask;
3587   unsigned bits;
3588
3589   if (data_index >= address_cost_data_list.length ())
3590     address_cost_data_list.safe_grow_cleared (data_index + 1);
3591
3592   data = address_cost_data_list[data_index];
3593   if (!data)
3594     {
3595       HOST_WIDE_INT i;
3596       HOST_WIDE_INT rat, off = 0;
3597       int old_cse_not_expected, width;
3598       unsigned sym_p, var_p, off_p, rat_p, add_c;
3599       rtx_insn *seq;
3600       rtx addr, base;
3601       rtx reg0, reg1;
3602
3603       data = (address_cost_data) xcalloc (1, sizeof (*data));
3604
3605       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3606
3607       width = GET_MODE_BITSIZE (address_mode) - 1;
3608       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3609         width = HOST_BITS_PER_WIDE_INT - 1;
3610       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3611
3612       for (i = width; i >= 0; i--)
3613         {
3614           off = -((unsigned HOST_WIDE_INT) 1 << i);
3615           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3616           if (memory_address_addr_space_p (mem_mode, addr, as))
3617             break;
3618         }
3619       data->min_offset = (i == -1? 0 : off);
3620
3621       for (i = width; i >= 0; i--)
3622         {
3623           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3624           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3625           if (memory_address_addr_space_p (mem_mode, addr, as))
3626             break;
3627           /* For some strict-alignment targets, the offset must be naturally
3628              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3629           off = mem_mode != QImode
3630                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3631                     - GET_MODE_SIZE (mem_mode)
3632                 : 0;
3633           if (off > 0)
3634             {
3635               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3636               if (memory_address_addr_space_p (mem_mode, addr, as))
3637                 break;
3638             }
3639         }
3640       if (i == -1)
3641         off = 0;
3642       data->max_offset = off;
3643
3644       if (dump_file && (dump_flags & TDF_DETAILS))
3645         {
3646           fprintf (dump_file, "get_address_cost:\n");
3647           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3648                    GET_MODE_NAME (mem_mode),
3649                    data->min_offset);
3650           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3651                    GET_MODE_NAME (mem_mode),
3652                    data->max_offset);
3653         }
3654
3655       rat = 1;
3656       for (i = 2; i <= MAX_RATIO; i++)
3657         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3658           {
3659             rat = i;
3660             break;
3661           }
3662
3663       /* Compute the cost of various addressing modes.  */
3664       acost = 0;
3665       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3666       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3667
3668       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3669           || USE_STORE_PRE_DECREMENT (mem_mode))
3670         {
3671           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3672           has_predec[mem_mode]
3673             = memory_address_addr_space_p (mem_mode, addr, as);
3674
3675           if (has_predec[mem_mode])
3676             data->ainc_costs[AINC_PRE_DEC]
3677               = address_cost (addr, mem_mode, as, speed);
3678         }
3679       if (USE_LOAD_POST_DECREMENT (mem_mode)
3680           || USE_STORE_POST_DECREMENT (mem_mode))
3681         {
3682           addr = gen_rtx_POST_DEC (address_mode, reg0);
3683           has_postdec[mem_mode]
3684             = memory_address_addr_space_p (mem_mode, addr, as);
3685
3686           if (has_postdec[mem_mode])
3687             data->ainc_costs[AINC_POST_DEC]
3688               = address_cost (addr, mem_mode, as, speed);
3689         }
3690       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3691           || USE_STORE_PRE_DECREMENT (mem_mode))
3692         {
3693           addr = gen_rtx_PRE_INC (address_mode, reg0);
3694           has_preinc[mem_mode]
3695             = memory_address_addr_space_p (mem_mode, addr, as);
3696
3697           if (has_preinc[mem_mode])
3698             data->ainc_costs[AINC_PRE_INC]
3699               = address_cost (addr, mem_mode, as, speed);
3700         }
3701       if (USE_LOAD_POST_INCREMENT (mem_mode)
3702           || USE_STORE_POST_INCREMENT (mem_mode))
3703         {
3704           addr = gen_rtx_POST_INC (address_mode, reg0);
3705           has_postinc[mem_mode]
3706             = memory_address_addr_space_p (mem_mode, addr, as);
3707
3708           if (has_postinc[mem_mode])
3709             data->ainc_costs[AINC_POST_INC]
3710               = address_cost (addr, mem_mode, as, speed);
3711         }
3712       for (i = 0; i < 16; i++)
3713         {
3714           sym_p = i & 1;
3715           var_p = (i >> 1) & 1;
3716           off_p = (i >> 2) & 1;
3717           rat_p = (i >> 3) & 1;
3718
3719           addr = reg0;
3720           if (rat_p)
3721             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3722                                    gen_int_mode (rat, address_mode));
3723
3724           if (var_p)
3725             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3726
3727           if (sym_p)
3728             {
3729               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3730               /* ??? We can run into trouble with some backends by presenting
3731                  it with symbols which haven't been properly passed through
3732                  targetm.encode_section_info.  By setting the local bit, we
3733                  enhance the probability of things working.  */
3734               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3735
3736               if (off_p)
3737                 base = gen_rtx_fmt_e (CONST, address_mode,
3738                                       gen_rtx_fmt_ee
3739                                         (PLUS, address_mode, base,
3740                                          gen_int_mode (off, address_mode)));
3741             }
3742           else if (off_p)
3743             base = gen_int_mode (off, address_mode);
3744           else
3745             base = NULL_RTX;
3746
3747           if (base)
3748             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3749
3750           start_sequence ();
3751           /* To avoid splitting addressing modes, pretend that no cse will
3752              follow.  */
3753           old_cse_not_expected = cse_not_expected;
3754           cse_not_expected = true;
3755           addr = memory_address_addr_space (mem_mode, addr, as);
3756           cse_not_expected = old_cse_not_expected;
3757           seq = get_insns ();
3758           end_sequence ();
3759
3760           acost = seq_cost (seq, speed);
3761           acost += address_cost (addr, mem_mode, as, speed);
3762
3763           if (!acost)
3764             acost = 1;
3765           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3766         }
3767
3768       /* On some targets, it is quite expensive to load symbol to a register,
3769          which makes addresses that contain symbols look much more expensive.
3770          However, the symbol will have to be loaded in any case before the
3771          loop (and quite likely we have it in register already), so it does not
3772          make much sense to penalize them too heavily.  So make some final
3773          tweaks for the SYMBOL_PRESENT modes:
3774
3775          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3776          var is cheaper, use this mode with small penalty.
3777          If VAR_PRESENT is true, try whether the mode with
3778          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3779          if this is the case, use it.  */
3780       add_c = add_cost (speed, address_mode);
3781       for (i = 0; i < 8; i++)
3782         {
3783           var_p = i & 1;
3784           off_p = (i >> 1) & 1;
3785           rat_p = (i >> 2) & 1;
3786
3787           acost = data->costs[0][1][off_p][rat_p] + 1;
3788           if (var_p)
3789             acost += add_c;
3790
3791           if (acost < data->costs[1][var_p][off_p][rat_p])
3792             data->costs[1][var_p][off_p][rat_p] = acost;
3793         }
3794
3795       if (dump_file && (dump_flags & TDF_DETAILS))
3796         {
3797           fprintf (dump_file, "Address costs:\n");
3798
3799           for (i = 0; i < 16; i++)
3800             {
3801               sym_p = i & 1;
3802               var_p = (i >> 1) & 1;
3803               off_p = (i >> 2) & 1;
3804               rat_p = (i >> 3) & 1;
3805
3806               fprintf (dump_file, "  ");
3807               if (sym_p)
3808                 fprintf (dump_file, "sym + ");
3809               if (var_p)
3810                 fprintf (dump_file, "var + ");
3811               if (off_p)
3812                 fprintf (dump_file, "cst + ");
3813               if (rat_p)
3814                 fprintf (dump_file, "rat * ");
3815
3816               acost = data->costs[sym_p][var_p][off_p][rat_p];
3817               fprintf (dump_file, "index costs %d\n", acost);
3818             }
3819           if (has_predec[mem_mode] || has_postdec[mem_mode]
3820               || has_preinc[mem_mode] || has_postinc[mem_mode])
3821             fprintf (dump_file, "  May include autoinc/dec\n");
3822           fprintf (dump_file, "\n");
3823         }
3824
3825       address_cost_data_list[data_index] = data;
3826     }
3827
3828   bits = GET_MODE_BITSIZE (address_mode);
3829   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3830   offset &= mask;
3831   if ((offset >> (bits - 1) & 1))
3832     offset |= ~mask;
3833   s_offset = offset;
3834
3835   autoinc = false;
3836   autoinc_type = AINC_NONE;
3837   msize = GET_MODE_SIZE (mem_mode);
3838   autoinc_offset = offset;
3839   if (stmt_after_inc)
3840     autoinc_offset += ratio * cstep;
3841   if (symbol_present || var_present || ratio != 1)
3842     autoinc = false;
3843   else
3844     {
3845       if (has_postinc[mem_mode] && autoinc_offset == 0
3846           && msize == cstep)
3847         autoinc_type = AINC_POST_INC;
3848       else if (has_postdec[mem_mode] && autoinc_offset == 0
3849                && msize == -cstep)
3850         autoinc_type = AINC_POST_DEC;
3851       else if (has_preinc[mem_mode] && autoinc_offset == msize
3852                && msize == cstep)
3853         autoinc_type = AINC_PRE_INC;
3854       else if (has_predec[mem_mode] && autoinc_offset == -msize
3855                && msize == -cstep)
3856         autoinc_type = AINC_PRE_DEC;
3857
3858       if (autoinc_type != AINC_NONE)
3859         autoinc = true;
3860     }
3861
3862   cost = 0;
3863   offset_p = (s_offset != 0
3864               && data->min_offset <= s_offset
3865               && s_offset <= data->max_offset);
3866   ratio_p = (ratio != 1
3867              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3868
3869   if (ratio != 1 && !ratio_p)
3870     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3871
3872   if (s_offset && !offset_p && !symbol_present)
3873     cost += add_cost (speed, address_mode);
3874
3875   if (may_autoinc)
3876     *may_autoinc = autoinc;
3877   if (autoinc)
3878     acost = data->ainc_costs[autoinc_type];
3879   else
3880     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3881   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3882   return new_cost (cost + acost, complexity);
3883 }
3884
3885  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3886     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3887     calculating the operands of EXPR.  Returns true if successful, and returns
3888     the cost in COST.  */
3889
3890 static bool
3891 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3892                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3893 {
3894   comp_cost res;
3895   tree op1 = TREE_OPERAND (expr, 1);
3896   tree cst = TREE_OPERAND (mult, 1);
3897   tree multop = TREE_OPERAND (mult, 0);
3898   int m = exact_log2 (int_cst_value (cst));
3899   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3900   int as_cost, sa_cost;
3901   bool mult_in_op1;
3902
3903   if (!(m >= 0 && m < maxm))
3904     return false;
3905
3906   mult_in_op1 = operand_equal_p (op1, mult, 0);
3907
3908   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3909
3910   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3911      use that in preference to a shift insn followed by an add insn.  */
3912   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3913              ? shiftadd_cost (speed, mode, m)
3914              : (mult_in_op1
3915                 ? shiftsub1_cost (speed, mode, m)
3916                 : shiftsub0_cost (speed, mode, m)));
3917
3918   res = new_cost (MIN (as_cost, sa_cost), 0);
3919   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3920
3921   STRIP_NOPS (multop);
3922   if (!is_gimple_val (multop))
3923     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3924
3925   *cost = res;
3926   return true;
3927 }
3928
3929 /* Estimates cost of forcing expression EXPR into a variable.  */
3930
3931 static comp_cost
3932 force_expr_to_var_cost (tree expr, bool speed)
3933 {
3934   static bool costs_initialized = false;
3935   static unsigned integer_cost [2];
3936   static unsigned symbol_cost [2];
3937   static unsigned address_cost [2];
3938   tree op0, op1;
3939   comp_cost cost0, cost1, cost;
3940   machine_mode mode;
3941
3942   if (!costs_initialized)
3943     {
3944       tree type = build_pointer_type (integer_type_node);
3945       tree var, addr;
3946       rtx x;
3947       int i;
3948
3949       var = create_tmp_var_raw (integer_type_node, "test_var");
3950       TREE_STATIC (var) = 1;
3951       x = produce_memory_decl_rtl (var, NULL);
3952       SET_DECL_RTL (var, x);
3953
3954       addr = build1 (ADDR_EXPR, type, var);
3955
3956
3957       for (i = 0; i < 2; i++)
3958         {
3959           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3960                                                              2000), i);
3961
3962           symbol_cost[i] = computation_cost (addr, i) + 1;
3963
3964           address_cost[i]
3965             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3966           if (dump_file && (dump_flags & TDF_DETAILS))
3967             {
3968               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3969               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3970               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3971               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3972               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3973               fprintf (dump_file, "\n");
3974             }
3975         }
3976
3977       costs_initialized = true;
3978     }
3979
3980   STRIP_NOPS (expr);
3981
3982   if (SSA_VAR_P (expr))
3983     return no_cost;
3984
3985   if (is_gimple_min_invariant (expr))
3986     {
3987       if (TREE_CODE (expr) == INTEGER_CST)
3988         return new_cost (integer_cost [speed], 0);
3989
3990       if (TREE_CODE (expr) == ADDR_EXPR)
3991         {
3992           tree obj = TREE_OPERAND (expr, 0);
3993
3994           if (TREE_CODE (obj) == VAR_DECL
3995               || TREE_CODE (obj) == PARM_DECL
3996               || TREE_CODE (obj) == RESULT_DECL)
3997             return new_cost (symbol_cost [speed], 0);
3998         }
3999
4000       return new_cost (address_cost [speed], 0);
4001     }
4002
4003   switch (TREE_CODE (expr))
4004     {
4005     case POINTER_PLUS_EXPR:
4006     case PLUS_EXPR:
4007     case MINUS_EXPR:
4008     case MULT_EXPR:
4009       op0 = TREE_OPERAND (expr, 0);
4010       op1 = TREE_OPERAND (expr, 1);
4011       STRIP_NOPS (op0);
4012       STRIP_NOPS (op1);
4013       break;
4014
4015     CASE_CONVERT:
4016     case NEGATE_EXPR:
4017       op0 = TREE_OPERAND (expr, 0);
4018       STRIP_NOPS (op0);
4019       op1 = NULL_TREE;
4020       break;
4021
4022     default:
4023       /* Just an arbitrary value, FIXME.  */
4024       return new_cost (target_spill_cost[speed], 0);
4025     }
4026
4027   if (op0 == NULL_TREE
4028       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4029     cost0 = no_cost;
4030   else
4031     cost0 = force_expr_to_var_cost (op0, speed);
4032
4033   if (op1 == NULL_TREE
4034       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4035     cost1 = no_cost;
4036   else
4037     cost1 = force_expr_to_var_cost (op1, speed);
4038
4039   mode = TYPE_MODE (TREE_TYPE (expr));
4040   switch (TREE_CODE (expr))
4041     {
4042     case POINTER_PLUS_EXPR:
4043     case PLUS_EXPR:
4044     case MINUS_EXPR:
4045     case NEGATE_EXPR:
4046       cost = new_cost (add_cost (speed, mode), 0);
4047       if (TREE_CODE (expr) != NEGATE_EXPR)
4048         {
4049           tree mult = NULL_TREE;
4050           comp_cost sa_cost;
4051           if (TREE_CODE (op1) == MULT_EXPR)
4052             mult = op1;
4053           else if (TREE_CODE (op0) == MULT_EXPR)
4054             mult = op0;
4055
4056           if (mult != NULL_TREE
4057               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4058               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4059                                     speed, &sa_cost))
4060             return sa_cost;
4061         }
4062       break;
4063
4064     CASE_CONVERT:
4065       {
4066         tree inner_mode, outer_mode;
4067         outer_mode = TREE_TYPE (expr);
4068         inner_mode = TREE_TYPE (op0);
4069         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4070                                        TYPE_MODE (inner_mode), speed), 0);
4071       }
4072       break;
4073
4074     case MULT_EXPR:
4075       if (cst_and_fits_in_hwi (op0))
4076         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4077                                              mode, speed), 0);
4078       else if (cst_and_fits_in_hwi (op1))
4079         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4080                                              mode, speed), 0);
4081       else
4082         return new_cost (target_spill_cost [speed], 0);
4083       break;
4084
4085     default:
4086       gcc_unreachable ();
4087     }
4088
4089   cost = add_costs (cost, cost0);
4090   cost = add_costs (cost, cost1);
4091
4092   /* Bound the cost by target_spill_cost.  The parts of complicated
4093      computations often are either loop invariant or at least can
4094      be shared between several iv uses, so letting this grow without
4095      limits would not give reasonable results.  */
4096   if (cost.cost > (int) target_spill_cost [speed])
4097     cost.cost = target_spill_cost [speed];
4098
4099   return cost;
4100 }
4101
4102 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4103    invariants the computation depends on.  */
4104
4105 static comp_cost
4106 force_var_cost (struct ivopts_data *data,
4107                 tree expr, bitmap *depends_on)
4108 {
4109   if (depends_on)
4110     {
4111       fd_ivopts_data = data;
4112       walk_tree (&expr, find_depends, depends_on, NULL);
4113     }
4114
4115   return force_expr_to_var_cost (expr, data->speed);
4116 }
4117
4118 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4119    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4120    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4121    invariants the computation depends on.  */
4122
4123 static comp_cost
4124 split_address_cost (struct ivopts_data *data,
4125                     tree addr, bool *symbol_present, bool *var_present,
4126                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4127 {
4128   tree core;
4129   HOST_WIDE_INT bitsize;
4130   HOST_WIDE_INT bitpos;
4131   tree toffset;
4132   machine_mode mode;
4133   int unsignedp, volatilep;
4134
4135   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4136                               &unsignedp, &volatilep, false);
4137
4138   if (toffset != 0
4139       || bitpos % BITS_PER_UNIT != 0
4140       || TREE_CODE (core) != VAR_DECL)
4141     {
4142       *symbol_present = false;
4143       *var_present = true;
4144       fd_ivopts_data = data;
4145       walk_tree (&addr, find_depends, depends_on, NULL);
4146       return new_cost (target_spill_cost[data->speed], 0);
4147     }
4148
4149   *offset += bitpos / BITS_PER_UNIT;
4150   if (TREE_STATIC (core)
4151       || DECL_EXTERNAL (core))
4152     {
4153       *symbol_present = true;
4154       *var_present = false;
4155       return no_cost;
4156     }
4157
4158   *symbol_present = false;
4159   *var_present = true;
4160   return no_cost;
4161 }
4162
4163 /* Estimates cost of expressing difference of addresses E1 - E2 as
4164    var + symbol + offset.  The value of offset is added to OFFSET,
4165    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4166    part is missing.  DEPENDS_ON is a set of the invariants the computation
4167    depends on.  */
4168
4169 static comp_cost
4170 ptr_difference_cost (struct ivopts_data *data,
4171                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4172                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4173 {
4174   HOST_WIDE_INT diff = 0;
4175   aff_tree aff_e1, aff_e2;
4176   tree type;
4177
4178   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4179
4180   if (ptr_difference_const (e1, e2, &diff))
4181     {
4182       *offset += diff;
4183       *symbol_present = false;
4184       *var_present = false;
4185       return no_cost;
4186     }
4187
4188   if (integer_zerop (e2))
4189     return split_address_cost (data, TREE_OPERAND (e1, 0),
4190                                symbol_present, var_present, offset, depends_on);
4191
4192   *symbol_present = false;
4193   *var_present = true;
4194
4195   type = signed_type_for (TREE_TYPE (e1));
4196   tree_to_aff_combination (e1, type, &aff_e1);
4197   tree_to_aff_combination (e2, type, &aff_e2);
4198   aff_combination_scale (&aff_e2, -1);
4199   aff_combination_add (&aff_e1, &aff_e2);
4200
4201   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4202 }
4203
4204 /* Estimates cost of expressing difference E1 - E2 as
4205    var + symbol + offset.  The value of offset is added to OFFSET,
4206    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4207    part is missing.  DEPENDS_ON is a set of the invariants the computation
4208    depends on.  */
4209
4210 static comp_cost
4211 difference_cost (struct ivopts_data *data,
4212                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4213                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4214 {
4215   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4216   unsigned HOST_WIDE_INT off1, off2;
4217   aff_tree aff_e1, aff_e2;
4218   tree type;
4219
4220   e1 = strip_offset (e1, &off1);
4221   e2 = strip_offset (e2, &off2);
4222   *offset += off1 - off2;
4223
4224   STRIP_NOPS (e1);
4225   STRIP_NOPS (e2);
4226
4227   if (TREE_CODE (e1) == ADDR_EXPR)
4228     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4229                                 offset, depends_on);
4230   *symbol_present = false;
4231
4232   if (operand_equal_p (e1, e2, 0))
4233     {
4234       *var_present = false;
4235       return no_cost;
4236     }
4237
4238   *var_present = true;
4239
4240   if (integer_zerop (e2))
4241     return force_var_cost (data, e1, depends_on);
4242
4243   if (integer_zerop (e1))
4244     {
4245       comp_cost cost = force_var_cost (data, e2, depends_on);
4246       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4247       return cost;
4248     }
4249
4250   type = signed_type_for (TREE_TYPE (e1));
4251   tree_to_aff_combination (e1, type, &aff_e1);
4252   tree_to_aff_combination (e2, type, &aff_e2);
4253   aff_combination_scale (&aff_e2, -1);
4254   aff_combination_add (&aff_e1, &aff_e2);
4255
4256   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4257 }
4258
4259 /* Returns true if AFF1 and AFF2 are identical.  */
4260
4261 static bool
4262 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4263 {
4264   unsigned i;
4265
4266   if (aff1->n != aff2->n)
4267     return false;
4268
4269   for (i = 0; i < aff1->n; i++)
4270     {
4271       if (aff1->elts[i].coef != aff2->elts[i].coef)
4272         return false;
4273
4274       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4275         return false;
4276     }
4277   return true;
4278 }
4279
4280 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4281
4282 static int
4283 get_expr_id (struct ivopts_data *data, tree expr)
4284 {
4285   struct iv_inv_expr_ent ent;
4286   struct iv_inv_expr_ent **slot;
4287
4288   ent.expr = expr;
4289   ent.hash = iterative_hash_expr (expr, 0);
4290   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4291   if (*slot)
4292     return (*slot)->id;
4293
4294   *slot = XNEW (struct iv_inv_expr_ent);
4295   (*slot)->expr = expr;
4296   (*slot)->hash = ent.hash;
4297   (*slot)->id = data->inv_expr_id++;
4298   return (*slot)->id;
4299 }
4300
4301 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4302    requires a new compiler generated temporary.  Returns -1 otherwise.
4303    ADDRESS_P is a flag indicating if the expression is for address
4304    computation.  */
4305
4306 static int
4307 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4308                             tree cbase, HOST_WIDE_INT ratio,
4309                             bool address_p)
4310 {
4311   aff_tree ubase_aff, cbase_aff;
4312   tree expr, ub, cb;
4313
4314   STRIP_NOPS (ubase);
4315   STRIP_NOPS (cbase);
4316   ub = ubase;
4317   cb = cbase;
4318
4319   if ((TREE_CODE (ubase) == INTEGER_CST)
4320       && (TREE_CODE (cbase) == INTEGER_CST))
4321     return -1;
4322
4323   /* Strips the constant part. */
4324   if (TREE_CODE (ubase) == PLUS_EXPR
4325       || TREE_CODE (ubase) == MINUS_EXPR
4326       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4327     {
4328       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4329         ubase = TREE_OPERAND (ubase, 0);
4330     }
4331
4332   /* Strips the constant part. */
4333   if (TREE_CODE (cbase) == PLUS_EXPR
4334       || TREE_CODE (cbase) == MINUS_EXPR
4335       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4336     {
4337       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4338         cbase = TREE_OPERAND (cbase, 0);
4339     }
4340
4341   if (address_p)
4342     {
4343       if (((TREE_CODE (ubase) == SSA_NAME)
4344            || (TREE_CODE (ubase) == ADDR_EXPR
4345                && is_gimple_min_invariant (ubase)))
4346           && (TREE_CODE (cbase) == INTEGER_CST))
4347         return -1;
4348
4349       if (((TREE_CODE (cbase) == SSA_NAME)
4350            || (TREE_CODE (cbase) == ADDR_EXPR
4351                && is_gimple_min_invariant (cbase)))
4352           && (TREE_CODE (ubase) == INTEGER_CST))
4353         return -1;
4354     }
4355
4356   if (ratio == 1)
4357     {
4358       if (operand_equal_p (ubase, cbase, 0))
4359         return -1;
4360
4361       if (TREE_CODE (ubase) == ADDR_EXPR
4362           && TREE_CODE (cbase) == ADDR_EXPR)
4363         {
4364           tree usym, csym;
4365
4366           usym = TREE_OPERAND (ubase, 0);
4367           csym = TREE_OPERAND (cbase, 0);
4368           if (TREE_CODE (usym) == ARRAY_REF)
4369             {
4370               tree ind = TREE_OPERAND (usym, 1);
4371               if (TREE_CODE (ind) == INTEGER_CST
4372                   && tree_fits_shwi_p (ind)
4373                   && tree_to_shwi (ind) == 0)
4374                 usym = TREE_OPERAND (usym, 0);
4375             }
4376           if (TREE_CODE (csym) == ARRAY_REF)
4377             {
4378               tree ind = TREE_OPERAND (csym, 1);
4379               if (TREE_CODE (ind) == INTEGER_CST
4380                   && tree_fits_shwi_p (ind)
4381                   && tree_to_shwi (ind) == 0)
4382                 csym = TREE_OPERAND (csym, 0);
4383             }
4384           if (operand_equal_p (usym, csym, 0))
4385             return -1;
4386         }
4387       /* Now do more complex comparison  */
4388       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4389       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4390       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4391         return -1;
4392     }
4393
4394   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4395   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4396
4397   aff_combination_scale (&cbase_aff, -1 * ratio);
4398   aff_combination_add (&ubase_aff, &cbase_aff);
4399   expr = aff_combination_to_tree (&ubase_aff);
4400   return get_expr_id (data, expr);
4401 }
4402
4403
4404
4405 /* Determines the cost of the computation by that USE is expressed
4406    from induction variable CAND.  If ADDRESS_P is true, we just need
4407    to create an address from it, otherwise we want to get it into
4408    register.  A set of invariants we depend on is stored in
4409    DEPENDS_ON.  AT is the statement at that the value is computed.
4410    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4411    addressing is likely.  */
4412
4413 static comp_cost
4414 get_computation_cost_at (struct ivopts_data *data,
4415                          struct iv_use *use, struct iv_cand *cand,
4416                          bool address_p, bitmap *depends_on, gimple at,
4417                          bool *can_autoinc,
4418                          int *inv_expr_id)
4419 {
4420   tree ubase = use->iv->base, ustep = use->iv->step;
4421   tree cbase, cstep;
4422   tree utype = TREE_TYPE (ubase), ctype;
4423   unsigned HOST_WIDE_INT cstepi, offset = 0;
4424   HOST_WIDE_INT ratio, aratio;
4425   bool var_present, symbol_present, stmt_is_after_inc;
4426   comp_cost cost;
4427   widest_int rat;
4428   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4429   machine_mode mem_mode = (address_p
4430                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4431                                 : VOIDmode);
4432
4433   *depends_on = NULL;
4434
4435   /* Only consider real candidates.  */
4436   if (!cand->iv)
4437     return infinite_cost;
4438
4439   cbase = cand->iv->base;
4440   cstep = cand->iv->step;
4441   ctype = TREE_TYPE (cbase);
4442
4443   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4444     {
4445       /* We do not have a precision to express the values of use.  */
4446       return infinite_cost;
4447     }
4448
4449   if (address_p
4450       || (use->iv->base_object
4451           && cand->iv->base_object
4452           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4453           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4454     {
4455       /* Do not try to express address of an object with computation based
4456          on address of a different object.  This may cause problems in rtl
4457          level alias analysis (that does not expect this to be happening,
4458          as this is illegal in C), and would be unlikely to be useful
4459          anyway.  */
4460       if (use->iv->base_object
4461           && cand->iv->base_object
4462           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4463         return infinite_cost;
4464     }
4465
4466   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4467     {
4468       /* TODO -- add direct handling of this case.  */
4469       goto fallback;
4470     }
4471
4472   /* CSTEPI is removed from the offset in case statement is after the
4473      increment.  If the step is not constant, we use zero instead.
4474      This is a bit imprecise (there is the extra addition), but
4475      redundancy elimination is likely to transform the code so that
4476      it uses value of the variable before increment anyway,
4477      so it is not that much unrealistic.  */
4478   if (cst_and_fits_in_hwi (cstep))
4479     cstepi = int_cst_value (cstep);
4480   else
4481     cstepi = 0;
4482
4483   if (!constant_multiple_of (ustep, cstep, &rat))
4484     return infinite_cost;
4485
4486   if (wi::fits_shwi_p (rat))
4487     ratio = rat.to_shwi ();
4488   else
4489     return infinite_cost;
4490
4491   STRIP_NOPS (cbase);
4492   ctype = TREE_TYPE (cbase);
4493
4494   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4495
4496   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4497      or ratio == 1, it is better to handle this like
4498
4499      ubase - ratio * cbase + ratio * var
4500
4501      (also holds in the case ratio == -1, TODO.  */
4502
4503   if (cst_and_fits_in_hwi (cbase))
4504     {
4505       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4506       cost = difference_cost (data,
4507                               ubase, build_int_cst (utype, 0),
4508                               &symbol_present, &var_present, &offset,
4509                               depends_on);
4510       cost.cost /= avg_loop_niter (data->current_loop);
4511     }
4512   else if (ratio == 1)
4513     {
4514       tree real_cbase = cbase;
4515
4516       /* Check to see if any adjustment is needed.  */
4517       if (cstepi == 0 && stmt_is_after_inc)
4518         {
4519           aff_tree real_cbase_aff;
4520           aff_tree cstep_aff;
4521
4522           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4523                                    &real_cbase_aff);
4524           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4525
4526           aff_combination_add (&real_cbase_aff, &cstep_aff);
4527           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4528         }
4529
4530       cost = difference_cost (data,
4531                               ubase, real_cbase,
4532                               &symbol_present, &var_present, &offset,
4533                               depends_on);
4534       cost.cost /= avg_loop_niter (data->current_loop);
4535     }
4536   else if (address_p
4537            && !POINTER_TYPE_P (ctype)
4538            && multiplier_allowed_in_address_p
4539                 (ratio, mem_mode,
4540                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4541     {
4542       cbase
4543         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4544       cost = difference_cost (data,
4545                               ubase, cbase,
4546                               &symbol_present, &var_present, &offset,
4547                               depends_on);
4548       cost.cost /= avg_loop_niter (data->current_loop);
4549     }
4550   else
4551     {
4552       cost = force_var_cost (data, cbase, depends_on);
4553       cost = add_costs (cost,
4554                         difference_cost (data,
4555                                          ubase, build_int_cst (utype, 0),
4556                                          &symbol_present, &var_present,
4557                                          &offset, depends_on));
4558       cost.cost /= avg_loop_niter (data->current_loop);
4559       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4560     }
4561
4562   /* Set of invariants depended on by sub use has already been computed
4563      for the first use in the group.  */
4564   if (use->sub_id)
4565     {
4566       cost.cost = 0;
4567       if (depends_on && *depends_on)
4568         bitmap_clear (*depends_on);
4569     }
4570   else if (inv_expr_id)
4571     {
4572       *inv_expr_id =
4573           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4574       /* Clear depends on.  */
4575       if (*inv_expr_id != -1 && depends_on && *depends_on)
4576         bitmap_clear (*depends_on);
4577     }
4578
4579   /* If we are after the increment, the value of the candidate is higher by
4580      one iteration.  */
4581   if (stmt_is_after_inc)
4582     offset -= ratio * cstepi;
4583
4584   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4585      (symbol/var1/const parts may be omitted).  If we are looking for an
4586      address, find the cost of addressing this.  */
4587   if (address_p)
4588     return add_costs (cost,
4589                       get_address_cost (symbol_present, var_present,
4590                                         offset, ratio, cstepi,
4591                                         mem_mode,
4592                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4593                                         speed, stmt_is_after_inc,
4594                                         can_autoinc));
4595
4596   /* Otherwise estimate the costs for computing the expression.  */
4597   if (!symbol_present && !var_present && !offset)
4598     {
4599       if (ratio != 1)
4600         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4601       return cost;
4602     }
4603
4604   /* Symbol + offset should be compile-time computable so consider that they
4605       are added once to the variable, if present.  */
4606   if (var_present && (symbol_present || offset))
4607     cost.cost += adjust_setup_cost (data,
4608                                     add_cost (speed, TYPE_MODE (ctype)));
4609
4610   /* Having offset does not affect runtime cost in case it is added to
4611      symbol, but it increases complexity.  */
4612   if (offset)
4613     cost.complexity++;
4614
4615   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4616
4617   aratio = ratio > 0 ? ratio : -ratio;
4618   if (aratio != 1)
4619     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4620   return cost;
4621
4622 fallback:
4623   if (can_autoinc)
4624     *can_autoinc = false;
4625
4626   {
4627     /* Just get the expression, expand it and measure the cost.  */
4628     tree comp = get_computation_at (data->current_loop, use, cand, at);
4629
4630     if (!comp)
4631       return infinite_cost;
4632
4633     if (address_p)
4634       comp = build_simple_mem_ref (comp);
4635
4636     return new_cost (computation_cost (comp, speed), 0);
4637   }
4638 }
4639
4640 /* Determines the cost of the computation by that USE is expressed
4641    from induction variable CAND.  If ADDRESS_P is true, we just need
4642    to create an address from it, otherwise we want to get it into
4643    register.  A set of invariants we depend on is stored in
4644    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4645    autoinc addressing is likely.  */
4646
4647 static comp_cost
4648 get_computation_cost (struct ivopts_data *data,
4649                       struct iv_use *use, struct iv_cand *cand,
4650                       bool address_p, bitmap *depends_on,
4651                       bool *can_autoinc, int *inv_expr_id)
4652 {
4653   return get_computation_cost_at (data,
4654                                   use, cand, address_p, depends_on, use->stmt,
4655                                   can_autoinc, inv_expr_id);
4656 }
4657
4658 /* Determines cost of basing replacement of USE on CAND in a generic
4659    expression.  */
4660
4661 static bool
4662 determine_use_iv_cost_generic (struct ivopts_data *data,
4663                                struct iv_use *use, struct iv_cand *cand)
4664 {
4665   bitmap depends_on;
4666   comp_cost cost;
4667   int inv_expr_id = -1;
4668
4669   /* The simple case first -- if we need to express value of the preserved
4670      original biv, the cost is 0.  This also prevents us from counting the
4671      cost of increment twice -- once at this use and once in the cost of
4672      the candidate.  */
4673   if (cand->pos == IP_ORIGINAL
4674       && cand->incremented_at == use->stmt)
4675     {
4676       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4677                        ERROR_MARK, -1);
4678       return true;
4679     }
4680
4681   cost = get_computation_cost (data, use, cand, false, &depends_on,
4682                                NULL, &inv_expr_id);
4683
4684   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4685                    inv_expr_id);
4686
4687   return !infinite_cost_p (cost);
4688 }
4689
4690 /* Determines cost of basing replacement of USE on CAND in an address.  */
4691
4692 static bool
4693 determine_use_iv_cost_address (struct ivopts_data *data,
4694                                struct iv_use *use, struct iv_cand *cand)
4695 {
4696   bitmap depends_on;
4697   bool can_autoinc;
4698   int inv_expr_id = -1;
4699   struct iv_use *sub_use;
4700   comp_cost sub_cost;
4701   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4702                                          &can_autoinc, &inv_expr_id);
4703
4704   if (cand->ainc_use == use)
4705     {
4706       if (can_autoinc)
4707         cost.cost -= cand->cost_step;
4708       /* If we generated the candidate solely for exploiting autoincrement
4709          opportunities, and it turns out it can't be used, set the cost to
4710          infinity to make sure we ignore it.  */
4711       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4712         cost = infinite_cost;
4713     }
4714   for (sub_use = use->next;
4715        sub_use && !infinite_cost_p (cost);
4716        sub_use = sub_use->next)
4717     {
4718        sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4719                                         &can_autoinc, &inv_expr_id);
4720        cost = add_costs (cost, sub_cost);
4721     }
4722
4723   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4724                    inv_expr_id);
4725
4726   return !infinite_cost_p (cost);
4727 }
4728
4729 /* Computes value of candidate CAND at position AT in iteration NITER, and
4730    stores it to VAL.  */
4731
4732 static void
4733 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4734                aff_tree *val)
4735 {
4736   aff_tree step, delta, nit;
4737   struct iv *iv = cand->iv;
4738   tree type = TREE_TYPE (iv->base);
4739   tree steptype = type;
4740   if (POINTER_TYPE_P (type))
4741     steptype = sizetype;
4742   steptype = unsigned_type_for (type);
4743
4744   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4745   aff_combination_convert (&step, steptype);
4746   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4747   aff_combination_convert (&nit, steptype);
4748   aff_combination_mult (&nit, &step, &delta);
4749   if (stmt_after_increment (loop, cand, at))
4750     aff_combination_add (&delta, &step);
4751
4752   tree_to_aff_combination (iv->base, type, val);
4753   if (!POINTER_TYPE_P (type))
4754     aff_combination_convert (val, steptype);
4755   aff_combination_add (val, &delta);
4756 }
4757
4758 /* Returns period of induction variable iv.  */
4759
4760 static tree
4761 iv_period (struct iv *iv)
4762 {
4763   tree step = iv->step, period, type;
4764   tree pow2div;
4765
4766   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4767
4768   type = unsigned_type_for (TREE_TYPE (step));
4769   /* Period of the iv is lcm (step, type_range)/step -1,
4770      i.e., N*type_range/step - 1. Since type range is power
4771      of two, N == (step >> num_of_ending_zeros_binary (step),
4772      so the final result is
4773
4774        (type_range >> num_of_ending_zeros_binary (step)) - 1
4775
4776   */
4777   pow2div = num_ending_zeros (step);
4778
4779   period = build_low_bits_mask (type,
4780                                 (TYPE_PRECISION (type)
4781                                  - tree_to_uhwi (pow2div)));
4782
4783   return period;
4784 }
4785
4786 /* Returns the comparison operator used when eliminating the iv USE.  */
4787
4788 static enum tree_code
4789 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4790 {
4791   struct loop *loop = data->current_loop;
4792   basic_block ex_bb;
4793   edge exit;
4794
4795   ex_bb = gimple_bb (use->stmt);
4796   exit = EDGE_SUCC (ex_bb, 0);
4797   if (flow_bb_inside_loop_p (loop, exit->dest))
4798     exit = EDGE_SUCC (ex_bb, 1);
4799
4800   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4801 }
4802
4803 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4804    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4805    calculation is performed in non-wrapping type.
4806
4807    TODO: More generally, we could test for the situation that
4808          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4809          This would require knowing the sign of OFFSET.  */
4810
4811 static bool
4812 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4813 {
4814   enum tree_code code;
4815   tree e1, e2;
4816   aff_tree aff_e1, aff_e2, aff_offset;
4817
4818   if (!nowrap_type_p (TREE_TYPE (base)))
4819     return false;
4820
4821   base = expand_simple_operations (base);
4822
4823   if (TREE_CODE (base) == SSA_NAME)
4824     {
4825       gimple stmt = SSA_NAME_DEF_STMT (base);
4826
4827       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4828         return false;
4829
4830       code = gimple_assign_rhs_code (stmt);
4831       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4832         return false;
4833
4834       e1 = gimple_assign_rhs1 (stmt);
4835       e2 = gimple_assign_rhs2 (stmt);
4836     }
4837   else
4838     {
4839       code = TREE_CODE (base);
4840       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4841         return false;
4842       e1 = TREE_OPERAND (base, 0);
4843       e2 = TREE_OPERAND (base, 1);
4844     }
4845
4846   /* Use affine expansion as deeper inspection to prove the equality.  */
4847   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4848                                   &aff_e2, &data->name_expansion_cache);
4849   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4850                                   &aff_offset, &data->name_expansion_cache);
4851   aff_combination_scale (&aff_offset, -1);
4852   switch (code)
4853     {
4854     case PLUS_EXPR:
4855       aff_combination_add (&aff_e2, &aff_offset);
4856       if (aff_combination_zero_p (&aff_e2))
4857         return true;
4858
4859       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4860                                       &aff_e1, &data->name_expansion_cache);
4861       aff_combination_add (&aff_e1, &aff_offset);
4862       return aff_combination_zero_p (&aff_e1);
4863
4864     case POINTER_PLUS_EXPR:
4865       aff_combination_add (&aff_e2, &aff_offset);
4866       return aff_combination_zero_p (&aff_e2);
4867
4868     default:
4869       return false;
4870     }
4871 }
4872
4873 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4874    comparison with CAND.  NITER describes the number of iterations of
4875    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4876
4877    We aim to handle the following situation:
4878
4879    sometype *base, *p;
4880    int a, b, i;
4881
4882    i = a;
4883    p = p_0 = base + a;
4884
4885    do
4886      {
4887        bla (*p);
4888        p++;
4889        i++;
4890      }
4891    while (i < b);
4892
4893    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4894    We aim to optimize this to
4895
4896    p = p_0 = base + a;
4897    do
4898      {
4899        bla (*p);
4900        p++;
4901      }
4902    while (p < p_0 - a + b);
4903
4904    This preserves the correctness, since the pointer arithmetics does not
4905    overflow.  More precisely:
4906
4907    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4908       overflow in computing it or the values of p.
4909    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4910       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4911
4912 static bool
4913 iv_elimination_compare_lt (struct ivopts_data *data,
4914                            struct iv_cand *cand, enum tree_code *comp_p,
4915                            struct tree_niter_desc *niter)
4916 {
4917   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4918   struct aff_tree nit, tmpa, tmpb;
4919   enum tree_code comp;
4920   HOST_WIDE_INT step;
4921
4922   /* We need to know that the candidate induction variable does not overflow.
4923      While more complex analysis may be used to prove this, for now just
4924      check that the variable appears in the original program and that it
4925      is computed in a type that guarantees no overflows.  */
4926   cand_type = TREE_TYPE (cand->iv->base);
4927   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4928     return false;
4929
4930   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4931      the calculation of the BOUND could overflow, making the comparison
4932      invalid.  */
4933   if (!data->loop_single_exit_p)
4934     return false;
4935
4936   /* We need to be able to decide whether candidate is increasing or decreasing
4937      in order to choose the right comparison operator.  */
4938   if (!cst_and_fits_in_hwi (cand->iv->step))
4939     return false;
4940   step = int_cst_value (cand->iv->step);
4941
4942   /* Check that the number of iterations matches the expected pattern:
4943      a + 1 > b ? 0 : b - a - 1.  */
4944   mbz = niter->may_be_zero;
4945   if (TREE_CODE (mbz) == GT_EXPR)
4946     {
4947       /* Handle a + 1 > b.  */
4948       tree op0 = TREE_OPERAND (mbz, 0);
4949       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4950         {
4951           a = TREE_OPERAND (op0, 0);
4952           b = TREE_OPERAND (mbz, 1);
4953         }
4954       else
4955         return false;
4956     }
4957   else if (TREE_CODE (mbz) == LT_EXPR)
4958     {
4959       tree op1 = TREE_OPERAND (mbz, 1);
4960
4961       /* Handle b < a + 1.  */
4962       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4963         {
4964           a = TREE_OPERAND (op1, 0);
4965           b = TREE_OPERAND (mbz, 0);
4966         }
4967       else
4968         return false;
4969     }
4970   else
4971     return false;
4972
4973   /* Expected number of iterations is B - A - 1.  Check that it matches
4974      the actual number, i.e., that B - A - NITER = 1.  */
4975   tree_to_aff_combination (niter->niter, nit_type, &nit);
4976   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4977   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4978   aff_combination_scale (&nit, -1);
4979   aff_combination_scale (&tmpa, -1);
4980   aff_combination_add (&tmpb, &tmpa);
4981   aff_combination_add (&tmpb, &nit);
4982   if (tmpb.n != 0 || tmpb.offset != 1)
4983     return false;
4984
4985   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4986      overflow.  */
4987   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4988                         cand->iv->step,
4989                         fold_convert (TREE_TYPE (cand->iv->step), a));
4990   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4991     return false;
4992
4993   /* Determine the new comparison operator.  */
4994   comp = step < 0 ? GT_EXPR : LT_EXPR;
4995   if (*comp_p == NE_EXPR)
4996     *comp_p = comp;
4997   else if (*comp_p == EQ_EXPR)
4998     *comp_p = invert_tree_comparison (comp, false);
4999   else
5000     gcc_unreachable ();
5001
5002   return true;
5003 }
5004
5005 /* Check whether it is possible to express the condition in USE by comparison
5006    of candidate CAND.  If so, store the value compared with to BOUND, and the
5007    comparison operator to COMP.  */
5008
5009 static bool
5010 may_eliminate_iv (struct ivopts_data *data,
5011                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5012                   enum tree_code *comp)
5013 {
5014   basic_block ex_bb;
5015   edge exit;
5016   tree period;
5017   struct loop *loop = data->current_loop;
5018   aff_tree bnd;
5019   struct tree_niter_desc *desc = NULL;
5020
5021   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5022     return false;
5023
5024   /* For now works only for exits that dominate the loop latch.
5025      TODO: extend to other conditions inside loop body.  */
5026   ex_bb = gimple_bb (use->stmt);
5027   if (use->stmt != last_stmt (ex_bb)
5028       || gimple_code (use->stmt) != GIMPLE_COND
5029       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5030     return false;
5031
5032   exit = EDGE_SUCC (ex_bb, 0);
5033   if (flow_bb_inside_loop_p (loop, exit->dest))
5034     exit = EDGE_SUCC (ex_bb, 1);
5035   if (flow_bb_inside_loop_p (loop, exit->dest))
5036     return false;
5037
5038   desc = niter_for_exit (data, exit);
5039   if (!desc)
5040     return false;
5041
5042   /* Determine whether we can use the variable to test the exit condition.
5043      This is the case iff the period of the induction variable is greater
5044      than the number of iterations for which the exit condition is true.  */
5045   period = iv_period (cand->iv);
5046
5047   /* If the number of iterations is constant, compare against it directly.  */
5048   if (TREE_CODE (desc->niter) == INTEGER_CST)
5049     {
5050       /* See cand_value_at.  */
5051       if (stmt_after_increment (loop, cand, use->stmt))
5052         {
5053           if (!tree_int_cst_lt (desc->niter, period))
5054             return false;
5055         }
5056       else
5057         {
5058           if (tree_int_cst_lt (period, desc->niter))
5059             return false;
5060         }
5061     }
5062
5063   /* If not, and if this is the only possible exit of the loop, see whether
5064      we can get a conservative estimate on the number of iterations of the
5065      entire loop and compare against that instead.  */
5066   else
5067     {
5068       widest_int period_value, max_niter;
5069
5070       max_niter = desc->max;
5071       if (stmt_after_increment (loop, cand, use->stmt))
5072         max_niter += 1;
5073       period_value = wi::to_widest (period);
5074       if (wi::gtu_p (max_niter, period_value))
5075         {
5076           /* See if we can take advantage of inferred loop bound information.  */
5077           if (data->loop_single_exit_p)
5078             {
5079               if (!max_loop_iterations (loop, &max_niter))
5080                 return false;
5081               /* The loop bound is already adjusted by adding 1.  */
5082               if (wi::gtu_p (max_niter, period_value))
5083                 return false;
5084             }
5085           else
5086             return false;
5087         }
5088     }
5089
5090   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5091
5092   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5093                          aff_combination_to_tree (&bnd));
5094   *comp = iv_elimination_compare (data, use);
5095
5096   /* It is unlikely that computing the number of iterations using division
5097      would be more profitable than keeping the original induction variable.  */
5098   if (expression_expensive_p (*bound))
5099     return false;
5100
5101   /* Sometimes, it is possible to handle the situation that the number of
5102      iterations may be zero unless additional assumtions by using <
5103      instead of != in the exit condition.
5104
5105      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5106            base the exit condition on it.  However, that is often too
5107            expensive.  */
5108   if (!integer_zerop (desc->may_be_zero))
5109     return iv_elimination_compare_lt (data, cand, comp, desc);
5110
5111   return true;
5112 }
5113
5114  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5115     be copied, if is is used in the loop body and DATA->body_includes_call.  */
5116
5117 static int
5118 parm_decl_cost (struct ivopts_data *data, tree bound)
5119 {
5120   tree sbound = bound;
5121   STRIP_NOPS (sbound);
5122
5123   if (TREE_CODE (sbound) == SSA_NAME
5124       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5125       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5126       && data->body_includes_call)
5127     return COSTS_N_INSNS (1);
5128
5129   return 0;
5130 }
5131
5132 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5133
5134 static bool
5135 determine_use_iv_cost_condition (struct ivopts_data *data,
5136                                  struct iv_use *use, struct iv_cand *cand)
5137 {
5138   tree bound = NULL_TREE;
5139   struct iv *cmp_iv;
5140   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5141   comp_cost elim_cost, express_cost, cost, bound_cost;
5142   bool ok;
5143   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5144   tree *control_var, *bound_cst;
5145   enum tree_code comp = ERROR_MARK;
5146
5147   /* Only consider real candidates.  */
5148   if (!cand->iv)
5149     {
5150       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5151                        ERROR_MARK, -1);
5152       return false;
5153     }
5154
5155   /* Try iv elimination.  */
5156   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5157     {
5158       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5159       if (elim_cost.cost == 0)
5160         elim_cost.cost = parm_decl_cost (data, bound);
5161       else if (TREE_CODE (bound) == INTEGER_CST)
5162         elim_cost.cost = 0;
5163       /* If we replace a loop condition 'i < n' with 'p < base + n',
5164          depends_on_elim will have 'base' and 'n' set, which implies
5165          that both 'base' and 'n' will be live during the loop.  More likely,
5166          'base + n' will be loop invariant, resulting in only one live value
5167          during the loop.  So in that case we clear depends_on_elim and set
5168         elim_inv_expr_id instead.  */
5169       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5170         {
5171           elim_inv_expr_id = get_expr_id (data, bound);
5172           bitmap_clear (depends_on_elim);
5173         }
5174       /* The bound is a loop invariant, so it will be only computed
5175          once.  */
5176       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5177     }
5178   else
5179     elim_cost = infinite_cost;
5180
5181   /* Try expressing the original giv.  If it is compared with an invariant,
5182      note that we cannot get rid of it.  */
5183   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5184                               NULL, &cmp_iv);
5185   gcc_assert (ok);
5186
5187   /* When the condition is a comparison of the candidate IV against
5188      zero, prefer this IV.
5189
5190      TODO: The constant that we're subtracting from the cost should
5191      be target-dependent.  This information should be added to the
5192      target costs for each backend.  */
5193   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5194       && integer_zerop (*bound_cst)
5195       && (operand_equal_p (*control_var, cand->var_after, 0)
5196           || operand_equal_p (*control_var, cand->var_before, 0)))
5197     elim_cost.cost -= 1;
5198
5199   express_cost = get_computation_cost (data, use, cand, false,
5200                                        &depends_on_express, NULL,
5201                                        &express_inv_expr_id);
5202   fd_ivopts_data = data;
5203   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5204
5205   /* Count the cost of the original bound as well.  */
5206   bound_cost = force_var_cost (data, *bound_cst, NULL);
5207   if (bound_cost.cost == 0)
5208     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5209   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5210     bound_cost.cost = 0;
5211   express_cost.cost += bound_cost.cost;
5212
5213   /* Choose the better approach, preferring the eliminated IV. */
5214   if (compare_costs (elim_cost, express_cost) <= 0)
5215     {
5216       cost = elim_cost;
5217       depends_on = depends_on_elim;
5218       depends_on_elim = NULL;
5219       inv_expr_id = elim_inv_expr_id;
5220     }
5221   else
5222     {
5223       cost = express_cost;
5224       depends_on = depends_on_express;
5225       depends_on_express = NULL;
5226       bound = NULL_TREE;
5227       comp = ERROR_MARK;
5228       inv_expr_id = express_inv_expr_id;
5229     }
5230
5231   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5232
5233   if (depends_on_elim)
5234     BITMAP_FREE (depends_on_elim);
5235   if (depends_on_express)
5236     BITMAP_FREE (depends_on_express);
5237
5238   return !infinite_cost_p (cost);
5239 }
5240
5241 /* Determines cost of basing replacement of USE on CAND.  Returns false
5242    if USE cannot be based on CAND.  */
5243
5244 static bool
5245 determine_use_iv_cost (struct ivopts_data *data,
5246                        struct iv_use *use, struct iv_cand *cand)
5247 {
5248   switch (use->type)
5249     {
5250     case USE_NONLINEAR_EXPR:
5251       return determine_use_iv_cost_generic (data, use, cand);
5252
5253     case USE_ADDRESS:
5254       return determine_use_iv_cost_address (data, use, cand);
5255
5256     case USE_COMPARE:
5257       return determine_use_iv_cost_condition (data, use, cand);
5258
5259     default:
5260       gcc_unreachable ();
5261     }
5262 }
5263
5264 /* Return true if get_computation_cost indicates that autoincrement is
5265    a possibility for the pair of USE and CAND, false otherwise.  */
5266
5267 static bool
5268 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5269                            struct iv_cand *cand)
5270 {
5271   bitmap depends_on;
5272   bool can_autoinc;
5273   comp_cost cost;
5274
5275   if (use->type != USE_ADDRESS)
5276     return false;
5277
5278   cost = get_computation_cost (data, use, cand, true, &depends_on,
5279                                &can_autoinc, NULL);
5280
5281   BITMAP_FREE (depends_on);
5282
5283   return !infinite_cost_p (cost) && can_autoinc;
5284 }
5285
5286 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5287    use that allows autoincrement, and set their AINC_USE if possible.  */
5288
5289 static void
5290 set_autoinc_for_original_candidates (struct ivopts_data *data)
5291 {
5292   unsigned i, j;
5293
5294   for (i = 0; i < n_iv_cands (data); i++)
5295     {
5296       struct iv_cand *cand = iv_cand (data, i);
5297       struct iv_use *closest_before = NULL;
5298       struct iv_use *closest_after = NULL;
5299       if (cand->pos != IP_ORIGINAL)
5300         continue;
5301
5302       for (j = 0; j < n_iv_uses (data); j++)
5303         {
5304           struct iv_use *use = iv_use (data, j);
5305           unsigned uid = gimple_uid (use->stmt);
5306
5307           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5308             continue;
5309
5310           if (uid < gimple_uid (cand->incremented_at)
5311               && (closest_before == NULL
5312                   || uid > gimple_uid (closest_before->stmt)))
5313             closest_before = use;
5314
5315           if (uid > gimple_uid (cand->incremented_at)
5316               && (closest_after == NULL
5317                   || uid < gimple_uid (closest_after->stmt)))
5318             closest_after = use;
5319         }
5320
5321       if (closest_before != NULL
5322           && autoinc_possible_for_pair (data, closest_before, cand))
5323         cand->ainc_use = closest_before;
5324       else if (closest_after != NULL
5325                && autoinc_possible_for_pair (data, closest_after, cand))
5326         cand->ainc_use = closest_after;
5327     }
5328 }
5329
5330 /* Finds the candidates for the induction variables.  */
5331
5332 static void
5333 find_iv_candidates (struct ivopts_data *data)
5334 {
5335   /* Add commonly used ivs.  */
5336   add_standard_iv_candidates (data);
5337
5338   /* Add old induction variables.  */
5339   add_old_ivs_candidates (data);
5340
5341   /* Add induction variables derived from uses.  */
5342   add_derived_ivs_candidates (data);
5343
5344   set_autoinc_for_original_candidates (data);
5345
5346   /* Record the important candidates.  */
5347   record_important_candidates (data);
5348 }
5349
5350 /* Determines costs of basing the use of the iv on an iv candidate.  */
5351
5352 static void
5353 determine_use_iv_costs (struct ivopts_data *data)
5354 {
5355   unsigned i, j;
5356   struct iv_use *use;
5357   struct iv_cand *cand;
5358   bitmap to_clear = BITMAP_ALLOC (NULL);
5359
5360   alloc_use_cost_map (data);
5361
5362   for (i = 0; i < n_iv_uses (data); i++)
5363     {
5364       use = iv_use (data, i);
5365
5366       if (data->consider_all_candidates)
5367         {
5368           for (j = 0; j < n_iv_cands (data); j++)
5369             {
5370               cand = iv_cand (data, j);
5371               determine_use_iv_cost (data, use, cand);
5372             }
5373         }
5374       else
5375         {
5376           bitmap_iterator bi;
5377
5378           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5379             {
5380               cand = iv_cand (data, j);
5381               if (!determine_use_iv_cost (data, use, cand))
5382                 bitmap_set_bit (to_clear, j);
5383             }
5384
5385           /* Remove the candidates for that the cost is infinite from
5386              the list of related candidates.  */
5387           bitmap_and_compl_into (use->related_cands, to_clear);
5388           bitmap_clear (to_clear);
5389         }
5390     }
5391
5392   BITMAP_FREE (to_clear);
5393
5394   if (dump_file && (dump_flags & TDF_DETAILS))
5395     {
5396       fprintf (dump_file, "Use-candidate costs:\n");
5397
5398       for (i = 0; i < n_iv_uses (data); i++)
5399         {
5400           use = iv_use (data, i);
5401
5402           fprintf (dump_file, "Use %d:\n", i);
5403           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5404           for (j = 0; j < use->n_map_members; j++)
5405             {
5406               if (!use->cost_map[j].cand
5407                   || infinite_cost_p (use->cost_map[j].cost))
5408                 continue;
5409
5410               fprintf (dump_file, "  %d\t%d\t%d\t",
5411                        use->cost_map[j].cand->id,
5412                        use->cost_map[j].cost.cost,
5413                        use->cost_map[j].cost.complexity);
5414               if (use->cost_map[j].depends_on)
5415                 bitmap_print (dump_file,
5416                               use->cost_map[j].depends_on, "","");
5417               if (use->cost_map[j].inv_expr_id != -1)
5418                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5419               fprintf (dump_file, "\n");
5420             }
5421
5422           fprintf (dump_file, "\n");
5423         }
5424       fprintf (dump_file, "\n");
5425     }
5426 }
5427
5428 /* Determines cost of the candidate CAND.  */
5429
5430 static void
5431 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5432 {
5433   comp_cost cost_base;
5434   unsigned cost, cost_step;
5435   tree base;
5436
5437   if (!cand->iv)
5438     {
5439       cand->cost = 0;
5440       return;
5441     }
5442
5443   /* There are two costs associated with the candidate -- its increment
5444      and its initialization.  The second is almost negligible for any loop
5445      that rolls enough, so we take it just very little into account.  */
5446
5447   base = cand->iv->base;
5448   cost_base = force_var_cost (data, base, NULL);
5449   /* It will be exceptional that the iv register happens to be initialized with
5450      the proper value at no cost.  In general, there will at least be a regcopy
5451      or a const set.  */
5452   if (cost_base.cost == 0)
5453     cost_base.cost = COSTS_N_INSNS (1);
5454   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5455
5456   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5457
5458   /* Prefer the original ivs unless we may gain something by replacing it.
5459      The reason is to make debugging simpler; so this is not relevant for
5460      artificial ivs created by other optimization passes.  */
5461   if (cand->pos != IP_ORIGINAL
5462       || !SSA_NAME_VAR (cand->var_before)
5463       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5464     cost++;
5465
5466   /* Prefer not to insert statements into latch unless there are some
5467      already (so that we do not create unnecessary jumps).  */
5468   if (cand->pos == IP_END
5469       && empty_block_p (ip_end_pos (data->current_loop)))
5470     cost++;
5471
5472   cand->cost = cost;
5473   cand->cost_step = cost_step;
5474 }
5475
5476 /* Determines costs of computation of the candidates.  */
5477
5478 static void
5479 determine_iv_costs (struct ivopts_data *data)
5480 {
5481   unsigned i;
5482
5483   if (dump_file && (dump_flags & TDF_DETAILS))
5484     {
5485       fprintf (dump_file, "Candidate costs:\n");
5486       fprintf (dump_file, "  cand\tcost\n");
5487     }
5488
5489   for (i = 0; i < n_iv_cands (data); i++)
5490     {
5491       struct iv_cand *cand = iv_cand (data, i);
5492
5493       determine_iv_cost (data, cand);
5494
5495       if (dump_file && (dump_flags & TDF_DETAILS))
5496         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5497     }
5498
5499   if (dump_file && (dump_flags & TDF_DETAILS))
5500     fprintf (dump_file, "\n");
5501 }
5502
5503 /* Calculates cost for having SIZE induction variables.  */
5504
5505 static unsigned
5506 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5507 {
5508   /* We add size to the cost, so that we prefer eliminating ivs
5509      if possible.  */
5510   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5511                                             data->body_includes_call);
5512 }
5513
5514 /* For each size of the induction variable set determine the penalty.  */
5515
5516 static void
5517 determine_set_costs (struct ivopts_data *data)
5518 {
5519   unsigned j, n;
5520   gphi *phi;
5521   gphi_iterator psi;
5522   tree op;
5523   struct loop *loop = data->current_loop;
5524   bitmap_iterator bi;
5525
5526   if (dump_file && (dump_flags & TDF_DETAILS))
5527     {
5528       fprintf (dump_file, "Global costs:\n");
5529       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5530       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5531       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5532       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5533     }
5534
5535   n = 0;
5536   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5537     {
5538       phi = psi.phi ();
5539       op = PHI_RESULT (phi);
5540
5541       if (virtual_operand_p (op))
5542         continue;
5543
5544       if (get_iv (data, op))
5545         continue;
5546
5547       n++;
5548     }
5549
5550   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5551     {
5552       struct version_info *info = ver_info (data, j);
5553
5554       if (info->inv_id && info->has_nonlin_use)
5555         n++;
5556     }
5557
5558   data->regs_used = n;
5559   if (dump_file && (dump_flags & TDF_DETAILS))
5560     fprintf (dump_file, "  regs_used %d\n", n);
5561
5562   if (dump_file && (dump_flags & TDF_DETAILS))
5563     {
5564       fprintf (dump_file, "  cost for size:\n");
5565       fprintf (dump_file, "  ivs\tcost\n");
5566       for (j = 0; j <= 2 * target_avail_regs; j++)
5567         fprintf (dump_file, "  %d\t%d\n", j,
5568                  ivopts_global_cost_for_size (data, j));
5569       fprintf (dump_file, "\n");
5570     }
5571 }
5572
5573 /* Returns true if A is a cheaper cost pair than B.  */
5574
5575 static bool
5576 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5577 {
5578   int cmp;
5579
5580   if (!a)
5581     return false;
5582
5583   if (!b)
5584     return true;
5585
5586   cmp = compare_costs (a->cost, b->cost);
5587   if (cmp < 0)
5588     return true;
5589
5590   if (cmp > 0)
5591     return false;
5592
5593   /* In case the costs are the same, prefer the cheaper candidate.  */
5594   if (a->cand->cost < b->cand->cost)
5595     return true;
5596
5597   return false;
5598 }
5599
5600
5601 /* Returns candidate by that USE is expressed in IVS.  */
5602
5603 static struct cost_pair *
5604 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5605 {
5606   return ivs->cand_for_use[use->id];
5607 }
5608
5609 /* Computes the cost field of IVS structure.  */
5610
5611 static void
5612 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5613 {
5614   comp_cost cost = ivs->cand_use_cost;
5615
5616   cost.cost += ivs->cand_cost;
5617
5618   cost.cost += ivopts_global_cost_for_size (data,
5619                                             ivs->n_regs + ivs->num_used_inv_expr);
5620
5621   ivs->cost = cost;
5622 }
5623
5624 /* Remove invariants in set INVS to set IVS.  */
5625
5626 static void
5627 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5628 {
5629   bitmap_iterator bi;
5630   unsigned iid;
5631
5632   if (!invs)
5633     return;
5634
5635   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5636     {
5637       ivs->n_invariant_uses[iid]--;
5638       if (ivs->n_invariant_uses[iid] == 0)
5639         ivs->n_regs--;
5640     }
5641 }
5642
5643 /* Set USE not to be expressed by any candidate in IVS.  */
5644
5645 static void
5646 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5647                  struct iv_use *use)
5648 {
5649   unsigned uid = use->id, cid;
5650   struct cost_pair *cp;
5651
5652   cp = ivs->cand_for_use[uid];
5653   if (!cp)
5654     return;
5655   cid = cp->cand->id;
5656
5657   ivs->bad_uses++;
5658   ivs->cand_for_use[uid] = NULL;
5659   ivs->n_cand_uses[cid]--;
5660
5661   if (ivs->n_cand_uses[cid] == 0)
5662     {
5663       bitmap_clear_bit (ivs->cands, cid);
5664       /* Do not count the pseudocandidates.  */
5665       if (cp->cand->iv)
5666         ivs->n_regs--;
5667       ivs->n_cands--;
5668       ivs->cand_cost -= cp->cand->cost;
5669
5670       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5671     }
5672
5673   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5674
5675   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5676
5677   if (cp->inv_expr_id != -1)
5678     {
5679       ivs->used_inv_expr[cp->inv_expr_id]--;
5680       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5681         ivs->num_used_inv_expr--;
5682     }
5683   iv_ca_recount_cost (data, ivs);
5684 }
5685
5686 /* Add invariants in set INVS to set IVS.  */
5687
5688 static void
5689 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5690 {
5691   bitmap_iterator bi;
5692   unsigned iid;
5693
5694   if (!invs)
5695     return;
5696
5697   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5698     {
5699       ivs->n_invariant_uses[iid]++;
5700       if (ivs->n_invariant_uses[iid] == 1)
5701         ivs->n_regs++;
5702     }
5703 }
5704
5705 /* Set cost pair for USE in set IVS to CP.  */
5706
5707 static void
5708 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5709               struct iv_use *use, struct cost_pair *cp)
5710 {
5711   unsigned uid = use->id, cid;
5712
5713   if (ivs->cand_for_use[uid] == cp)
5714     return;
5715
5716   if (ivs->cand_for_use[uid])
5717     iv_ca_set_no_cp (data, ivs, use);
5718
5719   if (cp)
5720     {
5721       cid = cp->cand->id;
5722
5723       ivs->bad_uses--;
5724       ivs->cand_for_use[uid] = cp;
5725       ivs->n_cand_uses[cid]++;
5726       if (ivs->n_cand_uses[cid] == 1)
5727         {
5728           bitmap_set_bit (ivs->cands, cid);
5729           /* Do not count the pseudocandidates.  */
5730           if (cp->cand->iv)
5731             ivs->n_regs++;
5732           ivs->n_cands++;
5733           ivs->cand_cost += cp->cand->cost;
5734
5735           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5736         }
5737
5738       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5739       iv_ca_set_add_invariants (ivs, cp->depends_on);
5740
5741       if (cp->inv_expr_id != -1)
5742         {
5743           ivs->used_inv_expr[cp->inv_expr_id]++;
5744           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5745             ivs->num_used_inv_expr++;
5746         }
5747       iv_ca_recount_cost (data, ivs);
5748     }
5749 }
5750
5751 /* Extend set IVS by expressing USE by some of the candidates in it
5752    if possible.  Consider all important candidates if candidates in
5753    set IVS don't give any result.  */
5754
5755 static void
5756 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5757                struct iv_use *use)
5758 {
5759   struct cost_pair *best_cp = NULL, *cp;
5760   bitmap_iterator bi;
5761   unsigned i;
5762   struct iv_cand *cand;
5763
5764   gcc_assert (ivs->upto >= use->id);
5765   ivs->upto++;
5766   ivs->bad_uses++;
5767
5768   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5769     {
5770       cand = iv_cand (data, i);
5771       cp = get_use_iv_cost (data, use, cand);
5772       if (cheaper_cost_pair (cp, best_cp))
5773         best_cp = cp;
5774     }
5775
5776   if (best_cp == NULL)
5777     {
5778       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5779         {
5780           cand = iv_cand (data, i);
5781           cp = get_use_iv_cost (data, use, cand);
5782           if (cheaper_cost_pair (cp, best_cp))
5783             best_cp = cp;
5784         }
5785     }
5786
5787   iv_ca_set_cp (data, ivs, use, best_cp);
5788 }
5789
5790 /* Get cost for assignment IVS.  */
5791
5792 static comp_cost
5793 iv_ca_cost (struct iv_ca *ivs)
5794 {
5795   /* This was a conditional expression but it triggered a bug in
5796      Sun C 5.5.  */
5797   if (ivs->bad_uses)
5798     return infinite_cost;
5799   else
5800     return ivs->cost;
5801 }
5802
5803 /* Returns true if all dependences of CP are among invariants in IVS.  */
5804
5805 static bool
5806 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5807 {
5808   unsigned i;
5809   bitmap_iterator bi;
5810
5811   if (!cp->depends_on)
5812     return true;
5813
5814   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5815     {
5816       if (ivs->n_invariant_uses[i] == 0)
5817         return false;
5818     }
5819
5820   return true;
5821 }
5822
5823 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5824    it before NEXT_CHANGE.  */
5825
5826 static struct iv_ca_delta *
5827 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5828                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5829 {
5830   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5831
5832   change->use = use;
5833   change->old_cp = old_cp;
5834   change->new_cp = new_cp;
5835   change->next_change = next_change;
5836
5837   return change;
5838 }
5839
5840 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5841    are rewritten.  */
5842
5843 static struct iv_ca_delta *
5844 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5845 {
5846   struct iv_ca_delta *last;
5847
5848   if (!l2)
5849     return l1;
5850
5851   if (!l1)
5852     return l2;
5853
5854   for (last = l1; last->next_change; last = last->next_change)
5855     continue;
5856   last->next_change = l2;
5857
5858   return l1;
5859 }
5860
5861 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5862
5863 static struct iv_ca_delta *
5864 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5865 {
5866   struct iv_ca_delta *act, *next, *prev = NULL;
5867
5868   for (act = delta; act; act = next)
5869     {
5870       next = act->next_change;
5871       act->next_change = prev;
5872       prev = act;
5873
5874       std::swap (act->old_cp, act->new_cp);
5875     }
5876
5877   return prev;
5878 }
5879
5880 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5881    reverted instead.  */
5882
5883 static void
5884 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5885                     struct iv_ca_delta *delta, bool forward)
5886 {
5887   struct cost_pair *from, *to;
5888   struct iv_ca_delta *act;
5889
5890   if (!forward)
5891     delta = iv_ca_delta_reverse (delta);
5892
5893   for (act = delta; act; act = act->next_change)
5894     {
5895       from = act->old_cp;
5896       to = act->new_cp;
5897       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5898       iv_ca_set_cp (data, ivs, act->use, to);
5899     }
5900
5901   if (!forward)
5902     iv_ca_delta_reverse (delta);
5903 }
5904
5905 /* Returns true if CAND is used in IVS.  */
5906
5907 static bool
5908 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5909 {
5910   return ivs->n_cand_uses[cand->id] > 0;
5911 }
5912
5913 /* Returns number of induction variable candidates in the set IVS.  */
5914
5915 static unsigned
5916 iv_ca_n_cands (struct iv_ca *ivs)
5917 {
5918   return ivs->n_cands;
5919 }
5920
5921 /* Free the list of changes DELTA.  */
5922
5923 static void
5924 iv_ca_delta_free (struct iv_ca_delta **delta)
5925 {
5926   struct iv_ca_delta *act, *next;
5927
5928   for (act = *delta; act; act = next)
5929     {
5930       next = act->next_change;
5931       free (act);
5932     }
5933
5934   *delta = NULL;
5935 }
5936
5937 /* Allocates new iv candidates assignment.  */
5938
5939 static struct iv_ca *
5940 iv_ca_new (struct ivopts_data *data)
5941 {
5942   struct iv_ca *nw = XNEW (struct iv_ca);
5943
5944   nw->upto = 0;
5945   nw->bad_uses = 0;
5946   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5947   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5948   nw->cands = BITMAP_ALLOC (NULL);
5949   nw->n_cands = 0;
5950   nw->n_regs = 0;
5951   nw->cand_use_cost = no_cost;
5952   nw->cand_cost = 0;
5953   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5954   nw->cost = no_cost;
5955   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5956   nw->num_used_inv_expr = 0;
5957
5958   return nw;
5959 }
5960
5961 /* Free memory occupied by the set IVS.  */
5962
5963 static void
5964 iv_ca_free (struct iv_ca **ivs)
5965 {
5966   free ((*ivs)->cand_for_use);
5967   free ((*ivs)->n_cand_uses);
5968   BITMAP_FREE ((*ivs)->cands);
5969   free ((*ivs)->n_invariant_uses);
5970   free ((*ivs)->used_inv_expr);
5971   free (*ivs);
5972   *ivs = NULL;
5973 }
5974
5975 /* Dumps IVS to FILE.  */
5976
5977 static void
5978 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5979 {
5980   const char *pref = "  invariants ";
5981   unsigned i;
5982   comp_cost cost = iv_ca_cost (ivs);
5983
5984   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5985   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5986            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5987   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5988
5989    for (i = 0; i < ivs->upto; i++)
5990     {
5991       struct iv_use *use = iv_use (data, i);
5992       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5993       if (cp)
5994         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5995                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5996       else
5997         fprintf (file, "   use:%d --> ??\n", use->id);
5998     }
5999
6000   for (i = 1; i <= data->max_inv_id; i++)
6001     if (ivs->n_invariant_uses[i])
6002       {
6003         fprintf (file, "%s%d", pref, i);
6004         pref = ", ";
6005       }
6006   fprintf (file, "\n\n");
6007 }
6008
6009 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6010    new set, and store differences in DELTA.  Number of induction variables
6011    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6012    the function will try to find a solution with mimimal iv candidates.  */
6013
6014 static comp_cost
6015 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6016               struct iv_cand *cand, struct iv_ca_delta **delta,
6017               unsigned *n_ivs, bool min_ncand)
6018 {
6019   unsigned i;
6020   comp_cost cost;
6021   struct iv_use *use;
6022   struct cost_pair *old_cp, *new_cp;
6023
6024   *delta = NULL;
6025   for (i = 0; i < ivs->upto; i++)
6026     {
6027       use = iv_use (data, i);
6028       old_cp = iv_ca_cand_for_use (ivs, use);
6029
6030       if (old_cp
6031           && old_cp->cand == cand)
6032         continue;
6033
6034       new_cp = get_use_iv_cost (data, use, cand);
6035       if (!new_cp)
6036         continue;
6037
6038       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6039         continue;
6040
6041       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6042         continue;
6043
6044       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6045     }
6046
6047   iv_ca_delta_commit (data, ivs, *delta, true);
6048   cost = iv_ca_cost (ivs);
6049   if (n_ivs)
6050     *n_ivs = iv_ca_n_cands (ivs);
6051   iv_ca_delta_commit (data, ivs, *delta, false);
6052
6053   return cost;
6054 }
6055
6056 /* Try narrowing set IVS by removing CAND.  Return the cost of
6057    the new set and store the differences in DELTA.  START is
6058    the candidate with which we start narrowing.  */
6059
6060 static comp_cost
6061 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6062               struct iv_cand *cand, struct iv_cand *start,
6063               struct iv_ca_delta **delta)
6064 {
6065   unsigned i, ci;
6066   struct iv_use *use;
6067   struct cost_pair *old_cp, *new_cp, *cp;
6068   bitmap_iterator bi;
6069   struct iv_cand *cnd;
6070   comp_cost cost, best_cost, acost;
6071
6072   *delta = NULL;
6073   for (i = 0; i < n_iv_uses (data); i++)
6074     {
6075       use = iv_use (data, i);
6076
6077       old_cp = iv_ca_cand_for_use (ivs, use);
6078       if (old_cp->cand != cand)
6079         continue;
6080
6081       best_cost = iv_ca_cost (ivs);
6082       /* Start narrowing with START.  */
6083       new_cp = get_use_iv_cost (data, use, start);
6084
6085       if (data->consider_all_candidates)
6086         {
6087           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6088             {
6089               if (ci == cand->id || (start && ci == start->id))
6090                 continue;
6091
6092               cnd = iv_cand (data, ci);
6093
6094               cp = get_use_iv_cost (data, use, cnd);
6095               if (!cp)
6096                 continue;
6097
6098               iv_ca_set_cp (data, ivs, use, cp);
6099               acost = iv_ca_cost (ivs);
6100
6101               if (compare_costs (acost, best_cost) < 0)
6102                 {
6103                   best_cost = acost;
6104                   new_cp = cp;
6105                 }
6106             }
6107         }
6108       else
6109         {
6110           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6111             {
6112               if (ci == cand->id || (start && ci == start->id))
6113                 continue;
6114
6115               cnd = iv_cand (data, ci);
6116
6117               cp = get_use_iv_cost (data, use, cnd);
6118               if (!cp)
6119                 continue;
6120
6121               iv_ca_set_cp (data, ivs, use, cp);
6122               acost = iv_ca_cost (ivs);
6123
6124               if (compare_costs (acost, best_cost) < 0)
6125                 {
6126                   best_cost = acost;
6127                   new_cp = cp;
6128                 }
6129             }
6130         }
6131       /* Restore to old cp for use.  */
6132       iv_ca_set_cp (data, ivs, use, old_cp);
6133
6134       if (!new_cp)
6135         {
6136           iv_ca_delta_free (delta);
6137           return infinite_cost;
6138         }
6139
6140       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6141     }
6142
6143   iv_ca_delta_commit (data, ivs, *delta, true);
6144   cost = iv_ca_cost (ivs);
6145   iv_ca_delta_commit (data, ivs, *delta, false);
6146
6147   return cost;
6148 }
6149
6150 /* Try optimizing the set of candidates IVS by removing candidates different
6151    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6152    differences in DELTA.  */
6153
6154 static comp_cost
6155 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6156              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6157 {
6158   bitmap_iterator bi;
6159   struct iv_ca_delta *act_delta, *best_delta;
6160   unsigned i;
6161   comp_cost best_cost, acost;
6162   struct iv_cand *cand;
6163
6164   best_delta = NULL;
6165   best_cost = iv_ca_cost (ivs);
6166
6167   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6168     {
6169       cand = iv_cand (data, i);
6170
6171       if (cand == except_cand)
6172         continue;
6173
6174       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6175
6176       if (compare_costs (acost, best_cost) < 0)
6177         {
6178           best_cost = acost;
6179           iv_ca_delta_free (&best_delta);
6180           best_delta = act_delta;
6181         }
6182       else
6183         iv_ca_delta_free (&act_delta);
6184     }
6185
6186   if (!best_delta)
6187     {
6188       *delta = NULL;
6189       return best_cost;
6190     }
6191
6192   /* Recurse to possibly remove other unnecessary ivs.  */
6193   iv_ca_delta_commit (data, ivs, best_delta, true);
6194   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6195   iv_ca_delta_commit (data, ivs, best_delta, false);
6196   *delta = iv_ca_delta_join (best_delta, *delta);
6197   return best_cost;
6198 }
6199
6200 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6201    cheaper local cost for USE than BEST_CP.  Return pointer to
6202    the corresponding cost_pair, otherwise just return BEST_CP.  */
6203
6204 static struct cost_pair*
6205 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6206                         unsigned int cand_idx, struct iv_cand *old_cand,
6207                         struct cost_pair *best_cp)
6208 {
6209   struct iv_cand *cand;
6210   struct cost_pair *cp;
6211
6212   gcc_assert (old_cand != NULL && best_cp != NULL);
6213   if (cand_idx == old_cand->id)
6214     return best_cp;
6215
6216   cand = iv_cand (data, cand_idx);
6217   cp = get_use_iv_cost (data, use, cand);
6218   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6219     return cp;
6220
6221   return best_cp;
6222 }
6223
6224 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6225    which are used by more than one iv uses.  For each of those candidates,
6226    this function tries to represent iv uses under that candidate using
6227    other ones with lower local cost, then tries to prune the new set.
6228    If the new set has lower cost, It returns the new cost after recording
6229    candidate replacement in list DELTA.  */
6230
6231 static comp_cost
6232 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6233                struct iv_ca_delta **delta)
6234 {
6235   bitmap_iterator bi, bj;
6236   unsigned int i, j, k;
6237   struct iv_use *use;
6238   struct iv_cand *cand;
6239   comp_cost orig_cost, acost;
6240   struct iv_ca_delta *act_delta, *tmp_delta;
6241   struct cost_pair *old_cp, *best_cp = NULL;
6242
6243   *delta = NULL;
6244   orig_cost = iv_ca_cost (ivs);
6245
6246   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6247     {
6248       if (ivs->n_cand_uses[i] == 1
6249           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6250         continue;
6251
6252       cand = iv_cand (data, i);
6253
6254       act_delta = NULL;
6255       /*  Represent uses under current candidate using other ones with
6256           lower local cost.  */
6257       for (j = 0; j < ivs->upto; j++)
6258         {
6259           use = iv_use (data, j);
6260           old_cp = iv_ca_cand_for_use (ivs, use);
6261
6262           if (old_cp->cand != cand)
6263             continue;
6264
6265           best_cp = old_cp;
6266           if (data->consider_all_candidates)
6267             for (k = 0; k < n_iv_cands (data); k++)
6268               best_cp = cheaper_cost_with_cand (data, use, k,
6269                                                 old_cp->cand, best_cp);
6270           else
6271             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6272               best_cp = cheaper_cost_with_cand (data, use, k,
6273                                                 old_cp->cand, best_cp);
6274
6275           if (best_cp == old_cp)
6276             continue;
6277
6278           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6279         }
6280       /* No need for further prune.  */
6281       if (!act_delta)
6282         continue;
6283
6284       /* Prune the new candidate set.  */
6285       iv_ca_delta_commit (data, ivs, act_delta, true);
6286       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6287       iv_ca_delta_commit (data, ivs, act_delta, false);
6288       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6289
6290       if (compare_costs (acost, orig_cost) < 0)
6291         {
6292           *delta = act_delta;
6293           return acost;
6294         }
6295       else
6296         iv_ca_delta_free (&act_delta);
6297     }
6298
6299   return orig_cost;
6300 }
6301
6302 /* Tries to extend the sets IVS in the best possible way in order
6303    to express the USE.  If ORIGINALP is true, prefer candidates from
6304    the original set of IVs, otherwise favor important candidates not
6305    based on any memory object.  */
6306
6307 static bool
6308 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6309                   struct iv_use *use, bool originalp)
6310 {
6311   comp_cost best_cost, act_cost;
6312   unsigned i;
6313   bitmap_iterator bi;
6314   struct iv_cand *cand;
6315   struct iv_ca_delta *best_delta = NULL, *act_delta;
6316   struct cost_pair *cp;
6317
6318   iv_ca_add_use (data, ivs, use);
6319   best_cost = iv_ca_cost (ivs);
6320   cp = iv_ca_cand_for_use (ivs, use);
6321   if (cp)
6322     {
6323       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6324       iv_ca_set_no_cp (data, ivs, use);
6325     }
6326
6327   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6328      first try important candidates not based on any memory object.  Only if
6329      this fails, try the specific ones.  Rationale -- in loops with many
6330      variables the best choice often is to use just one generic biv.  If we
6331      added here many ivs specific to the uses, the optimization algorithm later
6332      would be likely to get stuck in a local minimum, thus causing us to create
6333      too many ivs.  The approach from few ivs to more seems more likely to be
6334      successful -- starting from few ivs, replacing an expensive use by a
6335      specific iv should always be a win.  */
6336   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6337     {
6338       cand = iv_cand (data, i);
6339
6340       if (originalp && cand->pos !=IP_ORIGINAL)
6341         continue;
6342
6343       if (!originalp && cand->iv->base_object != NULL_TREE)
6344         continue;
6345
6346       if (iv_ca_cand_used_p (ivs, cand))
6347         continue;
6348
6349       cp = get_use_iv_cost (data, use, cand);
6350       if (!cp)
6351         continue;
6352
6353       iv_ca_set_cp (data, ivs, use, cp);
6354       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6355                                true);
6356       iv_ca_set_no_cp (data, ivs, use);
6357       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6358
6359       if (compare_costs (act_cost, best_cost) < 0)
6360         {
6361           best_cost = act_cost;
6362
6363           iv_ca_delta_free (&best_delta);
6364           best_delta = act_delta;
6365         }
6366       else
6367         iv_ca_delta_free (&act_delta);
6368     }
6369
6370   if (infinite_cost_p (best_cost))
6371     {
6372       for (i = 0; i < use->n_map_members; i++)
6373         {
6374           cp = use->cost_map + i;
6375           cand = cp->cand;
6376           if (!cand)
6377             continue;
6378
6379           /* Already tried this.  */
6380           if (cand->important)
6381             {
6382               if (originalp && cand->pos == IP_ORIGINAL)
6383                 continue;
6384               if (!originalp && cand->iv->base_object == NULL_TREE)
6385                 continue;
6386             }
6387
6388           if (iv_ca_cand_used_p (ivs, cand))
6389             continue;
6390
6391           act_delta = NULL;
6392           iv_ca_set_cp (data, ivs, use, cp);
6393           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6394           iv_ca_set_no_cp (data, ivs, use);
6395           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6396                                        cp, act_delta);
6397
6398           if (compare_costs (act_cost, best_cost) < 0)
6399             {
6400               best_cost = act_cost;
6401
6402               if (best_delta)
6403                 iv_ca_delta_free (&best_delta);
6404               best_delta = act_delta;
6405             }
6406           else
6407             iv_ca_delta_free (&act_delta);
6408         }
6409     }
6410
6411   iv_ca_delta_commit (data, ivs, best_delta, true);
6412   iv_ca_delta_free (&best_delta);
6413
6414   return !infinite_cost_p (best_cost);
6415 }
6416
6417 /* Finds an initial assignment of candidates to uses.  */
6418
6419 static struct iv_ca *
6420 get_initial_solution (struct ivopts_data *data, bool originalp)
6421 {
6422   struct iv_ca *ivs = iv_ca_new (data);
6423   unsigned i;
6424
6425   for (i = 0; i < n_iv_uses (data); i++)
6426     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6427       {
6428         iv_ca_free (&ivs);
6429         return NULL;
6430       }
6431
6432   return ivs;
6433 }
6434
6435 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6436    points to a bool variable, this function tries to break local
6437    optimal fixed-point by replacing candidates in IVS if it's true.  */
6438
6439 static bool
6440 try_improve_iv_set (struct ivopts_data *data,
6441                     struct iv_ca *ivs, bool *try_replace_p)
6442 {
6443   unsigned i, n_ivs;
6444   comp_cost acost, best_cost = iv_ca_cost (ivs);
6445   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6446   struct iv_cand *cand;
6447
6448   /* Try extending the set of induction variables by one.  */
6449   for (i = 0; i < n_iv_cands (data); i++)
6450     {
6451       cand = iv_cand (data, i);
6452
6453       if (iv_ca_cand_used_p (ivs, cand))
6454         continue;
6455
6456       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6457       if (!act_delta)
6458         continue;
6459
6460       /* If we successfully added the candidate and the set is small enough,
6461          try optimizing it by removing other candidates.  */
6462       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6463         {
6464           iv_ca_delta_commit (data, ivs, act_delta, true);
6465           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6466           iv_ca_delta_commit (data, ivs, act_delta, false);
6467           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6468         }
6469
6470       if (compare_costs (acost, best_cost) < 0)
6471         {
6472           best_cost = acost;
6473           iv_ca_delta_free (&best_delta);
6474           best_delta = act_delta;
6475         }
6476       else
6477         iv_ca_delta_free (&act_delta);
6478     }
6479
6480   if (!best_delta)
6481     {
6482       /* Try removing the candidates from the set instead.  */
6483       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6484
6485       if (!best_delta && *try_replace_p)
6486         {
6487           *try_replace_p = false;
6488           /* So far candidate selecting algorithm tends to choose fewer IVs
6489              so that it can handle cases in which loops have many variables
6490              but the best choice is often to use only one general biv.  One
6491              weakness is it can't handle opposite cases, in which different
6492              candidates should be chosen with respect to each use.  To solve
6493              the problem, we replace candidates in a manner described by the
6494              comments of iv_ca_replace, thus give general algorithm a chance
6495              to break local optimal fixed-point in these cases.  */
6496           best_cost = iv_ca_replace (data, ivs, &best_delta);
6497         }
6498
6499       if (!best_delta)
6500         return false;
6501     }
6502
6503   iv_ca_delta_commit (data, ivs, best_delta, true);
6504   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6505   iv_ca_delta_free (&best_delta);
6506   return true;
6507 }
6508
6509 /* Attempts to find the optimal set of induction variables.  We do simple
6510    greedy heuristic -- we try to replace at most one candidate in the selected
6511    solution and remove the unused ivs while this improves the cost.  */
6512
6513 static struct iv_ca *
6514 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6515 {
6516   struct iv_ca *set;
6517   bool try_replace_p = true;
6518
6519   /* Get the initial solution.  */
6520   set = get_initial_solution (data, originalp);
6521   if (!set)
6522     {
6523       if (dump_file && (dump_flags & TDF_DETAILS))
6524         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6525       return NULL;
6526     }
6527
6528   if (dump_file && (dump_flags & TDF_DETAILS))
6529     {
6530       fprintf (dump_file, "Initial set of candidates:\n");
6531       iv_ca_dump (data, dump_file, set);
6532     }
6533
6534   while (try_improve_iv_set (data, set, &try_replace_p))
6535     {
6536       if (dump_file && (dump_flags & TDF_DETAILS))
6537         {
6538           fprintf (dump_file, "Improved to:\n");
6539           iv_ca_dump (data, dump_file, set);
6540         }
6541     }
6542
6543   return set;
6544 }
6545
6546 static struct iv_ca *
6547 find_optimal_iv_set (struct ivopts_data *data)
6548 {
6549   unsigned i;
6550   struct iv_ca *set, *origset;
6551   struct iv_use *use;
6552   comp_cost cost, origcost;
6553
6554   /* Determine the cost based on a strategy that starts with original IVs,
6555      and try again using a strategy that prefers candidates not based
6556      on any IVs.  */
6557   origset = find_optimal_iv_set_1 (data, true);
6558   set = find_optimal_iv_set_1 (data, false);
6559
6560   if (!origset && !set)
6561     return NULL;
6562
6563   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6564   cost = set ? iv_ca_cost (set) : infinite_cost;
6565
6566   if (dump_file && (dump_flags & TDF_DETAILS))
6567     {
6568       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6569                origcost.cost, origcost.complexity);
6570       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6571                cost.cost, cost.complexity);
6572     }
6573
6574   /* Choose the one with the best cost.  */
6575   if (compare_costs (origcost, cost) <= 0)
6576     {
6577       if (set)
6578         iv_ca_free (&set);
6579       set = origset;
6580     }
6581   else if (origset)
6582     iv_ca_free (&origset);
6583
6584   for (i = 0; i < n_iv_uses (data); i++)
6585     {
6586       use = iv_use (data, i);
6587       use->selected = iv_ca_cand_for_use (set, use)->cand;
6588     }
6589
6590   return set;
6591 }
6592
6593 /* Creates a new induction variable corresponding to CAND.  */
6594
6595 static void
6596 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6597 {
6598   gimple_stmt_iterator incr_pos;
6599   tree base;
6600   bool after = false;
6601
6602   if (!cand->iv)
6603     return;
6604
6605   switch (cand->pos)
6606     {
6607     case IP_NORMAL:
6608       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6609       break;
6610
6611     case IP_END:
6612       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6613       after = true;
6614       break;
6615
6616     case IP_AFTER_USE:
6617       after = true;
6618       /* fall through */
6619     case IP_BEFORE_USE:
6620       incr_pos = gsi_for_stmt (cand->incremented_at);
6621       break;
6622
6623     case IP_ORIGINAL:
6624       /* Mark that the iv is preserved.  */
6625       name_info (data, cand->var_before)->preserve_biv = true;
6626       name_info (data, cand->var_after)->preserve_biv = true;
6627
6628       /* Rewrite the increment so that it uses var_before directly.  */
6629       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6630       return;
6631     }
6632
6633   gimple_add_tmp_var (cand->var_before);
6634
6635   base = unshare_expr (cand->iv->base);
6636
6637   create_iv (base, unshare_expr (cand->iv->step),
6638              cand->var_before, data->current_loop,
6639              &incr_pos, after, &cand->var_before, &cand->var_after);
6640 }
6641
6642 /* Creates new induction variables described in SET.  */
6643
6644 static void
6645 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6646 {
6647   unsigned i;
6648   struct iv_cand *cand;
6649   bitmap_iterator bi;
6650
6651   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6652     {
6653       cand = iv_cand (data, i);
6654       create_new_iv (data, cand);
6655     }
6656
6657   if (dump_file && (dump_flags & TDF_DETAILS))
6658     {
6659       fprintf (dump_file, "Selected IV set for loop %d",
6660                data->current_loop->num);
6661       if (data->loop_loc != UNKNOWN_LOCATION)
6662         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6663                  LOCATION_LINE (data->loop_loc));
6664       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6665       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6666         {
6667           cand = iv_cand (data, i);
6668           dump_cand (dump_file, cand);
6669         }
6670       fprintf (dump_file, "\n");
6671     }
6672 }
6673
6674 /* Rewrites USE (definition of iv used in a nonlinear expression)
6675    using candidate CAND.  */
6676
6677 static void
6678 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6679                             struct iv_use *use, struct iv_cand *cand)
6680 {
6681   tree comp;
6682   tree op, tgt;
6683   gassign *ass;
6684   gimple_stmt_iterator bsi;
6685
6686   /* An important special case -- if we are asked to express value of
6687      the original iv by itself, just exit; there is no need to
6688      introduce a new computation (that might also need casting the
6689      variable to unsigned and back).  */
6690   if (cand->pos == IP_ORIGINAL
6691       && cand->incremented_at == use->stmt)
6692     {
6693       enum tree_code stmt_code;
6694
6695       gcc_assert (is_gimple_assign (use->stmt));
6696       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6697
6698       /* Check whether we may leave the computation unchanged.
6699          This is the case only if it does not rely on other
6700          computations in the loop -- otherwise, the computation
6701          we rely upon may be removed in remove_unused_ivs,
6702          thus leading to ICE.  */
6703       stmt_code = gimple_assign_rhs_code (use->stmt);
6704       if (stmt_code == PLUS_EXPR
6705           || stmt_code == MINUS_EXPR
6706           || stmt_code == POINTER_PLUS_EXPR)
6707         {
6708           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6709             op = gimple_assign_rhs2 (use->stmt);
6710           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6711             op = gimple_assign_rhs1 (use->stmt);
6712           else
6713             op = NULL_TREE;
6714         }
6715       else
6716         op = NULL_TREE;
6717
6718       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6719         return;
6720     }
6721
6722   comp = get_computation (data->current_loop, use, cand);
6723   gcc_assert (comp != NULL_TREE);
6724
6725   switch (gimple_code (use->stmt))
6726     {
6727     case GIMPLE_PHI:
6728       tgt = PHI_RESULT (use->stmt);
6729
6730       /* If we should keep the biv, do not replace it.  */
6731       if (name_info (data, tgt)->preserve_biv)
6732         return;
6733
6734       bsi = gsi_after_labels (gimple_bb (use->stmt));
6735       break;
6736
6737     case GIMPLE_ASSIGN:
6738       tgt = gimple_assign_lhs (use->stmt);
6739       bsi = gsi_for_stmt (use->stmt);
6740       break;
6741
6742     default:
6743       gcc_unreachable ();
6744     }
6745
6746   if (!valid_gimple_rhs_p (comp)
6747       || (gimple_code (use->stmt) != GIMPLE_PHI
6748           /* We can't allow re-allocating the stmt as it might be pointed
6749              to still.  */
6750           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6751               >= gimple_num_ops (gsi_stmt (bsi)))))
6752     {
6753       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6754                                        true, GSI_SAME_STMT);
6755       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6756         {
6757           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6758           /* As this isn't a plain copy we have to reset alignment
6759              information.  */
6760           if (SSA_NAME_PTR_INFO (comp))
6761             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6762         }
6763     }
6764
6765   if (gimple_code (use->stmt) == GIMPLE_PHI)
6766     {
6767       ass = gimple_build_assign (tgt, comp);
6768       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6769
6770       bsi = gsi_for_stmt (use->stmt);
6771       remove_phi_node (&bsi, false);
6772     }
6773   else
6774     {
6775       gimple_assign_set_rhs_from_tree (&bsi, comp);
6776       use->stmt = gsi_stmt (bsi);
6777     }
6778 }
6779
6780 /* Performs a peephole optimization to reorder the iv update statement with
6781    a mem ref to enable instruction combining in later phases. The mem ref uses
6782    the iv value before the update, so the reordering transformation requires
6783    adjustment of the offset. CAND is the selected IV_CAND.
6784
6785    Example:
6786
6787    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6788    iv2 = iv1 + 1;
6789
6790    if (t < val)      (1)
6791      goto L;
6792    goto Head;
6793
6794
6795    directly propagating t over to (1) will introduce overlapping live range
6796    thus increase register pressure. This peephole transform it into:
6797
6798
6799    iv2 = iv1 + 1;
6800    t = MEM_REF (base, iv2, 8, 8);
6801    if (t < val)
6802      goto L;
6803    goto Head;
6804 */
6805
6806 static void
6807 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6808 {
6809   tree var_after;
6810   gimple iv_update, stmt;
6811   basic_block bb;
6812   gimple_stmt_iterator gsi, gsi_iv;
6813
6814   if (cand->pos != IP_NORMAL)
6815     return;
6816
6817   var_after = cand->var_after;
6818   iv_update = SSA_NAME_DEF_STMT (var_after);
6819
6820   bb = gimple_bb (iv_update);
6821   gsi = gsi_last_nondebug_bb (bb);
6822   stmt = gsi_stmt (gsi);
6823
6824   /* Only handle conditional statement for now.  */
6825   if (gimple_code (stmt) != GIMPLE_COND)
6826     return;
6827
6828   gsi_prev_nondebug (&gsi);
6829   stmt = gsi_stmt (gsi);
6830   if (stmt != iv_update)
6831     return;
6832
6833   gsi_prev_nondebug (&gsi);
6834   if (gsi_end_p (gsi))
6835     return;
6836
6837   stmt = gsi_stmt (gsi);
6838   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6839     return;
6840
6841   if (stmt != use->stmt)
6842     return;
6843
6844   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6845     return;
6846
6847   if (dump_file && (dump_flags & TDF_DETAILS))
6848     {
6849       fprintf (dump_file, "Reordering \n");
6850       print_gimple_stmt (dump_file, iv_update, 0, 0);
6851       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6852       fprintf (dump_file, "\n");
6853     }
6854
6855   gsi = gsi_for_stmt (use->stmt);
6856   gsi_iv = gsi_for_stmt (iv_update);
6857   gsi_move_before (&gsi_iv, &gsi);
6858
6859   cand->pos = IP_BEFORE_USE;
6860   cand->incremented_at = use->stmt;
6861 }
6862
6863 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6864
6865 static void
6866 rewrite_use_address_1 (struct ivopts_data *data,
6867                        struct iv_use *use, struct iv_cand *cand)
6868 {
6869   aff_tree aff;
6870   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6871   tree base_hint = NULL_TREE;
6872   tree ref, iv;
6873   bool ok;
6874
6875   adjust_iv_update_pos (cand, use);
6876   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6877   gcc_assert (ok);
6878   unshare_aff_combination (&aff);
6879
6880   /* To avoid undefined overflow problems, all IV candidates use unsigned
6881      integer types.  The drawback is that this makes it impossible for
6882      create_mem_ref to distinguish an IV that is based on a memory object
6883      from one that represents simply an offset.
6884
6885      To work around this problem, we pass a hint to create_mem_ref that
6886      indicates which variable (if any) in aff is an IV based on a memory
6887      object.  Note that we only consider the candidate.  If this is not
6888      based on an object, the base of the reference is in some subexpression
6889      of the use -- but these will use pointer types, so they are recognized
6890      by the create_mem_ref heuristics anyway.  */
6891   if (cand->iv->base_object)
6892     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6893
6894   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6895   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6896                         reference_alias_ptr_type (*use->op_p),
6897                         iv, base_hint, data->speed);
6898   copy_ref_info (ref, *use->op_p);
6899   *use->op_p = ref;
6900 }
6901
6902 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
6903    first use of a group, rewrites sub uses in the group too.  */
6904
6905 static void
6906 rewrite_use_address (struct ivopts_data *data,
6907                       struct iv_use *use, struct iv_cand *cand)
6908 {
6909   struct iv_use *next;
6910
6911   gcc_assert (use->sub_id == 0);
6912   rewrite_use_address_1 (data, use, cand);
6913   update_stmt (use->stmt);
6914
6915   for (next = use->next; next != NULL; next = next->next)
6916     {
6917       rewrite_use_address_1 (data, next, cand);
6918       update_stmt (next->stmt);
6919     }
6920
6921   return;
6922 }
6923
6924 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6925    candidate CAND.  */
6926
6927 static void
6928 rewrite_use_compare (struct ivopts_data *data,
6929                      struct iv_use *use, struct iv_cand *cand)
6930 {
6931   tree comp, *var_p, op, bound;
6932   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6933   enum tree_code compare;
6934   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6935   bool ok;
6936
6937   bound = cp->value;
6938   if (bound)
6939     {
6940       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6941       tree var_type = TREE_TYPE (var);
6942       gimple_seq stmts;
6943
6944       if (dump_file && (dump_flags & TDF_DETAILS))
6945         {
6946           fprintf (dump_file, "Replacing exit test: ");
6947           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6948         }
6949       compare = cp->comp;
6950       bound = unshare_expr (fold_convert (var_type, bound));
6951       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6952       if (stmts)
6953         gsi_insert_seq_on_edge_immediate (
6954                 loop_preheader_edge (data->current_loop),
6955                 stmts);
6956
6957       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6958       gimple_cond_set_lhs (cond_stmt, var);
6959       gimple_cond_set_code (cond_stmt, compare);
6960       gimple_cond_set_rhs (cond_stmt, op);
6961       return;
6962     }
6963
6964   /* The induction variable elimination failed; just express the original
6965      giv.  */
6966   comp = get_computation (data->current_loop, use, cand);
6967   gcc_assert (comp != NULL_TREE);
6968
6969   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6970   gcc_assert (ok);
6971
6972   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6973                                      true, GSI_SAME_STMT);
6974 }
6975
6976 /* Rewrites USE using candidate CAND.  */
6977
6978 static void
6979 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6980 {
6981   switch (use->type)
6982     {
6983       case USE_NONLINEAR_EXPR:
6984         rewrite_use_nonlinear_expr (data, use, cand);
6985         break;
6986
6987       case USE_ADDRESS:
6988         rewrite_use_address (data, use, cand);
6989         break;
6990
6991       case USE_COMPARE:
6992         rewrite_use_compare (data, use, cand);
6993         break;
6994
6995       default:
6996         gcc_unreachable ();
6997     }
6998
6999   update_stmt (use->stmt);
7000 }
7001
7002 /* Rewrite the uses using the selected induction variables.  */
7003
7004 static void
7005 rewrite_uses (struct ivopts_data *data)
7006 {
7007   unsigned i;
7008   struct iv_cand *cand;
7009   struct iv_use *use;
7010
7011   for (i = 0; i < n_iv_uses (data); i++)
7012     {
7013       use = iv_use (data, i);
7014       cand = use->selected;
7015       gcc_assert (cand);
7016
7017       rewrite_use (data, use, cand);
7018     }
7019 }
7020
7021 /* Removes the ivs that are not used after rewriting.  */
7022
7023 static void
7024 remove_unused_ivs (struct ivopts_data *data)
7025 {
7026   unsigned j;
7027   bitmap_iterator bi;
7028   bitmap toremove = BITMAP_ALLOC (NULL);
7029
7030   /* Figure out an order in which to release SSA DEFs so that we don't
7031      release something that we'd have to propagate into a debug stmt
7032      afterwards.  */
7033   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7034     {
7035       struct version_info *info;
7036
7037       info = ver_info (data, j);
7038       if (info->iv
7039           && !integer_zerop (info->iv->step)
7040           && !info->inv_id
7041           && !info->iv->have_use_for
7042           && !info->preserve_biv)
7043         {
7044           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7045
7046           tree def = info->iv->ssa_name;
7047
7048           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7049             {
7050               imm_use_iterator imm_iter;
7051               use_operand_p use_p;
7052               gimple stmt;
7053               int count = 0;
7054
7055               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7056                 {
7057                   if (!gimple_debug_bind_p (stmt))
7058                     continue;
7059
7060                   /* We just want to determine whether to do nothing
7061                      (count == 0), to substitute the computed
7062                      expression into a single use of the SSA DEF by
7063                      itself (count == 1), or to use a debug temp
7064                      because the SSA DEF is used multiple times or as
7065                      part of a larger expression (count > 1). */
7066                   count++;
7067                   if (gimple_debug_bind_get_value (stmt) != def)
7068                     count++;
7069
7070                   if (count > 1)
7071                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7072                 }
7073
7074               if (!count)
7075                 continue;
7076
7077               struct iv_use dummy_use;
7078               struct iv_cand *best_cand = NULL, *cand;
7079               unsigned i, best_pref = 0, cand_pref;
7080
7081               memset (&dummy_use, 0, sizeof (dummy_use));
7082               dummy_use.iv = info->iv;
7083               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7084                 {
7085                   cand = iv_use (data, i)->selected;
7086                   if (cand == best_cand)
7087                     continue;
7088                   cand_pref = operand_equal_p (cand->iv->step,
7089                                                info->iv->step, 0)
7090                     ? 4 : 0;
7091                   cand_pref
7092                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7093                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7094                     ? 2 : 0;
7095                   cand_pref
7096                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7097                     ? 1 : 0;
7098                   if (best_cand == NULL || best_pref < cand_pref)
7099                     {
7100                       best_cand = cand;
7101                       best_pref = cand_pref;
7102                     }
7103                 }
7104
7105               if (!best_cand)
7106                 continue;
7107
7108               tree comp = get_computation_at (data->current_loop,
7109                                               &dummy_use, best_cand,
7110                                               SSA_NAME_DEF_STMT (def));
7111               if (!comp)
7112                 continue;
7113
7114               if (count > 1)
7115                 {
7116                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7117                   DECL_ARTIFICIAL (vexpr) = 1;
7118                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7119                   if (SSA_NAME_VAR (def))
7120                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7121                   else
7122                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7123                   gdebug *def_temp
7124                     = gimple_build_debug_bind (vexpr, comp, NULL);
7125                   gimple_stmt_iterator gsi;
7126
7127                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7128                     gsi = gsi_after_labels (gimple_bb
7129                                             (SSA_NAME_DEF_STMT (def)));
7130                   else
7131                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7132
7133                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7134                   comp = vexpr;
7135                 }
7136
7137               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7138                 {
7139                   if (!gimple_debug_bind_p (stmt))
7140                     continue;
7141
7142                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7143                     SET_USE (use_p, comp);
7144
7145                   update_stmt (stmt);
7146                 }
7147             }
7148         }
7149     }
7150
7151   release_defs_bitset (toremove);
7152
7153   BITMAP_FREE (toremove);
7154 }
7155
7156 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7157    for hash_map::traverse.  */
7158
7159 bool
7160 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7161 {
7162   free (value);
7163   return true;
7164 }
7165
7166 /* Frees data allocated by the optimization of a single loop.  */
7167
7168 static void
7169 free_loop_data (struct ivopts_data *data)
7170 {
7171   unsigned i, j;
7172   bitmap_iterator bi;
7173   tree obj;
7174
7175   if (data->niters)
7176     {
7177       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7178       delete data->niters;
7179       data->niters = NULL;
7180     }
7181
7182   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7183     {
7184       struct version_info *info;
7185
7186       info = ver_info (data, i);
7187       free (info->iv);
7188       info->iv = NULL;
7189       info->has_nonlin_use = false;
7190       info->preserve_biv = false;
7191       info->inv_id = 0;
7192     }
7193   bitmap_clear (data->relevant);
7194   bitmap_clear (data->important_candidates);
7195
7196   for (i = 0; i < n_iv_uses (data); i++)
7197     {
7198       struct iv_use *use = iv_use (data, i);
7199       struct iv_use *pre = use, *sub = use->next;
7200
7201       while (sub)
7202         {
7203           gcc_assert (sub->related_cands == NULL);
7204           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7205
7206           free (sub->iv);
7207           pre = sub;
7208           sub = sub->next;
7209           free (pre);
7210         }
7211
7212       free (use->iv);
7213       BITMAP_FREE (use->related_cands);
7214       for (j = 0; j < use->n_map_members; j++)
7215         if (use->cost_map[j].depends_on)
7216           BITMAP_FREE (use->cost_map[j].depends_on);
7217       free (use->cost_map);
7218       free (use);
7219     }
7220   data->iv_uses.truncate (0);
7221
7222   for (i = 0; i < n_iv_cands (data); i++)
7223     {
7224       struct iv_cand *cand = iv_cand (data, i);
7225
7226       free (cand->iv);
7227       if (cand->depends_on)
7228         BITMAP_FREE (cand->depends_on);
7229       free (cand);
7230     }
7231   data->iv_candidates.truncate (0);
7232
7233   if (data->version_info_size < num_ssa_names)
7234     {
7235       data->version_info_size = 2 * num_ssa_names;
7236       free (data->version_info);
7237       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7238     }
7239
7240   data->max_inv_id = 0;
7241
7242   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7243     SET_DECL_RTL (obj, NULL_RTX);
7244
7245   decl_rtl_to_reset.truncate (0);
7246
7247   data->inv_expr_tab->empty ();
7248   data->inv_expr_id = 0;
7249 }
7250
7251 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7252    loop tree.  */
7253
7254 static void
7255 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7256 {
7257   free_loop_data (data);
7258   free (data->version_info);
7259   BITMAP_FREE (data->relevant);
7260   BITMAP_FREE (data->important_candidates);
7261
7262   decl_rtl_to_reset.release ();
7263   data->iv_uses.release ();
7264   data->iv_candidates.release ();
7265   delete data->inv_expr_tab;
7266   data->inv_expr_tab = NULL;
7267   free_affine_expand_cache (&data->name_expansion_cache);
7268 }
7269
7270 /* Returns true if the loop body BODY includes any function calls.  */
7271
7272 static bool
7273 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7274 {
7275   gimple_stmt_iterator gsi;
7276   unsigned i;
7277
7278   for (i = 0; i < num_nodes; i++)
7279     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7280       {
7281         gimple stmt = gsi_stmt (gsi);
7282         if (is_gimple_call (stmt)
7283             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7284           return true;
7285       }
7286   return false;
7287 }
7288
7289 /* Optimizes the LOOP.  Returns true if anything changed.  */
7290
7291 static bool
7292 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7293 {
7294   bool changed = false;
7295   struct iv_ca *iv_ca;
7296   edge exit = single_dom_exit (loop);
7297   basic_block *body;
7298
7299   gcc_assert (!data->niters);
7300   data->current_loop = loop;
7301   data->loop_loc = find_loop_location (loop);
7302   data->speed = optimize_loop_for_speed_p (loop);
7303
7304   if (dump_file && (dump_flags & TDF_DETAILS))
7305     {
7306       fprintf (dump_file, "Processing loop %d", loop->num);
7307       if (data->loop_loc != UNKNOWN_LOCATION)
7308         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7309                  LOCATION_LINE (data->loop_loc));
7310       fprintf (dump_file, "\n");
7311
7312       if (exit)
7313         {
7314           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7315                    exit->src->index, exit->dest->index);
7316           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7317           fprintf (dump_file, "\n");
7318         }
7319
7320       fprintf (dump_file, "\n");
7321     }
7322
7323   body = get_loop_body (loop);
7324   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7325   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7326   free (body);
7327
7328   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7329
7330   /* For each ssa name determines whether it behaves as an induction variable
7331      in some loop.  */
7332   if (!find_induction_variables (data))
7333     goto finish;
7334
7335   /* Finds interesting uses (item 1).  */
7336   find_interesting_uses (data);
7337   group_address_uses (data);
7338   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7339     goto finish;
7340
7341   /* Finds candidates for the induction variables (item 2).  */
7342   find_iv_candidates (data);
7343
7344   /* Calculates the costs (item 3, part 1).  */
7345   determine_iv_costs (data);
7346   determine_use_iv_costs (data);
7347   determine_set_costs (data);
7348
7349   /* Find the optimal set of induction variables (item 3, part 2).  */
7350   iv_ca = find_optimal_iv_set (data);
7351   if (!iv_ca)
7352     goto finish;
7353   changed = true;
7354
7355   /* Create the new induction variables (item 4, part 1).  */
7356   create_new_ivs (data, iv_ca);
7357   iv_ca_free (&iv_ca);
7358
7359   /* Rewrite the uses (item 4, part 2).  */
7360   rewrite_uses (data);
7361
7362   /* Remove the ivs that are unused after rewriting.  */
7363   remove_unused_ivs (data);
7364
7365   /* We have changed the structure of induction variables; it might happen
7366      that definitions in the scev database refer to some of them that were
7367      eliminated.  */
7368   scev_reset ();
7369
7370 finish:
7371   free_loop_data (data);
7372
7373   return changed;
7374 }
7375
7376 /* Main entry point.  Optimizes induction variables in loops.  */
7377
7378 void
7379 tree_ssa_iv_optimize (void)
7380 {
7381   struct loop *loop;
7382   struct ivopts_data data;
7383
7384   tree_ssa_iv_optimize_init (&data);
7385
7386   /* Optimize the loops starting with the innermost ones.  */
7387   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7388     {
7389       if (dump_file && (dump_flags & TDF_DETAILS))
7390         flow_loop_dump (loop, dump_file, NULL, 1);
7391
7392       tree_ssa_iv_optimize_loop (&data, loop);
7393     }
7394
7395   tree_ssa_iv_optimize_finalize (&data);
7396 }