gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "tm_p.h"
  70 #include "basic-block.h"
  71 #include "gimple-pretty-print.h"
  72 #include "tree-ssa.h"
  73 #include "cfgloop.h"
  74 #include "tree-pass.h"
  75 #include "ggc.h"
  76 #include "insn-config.h"
  77 #include "pointer-set.h"
  78 #include "hash-table.h"
  79 #include "tree-chrec.h"
  80 #include "tree-scalar-evolution.h"
  81 #include "cfgloop.h"
  82 #include "params.h"
  83 #include "langhooks.h"
  84 #include "tree-affine.h"
  85 #include "target.h"
  86 #include "tree-inline.h"
  87 #include "tree-ssa-propagate.h"
  88 #include "expmed.h"
  89 #include "tree-ssa-address.h"
  90
  91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  92    cost of different addressing modes.  This should be moved to a TBD
  93    interface between the GIMPLE and RTL worlds.  */
  94 #include "expr.h"
  95 #include "recog.h"
  96
  97 /* The infinite cost.  */
  98 #define INFTY 10000000
  99
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102 /* Returns the expected number of loop iterations for LOOP.
 103    The average trip count is computed from profile data if it
 104    exists. */
 105
 106 static inline HOST_WIDE_INT
 107 avg_loop_niter (struct loop *loop)
 108 {
 109   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 110   if (niter == -1)
 111     return AVG_LOOP_NITER (loop);
 112
 113   return niter;
 114 }
 115
 116 /* Representation of the induction variable.  */
 117 struct iv
 118 {
 119   tree base;            /* Initial value of the iv.  */
 120   tree base_object;     /* A memory object to that the induction variable points.  */
 121   tree step;            /* Step of the iv (constant only).  */
 122   tree ssa_name;        /* The ssa name with the value.  */
 123   bool biv_p;           /* Is it a biv?  */
 124   bool have_use_for;    /* Do we already have a use for it?  */
 125   unsigned use_id;      /* The identifier in the use if it is the case.  */
 126 };
 127
 128 /* Per-ssa version information (induction variable descriptions, etc.).  */
 129 struct version_info
 130 {
 131   tree name;            /* The ssa name.  */
 132   struct iv *iv;        /* Induction variable description.  */
 133   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 134                            an expression that is not an induction variable.  */
 135   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 136   unsigned inv_id;      /* Id of an invariant.  */
 137 };
 138
 139 /* Types of uses.  */
 140 enum use_type
 141 {
 142   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 143   USE_ADDRESS,          /* Use in an address.  */
 144   USE_COMPARE           /* Use is a compare.  */
 145 };
 146
 147 /* Cost of a computation.  */
 148 typedef struct
 149 {
 150   int cost;             /* The runtime cost.  */
 151   unsigned complexity;  /* The estimate of the complexity of the code for
 152                            the computation (in no concrete units --
 153                            complexity field should be larger for more
 154                            complex expressions and addressing modes).  */
 155 } comp_cost;
 156
 157 static const comp_cost no_cost = {0, 0};
 158 static const comp_cost infinite_cost = {INFTY, INFTY};
 159
 160 /* The candidate - cost pair.  */
 161 struct cost_pair
 162 {
 163   struct iv_cand *cand; /* The candidate.  */
 164   comp_cost cost;       /* The cost.  */
 165   bitmap depends_on;    /* The list of invariants that have to be
 166                            preserved.  */
 167   tree value;           /* For final value elimination, the expression for
 168                            the final value of the iv.  For iv elimination,
 169                            the new bound to compare with.  */
 170   enum tree_code comp;  /* For iv elimination, the comparison.  */
 171   int inv_expr_id;      /* Loop invariant expression id.  */
 172 };
 173
 174 /* Use.  */
 175 struct iv_use
 176 {
 177   unsigned id;          /* The id of the use.  */
 178   enum use_type type;   /* Type of the use.  */
 179   struct iv *iv;        /* The induction variable it is based on.  */
 180   gimple stmt;          /* Statement in that it occurs.  */
 181   tree *op_p;           /* The place where it occurs.  */
 182   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 183                            important ones.  */
 184
 185   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 186   struct cost_pair *cost_map;
 187                         /* The costs wrto the iv candidates.  */
 188
 189   struct iv_cand *selected;
 190                         /* The selected candidate.  */
 191 };
 192
 193 /* The position where the iv is computed.  */
 194 enum iv_position
 195 {
 196   IP_NORMAL,            /* At the end, just before the exit condition.  */
 197   IP_END,               /* At the end of the latch block.  */
 198   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 199   IP_AFTER_USE,         /* Immediately after a specific use.  */
 200   IP_ORIGINAL           /* The original biv.  */
 201 };
 202
 203 /* The induction variable candidate.  */
 204 struct iv_cand
 205 {
 206   unsigned id;          /* The number of the candidate.  */
 207   bool important;       /* Whether this is an "important" candidate, i.e. such
 208                            that it should be considered by all uses.  */
 209   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 210   gimple incremented_at;/* For original biv, the statement where it is
 211                            incremented.  */
 212   tree var_before;      /* The variable used for it before increment.  */
 213   tree var_after;       /* The variable used for it after increment.  */
 214   struct iv *iv;        /* The value of the candidate.  NULL for
 215                            "pseudocandidate" used to indicate the possibility
 216                            to replace the final value of an iv by direct
 217                            computation of the value.  */
 218   unsigned cost;        /* Cost of the candidate.  */
 219   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 220   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 221                               where it is incremented.  */
 222   bitmap depends_on;    /* The list of invariants that are used in step of the
 223                            biv.  */
 224 };
 225
 226 /* Loop invariant expression hashtable entry.  */
 227 struct iv_inv_expr_ent
 228 {
 229   tree expr;
 230   int id;
 231   hashval_t hash;
 232 };
 233
 234 /* The data used by the induction variable optimizations.  */
 235
 236 typedef struct iv_use *iv_use_p;
 237
 238 typedef struct iv_cand *iv_cand_p;
 239
 240 /* Hashtable helpers.  */
 241
 242 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 243 {
 244   typedef iv_inv_expr_ent value_type;
 245   typedef iv_inv_expr_ent compare_type;
 246   static inline hashval_t hash (const value_type *);
 247   static inline bool equal (const value_type *, const compare_type *);
 248 };
 249
 250 /* Hash function for loop invariant expressions.  */
 251
 252 inline hashval_t
 253 iv_inv_expr_hasher::hash (const value_type *expr)
 254 {
 255   return expr->hash;
 256 }
 257
 258 /* Hash table equality function for expressions.  */
 259
 260 inline bool
 261 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 262 {
 263   return expr1->hash == expr2->hash
 264          && operand_equal_p (expr1->expr, expr2->expr, 0);
 265 }
 266
 267 struct ivopts_data
 268 {
 269   /* The currently optimized loop.  */
 270   struct loop *current_loop;
 271
 272   /* Numbers of iterations for all exits of the current loop.  */
 273   struct pointer_map_t *niters;
 274
 275   /* Number of registers used in it.  */
 276   unsigned regs_used;
 277
 278   /* The size of version_info array allocated.  */
 279   unsigned version_info_size;
 280
 281   /* The array of information for the ssa names.  */
 282   struct version_info *version_info;
 283
 284   /* The hashtable of loop invariant expressions created
 285      by ivopt.  */
 286   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 287
 288   /* Loop invariant expression id.  */
 289   int inv_expr_id;
 290
 291   /* The bitmap of indices in version_info whose value was changed.  */
 292   bitmap relevant;
 293
 294   /* The uses of induction variables.  */
 295   vec<iv_use_p> iv_uses;
 296
 297   /* The candidates.  */
 298   vec<iv_cand_p> iv_candidates;
 299
 300   /* A bitmap of important candidates.  */
 301   bitmap important_candidates;
 302
 303   /* The maximum invariant id.  */
 304   unsigned max_inv_id;
 305
 306   /* Whether to consider just related and important candidates when replacing a
 307      use.  */
 308   bool consider_all_candidates;
 309
 310   /* Are we optimizing for speed?  */
 311   bool speed;
 312
 313   /* Whether the loop body includes any function calls.  */
 314   bool body_includes_call;
 315
 316   /* Whether the loop body can only be exited via single exit.  */
 317   bool loop_single_exit_p;
 318 };
 319
 320 /* An assignment of iv candidates to uses.  */
 321
 322 struct iv_ca
 323 {
 324   /* The number of uses covered by the assignment.  */
 325   unsigned upto;
 326
 327   /* Number of uses that cannot be expressed by the candidates in the set.  */
 328   unsigned bad_uses;
 329
 330   /* Candidate assigned to a use, together with the related costs.  */
 331   struct cost_pair **cand_for_use;
 332
 333   /* Number of times each candidate is used.  */
 334   unsigned *n_cand_uses;
 335
 336   /* The candidates used.  */
 337   bitmap cands;
 338
 339   /* The number of candidates in the set.  */
 340   unsigned n_cands;
 341
 342   /* Total number of registers needed.  */
 343   unsigned n_regs;
 344
 345   /* Total cost of expressing uses.  */
 346   comp_cost cand_use_cost;
 347
 348   /* Total cost of candidates.  */
 349   unsigned cand_cost;
 350
 351   /* Number of times each invariant is used.  */
 352   unsigned *n_invariant_uses;
 353
 354   /* The array holding the number of uses of each loop
 355      invariant expressions created by ivopt.  */
 356   unsigned *used_inv_expr;
 357
 358   /* The number of created loop invariants.  */
 359   unsigned num_used_inv_expr;
 360
 361   /* Total cost of the assignment.  */
 362   comp_cost cost;
 363 };
 364
 365 /* Difference of two iv candidate assignments.  */
 366
 367 struct iv_ca_delta
 368 {
 369   /* Changed use.  */
 370   struct iv_use *use;
 371
 372   /* An old assignment (for rollback purposes).  */
 373   struct cost_pair *old_cp;
 374
 375   /* A new assignment.  */
 376   struct cost_pair *new_cp;
 377
 378   /* Next change in the list.  */
 379   struct iv_ca_delta *next_change;
 380 };
 381
 382 /* Bound on number of candidates below that all candidates are considered.  */
 383
 384 #define CONSIDER_ALL_CANDIDATES_BOUND \
 385   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 386
 387 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 388    optimizing such a loop would help, and it would take ages).  */
 389
 390 #define MAX_CONSIDERED_USES \
 391   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 392
 393 /* If there are at most this number of ivs in the set, try removing unnecessary
 394    ivs from the set always.  */
 395
 396 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 397   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 398
 399 /* The list of trees for that the decl_rtl field must be reset is stored
 400    here.  */
 401
 402 static vec<tree> decl_rtl_to_reset;
 403
 404 static comp_cost force_expr_to_var_cost (tree, bool);
 405
 406 /* Number of uses recorded in DATA.  */
 407
 408 static inline unsigned
 409 n_iv_uses (struct ivopts_data *data)
 410 {
 411   return data->iv_uses.length ();
 412 }
 413
 414 /* Ith use recorded in DATA.  */
 415
 416 static inline struct iv_use *
 417 iv_use (struct ivopts_data *data, unsigned i)
 418 {
 419   return data->iv_uses[i];
 420 }
 421
 422 /* Number of candidates recorded in DATA.  */
 423
 424 static inline unsigned
 425 n_iv_cands (struct ivopts_data *data)
 426 {
 427   return data->iv_candidates.length ();
 428 }
 429
 430 /* Ith candidate recorded in DATA.  */
 431
 432 static inline struct iv_cand *
 433 iv_cand (struct ivopts_data *data, unsigned i)
 434 {
 435   return data->iv_candidates[i];
 436 }
 437
 438 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 439
 440 edge
 441 single_dom_exit (struct loop *loop)
 442 {
 443   edge exit = single_exit (loop);
 444
 445   if (!exit)
 446     return NULL;
 447
 448   if (!just_once_each_iteration_p (loop, exit->src))
 449     return NULL;
 450
 451   return exit;
 452 }
 453
 454 /* Dumps information about the induction variable IV to FILE.  */
 455
 456 void
 457 dump_iv (FILE *file, struct iv *iv)
 458 {
 459   if (iv->ssa_name)
 460     {
 461       fprintf (file, "ssa name ");
 462       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 463       fprintf (file, "\n");
 464     }
 465
 466   fprintf (file, "  type ");
 467   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 468   fprintf (file, "\n");
 469
 470   if (iv->step)
 471     {
 472       fprintf (file, "  base ");
 473       print_generic_expr (file, iv->base, TDF_SLIM);
 474       fprintf (file, "\n");
 475
 476       fprintf (file, "  step ");
 477       print_generic_expr (file, iv->step, TDF_SLIM);
 478       fprintf (file, "\n");
 479     }
 480   else
 481     {
 482       fprintf (file, "  invariant ");
 483       print_generic_expr (file, iv->base, TDF_SLIM);
 484       fprintf (file, "\n");
 485     }
 486
 487   if (iv->base_object)
 488     {
 489       fprintf (file, "  base object ");
 490       print_generic_expr (file, iv->base_object, TDF_SLIM);
 491       fprintf (file, "\n");
 492     }
 493
 494   if (iv->biv_p)
 495     fprintf (file, "  is a biv\n");
 496 }
 497
 498 /* Dumps information about the USE to FILE.  */
 499
 500 void
 501 dump_use (FILE *file, struct iv_use *use)
 502 {
 503   fprintf (file, "use %d\n", use->id);
 504
 505   switch (use->type)
 506     {
 507     case USE_NONLINEAR_EXPR:
 508       fprintf (file, "  generic\n");
 509       break;
 510
 511     case USE_ADDRESS:
 512       fprintf (file, "  address\n");
 513       break;
 514
 515     case USE_COMPARE:
 516       fprintf (file, "  compare\n");
 517       break;
 518
 519     default:
 520       gcc_unreachable ();
 521     }
 522
 523   fprintf (file, "  in statement ");
 524   print_gimple_stmt (file, use->stmt, 0, 0);
 525   fprintf (file, "\n");
 526
 527   fprintf (file, "  at position ");
 528   if (use->op_p)
 529     print_generic_expr (file, *use->op_p, TDF_SLIM);
 530   fprintf (file, "\n");
 531
 532   dump_iv (file, use->iv);
 533
 534   if (use->related_cands)
 535     {
 536       fprintf (file, "  related candidates ");
 537       dump_bitmap (file, use->related_cands);
 538     }
 539 }
 540
 541 /* Dumps information about the uses to FILE.  */
 542
 543 void
 544 dump_uses (FILE *file, struct ivopts_data *data)
 545 {
 546   unsigned i;
 547   struct iv_use *use;
 548
 549   for (i = 0; i < n_iv_uses (data); i++)
 550     {
 551       use = iv_use (data, i);
 552
 553       dump_use (file, use);
 554       fprintf (file, "\n");
 555     }
 556 }
 557
 558 /* Dumps information about induction variable candidate CAND to FILE.  */
 559
 560 void
 561 dump_cand (FILE *file, struct iv_cand *cand)
 562 {
 563   struct iv *iv = cand->iv;
 564
 565   fprintf (file, "candidate %d%s\n",
 566            cand->id, cand->important ? " (important)" : "");
 567
 568   if (cand->depends_on)
 569     {
 570       fprintf (file, "  depends on ");
 571       dump_bitmap (file, cand->depends_on);
 572     }
 573
 574   if (!iv)
 575     {
 576       fprintf (file, "  final value replacement\n");
 577       return;
 578     }
 579
 580   if (cand->var_before)
 581     {
 582       fprintf (file, "  var_before ");
 583       print_generic_expr (file, cand->var_before, TDF_SLIM);
 584       fprintf (file, "\n");
 585     }
 586   if (cand->var_after)
 587     {
 588       fprintf (file, "  var_after ");
 589       print_generic_expr (file, cand->var_after, TDF_SLIM);
 590       fprintf (file, "\n");
 591     }
 592
 593   switch (cand->pos)
 594     {
 595     case IP_NORMAL:
 596       fprintf (file, "  incremented before exit test\n");
 597       break;
 598
 599     case IP_BEFORE_USE:
 600       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 601       break;
 602
 603     case IP_AFTER_USE:
 604       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 605       break;
 606
 607     case IP_END:
 608       fprintf (file, "  incremented at end\n");
 609       break;
 610
 611     case IP_ORIGINAL:
 612       fprintf (file, "  original biv\n");
 613       break;
 614     }
 615
 616   dump_iv (file, iv);
 617 }
 618
 619 /* Returns the info for ssa version VER.  */
 620
 621 static inline struct version_info *
 622 ver_info (struct ivopts_data *data, unsigned ver)
 623 {
 624   return data->version_info + ver;
 625 }
 626
 627 /* Returns the info for ssa name NAME.  */
 628
 629 static inline struct version_info *
 630 name_info (struct ivopts_data *data, tree name)
 631 {
 632   return ver_info (data, SSA_NAME_VERSION (name));
 633 }
 634
 635 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 636    emitted in LOOP.  */
 637
 638 static bool
 639 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 640 {
 641   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 642
 643   gcc_assert (bb);
 644
 645   if (sbb == loop->latch)
 646     return true;
 647
 648   if (sbb != bb)
 649     return false;
 650
 651   return stmt == last_stmt (bb);
 652 }
 653
 654 /* Returns true if STMT if after the place where the original induction
 655    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 656    if the positions are identical.  */
 657
 658 static bool
 659 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 660 {
 661   basic_block cand_bb = gimple_bb (cand->incremented_at);
 662   basic_block stmt_bb = gimple_bb (stmt);
 663
 664   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 665     return false;
 666
 667   if (stmt_bb != cand_bb)
 668     return true;
 669
 670   if (true_if_equal
 671       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 672     return true;
 673   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 674 }
 675
 676 /* Returns true if STMT if after the place where the induction variable
 677    CAND is incremented in LOOP.  */
 678
 679 static bool
 680 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 681 {
 682   switch (cand->pos)
 683     {
 684     case IP_END:
 685       return false;
 686
 687     case IP_NORMAL:
 688       return stmt_after_ip_normal_pos (loop, stmt);
 689
 690     case IP_ORIGINAL:
 691     case IP_AFTER_USE:
 692       return stmt_after_inc_pos (cand, stmt, false);
 693
 694     case IP_BEFORE_USE:
 695       return stmt_after_inc_pos (cand, stmt, true);
 696
 697     default:
 698       gcc_unreachable ();
 699     }
 700 }
 701
 702 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 703
 704 static bool
 705 abnormal_ssa_name_p (tree exp)
 706 {
 707   if (!exp)
 708     return false;
 709
 710   if (TREE_CODE (exp) != SSA_NAME)
 711     return false;
 712
 713   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 714 }
 715
 716 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 717    abnormal phi node.  Callback for for_each_index.  */
 718
 719 static bool
 720 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 721                                   void *data ATTRIBUTE_UNUSED)
 722 {
 723   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 724     {
 725       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 726         return false;
 727       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 728         return false;
 729     }
 730
 731   return !abnormal_ssa_name_p (*index);
 732 }
 733
 734 /* Returns true if EXPR contains a ssa name that occurs in an
 735    abnormal phi node.  */
 736
 737 bool
 738 contains_abnormal_ssa_name_p (tree expr)
 739 {
 740   enum tree_code code;
 741   enum tree_code_class codeclass;
 742
 743   if (!expr)
 744     return false;
 745
 746   code = TREE_CODE (expr);
 747   codeclass = TREE_CODE_CLASS (code);
 748
 749   if (code == SSA_NAME)
 750     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 751
 752   if (code == INTEGER_CST
 753       || is_gimple_min_invariant (expr))
 754     return false;
 755
 756   if (code == ADDR_EXPR)
 757     return !for_each_index (&TREE_OPERAND (expr, 0),
 758                             idx_contains_abnormal_ssa_name_p,
 759                             NULL);
 760
 761   if (code == COND_EXPR)
 762     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 763       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 764       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 765
 766   switch (codeclass)
 767     {
 768     case tcc_binary:
 769     case tcc_comparison:
 770       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 771         return true;
 772
 773       /* Fallthru.  */
 774     case tcc_unary:
 775       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 776         return true;
 777
 778       break;
 779
 780     default:
 781       gcc_unreachable ();
 782     }
 783
 784   return false;
 785 }
 786
 787 /*  Returns the structure describing number of iterations determined from
 788     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 789
 790 static struct tree_niter_desc *
 791 niter_for_exit (struct ivopts_data *data, edge exit)
 792 {
 793   struct tree_niter_desc *desc;
 794   void **slot;
 795
 796   if (!data->niters)
 797     {
 798       data->niters = pointer_map_create ();
 799       slot = NULL;
 800     }
 801   else
 802     slot = pointer_map_contains (data->niters, exit);
 803
 804   if (!slot)
 805     {
 806       /* Try to determine number of iterations.  We cannot safely work with ssa
 807          names that appear in phi nodes on abnormal edges, so that we do not
 808          create overlapping life ranges for them (PR 27283).  */
 809       desc = XNEW (struct tree_niter_desc);
 810       if (!number_of_iterations_exit (data->current_loop,
 811                                       exit, desc, true)
 812           || contains_abnormal_ssa_name_p (desc->niter))
 813         {
 814           XDELETE (desc);
 815           desc = NULL;
 816         }
 817       slot = pointer_map_insert (data->niters, exit);
 818       *slot = desc;
 819     }
 820   else
 821     desc = (struct tree_niter_desc *) *slot;
 822
 823   return desc;
 824 }
 825
 826 /* Returns the structure describing number of iterations determined from
 827    single dominating exit of DATA->current_loop, or NULL if something
 828    goes wrong.  */
 829
 830 static struct tree_niter_desc *
 831 niter_for_single_dom_exit (struct ivopts_data *data)
 832 {
 833   edge exit = single_dom_exit (data->current_loop);
 834
 835   if (!exit)
 836     return NULL;
 837
 838   return niter_for_exit (data, exit);
 839 }
 840
 841 /* Initializes data structures used by the iv optimization pass, stored
 842    in DATA.  */
 843
 844 static void
 845 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 846 {
 847   data->version_info_size = 2 * num_ssa_names;
 848   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 849   data->relevant = BITMAP_ALLOC (NULL);
 850   data->important_candidates = BITMAP_ALLOC (NULL);
 851   data->max_inv_id = 0;
 852   data->niters = NULL;
 853   data->iv_uses.create (20);
 854   data->iv_candidates.create (20);
 855   data->inv_expr_tab.create (10);
 856   data->inv_expr_id = 0;
 857   decl_rtl_to_reset.create (20);
 858 }
 859
 860 /* Returns a memory object to that EXPR points.  In case we are able to
 861    determine that it does not point to any such object, NULL is returned.  */
 862
 863 static tree
 864 determine_base_object (tree expr)
 865 {
 866   enum tree_code code = TREE_CODE (expr);
 867   tree base, obj;
 868
 869   /* If this is a pointer casted to any type, we need to determine
 870      the base object for the pointer; so handle conversions before
 871      throwing away non-pointer expressions.  */
 872   if (CONVERT_EXPR_P (expr))
 873     return determine_base_object (TREE_OPERAND (expr, 0));
 874
 875   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 876     return NULL_TREE;
 877
 878   switch (code)
 879     {
 880     case INTEGER_CST:
 881       return NULL_TREE;
 882
 883     case ADDR_EXPR:
 884       obj = TREE_OPERAND (expr, 0);
 885       base = get_base_address (obj);
 886
 887       if (!base)
 888         return expr;
 889
 890       if (TREE_CODE (base) == MEM_REF)
 891         return determine_base_object (TREE_OPERAND (base, 0));
 892
 893       return fold_convert (ptr_type_node,
 894                            build_fold_addr_expr (base));
 895
 896     case POINTER_PLUS_EXPR:
 897       return determine_base_object (TREE_OPERAND (expr, 0));
 898
 899     case PLUS_EXPR:
 900     case MINUS_EXPR:
 901       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 902       gcc_unreachable ();
 903
 904     default:
 905       return fold_convert (ptr_type_node, expr);
 906     }
 907 }
 908
 909 /* Allocates an induction variable with given initial value BASE and step STEP
 910    for loop LOOP.  */
 911
 912 static struct iv *
 913 alloc_iv (tree base, tree step)
 914 {
 915   struct iv *iv = XCNEW (struct iv);
 916   gcc_assert (step != NULL_TREE);
 917
 918   iv->base = base;
 919   iv->base_object = determine_base_object (base);
 920   iv->step = step;
 921   iv->biv_p = false;
 922   iv->have_use_for = false;
 923   iv->use_id = 0;
 924   iv->ssa_name = NULL_TREE;
 925
 926   return iv;
 927 }
 928
 929 /* Sets STEP and BASE for induction variable IV.  */
 930
 931 static void
 932 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 933 {
 934   struct version_info *info = name_info (data, iv);
 935
 936   gcc_assert (!info->iv);
 937
 938   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 939   info->iv = alloc_iv (base, step);
 940   info->iv->ssa_name = iv;
 941 }
 942
 943 /* Finds induction variable declaration for VAR.  */
 944
 945 static struct iv *
 946 get_iv (struct ivopts_data *data, tree var)
 947 {
 948   basic_block bb;
 949   tree type = TREE_TYPE (var);
 950
 951   if (!POINTER_TYPE_P (type)
 952       && !INTEGRAL_TYPE_P (type))
 953     return NULL;
 954
 955   if (!name_info (data, var)->iv)
 956     {
 957       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 958
 959       if (!bb
 960           || !flow_bb_inside_loop_p (data->current_loop, bb))
 961         set_iv (data, var, var, build_int_cst (type, 0));
 962     }
 963
 964   return name_info (data, var)->iv;
 965 }
 966
 967 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 968    not define a simple affine biv with nonzero step.  */
 969
 970 static tree
 971 determine_biv_step (gimple phi)
 972 {
 973   struct loop *loop = gimple_bb (phi)->loop_father;
 974   tree name = PHI_RESULT (phi);
 975   affine_iv iv;
 976
 977   if (virtual_operand_p (name))
 978     return NULL_TREE;
 979
 980   if (!simple_iv (loop, loop, name, &iv, true))
 981     return NULL_TREE;
 982
 983   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 984 }
 985
 986 /* Finds basic ivs.  */
 987
 988 static bool
 989 find_bivs (struct ivopts_data *data)
 990 {
 991   gimple phi;
 992   tree step, type, base;
 993   bool found = false;
 994   struct loop *loop = data->current_loop;
 995   gimple_stmt_iterator psi;
 996
 997   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 998     {
 999       phi = gsi_stmt (psi);
1000
1001       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1002         continue;
1003
1004       step = determine_biv_step (phi);
1005       if (!step)
1006         continue;
1007
1008       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1009       base = expand_simple_operations (base);
1010       if (contains_abnormal_ssa_name_p (base)
1011           || contains_abnormal_ssa_name_p (step))
1012         continue;
1013
1014       type = TREE_TYPE (PHI_RESULT (phi));
1015       base = fold_convert (type, base);
1016       if (step)
1017         {
1018           if (POINTER_TYPE_P (type))
1019             step = convert_to_ptrofftype (step);
1020           else
1021             step = fold_convert (type, step);
1022         }
1023
1024       set_iv (data, PHI_RESULT (phi), base, step);
1025       found = true;
1026     }
1027
1028   return found;
1029 }
1030
1031 /* Marks basic ivs.  */
1032
1033 static void
1034 mark_bivs (struct ivopts_data *data)
1035 {
1036   gimple phi;
1037   tree var;
1038   struct iv *iv, *incr_iv;
1039   struct loop *loop = data->current_loop;
1040   basic_block incr_bb;
1041   gimple_stmt_iterator psi;
1042
1043   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1044     {
1045       phi = gsi_stmt (psi);
1046
1047       iv = get_iv (data, PHI_RESULT (phi));
1048       if (!iv)
1049         continue;
1050
1051       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1052       incr_iv = get_iv (data, var);
1053       if (!incr_iv)
1054         continue;
1055
1056       /* If the increment is in the subloop, ignore it.  */
1057       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1058       if (incr_bb->loop_father != data->current_loop
1059           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1060         continue;
1061
1062       iv->biv_p = true;
1063       incr_iv->biv_p = true;
1064     }
1065 }
1066
1067 /* Checks whether STMT defines a linear induction variable and stores its
1068    parameters to IV.  */
1069
1070 static bool
1071 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1072 {
1073   tree lhs;
1074   struct loop *loop = data->current_loop;
1075
1076   iv->base = NULL_TREE;
1077   iv->step = NULL_TREE;
1078
1079   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1080     return false;
1081
1082   lhs = gimple_assign_lhs (stmt);
1083   if (TREE_CODE (lhs) != SSA_NAME)
1084     return false;
1085
1086   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1087     return false;
1088   iv->base = expand_simple_operations (iv->base);
1089
1090   if (contains_abnormal_ssa_name_p (iv->base)
1091       || contains_abnormal_ssa_name_p (iv->step))
1092     return false;
1093
1094   /* If STMT could throw, then do not consider STMT as defining a GIV.
1095      While this will suppress optimizations, we can not safely delete this
1096      GIV and associated statements, even if it appears it is not used.  */
1097   if (stmt_could_throw_p (stmt))
1098     return false;
1099
1100   return true;
1101 }
1102
1103 /* Finds general ivs in statement STMT.  */
1104
1105 static void
1106 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1107 {
1108   affine_iv iv;
1109
1110   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1111     return;
1112
1113   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1114 }
1115
1116 /* Finds general ivs in basic block BB.  */
1117
1118 static void
1119 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1120 {
1121   gimple_stmt_iterator bsi;
1122
1123   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1124     find_givs_in_stmt (data, gsi_stmt (bsi));
1125 }
1126
1127 /* Finds general ivs.  */
1128
1129 static void
1130 find_givs (struct ivopts_data *data)
1131 {
1132   struct loop *loop = data->current_loop;
1133   basic_block *body = get_loop_body_in_dom_order (loop);
1134   unsigned i;
1135
1136   for (i = 0; i < loop->num_nodes; i++)
1137     find_givs_in_bb (data, body[i]);
1138   free (body);
1139 }
1140
1141 /* For each ssa name defined in LOOP determines whether it is an induction
1142    variable and if so, its initial value and step.  */
1143
1144 static bool
1145 find_induction_variables (struct ivopts_data *data)
1146 {
1147   unsigned i;
1148   bitmap_iterator bi;
1149
1150   if (!find_bivs (data))
1151     return false;
1152
1153   find_givs (data);
1154   mark_bivs (data);
1155
1156   if (dump_file && (dump_flags & TDF_DETAILS))
1157     {
1158       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1159
1160       if (niter)
1161         {
1162           fprintf (dump_file, "  number of iterations ");
1163           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1164           if (!integer_zerop (niter->may_be_zero))
1165             {
1166               fprintf (dump_file, "; zero if ");
1167               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1168             }
1169           fprintf (dump_file, "\n\n");
1170         };
1171
1172       fprintf (dump_file, "Induction variables:\n\n");
1173
1174       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1175         {
1176           if (ver_info (data, i)->iv)
1177             dump_iv (dump_file, ver_info (data, i)->iv);
1178         }
1179     }
1180
1181   return true;
1182 }
1183
1184 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1185
1186 static struct iv_use *
1187 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1188             gimple stmt, enum use_type use_type)
1189 {
1190   struct iv_use *use = XCNEW (struct iv_use);
1191
1192   use->id = n_iv_uses (data);
1193   use->type = use_type;
1194   use->iv = iv;
1195   use->stmt = stmt;
1196   use->op_p = use_p;
1197   use->related_cands = BITMAP_ALLOC (NULL);
1198
1199   /* To avoid showing ssa name in the dumps, if it was not reset by the
1200      caller.  */
1201   iv->ssa_name = NULL_TREE;
1202
1203   if (dump_file && (dump_flags & TDF_DETAILS))
1204     dump_use (dump_file, use);
1205
1206   data->iv_uses.safe_push (use);
1207
1208   return use;
1209 }
1210
1211 /* Checks whether OP is a loop-level invariant and if so, records it.
1212    NONLINEAR_USE is true if the invariant is used in a way we do not
1213    handle specially.  */
1214
1215 static void
1216 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1217 {
1218   basic_block bb;
1219   struct version_info *info;
1220
1221   if (TREE_CODE (op) != SSA_NAME
1222       || virtual_operand_p (op))
1223     return;
1224
1225   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1226   if (bb
1227       && flow_bb_inside_loop_p (data->current_loop, bb))
1228     return;
1229
1230   info = name_info (data, op);
1231   info->name = op;
1232   info->has_nonlin_use |= nonlinear_use;
1233   if (!info->inv_id)
1234     info->inv_id = ++data->max_inv_id;
1235   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1236 }
1237
1238 /* Checks whether the use OP is interesting and if so, records it.  */
1239
1240 static struct iv_use *
1241 find_interesting_uses_op (struct ivopts_data *data, tree op)
1242 {
1243   struct iv *iv;
1244   struct iv *civ;
1245   gimple stmt;
1246   struct iv_use *use;
1247
1248   if (TREE_CODE (op) != SSA_NAME)
1249     return NULL;
1250
1251   iv = get_iv (data, op);
1252   if (!iv)
1253     return NULL;
1254
1255   if (iv->have_use_for)
1256     {
1257       use = iv_use (data, iv->use_id);
1258
1259       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1260       return use;
1261     }
1262
1263   if (integer_zerop (iv->step))
1264     {
1265       record_invariant (data, op, true);
1266       return NULL;
1267     }
1268   iv->have_use_for = true;
1269
1270   civ = XNEW (struct iv);
1271   *civ = *iv;
1272
1273   stmt = SSA_NAME_DEF_STMT (op);
1274   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1275               || is_gimple_assign (stmt));
1276
1277   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1278   iv->use_id = use->id;
1279
1280   return use;
1281 }
1282
1283 /* Given a condition in statement STMT, checks whether it is a compare
1284    of an induction variable and an invariant.  If this is the case,
1285    CONTROL_VAR is set to location of the iv, BOUND to the location of
1286    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1287    induction variable descriptions, and true is returned.  If this is not
1288    the case, CONTROL_VAR and BOUND are set to the arguments of the
1289    condition and false is returned.  */
1290
1291 static bool
1292 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1293                        tree **control_var, tree **bound,
1294                        struct iv **iv_var, struct iv **iv_bound)
1295 {
1296   /* The objects returned when COND has constant operands.  */
1297   static struct iv const_iv;
1298   static tree zero;
1299   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1300   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1301   bool ret = false;
1302
1303   if (gimple_code (stmt) == GIMPLE_COND)
1304     {
1305       op0 = gimple_cond_lhs_ptr (stmt);
1306       op1 = gimple_cond_rhs_ptr (stmt);
1307     }
1308   else
1309     {
1310       op0 = gimple_assign_rhs1_ptr (stmt);
1311       op1 = gimple_assign_rhs2_ptr (stmt);
1312     }
1313
1314   zero = integer_zero_node;
1315   const_iv.step = integer_zero_node;
1316
1317   if (TREE_CODE (*op0) == SSA_NAME)
1318     iv0 = get_iv (data, *op0);
1319   if (TREE_CODE (*op1) == SSA_NAME)
1320     iv1 = get_iv (data, *op1);
1321
1322   /* Exactly one of the compared values must be an iv, and the other one must
1323      be an invariant.  */
1324   if (!iv0 || !iv1)
1325     goto end;
1326
1327   if (integer_zerop (iv0->step))
1328     {
1329       /* Control variable may be on the other side.  */
1330       tmp_op = op0; op0 = op1; op1 = tmp_op;
1331       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1332     }
1333   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1334
1335 end:
1336   if (control_var)
1337     *control_var = op0;;
1338   if (iv_var)
1339     *iv_var = iv0;;
1340   if (bound)
1341     *bound = op1;
1342   if (iv_bound)
1343     *iv_bound = iv1;
1344
1345   return ret;
1346 }
1347
1348 /* Checks whether the condition in STMT is interesting and if so,
1349    records it.  */
1350
1351 static void
1352 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1353 {
1354   tree *var_p, *bound_p;
1355   struct iv *var_iv, *civ;
1356
1357   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1358     {
1359       find_interesting_uses_op (data, *var_p);
1360       find_interesting_uses_op (data, *bound_p);
1361       return;
1362     }
1363
1364   civ = XNEW (struct iv);
1365   *civ = *var_iv;
1366   record_use (data, NULL, civ, stmt, USE_COMPARE);
1367 }
1368
1369 /* Returns the outermost loop EXPR is obviously invariant in
1370    relative to the loop LOOP, i.e. if all its operands are defined
1371    outside of the returned loop.  Returns NULL if EXPR is not
1372    even obviously invariant in LOOP.  */
1373
1374 struct loop *
1375 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1376 {
1377   basic_block def_bb;
1378   unsigned i, len;
1379
1380   if (is_gimple_min_invariant (expr))
1381     return current_loops->tree_root;
1382
1383   if (TREE_CODE (expr) == SSA_NAME)
1384     {
1385       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1386       if (def_bb)
1387         {
1388           if (flow_bb_inside_loop_p (loop, def_bb))
1389             return NULL;
1390           return superloop_at_depth (loop,
1391                                      loop_depth (def_bb->loop_father) + 1);
1392         }
1393
1394       return current_loops->tree_root;
1395     }
1396
1397   if (!EXPR_P (expr))
1398     return NULL;
1399
1400   unsigned maxdepth = 0;
1401   len = TREE_OPERAND_LENGTH (expr);
1402   for (i = 0; i < len; i++)
1403     {
1404       struct loop *ivloop;
1405       if (!TREE_OPERAND (expr, i))
1406         continue;
1407
1408       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1409       if (!ivloop)
1410         return NULL;
1411       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1412     }
1413
1414   return superloop_at_depth (loop, maxdepth);
1415 }
1416
1417 /* Returns true if expression EXPR is obviously invariant in LOOP,
1418    i.e. if all its operands are defined outside of the LOOP.  LOOP
1419    should not be the function body.  */
1420
1421 bool
1422 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1423 {
1424   basic_block def_bb;
1425   unsigned i, len;
1426
1427   gcc_assert (loop_depth (loop) > 0);
1428
1429   if (is_gimple_min_invariant (expr))
1430     return true;
1431
1432   if (TREE_CODE (expr) == SSA_NAME)
1433     {
1434       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1435       if (def_bb
1436           && flow_bb_inside_loop_p (loop, def_bb))
1437         return false;
1438
1439       return true;
1440     }
1441
1442   if (!EXPR_P (expr))
1443     return false;
1444
1445   len = TREE_OPERAND_LENGTH (expr);
1446   for (i = 0; i < len; i++)
1447     if (TREE_OPERAND (expr, i)
1448         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1449       return false;
1450
1451   return true;
1452 }
1453
1454 /* Cumulates the steps of indices into DATA and replaces their values with the
1455    initial ones.  Returns false when the value of the index cannot be determined.
1456    Callback for for_each_index.  */
1457
1458 struct ifs_ivopts_data
1459 {
1460   struct ivopts_data *ivopts_data;
1461   gimple stmt;
1462   tree step;
1463 };
1464
1465 static bool
1466 idx_find_step (tree base, tree *idx, void *data)
1467 {
1468   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1469   struct iv *iv;
1470   tree step, iv_base, iv_step, lbound, off;
1471   struct loop *loop = dta->ivopts_data->current_loop;
1472
1473   /* If base is a component ref, require that the offset of the reference
1474      be invariant.  */
1475   if (TREE_CODE (base) == COMPONENT_REF)
1476     {
1477       off = component_ref_field_offset (base);
1478       return expr_invariant_in_loop_p (loop, off);
1479     }
1480
1481   /* If base is array, first check whether we will be able to move the
1482      reference out of the loop (in order to take its address in strength
1483      reduction).  In order for this to work we need both lower bound
1484      and step to be loop invariants.  */
1485   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1486     {
1487       /* Moreover, for a range, the size needs to be invariant as well.  */
1488       if (TREE_CODE (base) == ARRAY_RANGE_REF
1489           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1490         return false;
1491
1492       step = array_ref_element_size (base);
1493       lbound = array_ref_low_bound (base);
1494
1495       if (!expr_invariant_in_loop_p (loop, step)
1496           || !expr_invariant_in_loop_p (loop, lbound))
1497         return false;
1498     }
1499
1500   if (TREE_CODE (*idx) != SSA_NAME)
1501     return true;
1502
1503   iv = get_iv (dta->ivopts_data, *idx);
1504   if (!iv)
1505     return false;
1506
1507   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1508           *&x[0], which is not folded and does not trigger the
1509           ARRAY_REF path below.  */
1510   *idx = iv->base;
1511
1512   if (integer_zerop (iv->step))
1513     return true;
1514
1515   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1516     {
1517       step = array_ref_element_size (base);
1518
1519       /* We only handle addresses whose step is an integer constant.  */
1520       if (TREE_CODE (step) != INTEGER_CST)
1521         return false;
1522     }
1523   else
1524     /* The step for pointer arithmetics already is 1 byte.  */
1525     step = size_one_node;
1526
1527   iv_base = iv->base;
1528   iv_step = iv->step;
1529   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1530                             sizetype, &iv_base, &iv_step, dta->stmt,
1531                             false))
1532     {
1533       /* The index might wrap.  */
1534       return false;
1535     }
1536
1537   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1538   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1539
1540   return true;
1541 }
1542
1543 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1544    object is passed to it in DATA.  */
1545
1546 static bool
1547 idx_record_use (tree base, tree *idx,
1548                 void *vdata)
1549 {
1550   struct ivopts_data *data = (struct ivopts_data *) vdata;
1551   find_interesting_uses_op (data, *idx);
1552   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1553     {
1554       find_interesting_uses_op (data, array_ref_element_size (base));
1555       find_interesting_uses_op (data, array_ref_low_bound (base));
1556     }
1557   return true;
1558 }
1559
1560 /* If we can prove that TOP = cst * BOT for some constant cst,
1561    store cst to MUL and return true.  Otherwise return false.
1562    The returned value is always sign-extended, regardless of the
1563    signedness of TOP and BOT.  */
1564
1565 static bool
1566 constant_multiple_of (tree top, tree bot, widest_int *mul)
1567 {
1568   tree mby;
1569   enum tree_code code;
1570   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1571   widest_int res, p0, p1;
1572
1573   STRIP_NOPS (top);
1574   STRIP_NOPS (bot);
1575
1576   if (operand_equal_p (top, bot, 0))
1577     {
1578       *mul = 1;
1579       return true;
1580     }
1581
1582   code = TREE_CODE (top);
1583   switch (code)
1584     {
1585     case MULT_EXPR:
1586       mby = TREE_OPERAND (top, 1);
1587       if (TREE_CODE (mby) != INTEGER_CST)
1588         return false;
1589
1590       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1591         return false;
1592
1593       *mul = wi::sext (res * wi::to_widest (mby), precision);
1594       return true;
1595
1596     case PLUS_EXPR:
1597     case MINUS_EXPR:
1598       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1599           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1600         return false;
1601
1602       if (code == MINUS_EXPR)
1603         p1 = -p1;
1604       *mul = wi::sext (p0 + p1, precision);
1605       return true;
1606
1607     case INTEGER_CST:
1608       if (TREE_CODE (bot) != INTEGER_CST)
1609         return false;
1610
1611       p0 = widest_int::from (top, SIGNED);
1612       p1 = widest_int::from (bot, SIGNED);
1613       if (p1 == 0)
1614         return false;
1615       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1616       return res == 0;
1617
1618     default:
1619       return false;
1620     }
1621 }
1622
1623 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1624
1625 static bool
1626 may_be_unaligned_p (tree ref, tree step)
1627 {
1628   tree base;
1629   tree base_type;
1630   HOST_WIDE_INT bitsize;
1631   HOST_WIDE_INT bitpos;
1632   tree toffset;
1633   enum machine_mode mode;
1634   int unsignedp, volatilep;
1635   unsigned base_align;
1636
1637   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1638      thus they are not misaligned.  */
1639   if (TREE_CODE (ref) == TARGET_MEM_REF)
1640     return false;
1641
1642   /* The test below is basically copy of what expr.c:normal_inner_ref
1643      does to check whether the object must be loaded by parts when
1644      STRICT_ALIGNMENT is true.  */
1645   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1646                               &unsignedp, &volatilep, true);
1647   base_type = TREE_TYPE (base);
1648   base_align = get_object_alignment (base);
1649   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1650
1651   if (mode != BLKmode)
1652     {
1653       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1654
1655       if (base_align < mode_align
1656           || (bitpos % mode_align) != 0
1657           || (bitpos % BITS_PER_UNIT) != 0)
1658         return true;
1659
1660       if (toffset
1661           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1662         return true;
1663
1664       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1665         return true;
1666     }
1667
1668   return false;
1669 }
1670
1671 /* Return true if EXPR may be non-addressable.   */
1672
1673 bool
1674 may_be_nonaddressable_p (tree expr)
1675 {
1676   switch (TREE_CODE (expr))
1677     {
1678     case TARGET_MEM_REF:
1679       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1680          target, thus they are always addressable.  */
1681       return false;
1682
1683     case COMPONENT_REF:
1684       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1685              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1686
1687     case VIEW_CONVERT_EXPR:
1688       /* This kind of view-conversions may wrap non-addressable objects
1689          and make them look addressable.  After some processing the
1690          non-addressability may be uncovered again, causing ADDR_EXPRs
1691          of inappropriate objects to be built.  */
1692       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1693           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1694         return true;
1695
1696       /* ... fall through ... */
1697
1698     case ARRAY_REF:
1699     case ARRAY_RANGE_REF:
1700       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1701
1702     CASE_CONVERT:
1703       return true;
1704
1705     default:
1706       break;
1707     }
1708
1709   return false;
1710 }
1711
1712 /* Finds addresses in *OP_P inside STMT.  */
1713
1714 static void
1715 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1716 {
1717   tree base = *op_p, step = size_zero_node;
1718   struct iv *civ;
1719   struct ifs_ivopts_data ifs_ivopts_data;
1720
1721   /* Do not play with volatile memory references.  A bit too conservative,
1722      perhaps, but safe.  */
1723   if (gimple_has_volatile_ops (stmt))
1724     goto fail;
1725
1726   /* Ignore bitfields for now.  Not really something terribly complicated
1727      to handle.  TODO.  */
1728   if (TREE_CODE (base) == BIT_FIELD_REF)
1729     goto fail;
1730
1731   base = unshare_expr (base);
1732
1733   if (TREE_CODE (base) == TARGET_MEM_REF)
1734     {
1735       tree type = build_pointer_type (TREE_TYPE (base));
1736       tree astep;
1737
1738       if (TMR_BASE (base)
1739           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1740         {
1741           civ = get_iv (data, TMR_BASE (base));
1742           if (!civ)
1743             goto fail;
1744
1745           TMR_BASE (base) = civ->base;
1746           step = civ->step;
1747         }
1748       if (TMR_INDEX2 (base)
1749           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1750         {
1751           civ = get_iv (data, TMR_INDEX2 (base));
1752           if (!civ)
1753             goto fail;
1754
1755           TMR_INDEX2 (base) = civ->base;
1756           step = civ->step;
1757         }
1758       if (TMR_INDEX (base)
1759           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1760         {
1761           civ = get_iv (data, TMR_INDEX (base));
1762           if (!civ)
1763             goto fail;
1764
1765           TMR_INDEX (base) = civ->base;
1766           astep = civ->step;
1767
1768           if (astep)
1769             {
1770               if (TMR_STEP (base))
1771                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1772
1773               step = fold_build2 (PLUS_EXPR, type, step, astep);
1774             }
1775         }
1776
1777       if (integer_zerop (step))
1778         goto fail;
1779       base = tree_mem_ref_addr (type, base);
1780     }
1781   else
1782     {
1783       ifs_ivopts_data.ivopts_data = data;
1784       ifs_ivopts_data.stmt = stmt;
1785       ifs_ivopts_data.step = size_zero_node;
1786       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1787           || integer_zerop (ifs_ivopts_data.step))
1788         goto fail;
1789       step = ifs_ivopts_data.step;
1790
1791       /* Check that the base expression is addressable.  This needs
1792          to be done after substituting bases of IVs into it.  */
1793       if (may_be_nonaddressable_p (base))
1794         goto fail;
1795
1796       /* Moreover, on strict alignment platforms, check that it is
1797          sufficiently aligned.  */
1798       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1799         goto fail;
1800
1801       base = build_fold_addr_expr (base);
1802
1803       /* Substituting bases of IVs into the base expression might
1804          have caused folding opportunities.  */
1805       if (TREE_CODE (base) == ADDR_EXPR)
1806         {
1807           tree *ref = &TREE_OPERAND (base, 0);
1808           while (handled_component_p (*ref))
1809             ref = &TREE_OPERAND (*ref, 0);
1810           if (TREE_CODE (*ref) == MEM_REF)
1811             {
1812               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1813                                       TREE_OPERAND (*ref, 0),
1814                                       TREE_OPERAND (*ref, 1));
1815               if (tem)
1816                 *ref = tem;
1817             }
1818         }
1819     }
1820
1821   civ = alloc_iv (base, step);
1822   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1823   return;
1824
1825 fail:
1826   for_each_index (op_p, idx_record_use, data);
1827 }
1828
1829 /* Finds and records invariants used in STMT.  */
1830
1831 static void
1832 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1833 {
1834   ssa_op_iter iter;
1835   use_operand_p use_p;
1836   tree op;
1837
1838   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1839     {
1840       op = USE_FROM_PTR (use_p);
1841       record_invariant (data, op, false);
1842     }
1843 }
1844
1845 /* Finds interesting uses of induction variables in the statement STMT.  */
1846
1847 static void
1848 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1849 {
1850   struct iv *iv;
1851   tree op, *lhs, *rhs;
1852   ssa_op_iter iter;
1853   use_operand_p use_p;
1854   enum tree_code code;
1855
1856   find_invariants_stmt (data, stmt);
1857
1858   if (gimple_code (stmt) == GIMPLE_COND)
1859     {
1860       find_interesting_uses_cond (data, stmt);
1861       return;
1862     }
1863
1864   if (is_gimple_assign (stmt))
1865     {
1866       lhs = gimple_assign_lhs_ptr (stmt);
1867       rhs = gimple_assign_rhs1_ptr (stmt);
1868
1869       if (TREE_CODE (*lhs) == SSA_NAME)
1870         {
1871           /* If the statement defines an induction variable, the uses are not
1872              interesting by themselves.  */
1873
1874           iv = get_iv (data, *lhs);
1875
1876           if (iv && !integer_zerop (iv->step))
1877             return;
1878         }
1879
1880       code = gimple_assign_rhs_code (stmt);
1881       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1882           && (REFERENCE_CLASS_P (*rhs)
1883               || is_gimple_val (*rhs)))
1884         {
1885           if (REFERENCE_CLASS_P (*rhs))
1886             find_interesting_uses_address (data, stmt, rhs);
1887           else
1888             find_interesting_uses_op (data, *rhs);
1889
1890           if (REFERENCE_CLASS_P (*lhs))
1891             find_interesting_uses_address (data, stmt, lhs);
1892           return;
1893         }
1894       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1895         {
1896           find_interesting_uses_cond (data, stmt);
1897           return;
1898         }
1899
1900       /* TODO -- we should also handle address uses of type
1901
1902          memory = call (whatever);
1903
1904          and
1905
1906          call (memory).  */
1907     }
1908
1909   if (gimple_code (stmt) == GIMPLE_PHI
1910       && gimple_bb (stmt) == data->current_loop->header)
1911     {
1912       iv = get_iv (data, PHI_RESULT (stmt));
1913
1914       if (iv && !integer_zerop (iv->step))
1915         return;
1916     }
1917
1918   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1919     {
1920       op = USE_FROM_PTR (use_p);
1921
1922       if (TREE_CODE (op) != SSA_NAME)
1923         continue;
1924
1925       iv = get_iv (data, op);
1926       if (!iv)
1927         continue;
1928
1929       find_interesting_uses_op (data, op);
1930     }
1931 }
1932
1933 /* Finds interesting uses of induction variables outside of loops
1934    on loop exit edge EXIT.  */
1935
1936 static void
1937 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1938 {
1939   gimple phi;
1940   gimple_stmt_iterator psi;
1941   tree def;
1942
1943   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1944     {
1945       phi = gsi_stmt (psi);
1946       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1947       if (!virtual_operand_p (def))
1948         find_interesting_uses_op (data, def);
1949     }
1950 }
1951
1952 /* Finds uses of the induction variables that are interesting.  */
1953
1954 static void
1955 find_interesting_uses (struct ivopts_data *data)
1956 {
1957   basic_block bb;
1958   gimple_stmt_iterator bsi;
1959   basic_block *body = get_loop_body (data->current_loop);
1960   unsigned i;
1961   struct version_info *info;
1962   edge e;
1963
1964   if (dump_file && (dump_flags & TDF_DETAILS))
1965     fprintf (dump_file, "Uses:\n\n");
1966
1967   for (i = 0; i < data->current_loop->num_nodes; i++)
1968     {
1969       edge_iterator ei;
1970       bb = body[i];
1971
1972       FOR_EACH_EDGE (e, ei, bb->succs)
1973         if (e->dest != EXIT_BLOCK_PTR
1974             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1975           find_interesting_uses_outside (data, e);
1976
1977       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1978         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1979       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1980         if (!is_gimple_debug (gsi_stmt (bsi)))
1981           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1982     }
1983
1984   if (dump_file && (dump_flags & TDF_DETAILS))
1985     {
1986       bitmap_iterator bi;
1987
1988       fprintf (dump_file, "\n");
1989
1990       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1991         {
1992           info = ver_info (data, i);
1993           if (info->inv_id)
1994             {
1995               fprintf (dump_file, "  ");
1996               print_generic_expr (dump_file, info->name, TDF_SLIM);
1997               fprintf (dump_file, " is invariant (%d)%s\n",
1998                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1999             }
2000         }
2001
2002       fprintf (dump_file, "\n");
2003     }
2004
2005   free (body);
2006 }
2007
2008 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2009    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2010    we are at the top-level of the processed address.  */
2011
2012 static tree
2013 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2014                 unsigned HOST_WIDE_INT *offset)
2015 {
2016   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2017   enum tree_code code;
2018   tree type, orig_type = TREE_TYPE (expr);
2019   unsigned HOST_WIDE_INT off0, off1, st;
2020   tree orig_expr = expr;
2021
2022   STRIP_NOPS (expr);
2023
2024   type = TREE_TYPE (expr);
2025   code = TREE_CODE (expr);
2026   *offset = 0;
2027
2028   switch (code)
2029     {
2030     case INTEGER_CST:
2031       if (!cst_fits_shwi_p (expr)
2032           || integer_zerop (expr))
2033         return orig_expr;
2034
2035       *offset = int_cst_value (expr);
2036       return build_int_cst (orig_type, 0);
2037
2038     case POINTER_PLUS_EXPR:
2039     case PLUS_EXPR:
2040     case MINUS_EXPR:
2041       op0 = TREE_OPERAND (expr, 0);
2042       op1 = TREE_OPERAND (expr, 1);
2043
2044       op0 = strip_offset_1 (op0, false, false, &off0);
2045       op1 = strip_offset_1 (op1, false, false, &off1);
2046
2047       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2048       if (op0 == TREE_OPERAND (expr, 0)
2049           && op1 == TREE_OPERAND (expr, 1))
2050         return orig_expr;
2051
2052       if (integer_zerop (op1))
2053         expr = op0;
2054       else if (integer_zerop (op0))
2055         {
2056           if (code == MINUS_EXPR)
2057             expr = fold_build1 (NEGATE_EXPR, type, op1);
2058           else
2059             expr = op1;
2060         }
2061       else
2062         expr = fold_build2 (code, type, op0, op1);
2063
2064       return fold_convert (orig_type, expr);
2065
2066     case MULT_EXPR:
2067       op1 = TREE_OPERAND (expr, 1);
2068       if (!cst_fits_shwi_p (op1))
2069         return orig_expr;
2070
2071       op0 = TREE_OPERAND (expr, 0);
2072       op0 = strip_offset_1 (op0, false, false, &off0);
2073       if (op0 == TREE_OPERAND (expr, 0))
2074         return orig_expr;
2075
2076       *offset = off0 * int_cst_value (op1);
2077       if (integer_zerop (op0))
2078         expr = op0;
2079       else
2080         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2081
2082       return fold_convert (orig_type, expr);
2083
2084     case ARRAY_REF:
2085     case ARRAY_RANGE_REF:
2086       if (!inside_addr)
2087         return orig_expr;
2088
2089       step = array_ref_element_size (expr);
2090       if (!cst_fits_shwi_p (step))
2091         break;
2092
2093       st = int_cst_value (step);
2094       op1 = TREE_OPERAND (expr, 1);
2095       op1 = strip_offset_1 (op1, false, false, &off1);
2096       *offset = off1 * st;
2097
2098       if (top_compref
2099           && integer_zerop (op1))
2100         {
2101           /* Strip the component reference completely.  */
2102           op0 = TREE_OPERAND (expr, 0);
2103           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2104           *offset += off0;
2105           return op0;
2106         }
2107       break;
2108
2109     case COMPONENT_REF:
2110       if (!inside_addr)
2111         return orig_expr;
2112
2113       tmp = component_ref_field_offset (expr);
2114       if (top_compref
2115           && cst_fits_shwi_p (tmp))
2116         {
2117           /* Strip the component reference completely.  */
2118           op0 = TREE_OPERAND (expr, 0);
2119           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2120           *offset = off0 + int_cst_value (tmp);
2121           return op0;
2122         }
2123       break;
2124
2125     case ADDR_EXPR:
2126       op0 = TREE_OPERAND (expr, 0);
2127       op0 = strip_offset_1 (op0, true, true, &off0);
2128       *offset += off0;
2129
2130       if (op0 == TREE_OPERAND (expr, 0))
2131         return orig_expr;
2132
2133       expr = build_fold_addr_expr (op0);
2134       return fold_convert (orig_type, expr);
2135
2136     case MEM_REF:
2137       /* ???  Offset operand?  */
2138       inside_addr = false;
2139       break;
2140
2141     default:
2142       return orig_expr;
2143     }
2144
2145   /* Default handling of expressions for that we want to recurse into
2146      the first operand.  */
2147   op0 = TREE_OPERAND (expr, 0);
2148   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2149   *offset += off0;
2150
2151   if (op0 == TREE_OPERAND (expr, 0)
2152       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2153     return orig_expr;
2154
2155   expr = copy_node (expr);
2156   TREE_OPERAND (expr, 0) = op0;
2157   if (op1)
2158     TREE_OPERAND (expr, 1) = op1;
2159
2160   /* Inside address, we might strip the top level component references,
2161      thus changing type of the expression.  Handling of ADDR_EXPR
2162      will fix that.  */
2163   expr = fold_convert (orig_type, expr);
2164
2165   return expr;
2166 }
2167
2168 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2169
2170 static tree
2171 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2172 {
2173   return strip_offset_1 (expr, false, false, offset);
2174 }
2175
2176 /* Returns variant of TYPE that can be used as base for different uses.
2177    We return unsigned type with the same precision, which avoids problems
2178    with overflows.  */
2179
2180 static tree
2181 generic_type_for (tree type)
2182 {
2183   if (POINTER_TYPE_P (type))
2184     return unsigned_type_for (type);
2185
2186   if (TYPE_UNSIGNED (type))
2187     return type;
2188
2189   return unsigned_type_for (type);
2190 }
2191
2192 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2193    the bitmap to that we should store it.  */
2194
2195 static struct ivopts_data *fd_ivopts_data;
2196 static tree
2197 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2198 {
2199   bitmap *depends_on = (bitmap *) data;
2200   struct version_info *info;
2201
2202   if (TREE_CODE (*expr_p) != SSA_NAME)
2203     return NULL_TREE;
2204   info = name_info (fd_ivopts_data, *expr_p);
2205
2206   if (!info->inv_id || info->has_nonlin_use)
2207     return NULL_TREE;
2208
2209   if (!*depends_on)
2210     *depends_on = BITMAP_ALLOC (NULL);
2211   bitmap_set_bit (*depends_on, info->inv_id);
2212
2213   return NULL_TREE;
2214 }
2215
2216 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2217    position to POS.  If USE is not NULL, the candidate is set as related to
2218    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2219    replacement of the final value of the iv by a direct computation.  */
2220
2221 static struct iv_cand *
2222 add_candidate_1 (struct ivopts_data *data,
2223                  tree base, tree step, bool important, enum iv_position pos,
2224                  struct iv_use *use, gimple incremented_at)
2225 {
2226   unsigned i;
2227   struct iv_cand *cand = NULL;
2228   tree type, orig_type;
2229
2230   /* For non-original variables, make sure their values are computed in a type
2231      that does not invoke undefined behavior on overflows (since in general,
2232      we cannot prove that these induction variables are non-wrapping).  */
2233   if (pos != IP_ORIGINAL)
2234     {
2235       orig_type = TREE_TYPE (base);
2236       type = generic_type_for (orig_type);
2237       if (type != orig_type)
2238         {
2239           base = fold_convert (type, base);
2240           step = fold_convert (type, step);
2241         }
2242     }
2243
2244   for (i = 0; i < n_iv_cands (data); i++)
2245     {
2246       cand = iv_cand (data, i);
2247
2248       if (cand->pos != pos)
2249         continue;
2250
2251       if (cand->incremented_at != incremented_at
2252           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2253               && cand->ainc_use != use))
2254         continue;
2255
2256       if (!cand->iv)
2257         {
2258           if (!base && !step)
2259             break;
2260
2261           continue;
2262         }
2263
2264       if (!base && !step)
2265         continue;
2266
2267       if (operand_equal_p (base, cand->iv->base, 0)
2268           && operand_equal_p (step, cand->iv->step, 0)
2269           && (TYPE_PRECISION (TREE_TYPE (base))
2270               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2271         break;
2272     }
2273
2274   if (i == n_iv_cands (data))
2275     {
2276       cand = XCNEW (struct iv_cand);
2277       cand->id = i;
2278
2279       if (!base && !step)
2280         cand->iv = NULL;
2281       else
2282         cand->iv = alloc_iv (base, step);
2283
2284       cand->pos = pos;
2285       if (pos != IP_ORIGINAL && cand->iv)
2286         {
2287           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2288           cand->var_after = cand->var_before;
2289         }
2290       cand->important = important;
2291       cand->incremented_at = incremented_at;
2292       data->iv_candidates.safe_push (cand);
2293
2294       if (step
2295           && TREE_CODE (step) != INTEGER_CST)
2296         {
2297           fd_ivopts_data = data;
2298           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2299         }
2300
2301       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2302         cand->ainc_use = use;
2303       else
2304         cand->ainc_use = NULL;
2305
2306       if (dump_file && (dump_flags & TDF_DETAILS))
2307         dump_cand (dump_file, cand);
2308     }
2309
2310   if (important && !cand->important)
2311     {
2312       cand->important = true;
2313       if (dump_file && (dump_flags & TDF_DETAILS))
2314         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2315     }
2316
2317   if (use)
2318     {
2319       bitmap_set_bit (use->related_cands, i);
2320       if (dump_file && (dump_flags & TDF_DETAILS))
2321         fprintf (dump_file, "Candidate %d is related to use %d\n",
2322                  cand->id, use->id);
2323     }
2324
2325   return cand;
2326 }
2327
2328 /* Returns true if incrementing the induction variable at the end of the LOOP
2329    is allowed.
2330
2331    The purpose is to avoid splitting latch edge with a biv increment, thus
2332    creating a jump, possibly confusing other optimization passes and leaving
2333    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2334    is not available (so we do not have a better alternative), or if the latch
2335    edge is already nonempty.  */
2336
2337 static bool
2338 allow_ip_end_pos_p (struct loop *loop)
2339 {
2340   if (!ip_normal_pos (loop))
2341     return true;
2342
2343   if (!empty_block_p (ip_end_pos (loop)))
2344     return true;
2345
2346   return false;
2347 }
2348
2349 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2350    Important field is set to IMPORTANT.  */
2351
2352 static void
2353 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2354                         bool important, struct iv_use *use)
2355 {
2356   basic_block use_bb = gimple_bb (use->stmt);
2357   enum machine_mode mem_mode;
2358   unsigned HOST_WIDE_INT cstepi;
2359
2360   /* If we insert the increment in any position other than the standard
2361      ones, we must ensure that it is incremented once per iteration.
2362      It must not be in an inner nested loop, or one side of an if
2363      statement.  */
2364   if (use_bb->loop_father != data->current_loop
2365       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2366       || stmt_could_throw_p (use->stmt)
2367       || !cst_fits_shwi_p (step))
2368     return;
2369
2370   cstepi = int_cst_value (step);
2371
2372   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2373   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2374         || USE_STORE_PRE_INCREMENT (mem_mode))
2375        && GET_MODE_SIZE (mem_mode) == cstepi)
2376       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2377            || USE_STORE_PRE_DECREMENT (mem_mode))
2378           && GET_MODE_SIZE (mem_mode) == -cstepi))
2379     {
2380       enum tree_code code = MINUS_EXPR;
2381       tree new_base;
2382       tree new_step = step;
2383
2384       if (POINTER_TYPE_P (TREE_TYPE (base)))
2385         {
2386           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2387           code = POINTER_PLUS_EXPR;
2388         }
2389       else
2390         new_step = fold_convert (TREE_TYPE (base), new_step);
2391       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2392       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2393                        use->stmt);
2394     }
2395   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2396         || USE_STORE_POST_INCREMENT (mem_mode))
2397        && GET_MODE_SIZE (mem_mode) == cstepi)
2398       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2399            || USE_STORE_POST_DECREMENT (mem_mode))
2400           && GET_MODE_SIZE (mem_mode) == -cstepi))
2401     {
2402       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2403                        use->stmt);
2404     }
2405 }
2406
2407 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2408    position to POS.  If USE is not NULL, the candidate is set as related to
2409    it.  The candidate computation is scheduled on all available positions.  */
2410
2411 static void
2412 add_candidate (struct ivopts_data *data,
2413                tree base, tree step, bool important, struct iv_use *use)
2414 {
2415   if (ip_normal_pos (data->current_loop))
2416     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2417   if (ip_end_pos (data->current_loop)
2418       && allow_ip_end_pos_p (data->current_loop))
2419     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2420
2421   if (use != NULL && use->type == USE_ADDRESS)
2422     add_autoinc_candidates (data, base, step, important, use);
2423 }
2424
2425 /* Adds standard iv candidates.  */
2426
2427 static void
2428 add_standard_iv_candidates (struct ivopts_data *data)
2429 {
2430   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2431
2432   /* The same for a double-integer type if it is still fast enough.  */
2433   if (TYPE_PRECISION
2434         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2435       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2436     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2437                    build_int_cst (long_integer_type_node, 1), true, NULL);
2438
2439   /* The same for a double-integer type if it is still fast enough.  */
2440   if (TYPE_PRECISION
2441         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2442       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2443     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2444                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2445 }
2446
2447
2448 /* Adds candidates bases on the old induction variable IV.  */
2449
2450 static void
2451 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2452 {
2453   gimple phi;
2454   tree def;
2455   struct iv_cand *cand;
2456
2457   add_candidate (data, iv->base, iv->step, true, NULL);
2458
2459   /* The same, but with initial value zero.  */
2460   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2461     add_candidate (data, size_int (0), iv->step, true, NULL);
2462   else
2463     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2464                    iv->step, true, NULL);
2465
2466   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2467   if (gimple_code (phi) == GIMPLE_PHI)
2468     {
2469       /* Additionally record the possibility of leaving the original iv
2470          untouched.  */
2471       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2472       cand = add_candidate_1 (data,
2473                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2474                               SSA_NAME_DEF_STMT (def));
2475       cand->var_before = iv->ssa_name;
2476       cand->var_after = def;
2477     }
2478 }
2479
2480 /* Adds candidates based on the old induction variables.  */
2481
2482 static void
2483 add_old_ivs_candidates (struct ivopts_data *data)
2484 {
2485   unsigned i;
2486   struct iv *iv;
2487   bitmap_iterator bi;
2488
2489   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2490     {
2491       iv = ver_info (data, i)->iv;
2492       if (iv && iv->biv_p && !integer_zerop (iv->step))
2493         add_old_iv_candidates (data, iv);
2494     }
2495 }
2496
2497 /* Adds candidates based on the value of the induction variable IV and USE.  */
2498
2499 static void
2500 add_iv_value_candidates (struct ivopts_data *data,
2501                          struct iv *iv, struct iv_use *use)
2502 {
2503   unsigned HOST_WIDE_INT offset;
2504   tree base;
2505   tree basetype;
2506
2507   add_candidate (data, iv->base, iv->step, false, use);
2508
2509   /* The same, but with initial value zero.  Make such variable important,
2510      since it is generic enough so that possibly many uses may be based
2511      on it.  */
2512   basetype = TREE_TYPE (iv->base);
2513   if (POINTER_TYPE_P (basetype))
2514     basetype = sizetype;
2515   add_candidate (data, build_int_cst (basetype, 0),
2516                  iv->step, true, use);
2517
2518   /* Third, try removing the constant offset.  Make sure to even
2519      add a candidate for &a[0] vs. (T *)&a.  */
2520   base = strip_offset (iv->base, &offset);
2521   if (offset
2522       || base != iv->base)
2523     add_candidate (data, base, iv->step, false, use);
2524 }
2525
2526 /* Adds candidates based on the uses.  */
2527
2528 static void
2529 add_derived_ivs_candidates (struct ivopts_data *data)
2530 {
2531   unsigned i;
2532
2533   for (i = 0; i < n_iv_uses (data); i++)
2534     {
2535       struct iv_use *use = iv_use (data, i);
2536
2537       if (!use)
2538         continue;
2539
2540       switch (use->type)
2541         {
2542         case USE_NONLINEAR_EXPR:
2543         case USE_COMPARE:
2544         case USE_ADDRESS:
2545           /* Just add the ivs based on the value of the iv used here.  */
2546           add_iv_value_candidates (data, use->iv, use);
2547           break;
2548
2549         default:
2550           gcc_unreachable ();
2551         }
2552     }
2553 }
2554
2555 /* Record important candidates and add them to related_cands bitmaps
2556    if needed.  */
2557
2558 static void
2559 record_important_candidates (struct ivopts_data *data)
2560 {
2561   unsigned i;
2562   struct iv_use *use;
2563
2564   for (i = 0; i < n_iv_cands (data); i++)
2565     {
2566       struct iv_cand *cand = iv_cand (data, i);
2567
2568       if (cand->important)
2569         bitmap_set_bit (data->important_candidates, i);
2570     }
2571
2572   data->consider_all_candidates = (n_iv_cands (data)
2573                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2574
2575   if (data->consider_all_candidates)
2576     {
2577       /* We will not need "related_cands" bitmaps in this case,
2578          so release them to decrease peak memory consumption.  */
2579       for (i = 0; i < n_iv_uses (data); i++)
2580         {
2581           use = iv_use (data, i);
2582           BITMAP_FREE (use->related_cands);
2583         }
2584     }
2585   else
2586     {
2587       /* Add important candidates to the related_cands bitmaps.  */
2588       for (i = 0; i < n_iv_uses (data); i++)
2589         bitmap_ior_into (iv_use (data, i)->related_cands,
2590                          data->important_candidates);
2591     }
2592 }
2593
2594 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2595    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2596    we allocate a simple list to every use.  */
2597
2598 static void
2599 alloc_use_cost_map (struct ivopts_data *data)
2600 {
2601   unsigned i, size, s;
2602
2603   for (i = 0; i < n_iv_uses (data); i++)
2604     {
2605       struct iv_use *use = iv_use (data, i);
2606
2607       if (data->consider_all_candidates)
2608         size = n_iv_cands (data);
2609       else
2610         {
2611           s = bitmap_count_bits (use->related_cands);
2612
2613           /* Round up to the power of two, so that moduling by it is fast.  */
2614           size = s ? (1 << ceil_log2 (s)) : 1;
2615         }
2616
2617       use->n_map_members = size;
2618       use->cost_map = XCNEWVEC (struct cost_pair, size);
2619     }
2620 }
2621
2622 /* Returns description of computation cost of expression whose runtime
2623    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2624
2625 static comp_cost
2626 new_cost (unsigned runtime, unsigned complexity)
2627 {
2628   comp_cost cost;
2629
2630   static int ct = 0;
2631   ct++;
2632
2633   cost.cost = runtime;
2634   cost.complexity = complexity;
2635
2636   return cost;
2637 }
2638
2639 /* Adds costs COST1 and COST2.  */
2640
2641 static comp_cost
2642 add_costs (comp_cost cost1, comp_cost cost2)
2643 {
2644   cost1.cost += cost2.cost;
2645   cost1.complexity += cost2.complexity;
2646
2647   return cost1;
2648 }
2649 /* Subtracts costs COST1 and COST2.  */
2650
2651 static comp_cost
2652 sub_costs (comp_cost cost1, comp_cost cost2)
2653 {
2654   cost1.cost -= cost2.cost;
2655   cost1.complexity -= cost2.complexity;
2656
2657   return cost1;
2658 }
2659
2660 /* Returns a negative number if COST1 < COST2, a positive number if
2661    COST1 > COST2, and 0 if COST1 = COST2.  */
2662
2663 static int
2664 compare_costs (comp_cost cost1, comp_cost cost2)
2665 {
2666   if (cost1.cost == cost2.cost)
2667     return cost1.complexity - cost2.complexity;
2668
2669   return cost1.cost - cost2.cost;
2670 }
2671
2672 /* Returns true if COST is infinite.  */
2673
2674 static bool
2675 infinite_cost_p (comp_cost cost)
2676 {
2677   return cost.cost == INFTY;
2678 }
2679
2680 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2681    on invariants DEPENDS_ON and that the value used in expressing it
2682    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2683
2684 static void
2685 set_use_iv_cost (struct ivopts_data *data,
2686                  struct iv_use *use, struct iv_cand *cand,
2687                  comp_cost cost, bitmap depends_on, tree value,
2688                  enum tree_code comp, int inv_expr_id)
2689 {
2690   unsigned i, s;
2691
2692   if (infinite_cost_p (cost))
2693     {
2694       BITMAP_FREE (depends_on);
2695       return;
2696     }
2697
2698   if (data->consider_all_candidates)
2699     {
2700       use->cost_map[cand->id].cand = cand;
2701       use->cost_map[cand->id].cost = cost;
2702       use->cost_map[cand->id].depends_on = depends_on;
2703       use->cost_map[cand->id].value = value;
2704       use->cost_map[cand->id].comp = comp;
2705       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2706       return;
2707     }
2708
2709   /* n_map_members is a power of two, so this computes modulo.  */
2710   s = cand->id & (use->n_map_members - 1);
2711   for (i = s; i < use->n_map_members; i++)
2712     if (!use->cost_map[i].cand)
2713       goto found;
2714   for (i = 0; i < s; i++)
2715     if (!use->cost_map[i].cand)
2716       goto found;
2717
2718   gcc_unreachable ();
2719
2720 found:
2721   use->cost_map[i].cand = cand;
2722   use->cost_map[i].cost = cost;
2723   use->cost_map[i].depends_on = depends_on;
2724   use->cost_map[i].value = value;
2725   use->cost_map[i].comp = comp;
2726   use->cost_map[i].inv_expr_id = inv_expr_id;
2727 }
2728
2729 /* Gets cost of (USE, CANDIDATE) pair.  */
2730
2731 static struct cost_pair *
2732 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2733                  struct iv_cand *cand)
2734 {
2735   unsigned i, s;
2736   struct cost_pair *ret;
2737
2738   if (!cand)
2739     return NULL;
2740
2741   if (data->consider_all_candidates)
2742     {
2743       ret = use->cost_map + cand->id;
2744       if (!ret->cand)
2745         return NULL;
2746
2747       return ret;
2748     }
2749
2750   /* n_map_members is a power of two, so this computes modulo.  */
2751   s = cand->id & (use->n_map_members - 1);
2752   for (i = s; i < use->n_map_members; i++)
2753     if (use->cost_map[i].cand == cand)
2754       return use->cost_map + i;
2755     else if (use->cost_map[i].cand == NULL)
2756       return NULL;
2757   for (i = 0; i < s; i++)
2758     if (use->cost_map[i].cand == cand)
2759       return use->cost_map + i;
2760     else if (use->cost_map[i].cand == NULL)
2761       return NULL;
2762
2763   return NULL;
2764 }
2765
2766 /* Returns estimate on cost of computing SEQ.  */
2767
2768 static unsigned
2769 seq_cost (rtx seq, bool speed)
2770 {
2771   unsigned cost = 0;
2772   rtx set;
2773
2774   for (; seq; seq = NEXT_INSN (seq))
2775     {
2776       set = single_set (seq);
2777       if (set)
2778         cost += set_src_cost (SET_SRC (set), speed);
2779       else
2780         cost++;
2781     }
2782
2783   return cost;
2784 }
2785
2786 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2787 static rtx
2788 produce_memory_decl_rtl (tree obj, int *regno)
2789 {
2790   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2791   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2792   rtx x;
2793
2794   gcc_assert (obj);
2795   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2796     {
2797       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2798       x = gen_rtx_SYMBOL_REF (address_mode, name);
2799       SET_SYMBOL_REF_DECL (x, obj);
2800       x = gen_rtx_MEM (DECL_MODE (obj), x);
2801       set_mem_addr_space (x, as);
2802       targetm.encode_section_info (obj, x, true);
2803     }
2804   else
2805     {
2806       x = gen_raw_REG (address_mode, (*regno)++);
2807       x = gen_rtx_MEM (DECL_MODE (obj), x);
2808       set_mem_addr_space (x, as);
2809     }
2810
2811   return x;
2812 }
2813
2814 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2815    walk_tree.  DATA contains the actual fake register number.  */
2816
2817 static tree
2818 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2819 {
2820   tree obj = NULL_TREE;
2821   rtx x = NULL_RTX;
2822   int *regno = (int *) data;
2823
2824   switch (TREE_CODE (*expr_p))
2825     {
2826     case ADDR_EXPR:
2827       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2828            handled_component_p (*expr_p);
2829            expr_p = &TREE_OPERAND (*expr_p, 0))
2830         continue;
2831       obj = *expr_p;
2832       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2833         x = produce_memory_decl_rtl (obj, regno);
2834       break;
2835
2836     case SSA_NAME:
2837       *ws = 0;
2838       obj = SSA_NAME_VAR (*expr_p);
2839       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2840       if (!obj)
2841         return NULL_TREE;
2842       if (!DECL_RTL_SET_P (obj))
2843         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2844       break;
2845
2846     case VAR_DECL:
2847     case PARM_DECL:
2848     case RESULT_DECL:
2849       *ws = 0;
2850       obj = *expr_p;
2851
2852       if (DECL_RTL_SET_P (obj))
2853         break;
2854
2855       if (DECL_MODE (obj) == BLKmode)
2856         x = produce_memory_decl_rtl (obj, regno);
2857       else
2858         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2859
2860       break;
2861
2862     default:
2863       break;
2864     }
2865
2866   if (x)
2867     {
2868       decl_rtl_to_reset.safe_push (obj);
2869       SET_DECL_RTL (obj, x);
2870     }
2871
2872   return NULL_TREE;
2873 }
2874
2875 /* Determines cost of the computation of EXPR.  */
2876
2877 static unsigned
2878 computation_cost (tree expr, bool speed)
2879 {
2880   rtx seq, rslt;
2881   tree type = TREE_TYPE (expr);
2882   unsigned cost;
2883   /* Avoid using hard regs in ways which may be unsupported.  */
2884   int regno = LAST_VIRTUAL_REGISTER + 1;
2885   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2886   enum node_frequency real_frequency = node->frequency;
2887
2888   node->frequency = NODE_FREQUENCY_NORMAL;
2889   crtl->maybe_hot_insn_p = speed;
2890   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2891   start_sequence ();
2892   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2893   seq = get_insns ();
2894   end_sequence ();
2895   default_rtl_profile ();
2896   node->frequency = real_frequency;
2897
2898   cost = seq_cost (seq, speed);
2899   if (MEM_P (rslt))
2900     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2901                           TYPE_ADDR_SPACE (type), speed);
2902   else if (!REG_P (rslt))
2903     cost += set_src_cost (rslt, speed);
2904
2905   return cost;
2906 }
2907
2908 /* Returns variable containing the value of candidate CAND at statement AT.  */
2909
2910 static tree
2911 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2912 {
2913   if (stmt_after_increment (loop, cand, stmt))
2914     return cand->var_after;
2915   else
2916     return cand->var_before;
2917 }
2918
2919 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2920    same precision that is at least as wide as the precision of TYPE, stores
2921    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2922    type of A and B.  */
2923
2924 static tree
2925 determine_common_wider_type (tree *a, tree *b)
2926 {
2927   tree wider_type = NULL;
2928   tree suba, subb;
2929   tree atype = TREE_TYPE (*a);
2930
2931   if (CONVERT_EXPR_P (*a))
2932     {
2933       suba = TREE_OPERAND (*a, 0);
2934       wider_type = TREE_TYPE (suba);
2935       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2936         return atype;
2937     }
2938   else
2939     return atype;
2940
2941   if (CONVERT_EXPR_P (*b))
2942     {
2943       subb = TREE_OPERAND (*b, 0);
2944       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2945         return atype;
2946     }
2947   else
2948     return atype;
2949
2950   *a = suba;
2951   *b = subb;
2952   return wider_type;
2953 }
2954
2955 /* Determines the expression by that USE is expressed from induction variable
2956    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2957    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2958
2959 static bool
2960 get_computation_aff (struct loop *loop,
2961                      struct iv_use *use, struct iv_cand *cand, gimple at,
2962                      struct affine_tree_combination *aff)
2963 {
2964   tree ubase = use->iv->base;
2965   tree ustep = use->iv->step;
2966   tree cbase = cand->iv->base;
2967   tree cstep = cand->iv->step, cstep_common;
2968   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2969   tree common_type, var;
2970   tree uutype;
2971   aff_tree cbase_aff, var_aff;
2972   widest_int rat;
2973
2974   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2975     {
2976       /* We do not have a precision to express the values of use.  */
2977       return false;
2978     }
2979
2980   var = var_at_stmt (loop, cand, at);
2981   uutype = unsigned_type_for (utype);
2982
2983   /* If the conversion is not noop, perform it.  */
2984   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2985     {
2986       cstep = fold_convert (uutype, cstep);
2987       cbase = fold_convert (uutype, cbase);
2988       var = fold_convert (uutype, var);
2989     }
2990
2991   if (!constant_multiple_of (ustep, cstep, &rat))
2992     return false;
2993
2994   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2995      type, we achieve better folding by computing their difference in this
2996      wider type, and cast the result to UUTYPE.  We do not need to worry about
2997      overflows, as all the arithmetics will in the end be performed in UUTYPE
2998      anyway.  */
2999   common_type = determine_common_wider_type (&ubase, &cbase);
3000
3001   /* use = ubase - ratio * cbase + ratio * var.  */
3002   tree_to_aff_combination (ubase, common_type, aff);
3003   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3004   tree_to_aff_combination (var, uutype, &var_aff);
3005
3006   /* We need to shift the value if we are after the increment.  */
3007   if (stmt_after_increment (loop, cand, at))
3008     {
3009       aff_tree cstep_aff;
3010
3011       if (common_type != uutype)
3012         cstep_common = fold_convert (common_type, cstep);
3013       else
3014         cstep_common = cstep;
3015
3016       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3017       aff_combination_add (&cbase_aff, &cstep_aff);
3018     }
3019
3020   aff_combination_scale (&cbase_aff, -rat);
3021   aff_combination_add (aff, &cbase_aff);
3022   if (common_type != uutype)
3023     aff_combination_convert (aff, uutype);
3024
3025   aff_combination_scale (&var_aff, rat);
3026   aff_combination_add (aff, &var_aff);
3027
3028   return true;
3029 }
3030
3031 /* Return the type of USE.  */
3032
3033 static tree
3034 get_use_type (struct iv_use *use)
3035 {
3036   tree base_type = TREE_TYPE (use->iv->base);
3037   tree type;
3038
3039   if (use->type == USE_ADDRESS)
3040     {
3041       /* The base_type may be a void pointer.  Create a pointer type based on
3042          the mem_ref instead.  */
3043       type = build_pointer_type (TREE_TYPE (*use->op_p));
3044       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3045                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3046     }
3047   else
3048     type = base_type;
3049
3050   return type;
3051 }
3052
3053 /* Determines the expression by that USE is expressed from induction variable
3054    CAND at statement AT in LOOP.  The computation is unshared.  */
3055
3056 static tree
3057 get_computation_at (struct loop *loop,
3058                     struct iv_use *use, struct iv_cand *cand, gimple at)
3059 {
3060   aff_tree aff;
3061   tree type = get_use_type (use);
3062
3063   if (!get_computation_aff (loop, use, cand, at, &aff))
3064     return NULL_TREE;
3065   unshare_aff_combination (&aff);
3066   return fold_convert (type, aff_combination_to_tree (&aff));
3067 }
3068
3069 /* Determines the expression by that USE is expressed from induction variable
3070    CAND in LOOP.  The computation is unshared.  */
3071
3072 static tree
3073 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3074 {
3075   return get_computation_at (loop, use, cand, use->stmt);
3076 }
3077
3078 /* Adjust the cost COST for being in loop setup rather than loop body.
3079    If we're optimizing for space, the loop setup overhead is constant;
3080    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3081 static unsigned
3082 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3083 {
3084   if (cost == INFTY)
3085     return cost;
3086   else if (optimize_loop_for_speed_p (data->current_loop))
3087     return cost / avg_loop_niter (data->current_loop);
3088   else
3089     return cost;
3090 }
3091
3092 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3093    validity for a memory reference accessing memory of mode MODE in
3094    address space AS.  */
3095
3096
3097 bool
3098 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3099                                  addr_space_t as)
3100 {
3101 #define MAX_RATIO 128
3102   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3103   static vec<sbitmap> valid_mult_list;
3104   sbitmap valid_mult;
3105
3106   if (data_index >= valid_mult_list.length ())
3107     valid_mult_list.safe_grow_cleared (data_index + 1);
3108
3109   valid_mult = valid_mult_list[data_index];
3110   if (!valid_mult)
3111     {
3112       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3113       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3114       rtx addr;
3115       HOST_WIDE_INT i;
3116
3117       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3118       bitmap_clear (valid_mult);
3119       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3120       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3121         {
3122           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3123           if (memory_address_addr_space_p (mode, addr, as))
3124             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3125         }
3126
3127       if (dump_file && (dump_flags & TDF_DETAILS))
3128         {
3129           fprintf (dump_file, "  allowed multipliers:");
3130           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3131             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3132               fprintf (dump_file, " %d", (int) i);
3133           fprintf (dump_file, "\n");
3134           fprintf (dump_file, "\n");
3135         }
3136
3137       valid_mult_list[data_index] = valid_mult;
3138     }
3139
3140   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3141     return false;
3142
3143   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3144 }
3145
3146 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3147    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3148    variable is omitted.  Compute the cost for a memory reference that accesses
3149    a memory location of mode MEM_MODE in address space AS.
3150
3151    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3152    size of MEM_MODE / RATIO) is available.  To make this determination, we
3153    look at the size of the increment to be made, which is given in CSTEP.
3154    CSTEP may be zero if the step is unknown.
3155    STMT_AFTER_INC is true iff the statement we're looking at is after the
3156    increment of the original biv.
3157
3158    TODO -- there must be some better way.  This all is quite crude.  */
3159
3160 typedef struct address_cost_data_s
3161 {
3162   HOST_WIDE_INT min_offset, max_offset;
3163   unsigned costs[2][2][2][2];
3164 } *address_cost_data;
3165
3166
3167 static comp_cost
3168 get_address_cost (bool symbol_present, bool var_present,
3169                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3170                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3171                   addr_space_t as, bool speed,
3172                   bool stmt_after_inc, bool *may_autoinc)
3173 {
3174   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3175   static vec<address_cost_data> address_cost_data_list;
3176   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3177   address_cost_data data;
3178   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3179   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3180   unsigned cost, acost, complexity;
3181   bool offset_p, ratio_p, autoinc;
3182   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3183   unsigned HOST_WIDE_INT mask;
3184   unsigned bits;
3185
3186   if (data_index >= address_cost_data_list.length ())
3187     address_cost_data_list.safe_grow_cleared (data_index + 1);
3188
3189   data = address_cost_data_list[data_index];
3190   if (!data)
3191     {
3192       HOST_WIDE_INT i;
3193       HOST_WIDE_INT rat, off = 0;
3194       int old_cse_not_expected, width;
3195       unsigned sym_p, var_p, off_p, rat_p, add_c;
3196       rtx seq, addr, base;
3197       rtx reg0, reg1;
3198
3199       data = (address_cost_data) xcalloc (1, sizeof (*data));
3200
3201       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3202
3203       width = GET_MODE_BITSIZE (address_mode) - 1;
3204       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3205         width = HOST_BITS_PER_WIDE_INT - 1;
3206       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3207
3208       for (i = width; i >= 0; i--)
3209         {
3210           off = -((unsigned HOST_WIDE_INT) 1 << i);
3211           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3212           if (memory_address_addr_space_p (mem_mode, addr, as))
3213             break;
3214         }
3215       data->min_offset = (i == -1? 0 : off);
3216
3217       for (i = width; i >= 0; i--)
3218         {
3219           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3220           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3221           if (memory_address_addr_space_p (mem_mode, addr, as))
3222             break;
3223         }
3224       if (i == -1)
3225         off = 0;
3226       data->max_offset = off;
3227
3228       if (dump_file && (dump_flags & TDF_DETAILS))
3229         {
3230           fprintf (dump_file, "get_address_cost:\n");
3231           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3232                    GET_MODE_NAME (mem_mode),
3233                    data->min_offset);
3234           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3235                    GET_MODE_NAME (mem_mode),
3236                    data->max_offset);
3237         }
3238
3239       rat = 1;
3240       for (i = 2; i <= MAX_RATIO; i++)
3241         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3242           {
3243             rat = i;
3244             break;
3245           }
3246
3247       /* Compute the cost of various addressing modes.  */
3248       acost = 0;
3249       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3250       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3251
3252       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3253           || USE_STORE_PRE_DECREMENT (mem_mode))
3254         {
3255           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3256           has_predec[mem_mode]
3257             = memory_address_addr_space_p (mem_mode, addr, as);
3258         }
3259       if (USE_LOAD_POST_DECREMENT (mem_mode)
3260           || USE_STORE_POST_DECREMENT (mem_mode))
3261         {
3262           addr = gen_rtx_POST_DEC (address_mode, reg0);
3263           has_postdec[mem_mode]
3264             = memory_address_addr_space_p (mem_mode, addr, as);
3265         }
3266       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3267           || USE_STORE_PRE_DECREMENT (mem_mode))
3268         {
3269           addr = gen_rtx_PRE_INC (address_mode, reg0);
3270           has_preinc[mem_mode]
3271             = memory_address_addr_space_p (mem_mode, addr, as);
3272         }
3273       if (USE_LOAD_POST_INCREMENT (mem_mode)
3274           || USE_STORE_POST_INCREMENT (mem_mode))
3275         {
3276           addr = gen_rtx_POST_INC (address_mode, reg0);
3277           has_postinc[mem_mode]
3278             = memory_address_addr_space_p (mem_mode, addr, as);
3279         }
3280       for (i = 0; i < 16; i++)
3281         {
3282           sym_p = i & 1;
3283           var_p = (i >> 1) & 1;
3284           off_p = (i >> 2) & 1;
3285           rat_p = (i >> 3) & 1;
3286
3287           addr = reg0;
3288           if (rat_p)
3289             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3290                                    gen_int_mode (rat, address_mode));
3291
3292           if (var_p)
3293             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3294
3295           if (sym_p)
3296             {
3297               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3298               /* ??? We can run into trouble with some backends by presenting
3299                  it with symbols which haven't been properly passed through
3300                  targetm.encode_section_info.  By setting the local bit, we
3301                  enhance the probability of things working.  */
3302               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3303
3304               if (off_p)
3305                 base = gen_rtx_fmt_e (CONST, address_mode,
3306                                       gen_rtx_fmt_ee
3307                                         (PLUS, address_mode, base,
3308                                          gen_int_mode (off, address_mode)));
3309             }
3310           else if (off_p)
3311             base = gen_int_mode (off, address_mode);
3312           else
3313             base = NULL_RTX;
3314
3315           if (base)
3316             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3317
3318           start_sequence ();
3319           /* To avoid splitting addressing modes, pretend that no cse will
3320              follow.  */
3321           old_cse_not_expected = cse_not_expected;
3322           cse_not_expected = true;
3323           addr = memory_address_addr_space (mem_mode, addr, as);
3324           cse_not_expected = old_cse_not_expected;
3325           seq = get_insns ();
3326           end_sequence ();
3327
3328           acost = seq_cost (seq, speed);
3329           acost += address_cost (addr, mem_mode, as, speed);
3330
3331           if (!acost)
3332             acost = 1;
3333           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3334         }
3335
3336       /* On some targets, it is quite expensive to load symbol to a register,
3337          which makes addresses that contain symbols look much more expensive.
3338          However, the symbol will have to be loaded in any case before the
3339          loop (and quite likely we have it in register already), so it does not
3340          make much sense to penalize them too heavily.  So make some final
3341          tweaks for the SYMBOL_PRESENT modes:
3342
3343          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3344          var is cheaper, use this mode with small penalty.
3345          If VAR_PRESENT is true, try whether the mode with
3346          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3347          if this is the case, use it.  */
3348       add_c = add_cost (speed, address_mode);
3349       for (i = 0; i < 8; i++)
3350         {
3351           var_p = i & 1;
3352           off_p = (i >> 1) & 1;
3353           rat_p = (i >> 2) & 1;
3354
3355           acost = data->costs[0][1][off_p][rat_p] + 1;
3356           if (var_p)
3357             acost += add_c;
3358
3359           if (acost < data->costs[1][var_p][off_p][rat_p])
3360             data->costs[1][var_p][off_p][rat_p] = acost;
3361         }
3362
3363       if (dump_file && (dump_flags & TDF_DETAILS))
3364         {
3365           fprintf (dump_file, "Address costs:\n");
3366
3367           for (i = 0; i < 16; i++)
3368             {
3369               sym_p = i & 1;
3370               var_p = (i >> 1) & 1;
3371               off_p = (i >> 2) & 1;
3372               rat_p = (i >> 3) & 1;
3373
3374               fprintf (dump_file, "  ");
3375               if (sym_p)
3376                 fprintf (dump_file, "sym + ");
3377               if (var_p)
3378                 fprintf (dump_file, "var + ");
3379               if (off_p)
3380                 fprintf (dump_file, "cst + ");
3381               if (rat_p)
3382                 fprintf (dump_file, "rat * ");
3383
3384               acost = data->costs[sym_p][var_p][off_p][rat_p];
3385               fprintf (dump_file, "index costs %d\n", acost);
3386             }
3387           if (has_predec[mem_mode] || has_postdec[mem_mode]
3388               || has_preinc[mem_mode] || has_postinc[mem_mode])
3389             fprintf (dump_file, "  May include autoinc/dec\n");
3390           fprintf (dump_file, "\n");
3391         }
3392
3393       address_cost_data_list[data_index] = data;
3394     }
3395
3396   bits = GET_MODE_BITSIZE (address_mode);
3397   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3398   offset &= mask;
3399   if ((offset >> (bits - 1) & 1))
3400     offset |= ~mask;
3401   s_offset = offset;
3402
3403   autoinc = false;
3404   msize = GET_MODE_SIZE (mem_mode);
3405   autoinc_offset = offset;
3406   if (stmt_after_inc)
3407     autoinc_offset += ratio * cstep;
3408   if (symbol_present || var_present || ratio != 1)
3409     autoinc = false;
3410   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3411                && msize == cstep)
3412            || (has_postdec[mem_mode] && autoinc_offset == 0
3413                && msize == -cstep)
3414            || (has_preinc[mem_mode] && autoinc_offset == msize
3415                && msize == cstep)
3416            || (has_predec[mem_mode] && autoinc_offset == -msize
3417                && msize == -cstep))
3418     autoinc = true;
3419
3420   cost = 0;
3421   offset_p = (s_offset != 0
3422               && data->min_offset <= s_offset
3423               && s_offset <= data->max_offset);
3424   ratio_p = (ratio != 1
3425              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3426
3427   if (ratio != 1 && !ratio_p)
3428     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3429
3430   if (s_offset && !offset_p && !symbol_present)
3431     cost += add_cost (speed, address_mode);
3432
3433   if (may_autoinc)
3434     *may_autoinc = autoinc;
3435   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3436   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3437   return new_cost (cost + acost, complexity);
3438 }
3439
3440  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3441     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3442     calculating the operands of EXPR.  Returns true if successful, and returns
3443     the cost in COST.  */
3444
3445 static bool
3446 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3447                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3448 {
3449   comp_cost res;
3450   tree op1 = TREE_OPERAND (expr, 1);
3451   tree cst = TREE_OPERAND (mult, 1);
3452   tree multop = TREE_OPERAND (mult, 0);
3453   int m = exact_log2 (int_cst_value (cst));
3454   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3455   int sa_cost;
3456
3457   if (!(m >= 0 && m < maxm))
3458     return false;
3459
3460   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3461              ? shiftadd_cost (speed, mode, m)
3462              : (mult == op1
3463                 ? shiftsub1_cost (speed, mode, m)
3464                 : shiftsub0_cost (speed, mode, m)));
3465   res = new_cost (sa_cost, 0);
3466   res = add_costs (res, mult == op1 ? cost0 : cost1);
3467
3468   STRIP_NOPS (multop);
3469   if (!is_gimple_val (multop))
3470     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3471
3472   *cost = res;
3473
3474   return true;
3475 }
3476
3477 /* Estimates cost of forcing expression EXPR into a variable.  */
3478
3479 static comp_cost
3480 force_expr_to_var_cost (tree expr, bool speed)
3481 {
3482   static bool costs_initialized = false;
3483   static unsigned integer_cost [2];
3484   static unsigned symbol_cost [2];
3485   static unsigned address_cost [2];
3486   tree op0, op1;
3487   comp_cost cost0, cost1, cost;
3488   enum machine_mode mode;
3489
3490   if (!costs_initialized)
3491     {
3492       tree type = build_pointer_type (integer_type_node);
3493       tree var, addr;
3494       rtx x;
3495       int i;
3496
3497       var = create_tmp_var_raw (integer_type_node, "test_var");
3498       TREE_STATIC (var) = 1;
3499       x = produce_memory_decl_rtl (var, NULL);
3500       SET_DECL_RTL (var, x);
3501
3502       addr = build1 (ADDR_EXPR, type, var);
3503
3504
3505       for (i = 0; i < 2; i++)
3506         {
3507           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3508                                                              2000), i);
3509
3510           symbol_cost[i] = computation_cost (addr, i) + 1;
3511
3512           address_cost[i]
3513             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3514           if (dump_file && (dump_flags & TDF_DETAILS))
3515             {
3516               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3517               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3518               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3519               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3520               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3521               fprintf (dump_file, "\n");
3522             }
3523         }
3524
3525       costs_initialized = true;
3526     }
3527
3528   STRIP_NOPS (expr);
3529
3530   if (SSA_VAR_P (expr))
3531     return no_cost;
3532
3533   if (is_gimple_min_invariant (expr))
3534     {
3535       if (TREE_CODE (expr) == INTEGER_CST)
3536         return new_cost (integer_cost [speed], 0);
3537
3538       if (TREE_CODE (expr) == ADDR_EXPR)
3539         {
3540           tree obj = TREE_OPERAND (expr, 0);
3541
3542           if (TREE_CODE (obj) == VAR_DECL
3543               || TREE_CODE (obj) == PARM_DECL
3544               || TREE_CODE (obj) == RESULT_DECL)
3545             return new_cost (symbol_cost [speed], 0);
3546         }
3547
3548       return new_cost (address_cost [speed], 0);
3549     }
3550
3551   switch (TREE_CODE (expr))
3552     {
3553     case POINTER_PLUS_EXPR:
3554     case PLUS_EXPR:
3555     case MINUS_EXPR:
3556     case MULT_EXPR:
3557       op0 = TREE_OPERAND (expr, 0);
3558       op1 = TREE_OPERAND (expr, 1);
3559       STRIP_NOPS (op0);
3560       STRIP_NOPS (op1);
3561
3562       if (is_gimple_val (op0))
3563         cost0 = no_cost;
3564       else
3565         cost0 = force_expr_to_var_cost (op0, speed);
3566
3567       if (is_gimple_val (op1))
3568         cost1 = no_cost;
3569       else
3570         cost1 = force_expr_to_var_cost (op1, speed);
3571
3572       break;
3573
3574     case NEGATE_EXPR:
3575       op0 = TREE_OPERAND (expr, 0);
3576       STRIP_NOPS (op0);
3577       op1 = NULL_TREE;
3578
3579       if (is_gimple_val (op0))
3580         cost0 = no_cost;
3581       else
3582         cost0 = force_expr_to_var_cost (op0, speed);
3583
3584       cost1 = no_cost;
3585       break;
3586
3587     default:
3588
3589       /* Just an arbitrary value, FIXME.  */
3590       return new_cost (target_spill_cost[speed], 0);
3591     }
3592
3593   mode = TYPE_MODE (TREE_TYPE (expr));
3594   switch (TREE_CODE (expr))
3595     {
3596     case POINTER_PLUS_EXPR:
3597     case PLUS_EXPR:
3598     case MINUS_EXPR:
3599     case NEGATE_EXPR:
3600       cost = new_cost (add_cost (speed, mode), 0);
3601       if (TREE_CODE (expr) != NEGATE_EXPR)
3602         {
3603           tree mult = NULL_TREE;
3604           comp_cost sa_cost;
3605           if (TREE_CODE (op1) == MULT_EXPR)
3606             mult = op1;
3607           else if (TREE_CODE (op0) == MULT_EXPR)
3608             mult = op0;
3609
3610           if (mult != NULL_TREE
3611               && cst_fits_shwi_p (TREE_OPERAND (mult, 1))
3612               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3613                                     speed, &sa_cost))
3614             return sa_cost;
3615         }
3616       break;
3617
3618     case MULT_EXPR:
3619       if (cst_fits_shwi_p (op0))
3620         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3621                                              mode, speed), 0);
3622       else if (cst_fits_shwi_p (op1))
3623         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3624                                              mode, speed), 0);
3625       else
3626         return new_cost (target_spill_cost [speed], 0);
3627       break;
3628
3629     default:
3630       gcc_unreachable ();
3631     }
3632
3633   cost = add_costs (cost, cost0);
3634   cost = add_costs (cost, cost1);
3635
3636   /* Bound the cost by target_spill_cost.  The parts of complicated
3637      computations often are either loop invariant or at least can
3638      be shared between several iv uses, so letting this grow without
3639      limits would not give reasonable results.  */
3640   if (cost.cost > (int) target_spill_cost [speed])
3641     cost.cost = target_spill_cost [speed];
3642
3643   return cost;
3644 }
3645
3646 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3647    invariants the computation depends on.  */
3648
3649 static comp_cost
3650 force_var_cost (struct ivopts_data *data,
3651                 tree expr, bitmap *depends_on)
3652 {
3653   if (depends_on)
3654     {
3655       fd_ivopts_data = data;
3656       walk_tree (&expr, find_depends, depends_on, NULL);
3657     }
3658
3659   return force_expr_to_var_cost (expr, data->speed);
3660 }
3661
3662 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3663    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3664    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3665    invariants the computation depends on.  */
3666
3667 static comp_cost
3668 split_address_cost (struct ivopts_data *data,
3669                     tree addr, bool *symbol_present, bool *var_present,
3670                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3671 {
3672   tree core;
3673   HOST_WIDE_INT bitsize;
3674   HOST_WIDE_INT bitpos;
3675   tree toffset;
3676   enum machine_mode mode;
3677   int unsignedp, volatilep;
3678
3679   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3680                               &unsignedp, &volatilep, false);
3681
3682   if (toffset != 0
3683       || bitpos % BITS_PER_UNIT != 0
3684       || TREE_CODE (core) != VAR_DECL)
3685     {
3686       *symbol_present = false;
3687       *var_present = true;
3688       fd_ivopts_data = data;
3689       walk_tree (&addr, find_depends, depends_on, NULL);
3690       return new_cost (target_spill_cost[data->speed], 0);
3691     }
3692
3693   *offset += bitpos / BITS_PER_UNIT;
3694   if (TREE_STATIC (core)
3695       || DECL_EXTERNAL (core))
3696     {
3697       *symbol_present = true;
3698       *var_present = false;
3699       return no_cost;
3700     }
3701
3702   *symbol_present = false;
3703   *var_present = true;
3704   return no_cost;
3705 }
3706
3707 /* Estimates cost of expressing difference of addresses E1 - E2 as
3708    var + symbol + offset.  The value of offset is added to OFFSET,
3709    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3710    part is missing.  DEPENDS_ON is a set of the invariants the computation
3711    depends on.  */
3712
3713 static comp_cost
3714 ptr_difference_cost (struct ivopts_data *data,
3715                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3716                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3717 {
3718   HOST_WIDE_INT diff = 0;
3719   aff_tree aff_e1, aff_e2;
3720   tree type;
3721
3722   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3723
3724   if (ptr_difference_const (e1, e2, &diff))
3725     {
3726       *offset += diff;
3727       *symbol_present = false;
3728       *var_present = false;
3729       return no_cost;
3730     }
3731
3732   if (integer_zerop (e2))
3733     return split_address_cost (data, TREE_OPERAND (e1, 0),
3734                                symbol_present, var_present, offset, depends_on);
3735
3736   *symbol_present = false;
3737   *var_present = true;
3738
3739   type = signed_type_for (TREE_TYPE (e1));
3740   tree_to_aff_combination (e1, type, &aff_e1);
3741   tree_to_aff_combination (e2, type, &aff_e2);
3742   aff_combination_scale (&aff_e2, -1);
3743   aff_combination_add (&aff_e1, &aff_e2);
3744
3745   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3746 }
3747
3748 /* Estimates cost of expressing difference E1 - E2 as
3749    var + symbol + offset.  The value of offset is added to OFFSET,
3750    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3751    part is missing.  DEPENDS_ON is a set of the invariants the computation
3752    depends on.  */
3753
3754 static comp_cost
3755 difference_cost (struct ivopts_data *data,
3756                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3757                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3758 {
3759   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3760   unsigned HOST_WIDE_INT off1, off2;
3761   aff_tree aff_e1, aff_e2;
3762   tree type;
3763
3764   e1 = strip_offset (e1, &off1);
3765   e2 = strip_offset (e2, &off2);
3766   *offset += off1 - off2;
3767
3768   STRIP_NOPS (e1);
3769   STRIP_NOPS (e2);
3770
3771   if (TREE_CODE (e1) == ADDR_EXPR)
3772     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3773                                 offset, depends_on);
3774   *symbol_present = false;
3775
3776   if (operand_equal_p (e1, e2, 0))
3777     {
3778       *var_present = false;
3779       return no_cost;
3780     }
3781
3782   *var_present = true;
3783
3784   if (integer_zerop (e2))
3785     return force_var_cost (data, e1, depends_on);
3786
3787   if (integer_zerop (e1))
3788     {
3789       comp_cost cost = force_var_cost (data, e2, depends_on);
3790       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3791       return cost;
3792     }
3793
3794   type = signed_type_for (TREE_TYPE (e1));
3795   tree_to_aff_combination (e1, type, &aff_e1);
3796   tree_to_aff_combination (e2, type, &aff_e2);
3797   aff_combination_scale (&aff_e2, -1);
3798   aff_combination_add (&aff_e1, &aff_e2);
3799
3800   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3801 }
3802
3803 /* Returns true if AFF1 and AFF2 are identical.  */
3804
3805 static bool
3806 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3807 {
3808   unsigned i;
3809
3810   if (aff1->n != aff2->n)
3811     return false;
3812
3813   for (i = 0; i < aff1->n; i++)
3814     {
3815       if (aff1->elts[i].coef != aff2->elts[i].coef)
3816         return false;
3817
3818       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3819         return false;
3820     }
3821   return true;
3822 }
3823
3824 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3825
3826 static int
3827 get_expr_id (struct ivopts_data *data, tree expr)
3828 {
3829   struct iv_inv_expr_ent ent;
3830   struct iv_inv_expr_ent **slot;
3831
3832   ent.expr = expr;
3833   ent.hash = iterative_hash_expr (expr, 0);
3834   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3835   if (*slot)
3836     return (*slot)->id;
3837
3838   *slot = XNEW (struct iv_inv_expr_ent);
3839   (*slot)->expr = expr;
3840   (*slot)->hash = ent.hash;
3841   (*slot)->id = data->inv_expr_id++;
3842   return (*slot)->id;
3843 }
3844
3845 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3846    requires a new compiler generated temporary.  Returns -1 otherwise.
3847    ADDRESS_P is a flag indicating if the expression is for address
3848    computation.  */
3849
3850 static int
3851 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3852                             tree cbase, HOST_WIDE_INT ratio,
3853                             bool address_p)
3854 {
3855   aff_tree ubase_aff, cbase_aff;
3856   tree expr, ub, cb;
3857
3858   STRIP_NOPS (ubase);
3859   STRIP_NOPS (cbase);
3860   ub = ubase;
3861   cb = cbase;
3862
3863   if ((TREE_CODE (ubase) == INTEGER_CST)
3864       && (TREE_CODE (cbase) == INTEGER_CST))
3865     return -1;
3866
3867   /* Strips the constant part. */
3868   if (TREE_CODE (ubase) == PLUS_EXPR
3869       || TREE_CODE (ubase) == MINUS_EXPR
3870       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3871     {
3872       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3873         ubase = TREE_OPERAND (ubase, 0);
3874     }
3875
3876   /* Strips the constant part. */
3877   if (TREE_CODE (cbase) == PLUS_EXPR
3878       || TREE_CODE (cbase) == MINUS_EXPR
3879       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3880     {
3881       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3882         cbase = TREE_OPERAND (cbase, 0);
3883     }
3884
3885   if (address_p)
3886     {
3887       if (((TREE_CODE (ubase) == SSA_NAME)
3888            || (TREE_CODE (ubase) == ADDR_EXPR
3889                && is_gimple_min_invariant (ubase)))
3890           && (TREE_CODE (cbase) == INTEGER_CST))
3891         return -1;
3892
3893       if (((TREE_CODE (cbase) == SSA_NAME)
3894            || (TREE_CODE (cbase) == ADDR_EXPR
3895                && is_gimple_min_invariant (cbase)))
3896           && (TREE_CODE (ubase) == INTEGER_CST))
3897         return -1;
3898     }
3899
3900   if (ratio == 1)
3901     {
3902       if (operand_equal_p (ubase, cbase, 0))
3903         return -1;
3904
3905       if (TREE_CODE (ubase) == ADDR_EXPR
3906           && TREE_CODE (cbase) == ADDR_EXPR)
3907         {
3908           tree usym, csym;
3909
3910           usym = TREE_OPERAND (ubase, 0);
3911           csym = TREE_OPERAND (cbase, 0);
3912           if (TREE_CODE (usym) == ARRAY_REF)
3913             {
3914               tree ind = TREE_OPERAND (usym, 1);
3915               if (TREE_CODE (ind) == INTEGER_CST
3916                   && tree_fits_shwi_p (ind)
3917                   && tree_to_shwi (ind) == 0)
3918                 usym = TREE_OPERAND (usym, 0);
3919             }
3920           if (TREE_CODE (csym) == ARRAY_REF)
3921             {
3922               tree ind = TREE_OPERAND (csym, 1);
3923               if (TREE_CODE (ind) == INTEGER_CST
3924                   && tree_fits_shwi_p (ind)
3925                   && tree_to_shwi (ind) == 0)
3926                 csym = TREE_OPERAND (csym, 0);
3927             }
3928           if (operand_equal_p (usym, csym, 0))
3929             return -1;
3930         }
3931       /* Now do more complex comparison  */
3932       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3933       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3934       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3935         return -1;
3936     }
3937
3938   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3939   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3940
3941   aff_combination_scale (&cbase_aff, -1 * ratio);
3942   aff_combination_add (&ubase_aff, &cbase_aff);
3943   expr = aff_combination_to_tree (&ubase_aff);
3944   return get_expr_id (data, expr);
3945 }
3946
3947
3948
3949 /* Determines the cost of the computation by that USE is expressed
3950    from induction variable CAND.  If ADDRESS_P is true, we just need
3951    to create an address from it, otherwise we want to get it into
3952    register.  A set of invariants we depend on is stored in
3953    DEPENDS_ON.  AT is the statement at that the value is computed.
3954    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3955    addressing is likely.  */
3956
3957 static comp_cost
3958 get_computation_cost_at (struct ivopts_data *data,
3959                          struct iv_use *use, struct iv_cand *cand,
3960                          bool address_p, bitmap *depends_on, gimple at,
3961                          bool *can_autoinc,
3962                          int *inv_expr_id)
3963 {
3964   tree ubase = use->iv->base, ustep = use->iv->step;
3965   tree cbase, cstep;
3966   tree utype = TREE_TYPE (ubase), ctype;
3967   unsigned HOST_WIDE_INT cstepi, offset = 0;
3968   HOST_WIDE_INT ratio, aratio;
3969   bool var_present, symbol_present, stmt_is_after_inc;
3970   comp_cost cost;
3971   widest_int rat;
3972   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3973   enum machine_mode mem_mode = (address_p
3974                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
3975                                 : VOIDmode);
3976
3977   *depends_on = NULL;
3978
3979   /* Only consider real candidates.  */
3980   if (!cand->iv)
3981     return infinite_cost;
3982
3983   cbase = cand->iv->base;
3984   cstep = cand->iv->step;
3985   ctype = TREE_TYPE (cbase);
3986
3987   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3988     {
3989       /* We do not have a precision to express the values of use.  */
3990       return infinite_cost;
3991     }
3992
3993   if (address_p
3994       || (use->iv->base_object
3995           && cand->iv->base_object
3996           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3997           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3998     {
3999       /* Do not try to express address of an object with computation based
4000          on address of a different object.  This may cause problems in rtl
4001          level alias analysis (that does not expect this to be happening,
4002          as this is illegal in C), and would be unlikely to be useful
4003          anyway.  */
4004       if (use->iv->base_object
4005           && cand->iv->base_object
4006           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4007         return infinite_cost;
4008     }
4009
4010   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4011     {
4012       /* TODO -- add direct handling of this case.  */
4013       goto fallback;
4014     }
4015
4016   /* CSTEPI is removed from the offset in case statement is after the
4017      increment.  If the step is not constant, we use zero instead.
4018      This is a bit imprecise (there is the extra addition), but
4019      redundancy elimination is likely to transform the code so that
4020      it uses value of the variable before increment anyway,
4021      so it is not that much unrealistic.  */
4022   if (cst_fits_shwi_p (cstep))
4023     cstepi = int_cst_value (cstep);
4024   else
4025     cstepi = 0;
4026
4027   if (!constant_multiple_of (ustep, cstep, &rat))
4028     return infinite_cost;
4029
4030   if (wi::fits_shwi_p (rat))
4031     ratio = rat.to_shwi ();
4032   else
4033     return infinite_cost;
4034
4035   STRIP_NOPS (cbase);
4036   ctype = TREE_TYPE (cbase);
4037
4038   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4039
4040   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4041      or ratio == 1, it is better to handle this like
4042
4043      ubase - ratio * cbase + ratio * var
4044
4045      (also holds in the case ratio == -1, TODO.  */
4046
4047   if (cst_fits_shwi_p (cbase))
4048     {
4049       offset = - ratio * int_cst_value (cbase);
4050       cost = difference_cost (data,
4051                               ubase, build_int_cst (utype, 0),
4052                               &symbol_present, &var_present, &offset,
4053                               depends_on);
4054       cost.cost /= avg_loop_niter (data->current_loop);
4055     }
4056   else if (ratio == 1)
4057     {
4058       tree real_cbase = cbase;
4059
4060       /* Check to see if any adjustment is needed.  */
4061       if (cstepi == 0 && stmt_is_after_inc)
4062         {
4063           aff_tree real_cbase_aff;
4064           aff_tree cstep_aff;
4065
4066           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4067                                    &real_cbase_aff);
4068           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4069
4070           aff_combination_add (&real_cbase_aff, &cstep_aff);
4071           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4072         }
4073
4074       cost = difference_cost (data,
4075                               ubase, real_cbase,
4076                               &symbol_present, &var_present, &offset,
4077                               depends_on);
4078       cost.cost /= avg_loop_niter (data->current_loop);
4079     }
4080   else if (address_p
4081            && !POINTER_TYPE_P (ctype)
4082            && multiplier_allowed_in_address_p
4083                 (ratio, mem_mode,
4084                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4085     {
4086       cbase
4087         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4088       cost = difference_cost (data,
4089                               ubase, cbase,
4090                               &symbol_present, &var_present, &offset,
4091                               depends_on);
4092       cost.cost /= avg_loop_niter (data->current_loop);
4093     }
4094   else
4095     {
4096       cost = force_var_cost (data, cbase, depends_on);
4097       cost = add_costs (cost,
4098                         difference_cost (data,
4099                                          ubase, build_int_cst (utype, 0),
4100                                          &symbol_present, &var_present,
4101                                          &offset, depends_on));
4102       cost.cost /= avg_loop_niter (data->current_loop);
4103       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4104     }
4105
4106   if (inv_expr_id)
4107     {
4108       *inv_expr_id =
4109           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4110       /* Clear depends on.  */
4111       if (*inv_expr_id != -1 && depends_on && *depends_on)
4112         bitmap_clear (*depends_on);
4113     }
4114
4115   /* If we are after the increment, the value of the candidate is higher by
4116      one iteration.  */
4117   if (stmt_is_after_inc)
4118     offset -= ratio * cstepi;
4119
4120   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4121      (symbol/var1/const parts may be omitted).  If we are looking for an
4122      address, find the cost of addressing this.  */
4123   if (address_p)
4124     return add_costs (cost,
4125                       get_address_cost (symbol_present, var_present,
4126                                         offset, ratio, cstepi,
4127                                         mem_mode,
4128                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4129                                         speed, stmt_is_after_inc,
4130                                         can_autoinc));
4131
4132   /* Otherwise estimate the costs for computing the expression.  */
4133   if (!symbol_present && !var_present && !offset)
4134     {
4135       if (ratio != 1)
4136         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4137       return cost;
4138     }
4139
4140   /* Symbol + offset should be compile-time computable so consider that they
4141       are added once to the variable, if present.  */
4142   if (var_present && (symbol_present || offset))
4143     cost.cost += adjust_setup_cost (data,
4144                                     add_cost (speed, TYPE_MODE (ctype)));
4145
4146   /* Having offset does not affect runtime cost in case it is added to
4147      symbol, but it increases complexity.  */
4148   if (offset)
4149     cost.complexity++;
4150
4151   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4152
4153   aratio = ratio > 0 ? ratio : -ratio;
4154   if (aratio != 1)
4155     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4156   return cost;
4157
4158 fallback:
4159   if (can_autoinc)
4160     *can_autoinc = false;
4161
4162   {
4163     /* Just get the expression, expand it and measure the cost.  */
4164     tree comp = get_computation_at (data->current_loop, use, cand, at);
4165
4166     if (!comp)
4167       return infinite_cost;
4168
4169     if (address_p)
4170       comp = build_simple_mem_ref (comp);
4171
4172     return new_cost (computation_cost (comp, speed), 0);
4173   }
4174 }
4175
4176 /* Determines the cost of the computation by that USE is expressed
4177    from induction variable CAND.  If ADDRESS_P is true, we just need
4178    to create an address from it, otherwise we want to get it into
4179    register.  A set of invariants we depend on is stored in
4180    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4181    autoinc addressing is likely.  */
4182
4183 static comp_cost
4184 get_computation_cost (struct ivopts_data *data,
4185                       struct iv_use *use, struct iv_cand *cand,
4186                       bool address_p, bitmap *depends_on,
4187                       bool *can_autoinc, int *inv_expr_id)
4188 {
4189   return get_computation_cost_at (data,
4190                                   use, cand, address_p, depends_on, use->stmt,
4191                                   can_autoinc, inv_expr_id);
4192 }
4193
4194 /* Determines cost of basing replacement of USE on CAND in a generic
4195    expression.  */
4196
4197 static bool
4198 determine_use_iv_cost_generic (struct ivopts_data *data,
4199                                struct iv_use *use, struct iv_cand *cand)
4200 {
4201   bitmap depends_on;
4202   comp_cost cost;
4203   int inv_expr_id = -1;
4204
4205   /* The simple case first -- if we need to express value of the preserved
4206      original biv, the cost is 0.  This also prevents us from counting the
4207      cost of increment twice -- once at this use and once in the cost of
4208      the candidate.  */
4209   if (cand->pos == IP_ORIGINAL
4210       && cand->incremented_at == use->stmt)
4211     {
4212       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4213                        ERROR_MARK, -1);
4214       return true;
4215     }
4216
4217   cost = get_computation_cost (data, use, cand, false, &depends_on,
4218                                NULL, &inv_expr_id);
4219
4220   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4221                    inv_expr_id);
4222
4223   return !infinite_cost_p (cost);
4224 }
4225
4226 /* Determines cost of basing replacement of USE on CAND in an address.  */
4227
4228 static bool
4229 determine_use_iv_cost_address (struct ivopts_data *data,
4230                                struct iv_use *use, struct iv_cand *cand)
4231 {
4232   bitmap depends_on;
4233   bool can_autoinc;
4234   int inv_expr_id = -1;
4235   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4236                                          &can_autoinc, &inv_expr_id);
4237
4238   if (cand->ainc_use == use)
4239     {
4240       if (can_autoinc)
4241         cost.cost -= cand->cost_step;
4242       /* If we generated the candidate solely for exploiting autoincrement
4243          opportunities, and it turns out it can't be used, set the cost to
4244          infinity to make sure we ignore it.  */
4245       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4246         cost = infinite_cost;
4247     }
4248   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4249                    inv_expr_id);
4250
4251   return !infinite_cost_p (cost);
4252 }
4253
4254 /* Computes value of candidate CAND at position AT in iteration NITER, and
4255    stores it to VAL.  */
4256
4257 static void
4258 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4259                aff_tree *val)
4260 {
4261   aff_tree step, delta, nit;
4262   struct iv *iv = cand->iv;
4263   tree type = TREE_TYPE (iv->base);
4264   tree steptype = type;
4265   if (POINTER_TYPE_P (type))
4266     steptype = sizetype;
4267
4268   tree_to_aff_combination (iv->step, steptype, &step);
4269   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4270   aff_combination_convert (&nit, steptype);
4271   aff_combination_mult (&nit, &step, &delta);
4272   if (stmt_after_increment (loop, cand, at))
4273     aff_combination_add (&delta, &step);
4274
4275   tree_to_aff_combination (iv->base, type, val);
4276   aff_combination_add (val, &delta);
4277 }
4278
4279 /* Returns period of induction variable iv.  */
4280
4281 static tree
4282 iv_period (struct iv *iv)
4283 {
4284   tree step = iv->step, period, type;
4285   tree pow2div;
4286
4287   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4288
4289   type = unsigned_type_for (TREE_TYPE (step));
4290   /* Period of the iv is lcm (step, type_range)/step -1,
4291      i.e., N*type_range/step - 1. Since type range is power
4292      of two, N == (step >> num_of_ending_zeros_binary (step),
4293      so the final result is
4294
4295        (type_range >> num_of_ending_zeros_binary (step)) - 1
4296
4297   */
4298   pow2div = num_ending_zeros (step);
4299
4300   period = build_low_bits_mask (type,
4301                                 (TYPE_PRECISION (type)
4302                                  - tree_to_uhwi (pow2div)));
4303
4304   return period;
4305 }
4306
4307 /* Returns the comparison operator used when eliminating the iv USE.  */
4308
4309 static enum tree_code
4310 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4311 {
4312   struct loop *loop = data->current_loop;
4313   basic_block ex_bb;
4314   edge exit;
4315
4316   ex_bb = gimple_bb (use->stmt);
4317   exit = EDGE_SUCC (ex_bb, 0);
4318   if (flow_bb_inside_loop_p (loop, exit->dest))
4319     exit = EDGE_SUCC (ex_bb, 1);
4320
4321   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4322 }
4323
4324 static tree
4325 strip_wrap_conserving_type_conversions (tree exp)
4326 {
4327   while (tree_ssa_useless_type_conversion (exp)
4328          && (nowrap_type_p (TREE_TYPE (exp))
4329              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4330     exp = TREE_OPERAND (exp, 0);
4331   return exp;
4332 }
4333
4334 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4335    check for an exact match.  */
4336
4337 static bool
4338 expr_equal_p (tree e, tree what)
4339 {
4340   gimple stmt;
4341   enum tree_code code;
4342
4343   e = strip_wrap_conserving_type_conversions (e);
4344   what = strip_wrap_conserving_type_conversions (what);
4345
4346   code = TREE_CODE (what);
4347   if (TREE_TYPE (e) != TREE_TYPE (what))
4348     return false;
4349
4350   if (operand_equal_p (e, what, 0))
4351     return true;
4352
4353   if (TREE_CODE (e) != SSA_NAME)
4354     return false;
4355
4356   stmt = SSA_NAME_DEF_STMT (e);
4357   if (gimple_code (stmt) != GIMPLE_ASSIGN
4358       || gimple_assign_rhs_code (stmt) != code)
4359     return false;
4360
4361   switch (get_gimple_rhs_class (code))
4362     {
4363     case GIMPLE_BINARY_RHS:
4364       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4365         return false;
4366       /* Fallthru.  */
4367
4368     case GIMPLE_UNARY_RHS:
4369     case GIMPLE_SINGLE_RHS:
4370       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4371     default:
4372       return false;
4373     }
4374 }
4375
4376 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4377    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4378    calculation is performed in non-wrapping type.
4379
4380    TODO: More generally, we could test for the situation that
4381          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4382          This would require knowing the sign of OFFSET.
4383
4384          Also, we only look for the first addition in the computation of BASE.
4385          More complex analysis would be better, but introducing it just for
4386          this optimization seems like an overkill.  */
4387
4388 static bool
4389 difference_cannot_overflow_p (tree base, tree offset)
4390 {
4391   enum tree_code code;
4392   tree e1, e2;
4393
4394   if (!nowrap_type_p (TREE_TYPE (base)))
4395     return false;
4396
4397   base = expand_simple_operations (base);
4398
4399   if (TREE_CODE (base) == SSA_NAME)
4400     {
4401       gimple stmt = SSA_NAME_DEF_STMT (base);
4402
4403       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4404         return false;
4405
4406       code = gimple_assign_rhs_code (stmt);
4407       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4408         return false;
4409
4410       e1 = gimple_assign_rhs1 (stmt);
4411       e2 = gimple_assign_rhs2 (stmt);
4412     }
4413   else
4414     {
4415       code = TREE_CODE (base);
4416       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4417         return false;
4418       e1 = TREE_OPERAND (base, 0);
4419       e2 = TREE_OPERAND (base, 1);
4420     }
4421
4422   /* TODO: deeper inspection may be necessary to prove the equality.  */
4423   switch (code)
4424     {
4425     case PLUS_EXPR:
4426       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4427     case POINTER_PLUS_EXPR:
4428       return expr_equal_p (e2, offset);
4429
4430     default:
4431       return false;
4432     }
4433 }
4434
4435 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4436    comparison with CAND.  NITER describes the number of iterations of
4437    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4438
4439    We aim to handle the following situation:
4440
4441    sometype *base, *p;
4442    int a, b, i;
4443
4444    i = a;
4445    p = p_0 = base + a;
4446
4447    do
4448      {
4449        bla (*p);
4450        p++;
4451        i++;
4452      }
4453    while (i < b);
4454
4455    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4456    We aim to optimize this to
4457
4458    p = p_0 = base + a;
4459    do
4460      {
4461        bla (*p);
4462        p++;
4463      }
4464    while (p < p_0 - a + b);
4465
4466    This preserves the correctness, since the pointer arithmetics does not
4467    overflow.  More precisely:
4468
4469    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4470       overflow in computing it or the values of p.
4471    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4472       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4473
4474 static bool
4475 iv_elimination_compare_lt (struct ivopts_data *data,
4476                            struct iv_cand *cand, enum tree_code *comp_p,
4477                            struct tree_niter_desc *niter)
4478 {
4479   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4480   struct affine_tree_combination nit, tmpa, tmpb;
4481   enum tree_code comp;
4482   HOST_WIDE_INT step;
4483
4484   /* We need to know that the candidate induction variable does not overflow.
4485      While more complex analysis may be used to prove this, for now just
4486      check that the variable appears in the original program and that it
4487      is computed in a type that guarantees no overflows.  */
4488   cand_type = TREE_TYPE (cand->iv->base);
4489   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4490     return false;
4491
4492   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4493      the calculation of the BOUND could overflow, making the comparison
4494      invalid.  */
4495   if (!data->loop_single_exit_p)
4496     return false;
4497
4498   /* We need to be able to decide whether candidate is increasing or decreasing
4499      in order to choose the right comparison operator.  */
4500   if (!cst_fits_shwi_p (cand->iv->step))
4501     return false;
4502   step = int_cst_value (cand->iv->step);
4503
4504   /* Check that the number of iterations matches the expected pattern:
4505      a + 1 > b ? 0 : b - a - 1.  */
4506   mbz = niter->may_be_zero;
4507   if (TREE_CODE (mbz) == GT_EXPR)
4508     {
4509       /* Handle a + 1 > b.  */
4510       tree op0 = TREE_OPERAND (mbz, 0);
4511       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4512         {
4513           a = TREE_OPERAND (op0, 0);
4514           b = TREE_OPERAND (mbz, 1);
4515         }
4516       else
4517         return false;
4518     }
4519   else if (TREE_CODE (mbz) == LT_EXPR)
4520     {
4521       tree op1 = TREE_OPERAND (mbz, 1);
4522
4523       /* Handle b < a + 1.  */
4524       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4525         {
4526           a = TREE_OPERAND (op1, 0);
4527           b = TREE_OPERAND (mbz, 0);
4528         }
4529       else
4530         return false;
4531     }
4532   else
4533     return false;
4534
4535   /* Expected number of iterations is B - A - 1.  Check that it matches
4536      the actual number, i.e., that B - A - NITER = 1.  */
4537   tree_to_aff_combination (niter->niter, nit_type, &nit);
4538   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4539   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4540   aff_combination_scale (&nit, -1);
4541   aff_combination_scale (&tmpa, -1);
4542   aff_combination_add (&tmpb, &tmpa);
4543   aff_combination_add (&tmpb, &nit);
4544   if (tmpb.n != 0 || tmpb.offset != 1)
4545     return false;
4546
4547   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4548      overflow.  */
4549   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4550                         cand->iv->step,
4551                         fold_convert (TREE_TYPE (cand->iv->step), a));
4552   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4553     return false;
4554
4555   /* Determine the new comparison operator.  */
4556   comp = step < 0 ? GT_EXPR : LT_EXPR;
4557   if (*comp_p == NE_EXPR)
4558     *comp_p = comp;
4559   else if (*comp_p == EQ_EXPR)
4560     *comp_p = invert_tree_comparison (comp, false);
4561   else
4562     gcc_unreachable ();
4563
4564   return true;
4565 }
4566
4567 /* Check whether it is possible to express the condition in USE by comparison
4568    of candidate CAND.  If so, store the value compared with to BOUND, and the
4569    comparison operator to COMP.  */
4570
4571 static bool
4572 may_eliminate_iv (struct ivopts_data *data,
4573                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4574                   enum tree_code *comp)
4575 {
4576   basic_block ex_bb;
4577   edge exit;
4578   tree period;
4579   struct loop *loop = data->current_loop;
4580   aff_tree bnd;
4581   struct tree_niter_desc *desc = NULL;
4582
4583   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4584     return false;
4585
4586   /* For now works only for exits that dominate the loop latch.
4587      TODO: extend to other conditions inside loop body.  */
4588   ex_bb = gimple_bb (use->stmt);
4589   if (use->stmt != last_stmt (ex_bb)
4590       || gimple_code (use->stmt) != GIMPLE_COND
4591       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4592     return false;
4593
4594   exit = EDGE_SUCC (ex_bb, 0);
4595   if (flow_bb_inside_loop_p (loop, exit->dest))
4596     exit = EDGE_SUCC (ex_bb, 1);
4597   if (flow_bb_inside_loop_p (loop, exit->dest))
4598     return false;
4599
4600   desc = niter_for_exit (data, exit);
4601   if (!desc)
4602     return false;
4603
4604   /* Determine whether we can use the variable to test the exit condition.
4605      This is the case iff the period of the induction variable is greater
4606      than the number of iterations for which the exit condition is true.  */
4607   period = iv_period (cand->iv);
4608
4609   /* If the number of iterations is constant, compare against it directly.  */
4610   if (TREE_CODE (desc->niter) == INTEGER_CST)
4611     {
4612       /* See cand_value_at.  */
4613       if (stmt_after_increment (loop, cand, use->stmt))
4614         {
4615           if (!tree_int_cst_lt (desc->niter, period))
4616             return false;
4617         }
4618       else
4619         {
4620           if (tree_int_cst_lt (period, desc->niter))
4621             return false;
4622         }
4623     }
4624
4625   /* If not, and if this is the only possible exit of the loop, see whether
4626      we can get a conservative estimate on the number of iterations of the
4627      entire loop and compare against that instead.  */
4628   else
4629     {
4630       widest_int period_value, max_niter;
4631
4632       max_niter = desc->max;
4633       if (stmt_after_increment (loop, cand, use->stmt))
4634         max_niter += 1;
4635       period_value = wi::to_widest (period);
4636       if (wi::gtu_p (max_niter, period_value))
4637         {
4638           /* See if we can take advantage of inferred loop bound information.  */
4639           if (data->loop_single_exit_p)
4640             {
4641               if (!max_loop_iterations (loop, &max_niter))
4642                 return false;
4643               /* The loop bound is already adjusted by adding 1.  */
4644               if (wi::gtu_p (max_niter, period_value))
4645                 return false;
4646             }
4647           else
4648             return false;
4649         }
4650     }
4651
4652   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4653
4654   *bound = aff_combination_to_tree (&bnd);
4655   *comp = iv_elimination_compare (data, use);
4656
4657   /* It is unlikely that computing the number of iterations using division
4658      would be more profitable than keeping the original induction variable.  */
4659   if (expression_expensive_p (*bound))
4660     return false;
4661
4662   /* Sometimes, it is possible to handle the situation that the number of
4663      iterations may be zero unless additional assumtions by using <
4664      instead of != in the exit condition.
4665
4666      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4667            base the exit condition on it.  However, that is often too
4668            expensive.  */
4669   if (!integer_zerop (desc->may_be_zero))
4670     return iv_elimination_compare_lt (data, cand, comp, desc);
4671
4672   return true;
4673 }
4674
4675  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4676     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4677
4678 static int
4679 parm_decl_cost (struct ivopts_data *data, tree bound)
4680 {
4681   tree sbound = bound;
4682   STRIP_NOPS (sbound);
4683
4684   if (TREE_CODE (sbound) == SSA_NAME
4685       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4686       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4687       && data->body_includes_call)
4688     return COSTS_N_INSNS (1);
4689
4690   return 0;
4691 }
4692
4693 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4694
4695 static bool
4696 determine_use_iv_cost_condition (struct ivopts_data *data,
4697                                  struct iv_use *use, struct iv_cand *cand)
4698 {
4699   tree bound = NULL_TREE;
4700   struct iv *cmp_iv;
4701   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4702   comp_cost elim_cost, express_cost, cost, bound_cost;
4703   bool ok;
4704   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4705   tree *control_var, *bound_cst;
4706   enum tree_code comp = ERROR_MARK;
4707
4708   /* Only consider real candidates.  */
4709   if (!cand->iv)
4710     {
4711       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4712                        ERROR_MARK, -1);
4713       return false;
4714     }
4715
4716   /* Try iv elimination.  */
4717   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4718     {
4719       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4720       if (elim_cost.cost == 0)
4721         elim_cost.cost = parm_decl_cost (data, bound);
4722       else if (TREE_CODE (bound) == INTEGER_CST)
4723         elim_cost.cost = 0;
4724       /* If we replace a loop condition 'i < n' with 'p < base + n',
4725          depends_on_elim will have 'base' and 'n' set, which implies
4726          that both 'base' and 'n' will be live during the loop.  More likely,
4727          'base + n' will be loop invariant, resulting in only one live value
4728          during the loop.  So in that case we clear depends_on_elim and set
4729         elim_inv_expr_id instead.  */
4730       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4731         {
4732           elim_inv_expr_id = get_expr_id (data, bound);
4733           bitmap_clear (depends_on_elim);
4734         }
4735       /* The bound is a loop invariant, so it will be only computed
4736          once.  */
4737       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4738     }
4739   else
4740     elim_cost = infinite_cost;
4741
4742   /* Try expressing the original giv.  If it is compared with an invariant,
4743      note that we cannot get rid of it.  */
4744   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4745                               NULL, &cmp_iv);
4746   gcc_assert (ok);
4747
4748   /* When the condition is a comparison of the candidate IV against
4749      zero, prefer this IV.
4750
4751      TODO: The constant that we're subtracting from the cost should
4752      be target-dependent.  This information should be added to the
4753      target costs for each backend.  */
4754   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4755       && integer_zerop (*bound_cst)
4756       && (operand_equal_p (*control_var, cand->var_after, 0)
4757           || operand_equal_p (*control_var, cand->var_before, 0)))
4758     elim_cost.cost -= 1;
4759
4760   express_cost = get_computation_cost (data, use, cand, false,
4761                                        &depends_on_express, NULL,
4762                                        &express_inv_expr_id);
4763   fd_ivopts_data = data;
4764   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4765
4766   /* Count the cost of the original bound as well.  */
4767   bound_cost = force_var_cost (data, *bound_cst, NULL);
4768   if (bound_cost.cost == 0)
4769     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4770   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4771     bound_cost.cost = 0;
4772   express_cost.cost += bound_cost.cost;
4773
4774   /* Choose the better approach, preferring the eliminated IV. */
4775   if (compare_costs (elim_cost, express_cost) <= 0)
4776     {
4777       cost = elim_cost;
4778       depends_on = depends_on_elim;
4779       depends_on_elim = NULL;
4780       inv_expr_id = elim_inv_expr_id;
4781     }
4782   else
4783     {
4784       cost = express_cost;
4785       depends_on = depends_on_express;
4786       depends_on_express = NULL;
4787       bound = NULL_TREE;
4788       comp = ERROR_MARK;
4789       inv_expr_id = express_inv_expr_id;
4790     }
4791
4792   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4793
4794   if (depends_on_elim)
4795     BITMAP_FREE (depends_on_elim);
4796   if (depends_on_express)
4797     BITMAP_FREE (depends_on_express);
4798
4799   return !infinite_cost_p (cost);
4800 }
4801
4802 /* Determines cost of basing replacement of USE on CAND.  Returns false
4803    if USE cannot be based on CAND.  */
4804
4805 static bool
4806 determine_use_iv_cost (struct ivopts_data *data,
4807                        struct iv_use *use, struct iv_cand *cand)
4808 {
4809   switch (use->type)
4810     {
4811     case USE_NONLINEAR_EXPR:
4812       return determine_use_iv_cost_generic (data, use, cand);
4813
4814     case USE_ADDRESS:
4815       return determine_use_iv_cost_address (data, use, cand);
4816
4817     case USE_COMPARE:
4818       return determine_use_iv_cost_condition (data, use, cand);
4819
4820     default:
4821       gcc_unreachable ();
4822     }
4823 }
4824
4825 /* Return true if get_computation_cost indicates that autoincrement is
4826    a possibility for the pair of USE and CAND, false otherwise.  */
4827
4828 static bool
4829 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4830                            struct iv_cand *cand)
4831 {
4832   bitmap depends_on;
4833   bool can_autoinc;
4834   comp_cost cost;
4835
4836   if (use->type != USE_ADDRESS)
4837     return false;
4838
4839   cost = get_computation_cost (data, use, cand, true, &depends_on,
4840                                &can_autoinc, NULL);
4841
4842   BITMAP_FREE (depends_on);
4843
4844   return !infinite_cost_p (cost) && can_autoinc;
4845 }
4846
4847 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4848    use that allows autoincrement, and set their AINC_USE if possible.  */
4849
4850 static void
4851 set_autoinc_for_original_candidates (struct ivopts_data *data)
4852 {
4853   unsigned i, j;
4854
4855   for (i = 0; i < n_iv_cands (data); i++)
4856     {
4857       struct iv_cand *cand = iv_cand (data, i);
4858       struct iv_use *closest_before = NULL;
4859       struct iv_use *closest_after = NULL;
4860       if (cand->pos != IP_ORIGINAL)
4861         continue;
4862
4863       for (j = 0; j < n_iv_uses (data); j++)
4864         {
4865           struct iv_use *use = iv_use (data, j);
4866           unsigned uid = gimple_uid (use->stmt);
4867
4868           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4869             continue;
4870
4871           if (uid < gimple_uid (cand->incremented_at)
4872               && (closest_before == NULL
4873                   || uid > gimple_uid (closest_before->stmt)))
4874             closest_before = use;
4875
4876           if (uid > gimple_uid (cand->incremented_at)
4877               && (closest_after == NULL
4878                   || uid < gimple_uid (closest_after->stmt)))
4879             closest_after = use;
4880         }
4881
4882       if (closest_before != NULL
4883           && autoinc_possible_for_pair (data, closest_before, cand))
4884         cand->ainc_use = closest_before;
4885       else if (closest_after != NULL
4886                && autoinc_possible_for_pair (data, closest_after, cand))
4887         cand->ainc_use = closest_after;
4888     }
4889 }
4890
4891 /* Finds the candidates for the induction variables.  */
4892
4893 static void
4894 find_iv_candidates (struct ivopts_data *data)
4895 {
4896   /* Add commonly used ivs.  */
4897   add_standard_iv_candidates (data);
4898
4899   /* Add old induction variables.  */
4900   add_old_ivs_candidates (data);
4901
4902   /* Add induction variables derived from uses.  */
4903   add_derived_ivs_candidates (data);
4904
4905   set_autoinc_for_original_candidates (data);
4906
4907   /* Record the important candidates.  */
4908   record_important_candidates (data);
4909 }
4910
4911 /* Determines costs of basing the use of the iv on an iv candidate.  */
4912
4913 static void
4914 determine_use_iv_costs (struct ivopts_data *data)
4915 {
4916   unsigned i, j;
4917   struct iv_use *use;
4918   struct iv_cand *cand;
4919   bitmap to_clear = BITMAP_ALLOC (NULL);
4920
4921   alloc_use_cost_map (data);
4922
4923   for (i = 0; i < n_iv_uses (data); i++)
4924     {
4925       use = iv_use (data, i);
4926
4927       if (data->consider_all_candidates)
4928         {
4929           for (j = 0; j < n_iv_cands (data); j++)
4930             {
4931               cand = iv_cand (data, j);
4932               determine_use_iv_cost (data, use, cand);
4933             }
4934         }
4935       else
4936         {
4937           bitmap_iterator bi;
4938
4939           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4940             {
4941               cand = iv_cand (data, j);
4942               if (!determine_use_iv_cost (data, use, cand))
4943                 bitmap_set_bit (to_clear, j);
4944             }
4945
4946           /* Remove the candidates for that the cost is infinite from
4947              the list of related candidates.  */
4948           bitmap_and_compl_into (use->related_cands, to_clear);
4949           bitmap_clear (to_clear);
4950         }
4951     }
4952
4953   BITMAP_FREE (to_clear);
4954
4955   if (dump_file && (dump_flags & TDF_DETAILS))
4956     {
4957       fprintf (dump_file, "Use-candidate costs:\n");
4958
4959       for (i = 0; i < n_iv_uses (data); i++)
4960         {
4961           use = iv_use (data, i);
4962
4963           fprintf (dump_file, "Use %d:\n", i);
4964           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4965           for (j = 0; j < use->n_map_members; j++)
4966             {
4967               if (!use->cost_map[j].cand
4968                   || infinite_cost_p (use->cost_map[j].cost))
4969                 continue;
4970
4971               fprintf (dump_file, "  %d\t%d\t%d\t",
4972                        use->cost_map[j].cand->id,
4973                        use->cost_map[j].cost.cost,
4974                        use->cost_map[j].cost.complexity);
4975               if (use->cost_map[j].depends_on)
4976                 bitmap_print (dump_file,
4977                               use->cost_map[j].depends_on, "","");
4978               if (use->cost_map[j].inv_expr_id != -1)
4979                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4980               fprintf (dump_file, "\n");
4981             }
4982
4983           fprintf (dump_file, "\n");
4984         }
4985       fprintf (dump_file, "\n");
4986     }
4987 }
4988
4989 /* Determines cost of the candidate CAND.  */
4990
4991 static void
4992 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4993 {
4994   comp_cost cost_base;
4995   unsigned cost, cost_step;
4996   tree base;
4997
4998   if (!cand->iv)
4999     {
5000       cand->cost = 0;
5001       return;
5002     }
5003
5004   /* There are two costs associated with the candidate -- its increment
5005      and its initialization.  The second is almost negligible for any loop
5006      that rolls enough, so we take it just very little into account.  */
5007
5008   base = cand->iv->base;
5009   cost_base = force_var_cost (data, base, NULL);
5010   /* It will be exceptional that the iv register happens to be initialized with
5011      the proper value at no cost.  In general, there will at least be a regcopy
5012      or a const set.  */
5013   if (cost_base.cost == 0)
5014     cost_base.cost = COSTS_N_INSNS (1);
5015   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5016
5017   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5018
5019   /* Prefer the original ivs unless we may gain something by replacing it.
5020      The reason is to make debugging simpler; so this is not relevant for
5021      artificial ivs created by other optimization passes.  */
5022   if (cand->pos != IP_ORIGINAL
5023       || !SSA_NAME_VAR (cand->var_before)
5024       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5025     cost++;
5026
5027   /* Prefer not to insert statements into latch unless there are some
5028      already (so that we do not create unnecessary jumps).  */
5029   if (cand->pos == IP_END
5030       && empty_block_p (ip_end_pos (data->current_loop)))
5031     cost++;
5032
5033   cand->cost = cost;
5034   cand->cost_step = cost_step;
5035 }
5036
5037 /* Determines costs of computation of the candidates.  */
5038
5039 static void
5040 determine_iv_costs (struct ivopts_data *data)
5041 {
5042   unsigned i;
5043
5044   if (dump_file && (dump_flags & TDF_DETAILS))
5045     {
5046       fprintf (dump_file, "Candidate costs:\n");
5047       fprintf (dump_file, "  cand\tcost\n");
5048     }
5049
5050   for (i = 0; i < n_iv_cands (data); i++)
5051     {
5052       struct iv_cand *cand = iv_cand (data, i);
5053
5054       determine_iv_cost (data, cand);
5055
5056       if (dump_file && (dump_flags & TDF_DETAILS))
5057         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5058     }
5059
5060   if (dump_file && (dump_flags & TDF_DETAILS))
5061     fprintf (dump_file, "\n");
5062 }
5063
5064 /* Calculates cost for having SIZE induction variables.  */
5065
5066 static unsigned
5067 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5068 {
5069   /* We add size to the cost, so that we prefer eliminating ivs
5070      if possible.  */
5071   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5072                                             data->body_includes_call);
5073 }
5074
5075 /* For each size of the induction variable set determine the penalty.  */
5076
5077 static void
5078 determine_set_costs (struct ivopts_data *data)
5079 {
5080   unsigned j, n;
5081   gimple phi;
5082   gimple_stmt_iterator psi;
5083   tree op;
5084   struct loop *loop = data->current_loop;
5085   bitmap_iterator bi;
5086
5087   if (dump_file && (dump_flags & TDF_DETAILS))
5088     {
5089       fprintf (dump_file, "Global costs:\n");
5090       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5091       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5092       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5093       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5094     }
5095
5096   n = 0;
5097   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5098     {
5099       phi = gsi_stmt (psi);
5100       op = PHI_RESULT (phi);
5101
5102       if (virtual_operand_p (op))
5103         continue;
5104
5105       if (get_iv (data, op))
5106         continue;
5107
5108       n++;
5109     }
5110
5111   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5112     {
5113       struct version_info *info = ver_info (data, j);
5114
5115       if (info->inv_id && info->has_nonlin_use)
5116         n++;
5117     }
5118
5119   data->regs_used = n;
5120   if (dump_file && (dump_flags & TDF_DETAILS))
5121     fprintf (dump_file, "  regs_used %d\n", n);
5122
5123   if (dump_file && (dump_flags & TDF_DETAILS))
5124     {
5125       fprintf (dump_file, "  cost for size:\n");
5126       fprintf (dump_file, "  ivs\tcost\n");
5127       for (j = 0; j <= 2 * target_avail_regs; j++)
5128         fprintf (dump_file, "  %d\t%d\n", j,
5129                  ivopts_global_cost_for_size (data, j));
5130       fprintf (dump_file, "\n");
5131     }
5132 }
5133
5134 /* Returns true if A is a cheaper cost pair than B.  */
5135
5136 static bool
5137 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5138 {
5139   int cmp;
5140
5141   if (!a)
5142     return false;
5143
5144   if (!b)
5145     return true;
5146
5147   cmp = compare_costs (a->cost, b->cost);
5148   if (cmp < 0)
5149     return true;
5150
5151   if (cmp > 0)
5152     return false;
5153
5154   /* In case the costs are the same, prefer the cheaper candidate.  */
5155   if (a->cand->cost < b->cand->cost)
5156     return true;
5157
5158   return false;
5159 }
5160
5161
5162 /* Returns candidate by that USE is expressed in IVS.  */
5163
5164 static struct cost_pair *
5165 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5166 {
5167   return ivs->cand_for_use[use->id];
5168 }
5169
5170 /* Computes the cost field of IVS structure.  */
5171
5172 static void
5173 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5174 {
5175   comp_cost cost = ivs->cand_use_cost;
5176
5177   cost.cost += ivs->cand_cost;
5178
5179   cost.cost += ivopts_global_cost_for_size (data,
5180                                             ivs->n_regs + ivs->num_used_inv_expr);
5181
5182   ivs->cost = cost;
5183 }
5184
5185 /* Remove invariants in set INVS to set IVS.  */
5186
5187 static void
5188 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5189 {
5190   bitmap_iterator bi;
5191   unsigned iid;
5192
5193   if (!invs)
5194     return;
5195
5196   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5197     {
5198       ivs->n_invariant_uses[iid]--;
5199       if (ivs->n_invariant_uses[iid] == 0)
5200         ivs->n_regs--;
5201     }
5202 }
5203
5204 /* Set USE not to be expressed by any candidate in IVS.  */
5205
5206 static void
5207 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5208                  struct iv_use *use)
5209 {
5210   unsigned uid = use->id, cid;
5211   struct cost_pair *cp;
5212
5213   cp = ivs->cand_for_use[uid];
5214   if (!cp)
5215     return;
5216   cid = cp->cand->id;
5217
5218   ivs->bad_uses++;
5219   ivs->cand_for_use[uid] = NULL;
5220   ivs->n_cand_uses[cid]--;
5221
5222   if (ivs->n_cand_uses[cid] == 0)
5223     {
5224       bitmap_clear_bit (ivs->cands, cid);
5225       /* Do not count the pseudocandidates.  */
5226       if (cp->cand->iv)
5227         ivs->n_regs--;
5228       ivs->n_cands--;
5229       ivs->cand_cost -= cp->cand->cost;
5230
5231       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5232     }
5233
5234   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5235
5236   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5237
5238   if (cp->inv_expr_id != -1)
5239     {
5240       ivs->used_inv_expr[cp->inv_expr_id]--;
5241       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5242         ivs->num_used_inv_expr--;
5243     }
5244   iv_ca_recount_cost (data, ivs);
5245 }
5246
5247 /* Add invariants in set INVS to set IVS.  */
5248
5249 static void
5250 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5251 {
5252   bitmap_iterator bi;
5253   unsigned iid;
5254
5255   if (!invs)
5256     return;
5257
5258   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5259     {
5260       ivs->n_invariant_uses[iid]++;
5261       if (ivs->n_invariant_uses[iid] == 1)
5262         ivs->n_regs++;
5263     }
5264 }
5265
5266 /* Set cost pair for USE in set IVS to CP.  */
5267
5268 static void
5269 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5270               struct iv_use *use, struct cost_pair *cp)
5271 {
5272   unsigned uid = use->id, cid;
5273
5274   if (ivs->cand_for_use[uid] == cp)
5275     return;
5276
5277   if (ivs->cand_for_use[uid])
5278     iv_ca_set_no_cp (data, ivs, use);
5279
5280   if (cp)
5281     {
5282       cid = cp->cand->id;
5283
5284       ivs->bad_uses--;
5285       ivs->cand_for_use[uid] = cp;
5286       ivs->n_cand_uses[cid]++;
5287       if (ivs->n_cand_uses[cid] == 1)
5288         {
5289           bitmap_set_bit (ivs->cands, cid);
5290           /* Do not count the pseudocandidates.  */
5291           if (cp->cand->iv)
5292             ivs->n_regs++;
5293           ivs->n_cands++;
5294           ivs->cand_cost += cp->cand->cost;
5295
5296           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5297         }
5298
5299       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5300       iv_ca_set_add_invariants (ivs, cp->depends_on);
5301
5302       if (cp->inv_expr_id != -1)
5303         {
5304           ivs->used_inv_expr[cp->inv_expr_id]++;
5305           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5306             ivs->num_used_inv_expr++;
5307         }
5308       iv_ca_recount_cost (data, ivs);
5309     }
5310 }
5311
5312 /* Extend set IVS by expressing USE by some of the candidates in it
5313    if possible. All important candidates will be considered
5314    if IMPORTANT_CANDIDATES is true.  */
5315
5316 static void
5317 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5318                struct iv_use *use, bool important_candidates)
5319 {
5320   struct cost_pair *best_cp = NULL, *cp;
5321   bitmap_iterator bi;
5322   bitmap cands;
5323   unsigned i;
5324
5325   gcc_assert (ivs->upto >= use->id);
5326
5327   if (ivs->upto == use->id)
5328     {
5329       ivs->upto++;
5330       ivs->bad_uses++;
5331     }
5332
5333   cands = (important_candidates ? data->important_candidates : ivs->cands);
5334   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5335     {
5336       struct iv_cand *cand = iv_cand (data, i);
5337
5338       cp = get_use_iv_cost (data, use, cand);
5339
5340       if (cheaper_cost_pair (cp, best_cp))
5341         best_cp = cp;
5342     }
5343
5344   iv_ca_set_cp (data, ivs, use, best_cp);
5345 }
5346
5347 /* Get cost for assignment IVS.  */
5348
5349 static comp_cost
5350 iv_ca_cost (struct iv_ca *ivs)
5351 {
5352   /* This was a conditional expression but it triggered a bug in
5353      Sun C 5.5.  */
5354   if (ivs->bad_uses)
5355     return infinite_cost;
5356   else
5357     return ivs->cost;
5358 }
5359
5360 /* Returns true if all dependences of CP are among invariants in IVS.  */
5361
5362 static bool
5363 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5364 {
5365   unsigned i;
5366   bitmap_iterator bi;
5367
5368   if (!cp->depends_on)
5369     return true;
5370
5371   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5372     {
5373       if (ivs->n_invariant_uses[i] == 0)
5374         return false;
5375     }
5376
5377   return true;
5378 }
5379
5380 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5381    it before NEXT_CHANGE.  */
5382
5383 static struct iv_ca_delta *
5384 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5385                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5386 {
5387   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5388
5389   change->use = use;
5390   change->old_cp = old_cp;
5391   change->new_cp = new_cp;
5392   change->next_change = next_change;
5393
5394   return change;
5395 }
5396
5397 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5398    are rewritten.  */
5399
5400 static struct iv_ca_delta *
5401 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5402 {
5403   struct iv_ca_delta *last;
5404
5405   if (!l2)
5406     return l1;
5407
5408   if (!l1)
5409     return l2;
5410
5411   for (last = l1; last->next_change; last = last->next_change)
5412     continue;
5413   last->next_change = l2;
5414
5415   return l1;
5416 }
5417
5418 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5419
5420 static struct iv_ca_delta *
5421 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5422 {
5423   struct iv_ca_delta *act, *next, *prev = NULL;
5424   struct cost_pair *tmp;
5425
5426   for (act = delta; act; act = next)
5427     {
5428       next = act->next_change;
5429       act->next_change = prev;
5430       prev = act;
5431
5432       tmp = act->old_cp;
5433       act->old_cp = act->new_cp;
5434       act->new_cp = tmp;
5435     }
5436
5437   return prev;
5438 }
5439
5440 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5441    reverted instead.  */
5442
5443 static void
5444 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5445                     struct iv_ca_delta *delta, bool forward)
5446 {
5447   struct cost_pair *from, *to;
5448   struct iv_ca_delta *act;
5449
5450   if (!forward)
5451     delta = iv_ca_delta_reverse (delta);
5452
5453   for (act = delta; act; act = act->next_change)
5454     {
5455       from = act->old_cp;
5456       to = act->new_cp;
5457       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5458       iv_ca_set_cp (data, ivs, act->use, to);
5459     }
5460
5461   if (!forward)
5462     iv_ca_delta_reverse (delta);
5463 }
5464
5465 /* Returns true if CAND is used in IVS.  */
5466
5467 static bool
5468 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5469 {
5470   return ivs->n_cand_uses[cand->id] > 0;
5471 }
5472
5473 /* Returns number of induction variable candidates in the set IVS.  */
5474
5475 static unsigned
5476 iv_ca_n_cands (struct iv_ca *ivs)
5477 {
5478   return ivs->n_cands;
5479 }
5480
5481 /* Free the list of changes DELTA.  */
5482
5483 static void
5484 iv_ca_delta_free (struct iv_ca_delta **delta)
5485 {
5486   struct iv_ca_delta *act, *next;
5487
5488   for (act = *delta; act; act = next)
5489     {
5490       next = act->next_change;
5491       free (act);
5492     }
5493
5494   *delta = NULL;
5495 }
5496
5497 /* Allocates new iv candidates assignment.  */
5498
5499 static struct iv_ca *
5500 iv_ca_new (struct ivopts_data *data)
5501 {
5502   struct iv_ca *nw = XNEW (struct iv_ca);
5503
5504   nw->upto = 0;
5505   nw->bad_uses = 0;
5506   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5507   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5508   nw->cands = BITMAP_ALLOC (NULL);
5509   nw->n_cands = 0;
5510   nw->n_regs = 0;
5511   nw->cand_use_cost = no_cost;
5512   nw->cand_cost = 0;
5513   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5514   nw->cost = no_cost;
5515   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5516   nw->num_used_inv_expr = 0;
5517
5518   return nw;
5519 }
5520
5521 /* Free memory occupied by the set IVS.  */
5522
5523 static void
5524 iv_ca_free (struct iv_ca **ivs)
5525 {
5526   free ((*ivs)->cand_for_use);
5527   free ((*ivs)->n_cand_uses);
5528   BITMAP_FREE ((*ivs)->cands);
5529   free ((*ivs)->n_invariant_uses);
5530   free ((*ivs)->used_inv_expr);
5531   free (*ivs);
5532   *ivs = NULL;
5533 }
5534
5535 /* Dumps IVS to FILE.  */
5536
5537 static void
5538 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5539 {
5540   const char *pref = "  invariants ";
5541   unsigned i;
5542   comp_cost cost = iv_ca_cost (ivs);
5543
5544   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5545   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5546            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5547   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5548
5549    for (i = 0; i < ivs->upto; i++)
5550     {
5551       struct iv_use *use = iv_use (data, i);
5552       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5553       if (cp)
5554         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5555                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5556       else
5557         fprintf (file, "   use:%d --> ??\n", use->id);
5558     }
5559
5560   for (i = 1; i <= data->max_inv_id; i++)
5561     if (ivs->n_invariant_uses[i])
5562       {
5563         fprintf (file, "%s%d", pref, i);
5564         pref = ", ";
5565       }
5566   fprintf (file, "\n\n");
5567 }
5568
5569 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5570    new set, and store differences in DELTA.  Number of induction variables
5571    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5572    the function will try to find a solution with mimimal iv candidates.  */
5573
5574 static comp_cost
5575 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5576               struct iv_cand *cand, struct iv_ca_delta **delta,
5577               unsigned *n_ivs, bool min_ncand)
5578 {
5579   unsigned i;
5580   comp_cost cost;
5581   struct iv_use *use;
5582   struct cost_pair *old_cp, *new_cp;
5583
5584   *delta = NULL;
5585   for (i = 0; i < ivs->upto; i++)
5586     {
5587       use = iv_use (data, i);
5588       old_cp = iv_ca_cand_for_use (ivs, use);
5589
5590       if (old_cp
5591           && old_cp->cand == cand)
5592         continue;
5593
5594       new_cp = get_use_iv_cost (data, use, cand);
5595       if (!new_cp)
5596         continue;
5597
5598       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5599         continue;
5600
5601       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5602         continue;
5603
5604       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5605     }
5606
5607   iv_ca_delta_commit (data, ivs, *delta, true);
5608   cost = iv_ca_cost (ivs);
5609   if (n_ivs)
5610     *n_ivs = iv_ca_n_cands (ivs);
5611   iv_ca_delta_commit (data, ivs, *delta, false);
5612
5613   return cost;
5614 }
5615
5616 /* Try narrowing set IVS by removing CAND.  Return the cost of
5617    the new set and store the differences in DELTA.  */
5618
5619 static comp_cost
5620 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5621               struct iv_cand *cand, struct iv_ca_delta **delta)
5622 {
5623   unsigned i, ci;
5624   struct iv_use *use;
5625   struct cost_pair *old_cp, *new_cp, *cp;
5626   bitmap_iterator bi;
5627   struct iv_cand *cnd;
5628   comp_cost cost;
5629
5630   *delta = NULL;
5631   for (i = 0; i < n_iv_uses (data); i++)
5632     {
5633       use = iv_use (data, i);
5634
5635       old_cp = iv_ca_cand_for_use (ivs, use);
5636       if (old_cp->cand != cand)
5637         continue;
5638
5639       new_cp = NULL;
5640
5641       if (data->consider_all_candidates)
5642         {
5643           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5644             {
5645               if (ci == cand->id)
5646                 continue;
5647
5648               cnd = iv_cand (data, ci);
5649
5650               cp = get_use_iv_cost (data, use, cnd);
5651               if (!cp)
5652                 continue;
5653
5654               if (!iv_ca_has_deps (ivs, cp))
5655                 continue;
5656
5657               if (!cheaper_cost_pair (cp, new_cp))
5658                 continue;
5659
5660               new_cp = cp;
5661             }
5662         }
5663       else
5664         {
5665           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5666             {
5667               if (ci == cand->id)
5668                 continue;
5669
5670               cnd = iv_cand (data, ci);
5671
5672               cp = get_use_iv_cost (data, use, cnd);
5673               if (!cp)
5674                 continue;
5675               if (!iv_ca_has_deps (ivs, cp))
5676                 continue;
5677
5678               if (!cheaper_cost_pair (cp, new_cp))
5679                 continue;
5680
5681               new_cp = cp;
5682             }
5683         }
5684
5685       if (!new_cp)
5686         {
5687           iv_ca_delta_free (delta);
5688           return infinite_cost;
5689         }
5690
5691       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5692     }
5693
5694   iv_ca_delta_commit (data, ivs, *delta, true);
5695   cost = iv_ca_cost (ivs);
5696   iv_ca_delta_commit (data, ivs, *delta, false);
5697
5698   return cost;
5699 }
5700
5701 /* Try optimizing the set of candidates IVS by removing candidates different
5702    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5703    differences in DELTA.  */
5704
5705 static comp_cost
5706 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5707              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5708 {
5709   bitmap_iterator bi;
5710   struct iv_ca_delta *act_delta, *best_delta;
5711   unsigned i;
5712   comp_cost best_cost, acost;
5713   struct iv_cand *cand;
5714
5715   best_delta = NULL;
5716   best_cost = iv_ca_cost (ivs);
5717
5718   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5719     {
5720       cand = iv_cand (data, i);
5721
5722       if (cand == except_cand)
5723         continue;
5724
5725       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5726
5727       if (compare_costs (acost, best_cost) < 0)
5728         {
5729           best_cost = acost;
5730           iv_ca_delta_free (&best_delta);
5731           best_delta = act_delta;
5732         }
5733       else
5734         iv_ca_delta_free (&act_delta);
5735     }
5736
5737   if (!best_delta)
5738     {
5739       *delta = NULL;
5740       return best_cost;
5741     }
5742
5743   /* Recurse to possibly remove other unnecessary ivs.  */
5744   iv_ca_delta_commit (data, ivs, best_delta, true);
5745   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5746   iv_ca_delta_commit (data, ivs, best_delta, false);
5747   *delta = iv_ca_delta_join (best_delta, *delta);
5748   return best_cost;
5749 }
5750
5751 /* Tries to extend the sets IVS in the best possible way in order
5752    to express the USE.  If ORIGINALP is true, prefer candidates from
5753    the original set of IVs, otherwise favor important candidates not
5754    based on any memory object.  */
5755
5756 static bool
5757 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5758                   struct iv_use *use, bool originalp)
5759 {
5760   comp_cost best_cost, act_cost;
5761   unsigned i;
5762   bitmap_iterator bi;
5763   struct iv_cand *cand;
5764   struct iv_ca_delta *best_delta = NULL, *act_delta;
5765   struct cost_pair *cp;
5766
5767   iv_ca_add_use (data, ivs, use, false);
5768   best_cost = iv_ca_cost (ivs);
5769
5770   cp = iv_ca_cand_for_use (ivs, use);
5771   if (!cp)
5772     {
5773       ivs->upto--;
5774       ivs->bad_uses--;
5775       iv_ca_add_use (data, ivs, use, true);
5776       best_cost = iv_ca_cost (ivs);
5777       cp = iv_ca_cand_for_use (ivs, use);
5778     }
5779   if (cp)
5780     {
5781       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5782       iv_ca_set_no_cp (data, ivs, use);
5783     }
5784
5785   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5786      first try important candidates not based on any memory object.  Only if
5787      this fails, try the specific ones.  Rationale -- in loops with many
5788      variables the best choice often is to use just one generic biv.  If we
5789      added here many ivs specific to the uses, the optimization algorithm later
5790      would be likely to get stuck in a local minimum, thus causing us to create
5791      too many ivs.  The approach from few ivs to more seems more likely to be
5792      successful -- starting from few ivs, replacing an expensive use by a
5793      specific iv should always be a win.  */
5794   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5795     {
5796       cand = iv_cand (data, i);
5797
5798       if (originalp && cand->pos !=IP_ORIGINAL)
5799         continue;
5800
5801       if (!originalp && cand->iv->base_object != NULL_TREE)
5802         continue;
5803
5804       if (iv_ca_cand_used_p (ivs, cand))
5805         continue;
5806
5807       cp = get_use_iv_cost (data, use, cand);
5808       if (!cp)
5809         continue;
5810
5811       iv_ca_set_cp (data, ivs, use, cp);
5812       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5813                                true);
5814       iv_ca_set_no_cp (data, ivs, use);
5815       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5816
5817       if (compare_costs (act_cost, best_cost) < 0)
5818         {
5819           best_cost = act_cost;
5820
5821           iv_ca_delta_free (&best_delta);
5822           best_delta = act_delta;
5823         }
5824       else
5825         iv_ca_delta_free (&act_delta);
5826     }
5827
5828   if (infinite_cost_p (best_cost))
5829     {
5830       for (i = 0; i < use->n_map_members; i++)
5831         {
5832           cp = use->cost_map + i;
5833           cand = cp->cand;
5834           if (!cand)
5835             continue;
5836
5837           /* Already tried this.  */
5838           if (cand->important)
5839             {
5840               if (originalp && cand->pos == IP_ORIGINAL)
5841                 continue;
5842               if (!originalp && cand->iv->base_object == NULL_TREE)
5843                 continue;
5844             }
5845
5846           if (iv_ca_cand_used_p (ivs, cand))
5847             continue;
5848
5849           act_delta = NULL;
5850           iv_ca_set_cp (data, ivs, use, cp);
5851           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5852           iv_ca_set_no_cp (data, ivs, use);
5853           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5854                                        cp, act_delta);
5855
5856           if (compare_costs (act_cost, best_cost) < 0)
5857             {
5858               best_cost = act_cost;
5859
5860               if (best_delta)
5861                 iv_ca_delta_free (&best_delta);
5862               best_delta = act_delta;
5863             }
5864           else
5865             iv_ca_delta_free (&act_delta);
5866         }
5867     }
5868
5869   iv_ca_delta_commit (data, ivs, best_delta, true);
5870   iv_ca_delta_free (&best_delta);
5871
5872   return !infinite_cost_p (best_cost);
5873 }
5874
5875 /* Finds an initial assignment of candidates to uses.  */
5876
5877 static struct iv_ca *
5878 get_initial_solution (struct ivopts_data *data, bool originalp)
5879 {
5880   struct iv_ca *ivs = iv_ca_new (data);
5881   unsigned i;
5882
5883   for (i = 0; i < n_iv_uses (data); i++)
5884     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5885       {
5886         iv_ca_free (&ivs);
5887         return NULL;
5888       }
5889
5890   return ivs;
5891 }
5892
5893 /* Tries to improve set of induction variables IVS.  */
5894
5895 static bool
5896 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5897 {
5898   unsigned i, n_ivs;
5899   comp_cost acost, best_cost = iv_ca_cost (ivs);
5900   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5901   struct iv_cand *cand;
5902
5903   /* Try extending the set of induction variables by one.  */
5904   for (i = 0; i < n_iv_cands (data); i++)
5905     {
5906       cand = iv_cand (data, i);
5907
5908       if (iv_ca_cand_used_p (ivs, cand))
5909         continue;
5910
5911       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5912       if (!act_delta)
5913         continue;
5914
5915       /* If we successfully added the candidate and the set is small enough,
5916          try optimizing it by removing other candidates.  */
5917       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5918         {
5919           iv_ca_delta_commit (data, ivs, act_delta, true);
5920           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5921           iv_ca_delta_commit (data, ivs, act_delta, false);
5922           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5923         }
5924
5925       if (compare_costs (acost, best_cost) < 0)
5926         {
5927           best_cost = acost;
5928           iv_ca_delta_free (&best_delta);
5929           best_delta = act_delta;
5930         }
5931       else
5932         iv_ca_delta_free (&act_delta);
5933     }
5934
5935   if (!best_delta)
5936     {
5937       /* Try removing the candidates from the set instead.  */
5938       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5939
5940       /* Nothing more we can do.  */
5941       if (!best_delta)
5942         return false;
5943     }
5944
5945   iv_ca_delta_commit (data, ivs, best_delta, true);
5946   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5947   iv_ca_delta_free (&best_delta);
5948   return true;
5949 }
5950
5951 /* Attempts to find the optimal set of induction variables.  We do simple
5952    greedy heuristic -- we try to replace at most one candidate in the selected
5953    solution and remove the unused ivs while this improves the cost.  */
5954
5955 static struct iv_ca *
5956 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5957 {
5958   struct iv_ca *set;
5959
5960   /* Get the initial solution.  */
5961   set = get_initial_solution (data, originalp);
5962   if (!set)
5963     {
5964       if (dump_file && (dump_flags & TDF_DETAILS))
5965         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5966       return NULL;
5967     }
5968
5969   if (dump_file && (dump_flags & TDF_DETAILS))
5970     {
5971       fprintf (dump_file, "Initial set of candidates:\n");
5972       iv_ca_dump (data, dump_file, set);
5973     }
5974
5975   while (try_improve_iv_set (data, set))
5976     {
5977       if (dump_file && (dump_flags & TDF_DETAILS))
5978         {
5979           fprintf (dump_file, "Improved to:\n");
5980           iv_ca_dump (data, dump_file, set);
5981         }
5982     }
5983
5984   return set;
5985 }
5986
5987 static struct iv_ca *
5988 find_optimal_iv_set (struct ivopts_data *data)
5989 {
5990   unsigned i;
5991   struct iv_ca *set, *origset;
5992   struct iv_use *use;
5993   comp_cost cost, origcost;
5994
5995   /* Determine the cost based on a strategy that starts with original IVs,
5996      and try again using a strategy that prefers candidates not based
5997      on any IVs.  */
5998   origset = find_optimal_iv_set_1 (data, true);
5999   set = find_optimal_iv_set_1 (data, false);
6000
6001   if (!origset && !set)
6002     return NULL;
6003
6004   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6005   cost = set ? iv_ca_cost (set) : infinite_cost;
6006
6007   if (dump_file && (dump_flags & TDF_DETAILS))
6008     {
6009       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6010                origcost.cost, origcost.complexity);
6011       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6012                cost.cost, cost.complexity);
6013     }
6014
6015   /* Choose the one with the best cost.  */
6016   if (compare_costs (origcost, cost) <= 0)
6017     {
6018       if (set)
6019         iv_ca_free (&set);
6020       set = origset;
6021     }
6022   else if (origset)
6023     iv_ca_free (&origset);
6024
6025   for (i = 0; i < n_iv_uses (data); i++)
6026     {
6027       use = iv_use (data, i);
6028       use->selected = iv_ca_cand_for_use (set, use)->cand;
6029     }
6030
6031   return set;
6032 }
6033
6034 /* Creates a new induction variable corresponding to CAND.  */
6035
6036 static void
6037 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6038 {
6039   gimple_stmt_iterator incr_pos;
6040   tree base;
6041   bool after = false;
6042
6043   if (!cand->iv)
6044     return;
6045
6046   switch (cand->pos)
6047     {
6048     case IP_NORMAL:
6049       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6050       break;
6051
6052     case IP_END:
6053       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6054       after = true;
6055       break;
6056
6057     case IP_AFTER_USE:
6058       after = true;
6059       /* fall through */
6060     case IP_BEFORE_USE:
6061       incr_pos = gsi_for_stmt (cand->incremented_at);
6062       break;
6063
6064     case IP_ORIGINAL:
6065       /* Mark that the iv is preserved.  */
6066       name_info (data, cand->var_before)->preserve_biv = true;
6067       name_info (data, cand->var_after)->preserve_biv = true;
6068
6069       /* Rewrite the increment so that it uses var_before directly.  */
6070       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6071       return;
6072     }
6073
6074   gimple_add_tmp_var (cand->var_before);
6075
6076   base = unshare_expr (cand->iv->base);
6077
6078   create_iv (base, unshare_expr (cand->iv->step),
6079              cand->var_before, data->current_loop,
6080              &incr_pos, after, &cand->var_before, &cand->var_after);
6081 }
6082
6083 /* Creates new induction variables described in SET.  */
6084
6085 static void
6086 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6087 {
6088   unsigned i;
6089   struct iv_cand *cand;
6090   bitmap_iterator bi;
6091
6092   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6093     {
6094       cand = iv_cand (data, i);
6095       create_new_iv (data, cand);
6096     }
6097
6098   if (dump_file && (dump_flags & TDF_DETAILS))
6099     {
6100       fprintf (dump_file, "\nSelected IV set: \n");
6101       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6102         {
6103           cand = iv_cand (data, i);
6104           dump_cand (dump_file, cand);
6105         }
6106       fprintf (dump_file, "\n");
6107     }
6108 }
6109
6110 /* Rewrites USE (definition of iv used in a nonlinear expression)
6111    using candidate CAND.  */
6112
6113 static void
6114 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6115                             struct iv_use *use, struct iv_cand *cand)
6116 {
6117   tree comp;
6118   tree op, tgt;
6119   gimple ass;
6120   gimple_stmt_iterator bsi;
6121
6122   /* An important special case -- if we are asked to express value of
6123      the original iv by itself, just exit; there is no need to
6124      introduce a new computation (that might also need casting the
6125      variable to unsigned and back).  */
6126   if (cand->pos == IP_ORIGINAL
6127       && cand->incremented_at == use->stmt)
6128     {
6129       enum tree_code stmt_code;
6130
6131       gcc_assert (is_gimple_assign (use->stmt));
6132       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6133
6134       /* Check whether we may leave the computation unchanged.
6135          This is the case only if it does not rely on other
6136          computations in the loop -- otherwise, the computation
6137          we rely upon may be removed in remove_unused_ivs,
6138          thus leading to ICE.  */
6139       stmt_code = gimple_assign_rhs_code (use->stmt);
6140       if (stmt_code == PLUS_EXPR
6141           || stmt_code == MINUS_EXPR
6142           || stmt_code == POINTER_PLUS_EXPR)
6143         {
6144           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6145             op = gimple_assign_rhs2 (use->stmt);
6146           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6147             op = gimple_assign_rhs1 (use->stmt);
6148           else
6149             op = NULL_TREE;
6150         }
6151       else
6152         op = NULL_TREE;
6153
6154       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6155         return;
6156     }
6157
6158   comp = get_computation (data->current_loop, use, cand);
6159   gcc_assert (comp != NULL_TREE);
6160
6161   switch (gimple_code (use->stmt))
6162     {
6163     case GIMPLE_PHI:
6164       tgt = PHI_RESULT (use->stmt);
6165
6166       /* If we should keep the biv, do not replace it.  */
6167       if (name_info (data, tgt)->preserve_biv)
6168         return;
6169
6170       bsi = gsi_after_labels (gimple_bb (use->stmt));
6171       break;
6172
6173     case GIMPLE_ASSIGN:
6174       tgt = gimple_assign_lhs (use->stmt);
6175       bsi = gsi_for_stmt (use->stmt);
6176       break;
6177
6178     default:
6179       gcc_unreachable ();
6180     }
6181
6182   if (!valid_gimple_rhs_p (comp)
6183       || (gimple_code (use->stmt) != GIMPLE_PHI
6184           /* We can't allow re-allocating the stmt as it might be pointed
6185              to still.  */
6186           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6187               >= gimple_num_ops (gsi_stmt (bsi)))))
6188     {
6189       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6190                                        true, GSI_SAME_STMT);
6191       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6192         {
6193           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6194           /* As this isn't a plain copy we have to reset alignment
6195              information.  */
6196           if (SSA_NAME_PTR_INFO (comp))
6197             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6198         }
6199     }
6200
6201   if (gimple_code (use->stmt) == GIMPLE_PHI)
6202     {
6203       ass = gimple_build_assign (tgt, comp);
6204       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6205
6206       bsi = gsi_for_stmt (use->stmt);
6207       remove_phi_node (&bsi, false);
6208     }
6209   else
6210     {
6211       gimple_assign_set_rhs_from_tree (&bsi, comp);
6212       use->stmt = gsi_stmt (bsi);
6213     }
6214 }
6215
6216 /* Performs a peephole optimization to reorder the iv update statement with
6217    a mem ref to enable instruction combining in later phases. The mem ref uses
6218    the iv value before the update, so the reordering transformation requires
6219    adjustment of the offset. CAND is the selected IV_CAND.
6220
6221    Example:
6222
6223    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6224    iv2 = iv1 + 1;
6225
6226    if (t < val)      (1)
6227      goto L;
6228    goto Head;
6229
6230
6231    directly propagating t over to (1) will introduce overlapping live range
6232    thus increase register pressure. This peephole transform it into:
6233
6234
6235    iv2 = iv1 + 1;
6236    t = MEM_REF (base, iv2, 8, 8);
6237    if (t < val)
6238      goto L;
6239    goto Head;
6240 */
6241
6242 static void
6243 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6244 {
6245   tree var_after;
6246   gimple iv_update, stmt;
6247   basic_block bb;
6248   gimple_stmt_iterator gsi, gsi_iv;
6249
6250   if (cand->pos != IP_NORMAL)
6251     return;
6252
6253   var_after = cand->var_after;
6254   iv_update = SSA_NAME_DEF_STMT (var_after);
6255
6256   bb = gimple_bb (iv_update);
6257   gsi = gsi_last_nondebug_bb (bb);
6258   stmt = gsi_stmt (gsi);
6259
6260   /* Only handle conditional statement for now.  */
6261   if (gimple_code (stmt) != GIMPLE_COND)
6262     return;
6263
6264   gsi_prev_nondebug (&gsi);
6265   stmt = gsi_stmt (gsi);
6266   if (stmt != iv_update)
6267     return;
6268
6269   gsi_prev_nondebug (&gsi);
6270   if (gsi_end_p (gsi))
6271     return;
6272
6273   stmt = gsi_stmt (gsi);
6274   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6275     return;
6276
6277   if (stmt != use->stmt)
6278     return;
6279
6280   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6281     return;
6282
6283   if (dump_file && (dump_flags & TDF_DETAILS))
6284     {
6285       fprintf (dump_file, "Reordering \n");
6286       print_gimple_stmt (dump_file, iv_update, 0, 0);
6287       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6288       fprintf (dump_file, "\n");
6289     }
6290
6291   gsi = gsi_for_stmt (use->stmt);
6292   gsi_iv = gsi_for_stmt (iv_update);
6293   gsi_move_before (&gsi_iv, &gsi);
6294
6295   cand->pos = IP_BEFORE_USE;
6296   cand->incremented_at = use->stmt;
6297 }
6298
6299 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6300
6301 static void
6302 rewrite_use_address (struct ivopts_data *data,
6303                      struct iv_use *use, struct iv_cand *cand)
6304 {
6305   aff_tree aff;
6306   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6307   tree base_hint = NULL_TREE;
6308   tree ref, iv;
6309   bool ok;
6310
6311   adjust_iv_update_pos (cand, use);
6312   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6313   gcc_assert (ok);
6314   unshare_aff_combination (&aff);
6315
6316   /* To avoid undefined overflow problems, all IV candidates use unsigned
6317      integer types.  The drawback is that this makes it impossible for
6318      create_mem_ref to distinguish an IV that is based on a memory object
6319      from one that represents simply an offset.
6320
6321      To work around this problem, we pass a hint to create_mem_ref that
6322      indicates which variable (if any) in aff is an IV based on a memory
6323      object.  Note that we only consider the candidate.  If this is not
6324      based on an object, the base of the reference is in some subexpression
6325      of the use -- but these will use pointer types, so they are recognized
6326      by the create_mem_ref heuristics anyway.  */
6327   if (cand->iv->base_object)
6328     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6329
6330   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6331   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6332                         reference_alias_ptr_type (*use->op_p),
6333                         iv, base_hint, data->speed);
6334   copy_ref_info (ref, *use->op_p);
6335   *use->op_p = ref;
6336 }
6337
6338 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6339    candidate CAND.  */
6340
6341 static void
6342 rewrite_use_compare (struct ivopts_data *data,
6343                      struct iv_use *use, struct iv_cand *cand)
6344 {
6345   tree comp, *var_p, op, bound;
6346   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6347   enum tree_code compare;
6348   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6349   bool ok;
6350
6351   bound = cp->value;
6352   if (bound)
6353     {
6354       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6355       tree var_type = TREE_TYPE (var);
6356       gimple_seq stmts;
6357
6358       if (dump_file && (dump_flags & TDF_DETAILS))
6359         {
6360           fprintf (dump_file, "Replacing exit test: ");
6361           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6362         }
6363       compare = cp->comp;
6364       bound = unshare_expr (fold_convert (var_type, bound));
6365       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6366       if (stmts)
6367         gsi_insert_seq_on_edge_immediate (
6368                 loop_preheader_edge (data->current_loop),
6369                 stmts);
6370
6371       gimple_cond_set_lhs (use->stmt, var);
6372       gimple_cond_set_code (use->stmt, compare);
6373       gimple_cond_set_rhs (use->stmt, op);
6374       return;
6375     }
6376
6377   /* The induction variable elimination failed; just express the original
6378      giv.  */
6379   comp = get_computation (data->current_loop, use, cand);
6380   gcc_assert (comp != NULL_TREE);
6381
6382   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6383   gcc_assert (ok);
6384
6385   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6386                                      true, GSI_SAME_STMT);
6387 }
6388
6389 /* Rewrites USE using candidate CAND.  */
6390
6391 static void
6392 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6393 {
6394   switch (use->type)
6395     {
6396       case USE_NONLINEAR_EXPR:
6397         rewrite_use_nonlinear_expr (data, use, cand);
6398         break;
6399
6400       case USE_ADDRESS:
6401         rewrite_use_address (data, use, cand);
6402         break;
6403
6404       case USE_COMPARE:
6405         rewrite_use_compare (data, use, cand);
6406         break;
6407
6408       default:
6409         gcc_unreachable ();
6410     }
6411
6412   update_stmt (use->stmt);
6413 }
6414
6415 /* Rewrite the uses using the selected induction variables.  */
6416
6417 static void
6418 rewrite_uses (struct ivopts_data *data)
6419 {
6420   unsigned i;
6421   struct iv_cand *cand;
6422   struct iv_use *use;
6423
6424   for (i = 0; i < n_iv_uses (data); i++)
6425     {
6426       use = iv_use (data, i);
6427       cand = use->selected;
6428       gcc_assert (cand);
6429
6430       rewrite_use (data, use, cand);
6431     }
6432 }
6433
6434 /* Removes the ivs that are not used after rewriting.  */
6435
6436 static void
6437 remove_unused_ivs (struct ivopts_data *data)
6438 {
6439   unsigned j;
6440   bitmap_iterator bi;
6441   bitmap toremove = BITMAP_ALLOC (NULL);
6442
6443   /* Figure out an order in which to release SSA DEFs so that we don't
6444      release something that we'd have to propagate into a debug stmt
6445      afterwards.  */
6446   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6447     {
6448       struct version_info *info;
6449
6450       info = ver_info (data, j);
6451       if (info->iv
6452           && !integer_zerop (info->iv->step)
6453           && !info->inv_id
6454           && !info->iv->have_use_for
6455           && !info->preserve_biv)
6456         {
6457           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6458
6459           tree def = info->iv->ssa_name;
6460
6461           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6462             {
6463               imm_use_iterator imm_iter;
6464               use_operand_p use_p;
6465               gimple stmt;
6466               int count = 0;
6467
6468               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6469                 {
6470                   if (!gimple_debug_bind_p (stmt))
6471                     continue;
6472
6473                   /* We just want to determine whether to do nothing
6474                      (count == 0), to substitute the computed
6475                      expression into a single use of the SSA DEF by
6476                      itself (count == 1), or to use a debug temp
6477                      because the SSA DEF is used multiple times or as
6478                      part of a larger expression (count > 1). */
6479                   count++;
6480                   if (gimple_debug_bind_get_value (stmt) != def)
6481                     count++;
6482
6483                   if (count > 1)
6484                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6485                 }
6486
6487               if (!count)
6488                 continue;
6489
6490               struct iv_use dummy_use;
6491               struct iv_cand *best_cand = NULL, *cand;
6492               unsigned i, best_pref = 0, cand_pref;
6493
6494               memset (&dummy_use, 0, sizeof (dummy_use));
6495               dummy_use.iv = info->iv;
6496               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6497                 {
6498                   cand = iv_use (data, i)->selected;
6499                   if (cand == best_cand)
6500                     continue;
6501                   cand_pref = operand_equal_p (cand->iv->step,
6502                                                info->iv->step, 0)
6503                     ? 4 : 0;
6504                   cand_pref
6505                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6506                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6507                     ? 2 : 0;
6508                   cand_pref
6509                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6510                     ? 1 : 0;
6511                   if (best_cand == NULL || best_pref < cand_pref)
6512                     {
6513                       best_cand = cand;
6514                       best_pref = cand_pref;
6515                     }
6516                 }
6517
6518               if (!best_cand)
6519                 continue;
6520
6521               tree comp = get_computation_at (data->current_loop,
6522                                               &dummy_use, best_cand,
6523                                               SSA_NAME_DEF_STMT (def));
6524               if (!comp)
6525                 continue;
6526
6527               if (count > 1)
6528                 {
6529                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6530                   DECL_ARTIFICIAL (vexpr) = 1;
6531                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6532                   if (SSA_NAME_VAR (def))
6533                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6534                   else
6535                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6536                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6537                   gimple_stmt_iterator gsi;
6538
6539                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6540                     gsi = gsi_after_labels (gimple_bb
6541                                             (SSA_NAME_DEF_STMT (def)));
6542                   else
6543                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6544
6545                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6546                   comp = vexpr;
6547                 }
6548
6549               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6550                 {
6551                   if (!gimple_debug_bind_p (stmt))
6552                     continue;
6553
6554                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6555                     SET_USE (use_p, comp);
6556
6557                   update_stmt (stmt);
6558                 }
6559             }
6560         }
6561     }
6562
6563   release_defs_bitset (toremove);
6564
6565   BITMAP_FREE (toremove);
6566 }
6567
6568 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6569    for pointer_map_traverse.  */
6570
6571 static bool
6572 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6573                       void *data ATTRIBUTE_UNUSED)
6574 {
6575   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6576
6577   free (niter);
6578   return true;
6579 }
6580
6581 /* Frees data allocated by the optimization of a single loop.  */
6582
6583 static void
6584 free_loop_data (struct ivopts_data *data)
6585 {
6586   unsigned i, j;
6587   bitmap_iterator bi;
6588   tree obj;
6589
6590   if (data->niters)
6591     {
6592       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6593       pointer_map_destroy (data->niters);
6594       data->niters = NULL;
6595     }
6596
6597   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6598     {
6599       struct version_info *info;
6600
6601       info = ver_info (data, i);
6602       free (info->iv);
6603       info->iv = NULL;
6604       info->has_nonlin_use = false;
6605       info->preserve_biv = false;
6606       info->inv_id = 0;
6607     }
6608   bitmap_clear (data->relevant);
6609   bitmap_clear (data->important_candidates);
6610
6611   for (i = 0; i < n_iv_uses (data); i++)
6612     {
6613       struct iv_use *use = iv_use (data, i);
6614
6615       free (use->iv);
6616       BITMAP_FREE (use->related_cands);
6617       for (j = 0; j < use->n_map_members; j++)
6618         if (use->cost_map[j].depends_on)
6619           BITMAP_FREE (use->cost_map[j].depends_on);
6620       free (use->cost_map);
6621       free (use);
6622     }
6623   data->iv_uses.truncate (0);
6624
6625   for (i = 0; i < n_iv_cands (data); i++)
6626     {
6627       struct iv_cand *cand = iv_cand (data, i);
6628
6629       free (cand->iv);
6630       if (cand->depends_on)
6631         BITMAP_FREE (cand->depends_on);
6632       free (cand);
6633     }
6634   data->iv_candidates.truncate (0);
6635
6636   if (data->version_info_size < num_ssa_names)
6637     {
6638       data->version_info_size = 2 * num_ssa_names;
6639       free (data->version_info);
6640       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6641     }
6642
6643   data->max_inv_id = 0;
6644
6645   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6646     SET_DECL_RTL (obj, NULL_RTX);
6647
6648   decl_rtl_to_reset.truncate (0);
6649
6650   data->inv_expr_tab.empty ();
6651   data->inv_expr_id = 0;
6652 }
6653
6654 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6655    loop tree.  */
6656
6657 static void
6658 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6659 {
6660   free_loop_data (data);
6661   free (data->version_info);
6662   BITMAP_FREE (data->relevant);
6663   BITMAP_FREE (data->important_candidates);
6664
6665   decl_rtl_to_reset.release ();
6666   data->iv_uses.release ();
6667   data->iv_candidates.release ();
6668   data->inv_expr_tab.dispose ();
6669 }
6670
6671 /* Returns true if the loop body BODY includes any function calls.  */
6672
6673 static bool
6674 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6675 {
6676   gimple_stmt_iterator gsi;
6677   unsigned i;
6678
6679   for (i = 0; i < num_nodes; i++)
6680     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6681       {
6682         gimple stmt = gsi_stmt (gsi);
6683         if (is_gimple_call (stmt)
6684             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6685           return true;
6686       }
6687   return false;
6688 }
6689
6690 /* Optimizes the LOOP.  Returns true if anything changed.  */
6691
6692 static bool
6693 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6694 {
6695   bool changed = false;
6696   struct iv_ca *iv_ca;
6697   edge exit = single_dom_exit (loop);
6698   basic_block *body;
6699
6700   gcc_assert (!data->niters);
6701   data->current_loop = loop;
6702   data->speed = optimize_loop_for_speed_p (loop);
6703
6704   if (dump_file && (dump_flags & TDF_DETAILS))
6705     {
6706       fprintf (dump_file, "Processing loop %d\n", loop->num);
6707
6708       if (exit)
6709         {
6710           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6711                    exit->src->index, exit->dest->index);
6712           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6713           fprintf (dump_file, "\n");
6714         }
6715
6716       fprintf (dump_file, "\n");
6717     }
6718
6719   body = get_loop_body (loop);
6720   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6721   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6722   free (body);
6723
6724   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6725
6726   /* For each ssa name determines whether it behaves as an induction variable
6727      in some loop.  */
6728   if (!find_induction_variables (data))
6729     goto finish;
6730
6731   /* Finds interesting uses (item 1).  */
6732   find_interesting_uses (data);
6733   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6734     goto finish;
6735
6736   /* Finds candidates for the induction variables (item 2).  */
6737   find_iv_candidates (data);
6738
6739   /* Calculates the costs (item 3, part 1).  */
6740   determine_iv_costs (data);
6741   determine_use_iv_costs (data);
6742   determine_set_costs (data);
6743
6744   /* Find the optimal set of induction variables (item 3, part 2).  */
6745   iv_ca = find_optimal_iv_set (data);
6746   if (!iv_ca)
6747     goto finish;
6748   changed = true;
6749
6750   /* Create the new induction variables (item 4, part 1).  */
6751   create_new_ivs (data, iv_ca);
6752   iv_ca_free (&iv_ca);
6753
6754   /* Rewrite the uses (item 4, part 2).  */
6755   rewrite_uses (data);
6756
6757   /* Remove the ivs that are unused after rewriting.  */
6758   remove_unused_ivs (data);
6759
6760   /* We have changed the structure of induction variables; it might happen
6761      that definitions in the scev database refer to some of them that were
6762      eliminated.  */
6763   scev_reset ();
6764
6765 finish:
6766   free_loop_data (data);
6767
6768   return changed;
6769 }
6770
6771 /* Main entry point.  Optimizes induction variables in loops.  */
6772
6773 void
6774 tree_ssa_iv_optimize (void)
6775 {
6776   struct loop *loop;
6777   struct ivopts_data data;
6778   loop_iterator li;
6779
6780   tree_ssa_iv_optimize_init (&data);
6781
6782   /* Optimize the loops starting with the innermost ones.  */
6783   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6784     {
6785       if (dump_file && (dump_flags & TDF_DETAILS))
6786         flow_loop_dump (loop, dump_file, NULL, 1);
6787
6788       tree_ssa_iv_optimize_loop (&data, loop);
6789     }
6790
6791   tree_ssa_iv_optimize_finalize (&data);
6792 }