gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "tm_p.h"
  70 #include "basic-block.h"
  71 #include "gimple-pretty-print.h"
  72 #include "gimple.h"
  73 #include "gimple-ssa.h"
  74 #include "cgraph.h"
  75 #include "tree-cfg.h"
  76 #include "tree-phinodes.h"
  77 #include "ssa-iterators.h"
  78 #include "tree-ssanames.h"
  79 #include "tree-ssa-loop-ivopts.h"
  80 #include "tree-ssa-loop-manip.h"
  81 #include "tree-ssa-loop-niter.h"
  82 #include "tree-ssa-loop.h"
  83 #include "tree-dfa.h"
  84 #include "tree-ssa.h"
  85 #include "cfgloop.h"
  86 #include "tree-pass.h"
  87 #include "ggc.h"
  88 #include "insn-config.h"
  89 #include "pointer-set.h"
  90 #include "hash-table.h"
  91 #include "tree-chrec.h"
  92 #include "tree-scalar-evolution.h"
  93 #include "cfgloop.h"
  94 #include "params.h"
  95 #include "langhooks.h"
  96 #include "tree-affine.h"
  97 #include "target.h"
  98 #include "tree-inline.h"
  99 #include "tree-ssa-propagate.h"
 100 #include "expmed.h"
 101 #include "tree-ssa-address.h"
 102
 103 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 104    cost of different addressing modes.  This should be moved to a TBD
 105    interface between the GIMPLE and RTL worlds.  */
 106 #include "expr.h"
 107 #include "recog.h"
 108
 109 /* The infinite cost.  */
 110 #define INFTY 10000000
 111
 112 #define AVG_LOOP_NITER(LOOP) 5
 113
 114 /* Returns the expected number of loop iterations for LOOP.
 115    The average trip count is computed from profile data if it
 116    exists. */
 117
 118 static inline HOST_WIDE_INT
 119 avg_loop_niter (struct loop *loop)
 120 {
 121   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 122   if (niter == -1)
 123     return AVG_LOOP_NITER (loop);
 124
 125   return niter;
 126 }
 127
 128 /* Representation of the induction variable.  */
 129 struct iv
 130 {
 131   tree base;            /* Initial value of the iv.  */
 132   tree base_object;     /* A memory object to that the induction variable points.  */
 133   tree step;            /* Step of the iv (constant only).  */
 134   tree ssa_name;        /* The ssa name with the value.  */
 135   bool biv_p;           /* Is it a biv?  */
 136   bool have_use_for;    /* Do we already have a use for it?  */
 137   unsigned use_id;      /* The identifier in the use if it is the case.  */
 138 };
 139
 140 /* Per-ssa version information (induction variable descriptions, etc.).  */
 141 struct version_info
 142 {
 143   tree name;            /* The ssa name.  */
 144   struct iv *iv;        /* Induction variable description.  */
 145   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 146                            an expression that is not an induction variable.  */
 147   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 148   unsigned inv_id;      /* Id of an invariant.  */
 149 };
 150
 151 /* Types of uses.  */
 152 enum use_type
 153 {
 154   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 155   USE_ADDRESS,          /* Use in an address.  */
 156   USE_COMPARE           /* Use is a compare.  */
 157 };
 158
 159 /* Cost of a computation.  */
 160 typedef struct
 161 {
 162   int cost;             /* The runtime cost.  */
 163   unsigned complexity;  /* The estimate of the complexity of the code for
 164                            the computation (in no concrete units --
 165                            complexity field should be larger for more
 166                            complex expressions and addressing modes).  */
 167 } comp_cost;
 168
 169 static const comp_cost no_cost = {0, 0};
 170 static const comp_cost infinite_cost = {INFTY, INFTY};
 171
 172 /* The candidate - cost pair.  */
 173 struct cost_pair
 174 {
 175   struct iv_cand *cand; /* The candidate.  */
 176   comp_cost cost;       /* The cost.  */
 177   bitmap depends_on;    /* The list of invariants that have to be
 178                            preserved.  */
 179   tree value;           /* For final value elimination, the expression for
 180                            the final value of the iv.  For iv elimination,
 181                            the new bound to compare with.  */
 182   enum tree_code comp;  /* For iv elimination, the comparison.  */
 183   int inv_expr_id;      /* Loop invariant expression id.  */
 184 };
 185
 186 /* Use.  */
 187 struct iv_use
 188 {
 189   unsigned id;          /* The id of the use.  */
 190   enum use_type type;   /* Type of the use.  */
 191   struct iv *iv;        /* The induction variable it is based on.  */
 192   gimple stmt;          /* Statement in that it occurs.  */
 193   tree *op_p;           /* The place where it occurs.  */
 194   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 195                            important ones.  */
 196
 197   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 198   struct cost_pair *cost_map;
 199                         /* The costs wrto the iv candidates.  */
 200
 201   struct iv_cand *selected;
 202                         /* The selected candidate.  */
 203 };
 204
 205 /* The position where the iv is computed.  */
 206 enum iv_position
 207 {
 208   IP_NORMAL,            /* At the end, just before the exit condition.  */
 209   IP_END,               /* At the end of the latch block.  */
 210   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 211   IP_AFTER_USE,         /* Immediately after a specific use.  */
 212   IP_ORIGINAL           /* The original biv.  */
 213 };
 214
 215 /* The induction variable candidate.  */
 216 struct iv_cand
 217 {
 218   unsigned id;          /* The number of the candidate.  */
 219   bool important;       /* Whether this is an "important" candidate, i.e. such
 220                            that it should be considered by all uses.  */
 221   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 222   gimple incremented_at;/* For original biv, the statement where it is
 223                            incremented.  */
 224   tree var_before;      /* The variable used for it before increment.  */
 225   tree var_after;       /* The variable used for it after increment.  */
 226   struct iv *iv;        /* The value of the candidate.  NULL for
 227                            "pseudocandidate" used to indicate the possibility
 228                            to replace the final value of an iv by direct
 229                            computation of the value.  */
 230   unsigned cost;        /* Cost of the candidate.  */
 231   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 232   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 233                               where it is incremented.  */
 234   bitmap depends_on;    /* The list of invariants that are used in step of the
 235                            biv.  */
 236 };
 237
 238 /* Loop invariant expression hashtable entry.  */
 239 struct iv_inv_expr_ent
 240 {
 241   tree expr;
 242   int id;
 243   hashval_t hash;
 244 };
 245
 246 /* The data used by the induction variable optimizations.  */
 247
 248 typedef struct iv_use *iv_use_p;
 249
 250 typedef struct iv_cand *iv_cand_p;
 251
 252 /* Hashtable helpers.  */
 253
 254 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 255 {
 256   typedef iv_inv_expr_ent value_type;
 257   typedef iv_inv_expr_ent compare_type;
 258   static inline hashval_t hash (const value_type *);
 259   static inline bool equal (const value_type *, const compare_type *);
 260 };
 261
 262 /* Hash function for loop invariant expressions.  */
 263
 264 inline hashval_t
 265 iv_inv_expr_hasher::hash (const value_type *expr)
 266 {
 267   return expr->hash;
 268 }
 269
 270 /* Hash table equality function for expressions.  */
 271
 272 inline bool
 273 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
 274 {
 275   return expr1->hash == expr2->hash
 276          && operand_equal_p (expr1->expr, expr2->expr, 0);
 277 }
 278
 279 struct ivopts_data
 280 {
 281   /* The currently optimized loop.  */
 282   struct loop *current_loop;
 283
 284   /* Numbers of iterations for all exits of the current loop.  */
 285   struct pointer_map_t *niters;
 286
 287   /* Number of registers used in it.  */
 288   unsigned regs_used;
 289
 290   /* The size of version_info array allocated.  */
 291   unsigned version_info_size;
 292
 293   /* The array of information for the ssa names.  */
 294   struct version_info *version_info;
 295
 296   /* The hashtable of loop invariant expressions created
 297      by ivopt.  */
 298   hash_table <iv_inv_expr_hasher> inv_expr_tab;
 299
 300   /* Loop invariant expression id.  */
 301   int inv_expr_id;
 302
 303   /* The bitmap of indices in version_info whose value was changed.  */
 304   bitmap relevant;
 305
 306   /* The uses of induction variables.  */
 307   vec<iv_use_p> iv_uses;
 308
 309   /* The candidates.  */
 310   vec<iv_cand_p> iv_candidates;
 311
 312   /* A bitmap of important candidates.  */
 313   bitmap important_candidates;
 314
 315   /* The maximum invariant id.  */
 316   unsigned max_inv_id;
 317
 318   /* Whether to consider just related and important candidates when replacing a
 319      use.  */
 320   bool consider_all_candidates;
 321
 322   /* Are we optimizing for speed?  */
 323   bool speed;
 324
 325   /* Whether the loop body includes any function calls.  */
 326   bool body_includes_call;
 327
 328   /* Whether the loop body can only be exited via single exit.  */
 329   bool loop_single_exit_p;
 330 };
 331
 332 /* An assignment of iv candidates to uses.  */
 333
 334 struct iv_ca
 335 {
 336   /* The number of uses covered by the assignment.  */
 337   unsigned upto;
 338
 339   /* Number of uses that cannot be expressed by the candidates in the set.  */
 340   unsigned bad_uses;
 341
 342   /* Candidate assigned to a use, together with the related costs.  */
 343   struct cost_pair **cand_for_use;
 344
 345   /* Number of times each candidate is used.  */
 346   unsigned *n_cand_uses;
 347
 348   /* The candidates used.  */
 349   bitmap cands;
 350
 351   /* The number of candidates in the set.  */
 352   unsigned n_cands;
 353
 354   /* Total number of registers needed.  */
 355   unsigned n_regs;
 356
 357   /* Total cost of expressing uses.  */
 358   comp_cost cand_use_cost;
 359
 360   /* Total cost of candidates.  */
 361   unsigned cand_cost;
 362
 363   /* Number of times each invariant is used.  */
 364   unsigned *n_invariant_uses;
 365
 366   /* The array holding the number of uses of each loop
 367      invariant expressions created by ivopt.  */
 368   unsigned *used_inv_expr;
 369
 370   /* The number of created loop invariants.  */
 371   unsigned num_used_inv_expr;
 372
 373   /* Total cost of the assignment.  */
 374   comp_cost cost;
 375 };
 376
 377 /* Difference of two iv candidate assignments.  */
 378
 379 struct iv_ca_delta
 380 {
 381   /* Changed use.  */
 382   struct iv_use *use;
 383
 384   /* An old assignment (for rollback purposes).  */
 385   struct cost_pair *old_cp;
 386
 387   /* A new assignment.  */
 388   struct cost_pair *new_cp;
 389
 390   /* Next change in the list.  */
 391   struct iv_ca_delta *next_change;
 392 };
 393
 394 /* Bound on number of candidates below that all candidates are considered.  */
 395
 396 #define CONSIDER_ALL_CANDIDATES_BOUND \
 397   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 398
 399 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 400    optimizing such a loop would help, and it would take ages).  */
 401
 402 #define MAX_CONSIDERED_USES \
 403   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 404
 405 /* If there are at most this number of ivs in the set, try removing unnecessary
 406    ivs from the set always.  */
 407
 408 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 409   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 410
 411 /* The list of trees for that the decl_rtl field must be reset is stored
 412    here.  */
 413
 414 static vec<tree> decl_rtl_to_reset;
 415
 416 static comp_cost force_expr_to_var_cost (tree, bool);
 417
 418 /* Number of uses recorded in DATA.  */
 419
 420 static inline unsigned
 421 n_iv_uses (struct ivopts_data *data)
 422 {
 423   return data->iv_uses.length ();
 424 }
 425
 426 /* Ith use recorded in DATA.  */
 427
 428 static inline struct iv_use *
 429 iv_use (struct ivopts_data *data, unsigned i)
 430 {
 431   return data->iv_uses[i];
 432 }
 433
 434 /* Number of candidates recorded in DATA.  */
 435
 436 static inline unsigned
 437 n_iv_cands (struct ivopts_data *data)
 438 {
 439   return data->iv_candidates.length ();
 440 }
 441
 442 /* Ith candidate recorded in DATA.  */
 443
 444 static inline struct iv_cand *
 445 iv_cand (struct ivopts_data *data, unsigned i)
 446 {
 447   return data->iv_candidates[i];
 448 }
 449
 450 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 451
 452 edge
 453 single_dom_exit (struct loop *loop)
 454 {
 455   edge exit = single_exit (loop);
 456
 457   if (!exit)
 458     return NULL;
 459
 460   if (!just_once_each_iteration_p (loop, exit->src))
 461     return NULL;
 462
 463   return exit;
 464 }
 465
 466 /* Dumps information about the induction variable IV to FILE.  */
 467
 468 void
 469 dump_iv (FILE *file, struct iv *iv)
 470 {
 471   if (iv->ssa_name)
 472     {
 473       fprintf (file, "ssa name ");
 474       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 475       fprintf (file, "\n");
 476     }
 477
 478   fprintf (file, "  type ");
 479   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 480   fprintf (file, "\n");
 481
 482   if (iv->step)
 483     {
 484       fprintf (file, "  base ");
 485       print_generic_expr (file, iv->base, TDF_SLIM);
 486       fprintf (file, "\n");
 487
 488       fprintf (file, "  step ");
 489       print_generic_expr (file, iv->step, TDF_SLIM);
 490       fprintf (file, "\n");
 491     }
 492   else
 493     {
 494       fprintf (file, "  invariant ");
 495       print_generic_expr (file, iv->base, TDF_SLIM);
 496       fprintf (file, "\n");
 497     }
 498
 499   if (iv->base_object)
 500     {
 501       fprintf (file, "  base object ");
 502       print_generic_expr (file, iv->base_object, TDF_SLIM);
 503       fprintf (file, "\n");
 504     }
 505
 506   if (iv->biv_p)
 507     fprintf (file, "  is a biv\n");
 508 }
 509
 510 /* Dumps information about the USE to FILE.  */
 511
 512 void
 513 dump_use (FILE *file, struct iv_use *use)
 514 {
 515   fprintf (file, "use %d\n", use->id);
 516
 517   switch (use->type)
 518     {
 519     case USE_NONLINEAR_EXPR:
 520       fprintf (file, "  generic\n");
 521       break;
 522
 523     case USE_ADDRESS:
 524       fprintf (file, "  address\n");
 525       break;
 526
 527     case USE_COMPARE:
 528       fprintf (file, "  compare\n");
 529       break;
 530
 531     default:
 532       gcc_unreachable ();
 533     }
 534
 535   fprintf (file, "  in statement ");
 536   print_gimple_stmt (file, use->stmt, 0, 0);
 537   fprintf (file, "\n");
 538
 539   fprintf (file, "  at position ");
 540   if (use->op_p)
 541     print_generic_expr (file, *use->op_p, TDF_SLIM);
 542   fprintf (file, "\n");
 543
 544   dump_iv (file, use->iv);
 545
 546   if (use->related_cands)
 547     {
 548       fprintf (file, "  related candidates ");
 549       dump_bitmap (file, use->related_cands);
 550     }
 551 }
 552
 553 /* Dumps information about the uses to FILE.  */
 554
 555 void
 556 dump_uses (FILE *file, struct ivopts_data *data)
 557 {
 558   unsigned i;
 559   struct iv_use *use;
 560
 561   for (i = 0; i < n_iv_uses (data); i++)
 562     {
 563       use = iv_use (data, i);
 564
 565       dump_use (file, use);
 566       fprintf (file, "\n");
 567     }
 568 }
 569
 570 /* Dumps information about induction variable candidate CAND to FILE.  */
 571
 572 void
 573 dump_cand (FILE *file, struct iv_cand *cand)
 574 {
 575   struct iv *iv = cand->iv;
 576
 577   fprintf (file, "candidate %d%s\n",
 578            cand->id, cand->important ? " (important)" : "");
 579
 580   if (cand->depends_on)
 581     {
 582       fprintf (file, "  depends on ");
 583       dump_bitmap (file, cand->depends_on);
 584     }
 585
 586   if (!iv)
 587     {
 588       fprintf (file, "  final value replacement\n");
 589       return;
 590     }
 591
 592   if (cand->var_before)
 593     {
 594       fprintf (file, "  var_before ");
 595       print_generic_expr (file, cand->var_before, TDF_SLIM);
 596       fprintf (file, "\n");
 597     }
 598   if (cand->var_after)
 599     {
 600       fprintf (file, "  var_after ");
 601       print_generic_expr (file, cand->var_after, TDF_SLIM);
 602       fprintf (file, "\n");
 603     }
 604
 605   switch (cand->pos)
 606     {
 607     case IP_NORMAL:
 608       fprintf (file, "  incremented before exit test\n");
 609       break;
 610
 611     case IP_BEFORE_USE:
 612       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 613       break;
 614
 615     case IP_AFTER_USE:
 616       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 617       break;
 618
 619     case IP_END:
 620       fprintf (file, "  incremented at end\n");
 621       break;
 622
 623     case IP_ORIGINAL:
 624       fprintf (file, "  original biv\n");
 625       break;
 626     }
 627
 628   dump_iv (file, iv);
 629 }
 630
 631 /* Returns the info for ssa version VER.  */
 632
 633 static inline struct version_info *
 634 ver_info (struct ivopts_data *data, unsigned ver)
 635 {
 636   return data->version_info + ver;
 637 }
 638
 639 /* Returns the info for ssa name NAME.  */
 640
 641 static inline struct version_info *
 642 name_info (struct ivopts_data *data, tree name)
 643 {
 644   return ver_info (data, SSA_NAME_VERSION (name));
 645 }
 646
 647 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 648    emitted in LOOP.  */
 649
 650 static bool
 651 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 652 {
 653   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 654
 655   gcc_assert (bb);
 656
 657   if (sbb == loop->latch)
 658     return true;
 659
 660   if (sbb != bb)
 661     return false;
 662
 663   return stmt == last_stmt (bb);
 664 }
 665
 666 /* Returns true if STMT if after the place where the original induction
 667    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 668    if the positions are identical.  */
 669
 670 static bool
 671 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 672 {
 673   basic_block cand_bb = gimple_bb (cand->incremented_at);
 674   basic_block stmt_bb = gimple_bb (stmt);
 675
 676   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 677     return false;
 678
 679   if (stmt_bb != cand_bb)
 680     return true;
 681
 682   if (true_if_equal
 683       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 684     return true;
 685   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 686 }
 687
 688 /* Returns true if STMT if after the place where the induction variable
 689    CAND is incremented in LOOP.  */
 690
 691 static bool
 692 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 693 {
 694   switch (cand->pos)
 695     {
 696     case IP_END:
 697       return false;
 698
 699     case IP_NORMAL:
 700       return stmt_after_ip_normal_pos (loop, stmt);
 701
 702     case IP_ORIGINAL:
 703     case IP_AFTER_USE:
 704       return stmt_after_inc_pos (cand, stmt, false);
 705
 706     case IP_BEFORE_USE:
 707       return stmt_after_inc_pos (cand, stmt, true);
 708
 709     default:
 710       gcc_unreachable ();
 711     }
 712 }
 713
 714 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 715
 716 static bool
 717 abnormal_ssa_name_p (tree exp)
 718 {
 719   if (!exp)
 720     return false;
 721
 722   if (TREE_CODE (exp) != SSA_NAME)
 723     return false;
 724
 725   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 726 }
 727
 728 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 729    abnormal phi node.  Callback for for_each_index.  */
 730
 731 static bool
 732 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 733                                   void *data ATTRIBUTE_UNUSED)
 734 {
 735   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 736     {
 737       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 738         return false;
 739       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 740         return false;
 741     }
 742
 743   return !abnormal_ssa_name_p (*index);
 744 }
 745
 746 /* Returns true if EXPR contains a ssa name that occurs in an
 747    abnormal phi node.  */
 748
 749 bool
 750 contains_abnormal_ssa_name_p (tree expr)
 751 {
 752   enum tree_code code;
 753   enum tree_code_class codeclass;
 754
 755   if (!expr)
 756     return false;
 757
 758   code = TREE_CODE (expr);
 759   codeclass = TREE_CODE_CLASS (code);
 760
 761   if (code == SSA_NAME)
 762     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 763
 764   if (code == INTEGER_CST
 765       || is_gimple_min_invariant (expr))
 766     return false;
 767
 768   if (code == ADDR_EXPR)
 769     return !for_each_index (&TREE_OPERAND (expr, 0),
 770                             idx_contains_abnormal_ssa_name_p,
 771                             NULL);
 772
 773   if (code == COND_EXPR)
 774     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 775       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 776       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 777
 778   switch (codeclass)
 779     {
 780     case tcc_binary:
 781     case tcc_comparison:
 782       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 783         return true;
 784
 785       /* Fallthru.  */
 786     case tcc_unary:
 787       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 788         return true;
 789
 790       break;
 791
 792     default:
 793       gcc_unreachable ();
 794     }
 795
 796   return false;
 797 }
 798
 799 /*  Returns the structure describing number of iterations determined from
 800     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 801
 802 static struct tree_niter_desc *
 803 niter_for_exit (struct ivopts_data *data, edge exit)
 804 {
 805   struct tree_niter_desc *desc;
 806   void **slot;
 807
 808   if (!data->niters)
 809     {
 810       data->niters = pointer_map_create ();
 811       slot = NULL;
 812     }
 813   else
 814     slot = pointer_map_contains (data->niters, exit);
 815
 816   if (!slot)
 817     {
 818       /* Try to determine number of iterations.  We cannot safely work with ssa
 819          names that appear in phi nodes on abnormal edges, so that we do not
 820          create overlapping life ranges for them (PR 27283).  */
 821       desc = XNEW (struct tree_niter_desc);
 822       if (!number_of_iterations_exit (data->current_loop,
 823                                       exit, desc, true)
 824           || contains_abnormal_ssa_name_p (desc->niter))
 825         {
 826           XDELETE (desc);
 827           desc = NULL;
 828         }
 829       slot = pointer_map_insert (data->niters, exit);
 830       *slot = desc;
 831     }
 832   else
 833     desc = (struct tree_niter_desc *) *slot;
 834
 835   return desc;
 836 }
 837
 838 /* Returns the structure describing number of iterations determined from
 839    single dominating exit of DATA->current_loop, or NULL if something
 840    goes wrong.  */
 841
 842 static struct tree_niter_desc *
 843 niter_for_single_dom_exit (struct ivopts_data *data)
 844 {
 845   edge exit = single_dom_exit (data->current_loop);
 846
 847   if (!exit)
 848     return NULL;
 849
 850   return niter_for_exit (data, exit);
 851 }
 852
 853 /* Initializes data structures used by the iv optimization pass, stored
 854    in DATA.  */
 855
 856 static void
 857 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 858 {
 859   data->version_info_size = 2 * num_ssa_names;
 860   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 861   data->relevant = BITMAP_ALLOC (NULL);
 862   data->important_candidates = BITMAP_ALLOC (NULL);
 863   data->max_inv_id = 0;
 864   data->niters = NULL;
 865   data->iv_uses.create (20);
 866   data->iv_candidates.create (20);
 867   data->inv_expr_tab.create (10);
 868   data->inv_expr_id = 0;
 869   decl_rtl_to_reset.create (20);
 870 }
 871
 872 /* Returns a memory object to that EXPR points.  In case we are able to
 873    determine that it does not point to any such object, NULL is returned.  */
 874
 875 static tree
 876 determine_base_object (tree expr)
 877 {
 878   enum tree_code code = TREE_CODE (expr);
 879   tree base, obj;
 880
 881   /* If this is a pointer casted to any type, we need to determine
 882      the base object for the pointer; so handle conversions before
 883      throwing away non-pointer expressions.  */
 884   if (CONVERT_EXPR_P (expr))
 885     return determine_base_object (TREE_OPERAND (expr, 0));
 886
 887   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 888     return NULL_TREE;
 889
 890   switch (code)
 891     {
 892     case INTEGER_CST:
 893       return NULL_TREE;
 894
 895     case ADDR_EXPR:
 896       obj = TREE_OPERAND (expr, 0);
 897       base = get_base_address (obj);
 898
 899       if (!base)
 900         return expr;
 901
 902       if (TREE_CODE (base) == MEM_REF)
 903         return determine_base_object (TREE_OPERAND (base, 0));
 904
 905       return fold_convert (ptr_type_node,
 906                            build_fold_addr_expr (base));
 907
 908     case POINTER_PLUS_EXPR:
 909       return determine_base_object (TREE_OPERAND (expr, 0));
 910
 911     case PLUS_EXPR:
 912     case MINUS_EXPR:
 913       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 914       gcc_unreachable ();
 915
 916     default:
 917       return fold_convert (ptr_type_node, expr);
 918     }
 919 }
 920
 921 /* Allocates an induction variable with given initial value BASE and step STEP
 922    for loop LOOP.  */
 923
 924 static struct iv *
 925 alloc_iv (tree base, tree step)
 926 {
 927   tree base_object = base;
 928   struct iv *iv = XCNEW (struct iv);
 929   gcc_assert (step != NULL_TREE);
 930
 931   /* Lower all address expressions except ones with DECL_P as operand.
 932      By doing this:
 933        1) More accurate cost can be computed for address expressions;
 934        2) Duplicate candidates won't be created for bases in different
 935           forms, like &a[0] and &a.  */
 936   STRIP_NOPS (base_object);
 937   if (TREE_CODE (base_object) == ADDR_EXPR
 938       && !DECL_P (TREE_OPERAND (base_object, 0)))
 939     {
 940       aff_tree comb;
 941       double_int size;
 942       base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
 943                                              &comb, &size);
 944       gcc_assert (base_object != NULL_TREE);
 945       base_object = build_fold_addr_expr (base_object);
 946       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
 947     }
 948
 949   iv->base = base;
 950   iv->base_object = determine_base_object (base_object);
 951   iv->step = step;
 952   iv->biv_p = false;
 953   iv->have_use_for = false;
 954   iv->use_id = 0;
 955   iv->ssa_name = NULL_TREE;
 956
 957   return iv;
 958 }
 959
 960 /* Sets STEP and BASE for induction variable IV.  */
 961
 962 static void
 963 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 964 {
 965   struct version_info *info = name_info (data, iv);
 966
 967   gcc_assert (!info->iv);
 968
 969   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 970   info->iv = alloc_iv (base, step);
 971   info->iv->ssa_name = iv;
 972 }
 973
 974 /* Finds induction variable declaration for VAR.  */
 975
 976 static struct iv *
 977 get_iv (struct ivopts_data *data, tree var)
 978 {
 979   basic_block bb;
 980   tree type = TREE_TYPE (var);
 981
 982   if (!POINTER_TYPE_P (type)
 983       && !INTEGRAL_TYPE_P (type))
 984     return NULL;
 985
 986   if (!name_info (data, var)->iv)
 987     {
 988       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 989
 990       if (!bb
 991           || !flow_bb_inside_loop_p (data->current_loop, bb))
 992         set_iv (data, var, var, build_int_cst (type, 0));
 993     }
 994
 995   return name_info (data, var)->iv;
 996 }
 997
 998 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 999    not define a simple affine biv with nonzero step.  */
1000
1001 static tree
1002 determine_biv_step (gimple phi)
1003 {
1004   struct loop *loop = gimple_bb (phi)->loop_father;
1005   tree name = PHI_RESULT (phi);
1006   affine_iv iv;
1007
1008   if (virtual_operand_p (name))
1009     return NULL_TREE;
1010
1011   if (!simple_iv (loop, loop, name, &iv, true))
1012     return NULL_TREE;
1013
1014   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1015 }
1016
1017 /* Finds basic ivs.  */
1018
1019 static bool
1020 find_bivs (struct ivopts_data *data)
1021 {
1022   gimple phi;
1023   tree step, type, base;
1024   bool found = false;
1025   struct loop *loop = data->current_loop;
1026   gimple_stmt_iterator psi;
1027
1028   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1029     {
1030       phi = gsi_stmt (psi);
1031
1032       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1033         continue;
1034
1035       step = determine_biv_step (phi);
1036       if (!step)
1037         continue;
1038
1039       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1040       base = expand_simple_operations (base);
1041       if (contains_abnormal_ssa_name_p (base)
1042           || contains_abnormal_ssa_name_p (step))
1043         continue;
1044
1045       type = TREE_TYPE (PHI_RESULT (phi));
1046       base = fold_convert (type, base);
1047       if (step)
1048         {
1049           if (POINTER_TYPE_P (type))
1050             step = convert_to_ptrofftype (step);
1051           else
1052             step = fold_convert (type, step);
1053         }
1054
1055       set_iv (data, PHI_RESULT (phi), base, step);
1056       found = true;
1057     }
1058
1059   return found;
1060 }
1061
1062 /* Marks basic ivs.  */
1063
1064 static void
1065 mark_bivs (struct ivopts_data *data)
1066 {
1067   gimple phi;
1068   tree var;
1069   struct iv *iv, *incr_iv;
1070   struct loop *loop = data->current_loop;
1071   basic_block incr_bb;
1072   gimple_stmt_iterator psi;
1073
1074   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1075     {
1076       phi = gsi_stmt (psi);
1077
1078       iv = get_iv (data, PHI_RESULT (phi));
1079       if (!iv)
1080         continue;
1081
1082       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1083       incr_iv = get_iv (data, var);
1084       if (!incr_iv)
1085         continue;
1086
1087       /* If the increment is in the subloop, ignore it.  */
1088       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1089       if (incr_bb->loop_father != data->current_loop
1090           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1091         continue;
1092
1093       iv->biv_p = true;
1094       incr_iv->biv_p = true;
1095     }
1096 }
1097
1098 /* Checks whether STMT defines a linear induction variable and stores its
1099    parameters to IV.  */
1100
1101 static bool
1102 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1103 {
1104   tree lhs;
1105   struct loop *loop = data->current_loop;
1106
1107   iv->base = NULL_TREE;
1108   iv->step = NULL_TREE;
1109
1110   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1111     return false;
1112
1113   lhs = gimple_assign_lhs (stmt);
1114   if (TREE_CODE (lhs) != SSA_NAME)
1115     return false;
1116
1117   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1118     return false;
1119   iv->base = expand_simple_operations (iv->base);
1120
1121   if (contains_abnormal_ssa_name_p (iv->base)
1122       || contains_abnormal_ssa_name_p (iv->step))
1123     return false;
1124
1125   /* If STMT could throw, then do not consider STMT as defining a GIV.
1126      While this will suppress optimizations, we can not safely delete this
1127      GIV and associated statements, even if it appears it is not used.  */
1128   if (stmt_could_throw_p (stmt))
1129     return false;
1130
1131   return true;
1132 }
1133
1134 /* Finds general ivs in statement STMT.  */
1135
1136 static void
1137 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1138 {
1139   affine_iv iv;
1140
1141   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1142     return;
1143
1144   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1145 }
1146
1147 /* Finds general ivs in basic block BB.  */
1148
1149 static void
1150 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1151 {
1152   gimple_stmt_iterator bsi;
1153
1154   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1155     find_givs_in_stmt (data, gsi_stmt (bsi));
1156 }
1157
1158 /* Finds general ivs.  */
1159
1160 static void
1161 find_givs (struct ivopts_data *data)
1162 {
1163   struct loop *loop = data->current_loop;
1164   basic_block *body = get_loop_body_in_dom_order (loop);
1165   unsigned i;
1166
1167   for (i = 0; i < loop->num_nodes; i++)
1168     find_givs_in_bb (data, body[i]);
1169   free (body);
1170 }
1171
1172 /* For each ssa name defined in LOOP determines whether it is an induction
1173    variable and if so, its initial value and step.  */
1174
1175 static bool
1176 find_induction_variables (struct ivopts_data *data)
1177 {
1178   unsigned i;
1179   bitmap_iterator bi;
1180
1181   if (!find_bivs (data))
1182     return false;
1183
1184   find_givs (data);
1185   mark_bivs (data);
1186
1187   if (dump_file && (dump_flags & TDF_DETAILS))
1188     {
1189       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1190
1191       if (niter)
1192         {
1193           fprintf (dump_file, "  number of iterations ");
1194           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1195           if (!integer_zerop (niter->may_be_zero))
1196             {
1197               fprintf (dump_file, "; zero if ");
1198               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1199             }
1200           fprintf (dump_file, "\n\n");
1201         };
1202
1203       fprintf (dump_file, "Induction variables:\n\n");
1204
1205       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1206         {
1207           if (ver_info (data, i)->iv)
1208             dump_iv (dump_file, ver_info (data, i)->iv);
1209         }
1210     }
1211
1212   return true;
1213 }
1214
1215 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1216
1217 static struct iv_use *
1218 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1219             gimple stmt, enum use_type use_type)
1220 {
1221   struct iv_use *use = XCNEW (struct iv_use);
1222
1223   use->id = n_iv_uses (data);
1224   use->type = use_type;
1225   use->iv = iv;
1226   use->stmt = stmt;
1227   use->op_p = use_p;
1228   use->related_cands = BITMAP_ALLOC (NULL);
1229
1230   /* To avoid showing ssa name in the dumps, if it was not reset by the
1231      caller.  */
1232   iv->ssa_name = NULL_TREE;
1233
1234   if (dump_file && (dump_flags & TDF_DETAILS))
1235     dump_use (dump_file, use);
1236
1237   data->iv_uses.safe_push (use);
1238
1239   return use;
1240 }
1241
1242 /* Checks whether OP is a loop-level invariant and if so, records it.
1243    NONLINEAR_USE is true if the invariant is used in a way we do not
1244    handle specially.  */
1245
1246 static void
1247 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1248 {
1249   basic_block bb;
1250   struct version_info *info;
1251
1252   if (TREE_CODE (op) != SSA_NAME
1253       || virtual_operand_p (op))
1254     return;
1255
1256   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1257   if (bb
1258       && flow_bb_inside_loop_p (data->current_loop, bb))
1259     return;
1260
1261   info = name_info (data, op);
1262   info->name = op;
1263   info->has_nonlin_use |= nonlinear_use;
1264   if (!info->inv_id)
1265     info->inv_id = ++data->max_inv_id;
1266   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1267 }
1268
1269 /* Checks whether the use OP is interesting and if so, records it.  */
1270
1271 static struct iv_use *
1272 find_interesting_uses_op (struct ivopts_data *data, tree op)
1273 {
1274   struct iv *iv;
1275   struct iv *civ;
1276   gimple stmt;
1277   struct iv_use *use;
1278
1279   if (TREE_CODE (op) != SSA_NAME)
1280     return NULL;
1281
1282   iv = get_iv (data, op);
1283   if (!iv)
1284     return NULL;
1285
1286   if (iv->have_use_for)
1287     {
1288       use = iv_use (data, iv->use_id);
1289
1290       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1291       return use;
1292     }
1293
1294   if (integer_zerop (iv->step))
1295     {
1296       record_invariant (data, op, true);
1297       return NULL;
1298     }
1299   iv->have_use_for = true;
1300
1301   civ = XNEW (struct iv);
1302   *civ = *iv;
1303
1304   stmt = SSA_NAME_DEF_STMT (op);
1305   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1306               || is_gimple_assign (stmt));
1307
1308   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1309   iv->use_id = use->id;
1310
1311   return use;
1312 }
1313
1314 /* Given a condition in statement STMT, checks whether it is a compare
1315    of an induction variable and an invariant.  If this is the case,
1316    CONTROL_VAR is set to location of the iv, BOUND to the location of
1317    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1318    induction variable descriptions, and true is returned.  If this is not
1319    the case, CONTROL_VAR and BOUND are set to the arguments of the
1320    condition and false is returned.  */
1321
1322 static bool
1323 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1324                        tree **control_var, tree **bound,
1325                        struct iv **iv_var, struct iv **iv_bound)
1326 {
1327   /* The objects returned when COND has constant operands.  */
1328   static struct iv const_iv;
1329   static tree zero;
1330   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1331   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1332   bool ret = false;
1333
1334   if (gimple_code (stmt) == GIMPLE_COND)
1335     {
1336       op0 = gimple_cond_lhs_ptr (stmt);
1337       op1 = gimple_cond_rhs_ptr (stmt);
1338     }
1339   else
1340     {
1341       op0 = gimple_assign_rhs1_ptr (stmt);
1342       op1 = gimple_assign_rhs2_ptr (stmt);
1343     }
1344
1345   zero = integer_zero_node;
1346   const_iv.step = integer_zero_node;
1347
1348   if (TREE_CODE (*op0) == SSA_NAME)
1349     iv0 = get_iv (data, *op0);
1350   if (TREE_CODE (*op1) == SSA_NAME)
1351     iv1 = get_iv (data, *op1);
1352
1353   /* Exactly one of the compared values must be an iv, and the other one must
1354      be an invariant.  */
1355   if (!iv0 || !iv1)
1356     goto end;
1357
1358   if (integer_zerop (iv0->step))
1359     {
1360       /* Control variable may be on the other side.  */
1361       tmp_op = op0; op0 = op1; op1 = tmp_op;
1362       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1363     }
1364   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1365
1366 end:
1367   if (control_var)
1368     *control_var = op0;;
1369   if (iv_var)
1370     *iv_var = iv0;;
1371   if (bound)
1372     *bound = op1;
1373   if (iv_bound)
1374     *iv_bound = iv1;
1375
1376   return ret;
1377 }
1378
1379 /* Checks whether the condition in STMT is interesting and if so,
1380    records it.  */
1381
1382 static void
1383 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1384 {
1385   tree *var_p, *bound_p;
1386   struct iv *var_iv, *civ;
1387
1388   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1389     {
1390       find_interesting_uses_op (data, *var_p);
1391       find_interesting_uses_op (data, *bound_p);
1392       return;
1393     }
1394
1395   civ = XNEW (struct iv);
1396   *civ = *var_iv;
1397   record_use (data, NULL, civ, stmt, USE_COMPARE);
1398 }
1399
1400 /* Returns the outermost loop EXPR is obviously invariant in
1401    relative to the loop LOOP, i.e. if all its operands are defined
1402    outside of the returned loop.  Returns NULL if EXPR is not
1403    even obviously invariant in LOOP.  */
1404
1405 struct loop *
1406 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1407 {
1408   basic_block def_bb;
1409   unsigned i, len;
1410
1411   if (is_gimple_min_invariant (expr))
1412     return current_loops->tree_root;
1413
1414   if (TREE_CODE (expr) == SSA_NAME)
1415     {
1416       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1417       if (def_bb)
1418         {
1419           if (flow_bb_inside_loop_p (loop, def_bb))
1420             return NULL;
1421           return superloop_at_depth (loop,
1422                                      loop_depth (def_bb->loop_father) + 1);
1423         }
1424
1425       return current_loops->tree_root;
1426     }
1427
1428   if (!EXPR_P (expr))
1429     return NULL;
1430
1431   unsigned maxdepth = 0;
1432   len = TREE_OPERAND_LENGTH (expr);
1433   for (i = 0; i < len; i++)
1434     {
1435       struct loop *ivloop;
1436       if (!TREE_OPERAND (expr, i))
1437         continue;
1438
1439       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1440       if (!ivloop)
1441         return NULL;
1442       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1443     }
1444
1445   return superloop_at_depth (loop, maxdepth);
1446 }
1447
1448 /* Returns true if expression EXPR is obviously invariant in LOOP,
1449    i.e. if all its operands are defined outside of the LOOP.  LOOP
1450    should not be the function body.  */
1451
1452 bool
1453 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1454 {
1455   basic_block def_bb;
1456   unsigned i, len;
1457
1458   gcc_assert (loop_depth (loop) > 0);
1459
1460   if (is_gimple_min_invariant (expr))
1461     return true;
1462
1463   if (TREE_CODE (expr) == SSA_NAME)
1464     {
1465       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1466       if (def_bb
1467           && flow_bb_inside_loop_p (loop, def_bb))
1468         return false;
1469
1470       return true;
1471     }
1472
1473   if (!EXPR_P (expr))
1474     return false;
1475
1476   len = TREE_OPERAND_LENGTH (expr);
1477   for (i = 0; i < len; i++)
1478     if (TREE_OPERAND (expr, i)
1479         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1480       return false;
1481
1482   return true;
1483 }
1484
1485 /* Cumulates the steps of indices into DATA and replaces their values with the
1486    initial ones.  Returns false when the value of the index cannot be determined.
1487    Callback for for_each_index.  */
1488
1489 struct ifs_ivopts_data
1490 {
1491   struct ivopts_data *ivopts_data;
1492   gimple stmt;
1493   tree step;
1494 };
1495
1496 static bool
1497 idx_find_step (tree base, tree *idx, void *data)
1498 {
1499   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1500   struct iv *iv;
1501   tree step, iv_base, iv_step, lbound, off;
1502   struct loop *loop = dta->ivopts_data->current_loop;
1503
1504   /* If base is a component ref, require that the offset of the reference
1505      be invariant.  */
1506   if (TREE_CODE (base) == COMPONENT_REF)
1507     {
1508       off = component_ref_field_offset (base);
1509       return expr_invariant_in_loop_p (loop, off);
1510     }
1511
1512   /* If base is array, first check whether we will be able to move the
1513      reference out of the loop (in order to take its address in strength
1514      reduction).  In order for this to work we need both lower bound
1515      and step to be loop invariants.  */
1516   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1517     {
1518       /* Moreover, for a range, the size needs to be invariant as well.  */
1519       if (TREE_CODE (base) == ARRAY_RANGE_REF
1520           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1521         return false;
1522
1523       step = array_ref_element_size (base);
1524       lbound = array_ref_low_bound (base);
1525
1526       if (!expr_invariant_in_loop_p (loop, step)
1527           || !expr_invariant_in_loop_p (loop, lbound))
1528         return false;
1529     }
1530
1531   if (TREE_CODE (*idx) != SSA_NAME)
1532     return true;
1533
1534   iv = get_iv (dta->ivopts_data, *idx);
1535   if (!iv)
1536     return false;
1537
1538   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1539           *&x[0], which is not folded and does not trigger the
1540           ARRAY_REF path below.  */
1541   *idx = iv->base;
1542
1543   if (integer_zerop (iv->step))
1544     return true;
1545
1546   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1547     {
1548       step = array_ref_element_size (base);
1549
1550       /* We only handle addresses whose step is an integer constant.  */
1551       if (TREE_CODE (step) != INTEGER_CST)
1552         return false;
1553     }
1554   else
1555     /* The step for pointer arithmetics already is 1 byte.  */
1556     step = size_one_node;
1557
1558   iv_base = iv->base;
1559   iv_step = iv->step;
1560   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1561                             sizetype, &iv_base, &iv_step, dta->stmt,
1562                             false))
1563     {
1564       /* The index might wrap.  */
1565       return false;
1566     }
1567
1568   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1569   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1570
1571   return true;
1572 }
1573
1574 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1575    object is passed to it in DATA.  */
1576
1577 static bool
1578 idx_record_use (tree base, tree *idx,
1579                 void *vdata)
1580 {
1581   struct ivopts_data *data = (struct ivopts_data *) vdata;
1582   find_interesting_uses_op (data, *idx);
1583   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1584     {
1585       find_interesting_uses_op (data, array_ref_element_size (base));
1586       find_interesting_uses_op (data, array_ref_low_bound (base));
1587     }
1588   return true;
1589 }
1590
1591 /* If we can prove that TOP = cst * BOT for some constant cst,
1592    store cst to MUL and return true.  Otherwise return false.
1593    The returned value is always sign-extended, regardless of the
1594    signedness of TOP and BOT.  */
1595
1596 static bool
1597 constant_multiple_of (tree top, tree bot, double_int *mul)
1598 {
1599   tree mby;
1600   enum tree_code code;
1601   double_int res, p0, p1;
1602   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1603
1604   STRIP_NOPS (top);
1605   STRIP_NOPS (bot);
1606
1607   if (operand_equal_p (top, bot, 0))
1608     {
1609       *mul = double_int_one;
1610       return true;
1611     }
1612
1613   code = TREE_CODE (top);
1614   switch (code)
1615     {
1616     case MULT_EXPR:
1617       mby = TREE_OPERAND (top, 1);
1618       if (TREE_CODE (mby) != INTEGER_CST)
1619         return false;
1620
1621       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1622         return false;
1623
1624       *mul = (res * tree_to_double_int (mby)).sext (precision);
1625       return true;
1626
1627     case PLUS_EXPR:
1628     case MINUS_EXPR:
1629       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1630           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1631         return false;
1632
1633       if (code == MINUS_EXPR)
1634         p1 = -p1;
1635       *mul = (p0 + p1).sext (precision);
1636       return true;
1637
1638     case INTEGER_CST:
1639       if (TREE_CODE (bot) != INTEGER_CST)
1640         return false;
1641
1642       p0 = tree_to_double_int (top).sext (precision);
1643       p1 = tree_to_double_int (bot).sext (precision);
1644       if (p1.is_zero ())
1645         return false;
1646       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1647       return res.is_zero ();
1648
1649     default:
1650       return false;
1651     }
1652 }
1653
1654 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1655
1656 static bool
1657 may_be_unaligned_p (tree ref, tree step)
1658 {
1659   tree base;
1660   tree base_type;
1661   HOST_WIDE_INT bitsize;
1662   HOST_WIDE_INT bitpos;
1663   tree toffset;
1664   enum machine_mode mode;
1665   int unsignedp, volatilep;
1666   unsigned base_align;
1667
1668   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1669      thus they are not misaligned.  */
1670   if (TREE_CODE (ref) == TARGET_MEM_REF)
1671     return false;
1672
1673   /* The test below is basically copy of what expr.c:normal_inner_ref
1674      does to check whether the object must be loaded by parts when
1675      STRICT_ALIGNMENT is true.  */
1676   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1677                               &unsignedp, &volatilep, true);
1678   base_type = TREE_TYPE (base);
1679   base_align = get_object_alignment (base);
1680   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1681
1682   if (mode != BLKmode)
1683     {
1684       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1685
1686       if (base_align < mode_align
1687           || (bitpos % mode_align) != 0
1688           || (bitpos % BITS_PER_UNIT) != 0)
1689         return true;
1690
1691       if (toffset
1692           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1693         return true;
1694
1695       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1696         return true;
1697     }
1698
1699   return false;
1700 }
1701
1702 /* Return true if EXPR may be non-addressable.   */
1703
1704 bool
1705 may_be_nonaddressable_p (tree expr)
1706 {
1707   switch (TREE_CODE (expr))
1708     {
1709     case TARGET_MEM_REF:
1710       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1711          target, thus they are always addressable.  */
1712       return false;
1713
1714     case COMPONENT_REF:
1715       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1716              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1717
1718     case VIEW_CONVERT_EXPR:
1719       /* This kind of view-conversions may wrap non-addressable objects
1720          and make them look addressable.  After some processing the
1721          non-addressability may be uncovered again, causing ADDR_EXPRs
1722          of inappropriate objects to be built.  */
1723       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1724           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1725         return true;
1726
1727       /* ... fall through ... */
1728
1729     case ARRAY_REF:
1730     case ARRAY_RANGE_REF:
1731       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1732
1733     CASE_CONVERT:
1734       return true;
1735
1736     default:
1737       break;
1738     }
1739
1740   return false;
1741 }
1742
1743 /* Finds addresses in *OP_P inside STMT.  */
1744
1745 static void
1746 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1747 {
1748   tree base = *op_p, step = size_zero_node;
1749   struct iv *civ;
1750   struct ifs_ivopts_data ifs_ivopts_data;
1751
1752   /* Do not play with volatile memory references.  A bit too conservative,
1753      perhaps, but safe.  */
1754   if (gimple_has_volatile_ops (stmt))
1755     goto fail;
1756
1757   /* Ignore bitfields for now.  Not really something terribly complicated
1758      to handle.  TODO.  */
1759   if (TREE_CODE (base) == BIT_FIELD_REF)
1760     goto fail;
1761
1762   base = unshare_expr (base);
1763
1764   if (TREE_CODE (base) == TARGET_MEM_REF)
1765     {
1766       tree type = build_pointer_type (TREE_TYPE (base));
1767       tree astep;
1768
1769       if (TMR_BASE (base)
1770           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1771         {
1772           civ = get_iv (data, TMR_BASE (base));
1773           if (!civ)
1774             goto fail;
1775
1776           TMR_BASE (base) = civ->base;
1777           step = civ->step;
1778         }
1779       if (TMR_INDEX2 (base)
1780           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1781         {
1782           civ = get_iv (data, TMR_INDEX2 (base));
1783           if (!civ)
1784             goto fail;
1785
1786           TMR_INDEX2 (base) = civ->base;
1787           step = civ->step;
1788         }
1789       if (TMR_INDEX (base)
1790           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1791         {
1792           civ = get_iv (data, TMR_INDEX (base));
1793           if (!civ)
1794             goto fail;
1795
1796           TMR_INDEX (base) = civ->base;
1797           astep = civ->step;
1798
1799           if (astep)
1800             {
1801               if (TMR_STEP (base))
1802                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1803
1804               step = fold_build2 (PLUS_EXPR, type, step, astep);
1805             }
1806         }
1807
1808       if (integer_zerop (step))
1809         goto fail;
1810       base = tree_mem_ref_addr (type, base);
1811     }
1812   else
1813     {
1814       ifs_ivopts_data.ivopts_data = data;
1815       ifs_ivopts_data.stmt = stmt;
1816       ifs_ivopts_data.step = size_zero_node;
1817       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1818           || integer_zerop (ifs_ivopts_data.step))
1819         goto fail;
1820       step = ifs_ivopts_data.step;
1821
1822       /* Check that the base expression is addressable.  This needs
1823          to be done after substituting bases of IVs into it.  */
1824       if (may_be_nonaddressable_p (base))
1825         goto fail;
1826
1827       /* Moreover, on strict alignment platforms, check that it is
1828          sufficiently aligned.  */
1829       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1830         goto fail;
1831
1832       base = build_fold_addr_expr (base);
1833
1834       /* Substituting bases of IVs into the base expression might
1835          have caused folding opportunities.  */
1836       if (TREE_CODE (base) == ADDR_EXPR)
1837         {
1838           tree *ref = &TREE_OPERAND (base, 0);
1839           while (handled_component_p (*ref))
1840             ref = &TREE_OPERAND (*ref, 0);
1841           if (TREE_CODE (*ref) == MEM_REF)
1842             {
1843               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1844                                       TREE_OPERAND (*ref, 0),
1845                                       TREE_OPERAND (*ref, 1));
1846               if (tem)
1847                 *ref = tem;
1848             }
1849         }
1850     }
1851
1852   civ = alloc_iv (base, step);
1853   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1854   return;
1855
1856 fail:
1857   for_each_index (op_p, idx_record_use, data);
1858 }
1859
1860 /* Finds and records invariants used in STMT.  */
1861
1862 static void
1863 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1864 {
1865   ssa_op_iter iter;
1866   use_operand_p use_p;
1867   tree op;
1868
1869   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1870     {
1871       op = USE_FROM_PTR (use_p);
1872       record_invariant (data, op, false);
1873     }
1874 }
1875
1876 /* Finds interesting uses of induction variables in the statement STMT.  */
1877
1878 static void
1879 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1880 {
1881   struct iv *iv;
1882   tree op, *lhs, *rhs;
1883   ssa_op_iter iter;
1884   use_operand_p use_p;
1885   enum tree_code code;
1886
1887   find_invariants_stmt (data, stmt);
1888
1889   if (gimple_code (stmt) == GIMPLE_COND)
1890     {
1891       find_interesting_uses_cond (data, stmt);
1892       return;
1893     }
1894
1895   if (is_gimple_assign (stmt))
1896     {
1897       lhs = gimple_assign_lhs_ptr (stmt);
1898       rhs = gimple_assign_rhs1_ptr (stmt);
1899
1900       if (TREE_CODE (*lhs) == SSA_NAME)
1901         {
1902           /* If the statement defines an induction variable, the uses are not
1903              interesting by themselves.  */
1904
1905           iv = get_iv (data, *lhs);
1906
1907           if (iv && !integer_zerop (iv->step))
1908             return;
1909         }
1910
1911       code = gimple_assign_rhs_code (stmt);
1912       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1913           && (REFERENCE_CLASS_P (*rhs)
1914               || is_gimple_val (*rhs)))
1915         {
1916           if (REFERENCE_CLASS_P (*rhs))
1917             find_interesting_uses_address (data, stmt, rhs);
1918           else
1919             find_interesting_uses_op (data, *rhs);
1920
1921           if (REFERENCE_CLASS_P (*lhs))
1922             find_interesting_uses_address (data, stmt, lhs);
1923           return;
1924         }
1925       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1926         {
1927           find_interesting_uses_cond (data, stmt);
1928           return;
1929         }
1930
1931       /* TODO -- we should also handle address uses of type
1932
1933          memory = call (whatever);
1934
1935          and
1936
1937          call (memory).  */
1938     }
1939
1940   if (gimple_code (stmt) == GIMPLE_PHI
1941       && gimple_bb (stmt) == data->current_loop->header)
1942     {
1943       iv = get_iv (data, PHI_RESULT (stmt));
1944
1945       if (iv && !integer_zerop (iv->step))
1946         return;
1947     }
1948
1949   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1950     {
1951       op = USE_FROM_PTR (use_p);
1952
1953       if (TREE_CODE (op) != SSA_NAME)
1954         continue;
1955
1956       iv = get_iv (data, op);
1957       if (!iv)
1958         continue;
1959
1960       find_interesting_uses_op (data, op);
1961     }
1962 }
1963
1964 /* Finds interesting uses of induction variables outside of loops
1965    on loop exit edge EXIT.  */
1966
1967 static void
1968 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1969 {
1970   gimple phi;
1971   gimple_stmt_iterator psi;
1972   tree def;
1973
1974   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1975     {
1976       phi = gsi_stmt (psi);
1977       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1978       if (!virtual_operand_p (def))
1979         find_interesting_uses_op (data, def);
1980     }
1981 }
1982
1983 /* Finds uses of the induction variables that are interesting.  */
1984
1985 static void
1986 find_interesting_uses (struct ivopts_data *data)
1987 {
1988   basic_block bb;
1989   gimple_stmt_iterator bsi;
1990   basic_block *body = get_loop_body (data->current_loop);
1991   unsigned i;
1992   struct version_info *info;
1993   edge e;
1994
1995   if (dump_file && (dump_flags & TDF_DETAILS))
1996     fprintf (dump_file, "Uses:\n\n");
1997
1998   for (i = 0; i < data->current_loop->num_nodes; i++)
1999     {
2000       edge_iterator ei;
2001       bb = body[i];
2002
2003       FOR_EACH_EDGE (e, ei, bb->succs)
2004         if (e->dest != EXIT_BLOCK_PTR
2005             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2006           find_interesting_uses_outside (data, e);
2007
2008       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2009         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2010       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2011         if (!is_gimple_debug (gsi_stmt (bsi)))
2012           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2013     }
2014
2015   if (dump_file && (dump_flags & TDF_DETAILS))
2016     {
2017       bitmap_iterator bi;
2018
2019       fprintf (dump_file, "\n");
2020
2021       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2022         {
2023           info = ver_info (data, i);
2024           if (info->inv_id)
2025             {
2026               fprintf (dump_file, "  ");
2027               print_generic_expr (dump_file, info->name, TDF_SLIM);
2028               fprintf (dump_file, " is invariant (%d)%s\n",
2029                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2030             }
2031         }
2032
2033       fprintf (dump_file, "\n");
2034     }
2035
2036   free (body);
2037 }
2038
2039 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2040    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2041    we are at the top-level of the processed address.  */
2042
2043 static tree
2044 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2045                 HOST_WIDE_INT *offset)
2046 {
2047   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2048   enum tree_code code;
2049   tree type, orig_type = TREE_TYPE (expr);
2050   HOST_WIDE_INT off0, off1, st;
2051   tree orig_expr = expr;
2052
2053   STRIP_NOPS (expr);
2054
2055   type = TREE_TYPE (expr);
2056   code = TREE_CODE (expr);
2057   *offset = 0;
2058
2059   switch (code)
2060     {
2061     case INTEGER_CST:
2062       if (!cst_and_fits_in_hwi (expr)
2063           || integer_zerop (expr))
2064         return orig_expr;
2065
2066       *offset = int_cst_value (expr);
2067       return build_int_cst (orig_type, 0);
2068
2069     case POINTER_PLUS_EXPR:
2070     case PLUS_EXPR:
2071     case MINUS_EXPR:
2072       op0 = TREE_OPERAND (expr, 0);
2073       op1 = TREE_OPERAND (expr, 1);
2074
2075       op0 = strip_offset_1 (op0, false, false, &off0);
2076       op1 = strip_offset_1 (op1, false, false, &off1);
2077
2078       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2079       if (op0 == TREE_OPERAND (expr, 0)
2080           && op1 == TREE_OPERAND (expr, 1))
2081         return orig_expr;
2082
2083       if (integer_zerop (op1))
2084         expr = op0;
2085       else if (integer_zerop (op0))
2086         {
2087           if (code == MINUS_EXPR)
2088             expr = fold_build1 (NEGATE_EXPR, type, op1);
2089           else
2090             expr = op1;
2091         }
2092       else
2093         expr = fold_build2 (code, type, op0, op1);
2094
2095       return fold_convert (orig_type, expr);
2096
2097     case MULT_EXPR:
2098       op1 = TREE_OPERAND (expr, 1);
2099       if (!cst_and_fits_in_hwi (op1))
2100         return orig_expr;
2101
2102       op0 = TREE_OPERAND (expr, 0);
2103       op0 = strip_offset_1 (op0, false, false, &off0);
2104       if (op0 == TREE_OPERAND (expr, 0))
2105         return orig_expr;
2106
2107       *offset = off0 * int_cst_value (op1);
2108       if (integer_zerop (op0))
2109         expr = op0;
2110       else
2111         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2112
2113       return fold_convert (orig_type, expr);
2114
2115     case ARRAY_REF:
2116     case ARRAY_RANGE_REF:
2117       if (!inside_addr)
2118         return orig_expr;
2119
2120       step = array_ref_element_size (expr);
2121       if (!cst_and_fits_in_hwi (step))
2122         break;
2123
2124       st = int_cst_value (step);
2125       op1 = TREE_OPERAND (expr, 1);
2126       op1 = strip_offset_1 (op1, false, false, &off1);
2127       *offset = off1 * st;
2128
2129       if (top_compref
2130           && integer_zerop (op1))
2131         {
2132           /* Strip the component reference completely.  */
2133           op0 = TREE_OPERAND (expr, 0);
2134           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2135           *offset += off0;
2136           return op0;
2137         }
2138       break;
2139
2140     case COMPONENT_REF:
2141       {
2142         tree field;
2143
2144         if (!inside_addr)
2145           return orig_expr;
2146
2147         tmp = component_ref_field_offset (expr);
2148         field = TREE_OPERAND (expr, 1);
2149         if (top_compref
2150             && cst_and_fits_in_hwi (tmp)
2151             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2152           {
2153             HOST_WIDE_INT boffset, abs_off;
2154
2155             /* Strip the component reference completely.  */
2156             op0 = TREE_OPERAND (expr, 0);
2157             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2158             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2159             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2160             if (boffset < 0)
2161               abs_off = -abs_off;
2162
2163             *offset = off0 + int_cst_value (tmp) + abs_off;
2164             return op0;
2165           }
2166       }
2167       break;
2168
2169     case ADDR_EXPR:
2170       op0 = TREE_OPERAND (expr, 0);
2171       op0 = strip_offset_1 (op0, true, true, &off0);
2172       *offset += off0;
2173
2174       if (op0 == TREE_OPERAND (expr, 0))
2175         return orig_expr;
2176
2177       expr = build_fold_addr_expr (op0);
2178       return fold_convert (orig_type, expr);
2179
2180     case MEM_REF:
2181       /* ???  Offset operand?  */
2182       inside_addr = false;
2183       break;
2184
2185     default:
2186       return orig_expr;
2187     }
2188
2189   /* Default handling of expressions for that we want to recurse into
2190      the first operand.  */
2191   op0 = TREE_OPERAND (expr, 0);
2192   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2193   *offset += off0;
2194
2195   if (op0 == TREE_OPERAND (expr, 0)
2196       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2197     return orig_expr;
2198
2199   expr = copy_node (expr);
2200   TREE_OPERAND (expr, 0) = op0;
2201   if (op1)
2202     TREE_OPERAND (expr, 1) = op1;
2203
2204   /* Inside address, we might strip the top level component references,
2205      thus changing type of the expression.  Handling of ADDR_EXPR
2206      will fix that.  */
2207   expr = fold_convert (orig_type, expr);
2208
2209   return expr;
2210 }
2211
2212 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2213
2214 static tree
2215 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2216 {
2217   HOST_WIDE_INT off;
2218   tree core = strip_offset_1 (expr, false, false, &off);
2219   *offset = off;
2220   return core;
2221 }
2222
2223 /* Returns variant of TYPE that can be used as base for different uses.
2224    We return unsigned type with the same precision, which avoids problems
2225    with overflows.  */
2226
2227 static tree
2228 generic_type_for (tree type)
2229 {
2230   if (POINTER_TYPE_P (type))
2231     return unsigned_type_for (type);
2232
2233   if (TYPE_UNSIGNED (type))
2234     return type;
2235
2236   return unsigned_type_for (type);
2237 }
2238
2239 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2240    the bitmap to that we should store it.  */
2241
2242 static struct ivopts_data *fd_ivopts_data;
2243 static tree
2244 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2245 {
2246   bitmap *depends_on = (bitmap *) data;
2247   struct version_info *info;
2248
2249   if (TREE_CODE (*expr_p) != SSA_NAME)
2250     return NULL_TREE;
2251   info = name_info (fd_ivopts_data, *expr_p);
2252
2253   if (!info->inv_id || info->has_nonlin_use)
2254     return NULL_TREE;
2255
2256   if (!*depends_on)
2257     *depends_on = BITMAP_ALLOC (NULL);
2258   bitmap_set_bit (*depends_on, info->inv_id);
2259
2260   return NULL_TREE;
2261 }
2262
2263 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2264    position to POS.  If USE is not NULL, the candidate is set as related to
2265    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2266    replacement of the final value of the iv by a direct computation.  */
2267
2268 static struct iv_cand *
2269 add_candidate_1 (struct ivopts_data *data,
2270                  tree base, tree step, bool important, enum iv_position pos,
2271                  struct iv_use *use, gimple incremented_at)
2272 {
2273   unsigned i;
2274   struct iv_cand *cand = NULL;
2275   tree type, orig_type;
2276
2277   /* For non-original variables, make sure their values are computed in a type
2278      that does not invoke undefined behavior on overflows (since in general,
2279      we cannot prove that these induction variables are non-wrapping).  */
2280   if (pos != IP_ORIGINAL)
2281     {
2282       orig_type = TREE_TYPE (base);
2283       type = generic_type_for (orig_type);
2284       if (type != orig_type)
2285         {
2286           base = fold_convert (type, base);
2287           step = fold_convert (type, step);
2288         }
2289     }
2290
2291   for (i = 0; i < n_iv_cands (data); i++)
2292     {
2293       cand = iv_cand (data, i);
2294
2295       if (cand->pos != pos)
2296         continue;
2297
2298       if (cand->incremented_at != incremented_at
2299           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2300               && cand->ainc_use != use))
2301         continue;
2302
2303       if (!cand->iv)
2304         {
2305           if (!base && !step)
2306             break;
2307
2308           continue;
2309         }
2310
2311       if (!base && !step)
2312         continue;
2313
2314       if (operand_equal_p (base, cand->iv->base, 0)
2315           && operand_equal_p (step, cand->iv->step, 0)
2316           && (TYPE_PRECISION (TREE_TYPE (base))
2317               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2318         break;
2319     }
2320
2321   if (i == n_iv_cands (data))
2322     {
2323       cand = XCNEW (struct iv_cand);
2324       cand->id = i;
2325
2326       if (!base && !step)
2327         cand->iv = NULL;
2328       else
2329         cand->iv = alloc_iv (base, step);
2330
2331       cand->pos = pos;
2332       if (pos != IP_ORIGINAL && cand->iv)
2333         {
2334           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2335           cand->var_after = cand->var_before;
2336         }
2337       cand->important = important;
2338       cand->incremented_at = incremented_at;
2339       data->iv_candidates.safe_push (cand);
2340
2341       if (step
2342           && TREE_CODE (step) != INTEGER_CST)
2343         {
2344           fd_ivopts_data = data;
2345           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2346         }
2347
2348       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2349         cand->ainc_use = use;
2350       else
2351         cand->ainc_use = NULL;
2352
2353       if (dump_file && (dump_flags & TDF_DETAILS))
2354         dump_cand (dump_file, cand);
2355     }
2356
2357   if (important && !cand->important)
2358     {
2359       cand->important = true;
2360       if (dump_file && (dump_flags & TDF_DETAILS))
2361         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2362     }
2363
2364   if (use)
2365     {
2366       bitmap_set_bit (use->related_cands, i);
2367       if (dump_file && (dump_flags & TDF_DETAILS))
2368         fprintf (dump_file, "Candidate %d is related to use %d\n",
2369                  cand->id, use->id);
2370     }
2371
2372   return cand;
2373 }
2374
2375 /* Returns true if incrementing the induction variable at the end of the LOOP
2376    is allowed.
2377
2378    The purpose is to avoid splitting latch edge with a biv increment, thus
2379    creating a jump, possibly confusing other optimization passes and leaving
2380    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2381    is not available (so we do not have a better alternative), or if the latch
2382    edge is already nonempty.  */
2383
2384 static bool
2385 allow_ip_end_pos_p (struct loop *loop)
2386 {
2387   if (!ip_normal_pos (loop))
2388     return true;
2389
2390   if (!empty_block_p (ip_end_pos (loop)))
2391     return true;
2392
2393   return false;
2394 }
2395
2396 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2397    Important field is set to IMPORTANT.  */
2398
2399 static void
2400 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2401                         bool important, struct iv_use *use)
2402 {
2403   basic_block use_bb = gimple_bb (use->stmt);
2404   enum machine_mode mem_mode;
2405   unsigned HOST_WIDE_INT cstepi;
2406
2407   /* If we insert the increment in any position other than the standard
2408      ones, we must ensure that it is incremented once per iteration.
2409      It must not be in an inner nested loop, or one side of an if
2410      statement.  */
2411   if (use_bb->loop_father != data->current_loop
2412       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2413       || stmt_could_throw_p (use->stmt)
2414       || !cst_and_fits_in_hwi (step))
2415     return;
2416
2417   cstepi = int_cst_value (step);
2418
2419   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2420   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2421         || USE_STORE_PRE_INCREMENT (mem_mode))
2422        && GET_MODE_SIZE (mem_mode) == cstepi)
2423       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2424            || USE_STORE_PRE_DECREMENT (mem_mode))
2425           && GET_MODE_SIZE (mem_mode) == -cstepi))
2426     {
2427       enum tree_code code = MINUS_EXPR;
2428       tree new_base;
2429       tree new_step = step;
2430
2431       if (POINTER_TYPE_P (TREE_TYPE (base)))
2432         {
2433           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2434           code = POINTER_PLUS_EXPR;
2435         }
2436       else
2437         new_step = fold_convert (TREE_TYPE (base), new_step);
2438       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2439       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2440                        use->stmt);
2441     }
2442   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2443         || USE_STORE_POST_INCREMENT (mem_mode))
2444        && GET_MODE_SIZE (mem_mode) == cstepi)
2445       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2446            || USE_STORE_POST_DECREMENT (mem_mode))
2447           && GET_MODE_SIZE (mem_mode) == -cstepi))
2448     {
2449       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2450                        use->stmt);
2451     }
2452 }
2453
2454 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2455    position to POS.  If USE is not NULL, the candidate is set as related to
2456    it.  The candidate computation is scheduled on all available positions.  */
2457
2458 static void
2459 add_candidate (struct ivopts_data *data,
2460                tree base, tree step, bool important, struct iv_use *use)
2461 {
2462   if (ip_normal_pos (data->current_loop))
2463     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2464   if (ip_end_pos (data->current_loop)
2465       && allow_ip_end_pos_p (data->current_loop))
2466     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2467
2468   if (use != NULL && use->type == USE_ADDRESS)
2469     add_autoinc_candidates (data, base, step, important, use);
2470 }
2471
2472 /* Adds standard iv candidates.  */
2473
2474 static void
2475 add_standard_iv_candidates (struct ivopts_data *data)
2476 {
2477   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2478
2479   /* The same for a double-integer type if it is still fast enough.  */
2480   if (TYPE_PRECISION
2481         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2482       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2483     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2484                    build_int_cst (long_integer_type_node, 1), true, NULL);
2485
2486   /* The same for a double-integer type if it is still fast enough.  */
2487   if (TYPE_PRECISION
2488         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2489       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2490     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2491                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2492 }
2493
2494
2495 /* Adds candidates bases on the old induction variable IV.  */
2496
2497 static void
2498 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2499 {
2500   gimple phi;
2501   tree def;
2502   struct iv_cand *cand;
2503
2504   add_candidate (data, iv->base, iv->step, true, NULL);
2505
2506   /* The same, but with initial value zero.  */
2507   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2508     add_candidate (data, size_int (0), iv->step, true, NULL);
2509   else
2510     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2511                    iv->step, true, NULL);
2512
2513   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2514   if (gimple_code (phi) == GIMPLE_PHI)
2515     {
2516       /* Additionally record the possibility of leaving the original iv
2517          untouched.  */
2518       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2519       cand = add_candidate_1 (data,
2520                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2521                               SSA_NAME_DEF_STMT (def));
2522       cand->var_before = iv->ssa_name;
2523       cand->var_after = def;
2524     }
2525 }
2526
2527 /* Adds candidates based on the old induction variables.  */
2528
2529 static void
2530 add_old_ivs_candidates (struct ivopts_data *data)
2531 {
2532   unsigned i;
2533   struct iv *iv;
2534   bitmap_iterator bi;
2535
2536   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2537     {
2538       iv = ver_info (data, i)->iv;
2539       if (iv && iv->biv_p && !integer_zerop (iv->step))
2540         add_old_iv_candidates (data, iv);
2541     }
2542 }
2543
2544 /* Adds candidates based on the value of the induction variable IV and USE.  */
2545
2546 static void
2547 add_iv_value_candidates (struct ivopts_data *data,
2548                          struct iv *iv, struct iv_use *use)
2549 {
2550   unsigned HOST_WIDE_INT offset;
2551   tree base;
2552   tree basetype;
2553
2554   add_candidate (data, iv->base, iv->step, false, use);
2555
2556   /* The same, but with initial value zero.  Make such variable important,
2557      since it is generic enough so that possibly many uses may be based
2558      on it.  */
2559   basetype = TREE_TYPE (iv->base);
2560   if (POINTER_TYPE_P (basetype))
2561     basetype = sizetype;
2562   add_candidate (data, build_int_cst (basetype, 0),
2563                  iv->step, true, use);
2564
2565   /* Third, try removing the constant offset.  Make sure to even
2566      add a candidate for &a[0] vs. (T *)&a.  */
2567   base = strip_offset (iv->base, &offset);
2568   if (offset
2569       || base != iv->base)
2570     add_candidate (data, base, iv->step, false, use);
2571 }
2572
2573 /* Adds candidates based on the uses.  */
2574
2575 static void
2576 add_derived_ivs_candidates (struct ivopts_data *data)
2577 {
2578   unsigned i;
2579
2580   for (i = 0; i < n_iv_uses (data); i++)
2581     {
2582       struct iv_use *use = iv_use (data, i);
2583
2584       if (!use)
2585         continue;
2586
2587       switch (use->type)
2588         {
2589         case USE_NONLINEAR_EXPR:
2590         case USE_COMPARE:
2591         case USE_ADDRESS:
2592           /* Just add the ivs based on the value of the iv used here.  */
2593           add_iv_value_candidates (data, use->iv, use);
2594           break;
2595
2596         default:
2597           gcc_unreachable ();
2598         }
2599     }
2600 }
2601
2602 /* Record important candidates and add them to related_cands bitmaps
2603    if needed.  */
2604
2605 static void
2606 record_important_candidates (struct ivopts_data *data)
2607 {
2608   unsigned i;
2609   struct iv_use *use;
2610
2611   for (i = 0; i < n_iv_cands (data); i++)
2612     {
2613       struct iv_cand *cand = iv_cand (data, i);
2614
2615       if (cand->important)
2616         bitmap_set_bit (data->important_candidates, i);
2617     }
2618
2619   data->consider_all_candidates = (n_iv_cands (data)
2620                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2621
2622   if (data->consider_all_candidates)
2623     {
2624       /* We will not need "related_cands" bitmaps in this case,
2625          so release them to decrease peak memory consumption.  */
2626       for (i = 0; i < n_iv_uses (data); i++)
2627         {
2628           use = iv_use (data, i);
2629           BITMAP_FREE (use->related_cands);
2630         }
2631     }
2632   else
2633     {
2634       /* Add important candidates to the related_cands bitmaps.  */
2635       for (i = 0; i < n_iv_uses (data); i++)
2636         bitmap_ior_into (iv_use (data, i)->related_cands,
2637                          data->important_candidates);
2638     }
2639 }
2640
2641 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2642    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2643    we allocate a simple list to every use.  */
2644
2645 static void
2646 alloc_use_cost_map (struct ivopts_data *data)
2647 {
2648   unsigned i, size, s;
2649
2650   for (i = 0; i < n_iv_uses (data); i++)
2651     {
2652       struct iv_use *use = iv_use (data, i);
2653
2654       if (data->consider_all_candidates)
2655         size = n_iv_cands (data);
2656       else
2657         {
2658           s = bitmap_count_bits (use->related_cands);
2659
2660           /* Round up to the power of two, so that moduling by it is fast.  */
2661           size = s ? (1 << ceil_log2 (s)) : 1;
2662         }
2663
2664       use->n_map_members = size;
2665       use->cost_map = XCNEWVEC (struct cost_pair, size);
2666     }
2667 }
2668
2669 /* Returns description of computation cost of expression whose runtime
2670    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2671
2672 static comp_cost
2673 new_cost (unsigned runtime, unsigned complexity)
2674 {
2675   comp_cost cost;
2676
2677   cost.cost = runtime;
2678   cost.complexity = complexity;
2679
2680   return cost;
2681 }
2682
2683 /* Adds costs COST1 and COST2.  */
2684
2685 static comp_cost
2686 add_costs (comp_cost cost1, comp_cost cost2)
2687 {
2688   cost1.cost += cost2.cost;
2689   cost1.complexity += cost2.complexity;
2690
2691   return cost1;
2692 }
2693 /* Subtracts costs COST1 and COST2.  */
2694
2695 static comp_cost
2696 sub_costs (comp_cost cost1, comp_cost cost2)
2697 {
2698   cost1.cost -= cost2.cost;
2699   cost1.complexity -= cost2.complexity;
2700
2701   return cost1;
2702 }
2703
2704 /* Returns a negative number if COST1 < COST2, a positive number if
2705    COST1 > COST2, and 0 if COST1 = COST2.  */
2706
2707 static int
2708 compare_costs (comp_cost cost1, comp_cost cost2)
2709 {
2710   if (cost1.cost == cost2.cost)
2711     return cost1.complexity - cost2.complexity;
2712
2713   return cost1.cost - cost2.cost;
2714 }
2715
2716 /* Returns true if COST is infinite.  */
2717
2718 static bool
2719 infinite_cost_p (comp_cost cost)
2720 {
2721   return cost.cost == INFTY;
2722 }
2723
2724 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2725    on invariants DEPENDS_ON and that the value used in expressing it
2726    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2727
2728 static void
2729 set_use_iv_cost (struct ivopts_data *data,
2730                  struct iv_use *use, struct iv_cand *cand,
2731                  comp_cost cost, bitmap depends_on, tree value,
2732                  enum tree_code comp, int inv_expr_id)
2733 {
2734   unsigned i, s;
2735
2736   if (infinite_cost_p (cost))
2737     {
2738       BITMAP_FREE (depends_on);
2739       return;
2740     }
2741
2742   if (data->consider_all_candidates)
2743     {
2744       use->cost_map[cand->id].cand = cand;
2745       use->cost_map[cand->id].cost = cost;
2746       use->cost_map[cand->id].depends_on = depends_on;
2747       use->cost_map[cand->id].value = value;
2748       use->cost_map[cand->id].comp = comp;
2749       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2750       return;
2751     }
2752
2753   /* n_map_members is a power of two, so this computes modulo.  */
2754   s = cand->id & (use->n_map_members - 1);
2755   for (i = s; i < use->n_map_members; i++)
2756     if (!use->cost_map[i].cand)
2757       goto found;
2758   for (i = 0; i < s; i++)
2759     if (!use->cost_map[i].cand)
2760       goto found;
2761
2762   gcc_unreachable ();
2763
2764 found:
2765   use->cost_map[i].cand = cand;
2766   use->cost_map[i].cost = cost;
2767   use->cost_map[i].depends_on = depends_on;
2768   use->cost_map[i].value = value;
2769   use->cost_map[i].comp = comp;
2770   use->cost_map[i].inv_expr_id = inv_expr_id;
2771 }
2772
2773 /* Gets cost of (USE, CANDIDATE) pair.  */
2774
2775 static struct cost_pair *
2776 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2777                  struct iv_cand *cand)
2778 {
2779   unsigned i, s;
2780   struct cost_pair *ret;
2781
2782   if (!cand)
2783     return NULL;
2784
2785   if (data->consider_all_candidates)
2786     {
2787       ret = use->cost_map + cand->id;
2788       if (!ret->cand)
2789         return NULL;
2790
2791       return ret;
2792     }
2793
2794   /* n_map_members is a power of two, so this computes modulo.  */
2795   s = cand->id & (use->n_map_members - 1);
2796   for (i = s; i < use->n_map_members; i++)
2797     if (use->cost_map[i].cand == cand)
2798       return use->cost_map + i;
2799     else if (use->cost_map[i].cand == NULL)
2800       return NULL;
2801   for (i = 0; i < s; i++)
2802     if (use->cost_map[i].cand == cand)
2803       return use->cost_map + i;
2804     else if (use->cost_map[i].cand == NULL)
2805       return NULL;
2806
2807   return NULL;
2808 }
2809
2810 /* Returns estimate on cost of computing SEQ.  */
2811
2812 static unsigned
2813 seq_cost (rtx seq, bool speed)
2814 {
2815   unsigned cost = 0;
2816   rtx set;
2817
2818   for (; seq; seq = NEXT_INSN (seq))
2819     {
2820       set = single_set (seq);
2821       if (set)
2822         cost += set_src_cost (SET_SRC (set), speed);
2823       else
2824         cost++;
2825     }
2826
2827   return cost;
2828 }
2829
2830 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2831 static rtx
2832 produce_memory_decl_rtl (tree obj, int *regno)
2833 {
2834   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2835   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2836   rtx x;
2837
2838   gcc_assert (obj);
2839   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2840     {
2841       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2842       x = gen_rtx_SYMBOL_REF (address_mode, name);
2843       SET_SYMBOL_REF_DECL (x, obj);
2844       x = gen_rtx_MEM (DECL_MODE (obj), x);
2845       set_mem_addr_space (x, as);
2846       targetm.encode_section_info (obj, x, true);
2847     }
2848   else
2849     {
2850       x = gen_raw_REG (address_mode, (*regno)++);
2851       x = gen_rtx_MEM (DECL_MODE (obj), x);
2852       set_mem_addr_space (x, as);
2853     }
2854
2855   return x;
2856 }
2857
2858 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2859    walk_tree.  DATA contains the actual fake register number.  */
2860
2861 static tree
2862 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2863 {
2864   tree obj = NULL_TREE;
2865   rtx x = NULL_RTX;
2866   int *regno = (int *) data;
2867
2868   switch (TREE_CODE (*expr_p))
2869     {
2870     case ADDR_EXPR:
2871       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2872            handled_component_p (*expr_p);
2873            expr_p = &TREE_OPERAND (*expr_p, 0))
2874         continue;
2875       obj = *expr_p;
2876       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2877         x = produce_memory_decl_rtl (obj, regno);
2878       break;
2879
2880     case SSA_NAME:
2881       *ws = 0;
2882       obj = SSA_NAME_VAR (*expr_p);
2883       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2884       if (!obj)
2885         return NULL_TREE;
2886       if (!DECL_RTL_SET_P (obj))
2887         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2888       break;
2889
2890     case VAR_DECL:
2891     case PARM_DECL:
2892     case RESULT_DECL:
2893       *ws = 0;
2894       obj = *expr_p;
2895
2896       if (DECL_RTL_SET_P (obj))
2897         break;
2898
2899       if (DECL_MODE (obj) == BLKmode)
2900         x = produce_memory_decl_rtl (obj, regno);
2901       else
2902         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2903
2904       break;
2905
2906     default:
2907       break;
2908     }
2909
2910   if (x)
2911     {
2912       decl_rtl_to_reset.safe_push (obj);
2913       SET_DECL_RTL (obj, x);
2914     }
2915
2916   return NULL_TREE;
2917 }
2918
2919 /* Determines cost of the computation of EXPR.  */
2920
2921 static unsigned
2922 computation_cost (tree expr, bool speed)
2923 {
2924   rtx seq, rslt;
2925   tree type = TREE_TYPE (expr);
2926   unsigned cost;
2927   /* Avoid using hard regs in ways which may be unsupported.  */
2928   int regno = LAST_VIRTUAL_REGISTER + 1;
2929   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2930   enum node_frequency real_frequency = node->frequency;
2931
2932   node->frequency = NODE_FREQUENCY_NORMAL;
2933   crtl->maybe_hot_insn_p = speed;
2934   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2935   start_sequence ();
2936   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2937   seq = get_insns ();
2938   end_sequence ();
2939   default_rtl_profile ();
2940   node->frequency = real_frequency;
2941
2942   cost = seq_cost (seq, speed);
2943   if (MEM_P (rslt))
2944     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2945                           TYPE_ADDR_SPACE (type), speed);
2946   else if (!REG_P (rslt))
2947     cost += set_src_cost (rslt, speed);
2948
2949   return cost;
2950 }
2951
2952 /* Returns variable containing the value of candidate CAND at statement AT.  */
2953
2954 static tree
2955 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2956 {
2957   if (stmt_after_increment (loop, cand, stmt))
2958     return cand->var_after;
2959   else
2960     return cand->var_before;
2961 }
2962
2963 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2964    same precision that is at least as wide as the precision of TYPE, stores
2965    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2966    type of A and B.  */
2967
2968 static tree
2969 determine_common_wider_type (tree *a, tree *b)
2970 {
2971   tree wider_type = NULL;
2972   tree suba, subb;
2973   tree atype = TREE_TYPE (*a);
2974
2975   if (CONVERT_EXPR_P (*a))
2976     {
2977       suba = TREE_OPERAND (*a, 0);
2978       wider_type = TREE_TYPE (suba);
2979       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2980         return atype;
2981     }
2982   else
2983     return atype;
2984
2985   if (CONVERT_EXPR_P (*b))
2986     {
2987       subb = TREE_OPERAND (*b, 0);
2988       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2989         return atype;
2990     }
2991   else
2992     return atype;
2993
2994   *a = suba;
2995   *b = subb;
2996   return wider_type;
2997 }
2998
2999 /* Determines the expression by that USE is expressed from induction variable
3000    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3001    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3002
3003 static bool
3004 get_computation_aff (struct loop *loop,
3005                      struct iv_use *use, struct iv_cand *cand, gimple at,
3006                      struct affine_tree_combination *aff)
3007 {
3008   tree ubase = use->iv->base;
3009   tree ustep = use->iv->step;
3010   tree cbase = cand->iv->base;
3011   tree cstep = cand->iv->step, cstep_common;
3012   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3013   tree common_type, var;
3014   tree uutype;
3015   aff_tree cbase_aff, var_aff;
3016   double_int rat;
3017
3018   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3019     {
3020       /* We do not have a precision to express the values of use.  */
3021       return false;
3022     }
3023
3024   var = var_at_stmt (loop, cand, at);
3025   uutype = unsigned_type_for (utype);
3026
3027   /* If the conversion is not noop, perform it.  */
3028   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3029     {
3030       cstep = fold_convert (uutype, cstep);
3031       cbase = fold_convert (uutype, cbase);
3032       var = fold_convert (uutype, var);
3033     }
3034
3035   if (!constant_multiple_of (ustep, cstep, &rat))
3036     return false;
3037
3038   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3039      type, we achieve better folding by computing their difference in this
3040      wider type, and cast the result to UUTYPE.  We do not need to worry about
3041      overflows, as all the arithmetics will in the end be performed in UUTYPE
3042      anyway.  */
3043   common_type = determine_common_wider_type (&ubase, &cbase);
3044
3045   /* use = ubase - ratio * cbase + ratio * var.  */
3046   tree_to_aff_combination (ubase, common_type, aff);
3047   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3048   tree_to_aff_combination (var, uutype, &var_aff);
3049
3050   /* We need to shift the value if we are after the increment.  */
3051   if (stmt_after_increment (loop, cand, at))
3052     {
3053       aff_tree cstep_aff;
3054
3055       if (common_type != uutype)
3056         cstep_common = fold_convert (common_type, cstep);
3057       else
3058         cstep_common = cstep;
3059
3060       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3061       aff_combination_add (&cbase_aff, &cstep_aff);
3062     }
3063
3064   aff_combination_scale (&cbase_aff, -rat);
3065   aff_combination_add (aff, &cbase_aff);
3066   if (common_type != uutype)
3067     aff_combination_convert (aff, uutype);
3068
3069   aff_combination_scale (&var_aff, rat);
3070   aff_combination_add (aff, &var_aff);
3071
3072   return true;
3073 }
3074
3075 /* Return the type of USE.  */
3076
3077 static tree
3078 get_use_type (struct iv_use *use)
3079 {
3080   tree base_type = TREE_TYPE (use->iv->base);
3081   tree type;
3082
3083   if (use->type == USE_ADDRESS)
3084     {
3085       /* The base_type may be a void pointer.  Create a pointer type based on
3086          the mem_ref instead.  */
3087       type = build_pointer_type (TREE_TYPE (*use->op_p));
3088       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3089                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3090     }
3091   else
3092     type = base_type;
3093
3094   return type;
3095 }
3096
3097 /* Determines the expression by that USE is expressed from induction variable
3098    CAND at statement AT in LOOP.  The computation is unshared.  */
3099
3100 static tree
3101 get_computation_at (struct loop *loop,
3102                     struct iv_use *use, struct iv_cand *cand, gimple at)
3103 {
3104   aff_tree aff;
3105   tree type = get_use_type (use);
3106
3107   if (!get_computation_aff (loop, use, cand, at, &aff))
3108     return NULL_TREE;
3109   unshare_aff_combination (&aff);
3110   return fold_convert (type, aff_combination_to_tree (&aff));
3111 }
3112
3113 /* Determines the expression by that USE is expressed from induction variable
3114    CAND in LOOP.  The computation is unshared.  */
3115
3116 static tree
3117 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3118 {
3119   return get_computation_at (loop, use, cand, use->stmt);
3120 }
3121
3122 /* Adjust the cost COST for being in loop setup rather than loop body.
3123    If we're optimizing for space, the loop setup overhead is constant;
3124    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3125 static unsigned
3126 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3127 {
3128   if (cost == INFTY)
3129     return cost;
3130   else if (optimize_loop_for_speed_p (data->current_loop))
3131     return cost / avg_loop_niter (data->current_loop);
3132   else
3133     return cost;
3134 }
3135
3136 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3137    validity for a memory reference accessing memory of mode MODE in
3138    address space AS.  */
3139
3140
3141 bool
3142 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3143                                  addr_space_t as)
3144 {
3145 #define MAX_RATIO 128
3146   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3147   static vec<sbitmap> valid_mult_list;
3148   sbitmap valid_mult;
3149
3150   if (data_index >= valid_mult_list.length ())
3151     valid_mult_list.safe_grow_cleared (data_index + 1);
3152
3153   valid_mult = valid_mult_list[data_index];
3154   if (!valid_mult)
3155     {
3156       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3157       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3158       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3159       rtx addr, scaled;
3160       HOST_WIDE_INT i;
3161
3162       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3163       bitmap_clear (valid_mult);
3164       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3165       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3166       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3167         {
3168           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3169           if (memory_address_addr_space_p (mode, addr, as)
3170               || memory_address_addr_space_p (mode, scaled, as))
3171             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3172         }
3173
3174       if (dump_file && (dump_flags & TDF_DETAILS))
3175         {
3176           fprintf (dump_file, "  allowed multipliers:");
3177           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3178             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3179               fprintf (dump_file, " %d", (int) i);
3180           fprintf (dump_file, "\n");
3181           fprintf (dump_file, "\n");
3182         }
3183
3184       valid_mult_list[data_index] = valid_mult;
3185     }
3186
3187   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3188     return false;
3189
3190   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3191 }
3192
3193 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3194    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3195    variable is omitted.  Compute the cost for a memory reference that accesses
3196    a memory location of mode MEM_MODE in address space AS.
3197
3198    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3199    size of MEM_MODE / RATIO) is available.  To make this determination, we
3200    look at the size of the increment to be made, which is given in CSTEP.
3201    CSTEP may be zero if the step is unknown.
3202    STMT_AFTER_INC is true iff the statement we're looking at is after the
3203    increment of the original biv.
3204
3205    TODO -- there must be some better way.  This all is quite crude.  */
3206
3207 typedef struct address_cost_data_s
3208 {
3209   HOST_WIDE_INT min_offset, max_offset;
3210   unsigned costs[2][2][2][2];
3211 } *address_cost_data;
3212
3213
3214 static comp_cost
3215 get_address_cost (bool symbol_present, bool var_present,
3216                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3217                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3218                   addr_space_t as, bool speed,
3219                   bool stmt_after_inc, bool *may_autoinc)
3220 {
3221   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3222   static vec<address_cost_data> address_cost_data_list;
3223   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3224   address_cost_data data;
3225   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3226   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3227   unsigned cost, acost, complexity;
3228   bool offset_p, ratio_p, autoinc;
3229   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3230   unsigned HOST_WIDE_INT mask;
3231   unsigned bits;
3232
3233   if (data_index >= address_cost_data_list.length ())
3234     address_cost_data_list.safe_grow_cleared (data_index + 1);
3235
3236   data = address_cost_data_list[data_index];
3237   if (!data)
3238     {
3239       HOST_WIDE_INT i;
3240       HOST_WIDE_INT rat, off = 0;
3241       int old_cse_not_expected, width;
3242       unsigned sym_p, var_p, off_p, rat_p, add_c;
3243       rtx seq, addr, base;
3244       rtx reg0, reg1;
3245
3246       data = (address_cost_data) xcalloc (1, sizeof (*data));
3247
3248       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3249
3250       width = GET_MODE_BITSIZE (address_mode) - 1;
3251       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3252         width = HOST_BITS_PER_WIDE_INT - 1;
3253       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3254
3255       for (i = width; i >= 0; i--)
3256         {
3257           off = -((unsigned HOST_WIDE_INT) 1 << i);
3258           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3259           if (memory_address_addr_space_p (mem_mode, addr, as))
3260             break;
3261         }
3262       data->min_offset = (i == -1? 0 : off);
3263
3264       for (i = width; i >= 0; i--)
3265         {
3266           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3267           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3268           if (memory_address_addr_space_p (mem_mode, addr, as))
3269             break;
3270         }
3271       if (i == -1)
3272         off = 0;
3273       data->max_offset = off;
3274
3275       if (dump_file && (dump_flags & TDF_DETAILS))
3276         {
3277           fprintf (dump_file, "get_address_cost:\n");
3278           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3279                    GET_MODE_NAME (mem_mode),
3280                    data->min_offset);
3281           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3282                    GET_MODE_NAME (mem_mode),
3283                    data->max_offset);
3284         }
3285
3286       rat = 1;
3287       for (i = 2; i <= MAX_RATIO; i++)
3288         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3289           {
3290             rat = i;
3291             break;
3292           }
3293
3294       /* Compute the cost of various addressing modes.  */
3295       acost = 0;
3296       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3297       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3298
3299       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3300           || USE_STORE_PRE_DECREMENT (mem_mode))
3301         {
3302           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3303           has_predec[mem_mode]
3304             = memory_address_addr_space_p (mem_mode, addr, as);
3305         }
3306       if (USE_LOAD_POST_DECREMENT (mem_mode)
3307           || USE_STORE_POST_DECREMENT (mem_mode))
3308         {
3309           addr = gen_rtx_POST_DEC (address_mode, reg0);
3310           has_postdec[mem_mode]
3311             = memory_address_addr_space_p (mem_mode, addr, as);
3312         }
3313       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3314           || USE_STORE_PRE_DECREMENT (mem_mode))
3315         {
3316           addr = gen_rtx_PRE_INC (address_mode, reg0);
3317           has_preinc[mem_mode]
3318             = memory_address_addr_space_p (mem_mode, addr, as);
3319         }
3320       if (USE_LOAD_POST_INCREMENT (mem_mode)
3321           || USE_STORE_POST_INCREMENT (mem_mode))
3322         {
3323           addr = gen_rtx_POST_INC (address_mode, reg0);
3324           has_postinc[mem_mode]
3325             = memory_address_addr_space_p (mem_mode, addr, as);
3326         }
3327       for (i = 0; i < 16; i++)
3328         {
3329           sym_p = i & 1;
3330           var_p = (i >> 1) & 1;
3331           off_p = (i >> 2) & 1;
3332           rat_p = (i >> 3) & 1;
3333
3334           addr = reg0;
3335           if (rat_p)
3336             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3337                                    gen_int_mode (rat, address_mode));
3338
3339           if (var_p)
3340             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3341
3342           if (sym_p)
3343             {
3344               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3345               /* ??? We can run into trouble with some backends by presenting
3346                  it with symbols which haven't been properly passed through
3347                  targetm.encode_section_info.  By setting the local bit, we
3348                  enhance the probability of things working.  */
3349               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3350
3351               if (off_p)
3352                 base = gen_rtx_fmt_e (CONST, address_mode,
3353                                       gen_rtx_fmt_ee
3354                                         (PLUS, address_mode, base,
3355                                          gen_int_mode (off, address_mode)));
3356             }
3357           else if (off_p)
3358             base = gen_int_mode (off, address_mode);
3359           else
3360             base = NULL_RTX;
3361
3362           if (base)
3363             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3364
3365           start_sequence ();
3366           /* To avoid splitting addressing modes, pretend that no cse will
3367              follow.  */
3368           old_cse_not_expected = cse_not_expected;
3369           cse_not_expected = true;
3370           addr = memory_address_addr_space (mem_mode, addr, as);
3371           cse_not_expected = old_cse_not_expected;
3372           seq = get_insns ();
3373           end_sequence ();
3374
3375           acost = seq_cost (seq, speed);
3376           acost += address_cost (addr, mem_mode, as, speed);
3377
3378           if (!acost)
3379             acost = 1;
3380           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3381         }
3382
3383       /* On some targets, it is quite expensive to load symbol to a register,
3384          which makes addresses that contain symbols look much more expensive.
3385          However, the symbol will have to be loaded in any case before the
3386          loop (and quite likely we have it in register already), so it does not
3387          make much sense to penalize them too heavily.  So make some final
3388          tweaks for the SYMBOL_PRESENT modes:
3389
3390          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3391          var is cheaper, use this mode with small penalty.
3392          If VAR_PRESENT is true, try whether the mode with
3393          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3394          if this is the case, use it.  */
3395       add_c = add_cost (speed, address_mode);
3396       for (i = 0; i < 8; i++)
3397         {
3398           var_p = i & 1;
3399           off_p = (i >> 1) & 1;
3400           rat_p = (i >> 2) & 1;
3401
3402           acost = data->costs[0][1][off_p][rat_p] + 1;
3403           if (var_p)
3404             acost += add_c;
3405
3406           if (acost < data->costs[1][var_p][off_p][rat_p])
3407             data->costs[1][var_p][off_p][rat_p] = acost;
3408         }
3409
3410       if (dump_file && (dump_flags & TDF_DETAILS))
3411         {
3412           fprintf (dump_file, "Address costs:\n");
3413
3414           for (i = 0; i < 16; i++)
3415             {
3416               sym_p = i & 1;
3417               var_p = (i >> 1) & 1;
3418               off_p = (i >> 2) & 1;
3419               rat_p = (i >> 3) & 1;
3420
3421               fprintf (dump_file, "  ");
3422               if (sym_p)
3423                 fprintf (dump_file, "sym + ");
3424               if (var_p)
3425                 fprintf (dump_file, "var + ");
3426               if (off_p)
3427                 fprintf (dump_file, "cst + ");
3428               if (rat_p)
3429                 fprintf (dump_file, "rat * ");
3430
3431               acost = data->costs[sym_p][var_p][off_p][rat_p];
3432               fprintf (dump_file, "index costs %d\n", acost);
3433             }
3434           if (has_predec[mem_mode] || has_postdec[mem_mode]
3435               || has_preinc[mem_mode] || has_postinc[mem_mode])
3436             fprintf (dump_file, "  May include autoinc/dec\n");
3437           fprintf (dump_file, "\n");
3438         }
3439
3440       address_cost_data_list[data_index] = data;
3441     }
3442
3443   bits = GET_MODE_BITSIZE (address_mode);
3444   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3445   offset &= mask;
3446   if ((offset >> (bits - 1) & 1))
3447     offset |= ~mask;
3448   s_offset = offset;
3449
3450   autoinc = false;
3451   msize = GET_MODE_SIZE (mem_mode);
3452   autoinc_offset = offset;
3453   if (stmt_after_inc)
3454     autoinc_offset += ratio * cstep;
3455   if (symbol_present || var_present || ratio != 1)
3456     autoinc = false;
3457   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3458                && msize == cstep)
3459            || (has_postdec[mem_mode] && autoinc_offset == 0
3460                && msize == -cstep)
3461            || (has_preinc[mem_mode] && autoinc_offset == msize
3462                && msize == cstep)
3463            || (has_predec[mem_mode] && autoinc_offset == -msize
3464                && msize == -cstep))
3465     autoinc = true;
3466
3467   cost = 0;
3468   offset_p = (s_offset != 0
3469               && data->min_offset <= s_offset
3470               && s_offset <= data->max_offset);
3471   ratio_p = (ratio != 1
3472              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3473
3474   if (ratio != 1 && !ratio_p)
3475     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3476
3477   if (s_offset && !offset_p && !symbol_present)
3478     cost += add_cost (speed, address_mode);
3479
3480   if (may_autoinc)
3481     *may_autoinc = autoinc;
3482   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3483   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3484   return new_cost (cost + acost, complexity);
3485 }
3486
3487  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3488     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3489     calculating the operands of EXPR.  Returns true if successful, and returns
3490     the cost in COST.  */
3491
3492 static bool
3493 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3494                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3495 {
3496   comp_cost res;
3497   tree op1 = TREE_OPERAND (expr, 1);
3498   tree cst = TREE_OPERAND (mult, 1);
3499   tree multop = TREE_OPERAND (mult, 0);
3500   int m = exact_log2 (int_cst_value (cst));
3501   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3502   int sa_cost;
3503   bool equal_p = false;
3504
3505   if (!(m >= 0 && m < maxm))
3506     return false;
3507
3508   if (operand_equal_p (op1, mult, 0))
3509     equal_p = true;
3510
3511   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3512              ? shiftadd_cost (speed, mode, m)
3513              : (equal_p
3514                 ? shiftsub1_cost (speed, mode, m)
3515                 : shiftsub0_cost (speed, mode, m)));
3516   res = new_cost (sa_cost, 0);
3517   res = add_costs (res, equal_p ? cost0 : cost1);
3518
3519   STRIP_NOPS (multop);
3520   if (!is_gimple_val (multop))
3521     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3522
3523   *cost = res;
3524   return true;
3525 }
3526
3527 /* Estimates cost of forcing expression EXPR into a variable.  */
3528
3529 static comp_cost
3530 force_expr_to_var_cost (tree expr, bool speed)
3531 {
3532   static bool costs_initialized = false;
3533   static unsigned integer_cost [2];
3534   static unsigned symbol_cost [2];
3535   static unsigned address_cost [2];
3536   tree op0, op1;
3537   comp_cost cost0, cost1, cost;
3538   enum machine_mode mode;
3539
3540   if (!costs_initialized)
3541     {
3542       tree type = build_pointer_type (integer_type_node);
3543       tree var, addr;
3544       rtx x;
3545       int i;
3546
3547       var = create_tmp_var_raw (integer_type_node, "test_var");
3548       TREE_STATIC (var) = 1;
3549       x = produce_memory_decl_rtl (var, NULL);
3550       SET_DECL_RTL (var, x);
3551
3552       addr = build1 (ADDR_EXPR, type, var);
3553
3554
3555       for (i = 0; i < 2; i++)
3556         {
3557           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3558                                                              2000), i);
3559
3560           symbol_cost[i] = computation_cost (addr, i) + 1;
3561
3562           address_cost[i]
3563             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3564           if (dump_file && (dump_flags & TDF_DETAILS))
3565             {
3566               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3567               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3568               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3569               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3570               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3571               fprintf (dump_file, "\n");
3572             }
3573         }
3574
3575       costs_initialized = true;
3576     }
3577
3578   STRIP_NOPS (expr);
3579
3580   if (SSA_VAR_P (expr))
3581     return no_cost;
3582
3583   if (is_gimple_min_invariant (expr))
3584     {
3585       if (TREE_CODE (expr) == INTEGER_CST)
3586         return new_cost (integer_cost [speed], 0);
3587
3588       if (TREE_CODE (expr) == ADDR_EXPR)
3589         {
3590           tree obj = TREE_OPERAND (expr, 0);
3591
3592           if (TREE_CODE (obj) == VAR_DECL
3593               || TREE_CODE (obj) == PARM_DECL
3594               || TREE_CODE (obj) == RESULT_DECL)
3595             return new_cost (symbol_cost [speed], 0);
3596         }
3597
3598       return new_cost (address_cost [speed], 0);
3599     }
3600
3601   switch (TREE_CODE (expr))
3602     {
3603     case POINTER_PLUS_EXPR:
3604     case PLUS_EXPR:
3605     case MINUS_EXPR:
3606     case MULT_EXPR:
3607       op0 = TREE_OPERAND (expr, 0);
3608       op1 = TREE_OPERAND (expr, 1);
3609       STRIP_NOPS (op0);
3610       STRIP_NOPS (op1);
3611
3612       if (is_gimple_val (op0))
3613         cost0 = no_cost;
3614       else
3615         cost0 = force_expr_to_var_cost (op0, speed);
3616
3617       if (is_gimple_val (op1))
3618         cost1 = no_cost;
3619       else
3620         cost1 = force_expr_to_var_cost (op1, speed);
3621
3622       break;
3623
3624     case NEGATE_EXPR:
3625       op0 = TREE_OPERAND (expr, 0);
3626       STRIP_NOPS (op0);
3627       op1 = NULL_TREE;
3628
3629       if (is_gimple_val (op0))
3630         cost0 = no_cost;
3631       else
3632         cost0 = force_expr_to_var_cost (op0, speed);
3633
3634       cost1 = no_cost;
3635       break;
3636
3637     default:
3638       /* Just an arbitrary value, FIXME.  */
3639       return new_cost (target_spill_cost[speed], 0);
3640     }
3641
3642   mode = TYPE_MODE (TREE_TYPE (expr));
3643   switch (TREE_CODE (expr))
3644     {
3645     case POINTER_PLUS_EXPR:
3646     case PLUS_EXPR:
3647     case MINUS_EXPR:
3648     case NEGATE_EXPR:
3649       cost = new_cost (add_cost (speed, mode), 0);
3650       if (TREE_CODE (expr) != NEGATE_EXPR)
3651         {
3652           tree mult = NULL_TREE;
3653           comp_cost sa_cost;
3654           if (TREE_CODE (op1) == MULT_EXPR)
3655             mult = op1;
3656           else if (TREE_CODE (op0) == MULT_EXPR)
3657             mult = op0;
3658
3659           if (mult != NULL_TREE
3660               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3661               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3662                                     speed, &sa_cost))
3663             return sa_cost;
3664         }
3665       break;
3666
3667     case MULT_EXPR:
3668       if (cst_and_fits_in_hwi (op0))
3669         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3670                                              mode, speed), 0);
3671       else if (cst_and_fits_in_hwi (op1))
3672         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3673                                              mode, speed), 0);
3674       else
3675         return new_cost (target_spill_cost [speed], 0);
3676       break;
3677
3678     default:
3679       gcc_unreachable ();
3680     }
3681
3682   cost = add_costs (cost, cost0);
3683   cost = add_costs (cost, cost1);
3684
3685   /* Bound the cost by target_spill_cost.  The parts of complicated
3686      computations often are either loop invariant or at least can
3687      be shared between several iv uses, so letting this grow without
3688      limits would not give reasonable results.  */
3689   if (cost.cost > (int) target_spill_cost [speed])
3690     cost.cost = target_spill_cost [speed];
3691
3692   return cost;
3693 }
3694
3695 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3696    invariants the computation depends on.  */
3697
3698 static comp_cost
3699 force_var_cost (struct ivopts_data *data,
3700                 tree expr, bitmap *depends_on)
3701 {
3702   if (depends_on)
3703     {
3704       fd_ivopts_data = data;
3705       walk_tree (&expr, find_depends, depends_on, NULL);
3706     }
3707
3708   return force_expr_to_var_cost (expr, data->speed);
3709 }
3710
3711 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3712    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3713    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3714    invariants the computation depends on.  */
3715
3716 static comp_cost
3717 split_address_cost (struct ivopts_data *data,
3718                     tree addr, bool *symbol_present, bool *var_present,
3719                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3720 {
3721   tree core;
3722   HOST_WIDE_INT bitsize;
3723   HOST_WIDE_INT bitpos;
3724   tree toffset;
3725   enum machine_mode mode;
3726   int unsignedp, volatilep;
3727
3728   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3729                               &unsignedp, &volatilep, false);
3730
3731   if (toffset != 0
3732       || bitpos % BITS_PER_UNIT != 0
3733       || TREE_CODE (core) != VAR_DECL)
3734     {
3735       *symbol_present = false;
3736       *var_present = true;
3737       fd_ivopts_data = data;
3738       walk_tree (&addr, find_depends, depends_on, NULL);
3739       return new_cost (target_spill_cost[data->speed], 0);
3740     }
3741
3742   *offset += bitpos / BITS_PER_UNIT;
3743   if (TREE_STATIC (core)
3744       || DECL_EXTERNAL (core))
3745     {
3746       *symbol_present = true;
3747       *var_present = false;
3748       return no_cost;
3749     }
3750
3751   *symbol_present = false;
3752   *var_present = true;
3753   return no_cost;
3754 }
3755
3756 /* Estimates cost of expressing difference of addresses E1 - E2 as
3757    var + symbol + offset.  The value of offset is added to OFFSET,
3758    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3759    part is missing.  DEPENDS_ON is a set of the invariants the computation
3760    depends on.  */
3761
3762 static comp_cost
3763 ptr_difference_cost (struct ivopts_data *data,
3764                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3765                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3766 {
3767   HOST_WIDE_INT diff = 0;
3768   aff_tree aff_e1, aff_e2;
3769   tree type;
3770
3771   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3772
3773   if (ptr_difference_const (e1, e2, &diff))
3774     {
3775       *offset += diff;
3776       *symbol_present = false;
3777       *var_present = false;
3778       return no_cost;
3779     }
3780
3781   if (integer_zerop (e2))
3782     return split_address_cost (data, TREE_OPERAND (e1, 0),
3783                                symbol_present, var_present, offset, depends_on);
3784
3785   *symbol_present = false;
3786   *var_present = true;
3787
3788   type = signed_type_for (TREE_TYPE (e1));
3789   tree_to_aff_combination (e1, type, &aff_e1);
3790   tree_to_aff_combination (e2, type, &aff_e2);
3791   aff_combination_scale (&aff_e2, double_int_minus_one);
3792   aff_combination_add (&aff_e1, &aff_e2);
3793
3794   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3795 }
3796
3797 /* Estimates cost of expressing difference E1 - E2 as
3798    var + symbol + offset.  The value of offset is added to OFFSET,
3799    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3800    part is missing.  DEPENDS_ON is a set of the invariants the computation
3801    depends on.  */
3802
3803 static comp_cost
3804 difference_cost (struct ivopts_data *data,
3805                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3806                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3807 {
3808   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3809   unsigned HOST_WIDE_INT off1, off2;
3810   aff_tree aff_e1, aff_e2;
3811   tree type;
3812
3813   e1 = strip_offset (e1, &off1);
3814   e2 = strip_offset (e2, &off2);
3815   *offset += off1 - off2;
3816
3817   STRIP_NOPS (e1);
3818   STRIP_NOPS (e2);
3819
3820   if (TREE_CODE (e1) == ADDR_EXPR)
3821     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3822                                 offset, depends_on);
3823   *symbol_present = false;
3824
3825   if (operand_equal_p (e1, e2, 0))
3826     {
3827       *var_present = false;
3828       return no_cost;
3829     }
3830
3831   *var_present = true;
3832
3833   if (integer_zerop (e2))
3834     return force_var_cost (data, e1, depends_on);
3835
3836   if (integer_zerop (e1))
3837     {
3838       comp_cost cost = force_var_cost (data, e2, depends_on);
3839       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3840       return cost;
3841     }
3842
3843   type = signed_type_for (TREE_TYPE (e1));
3844   tree_to_aff_combination (e1, type, &aff_e1);
3845   tree_to_aff_combination (e2, type, &aff_e2);
3846   aff_combination_scale (&aff_e2, double_int_minus_one);
3847   aff_combination_add (&aff_e1, &aff_e2);
3848
3849   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3850 }
3851
3852 /* Returns true if AFF1 and AFF2 are identical.  */
3853
3854 static bool
3855 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3856 {
3857   unsigned i;
3858
3859   if (aff1->n != aff2->n)
3860     return false;
3861
3862   for (i = 0; i < aff1->n; i++)
3863     {
3864       if (aff1->elts[i].coef != aff2->elts[i].coef)
3865         return false;
3866
3867       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3868         return false;
3869     }
3870   return true;
3871 }
3872
3873 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3874
3875 static int
3876 get_expr_id (struct ivopts_data *data, tree expr)
3877 {
3878   struct iv_inv_expr_ent ent;
3879   struct iv_inv_expr_ent **slot;
3880
3881   ent.expr = expr;
3882   ent.hash = iterative_hash_expr (expr, 0);
3883   slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3884   if (*slot)
3885     return (*slot)->id;
3886
3887   *slot = XNEW (struct iv_inv_expr_ent);
3888   (*slot)->expr = expr;
3889   (*slot)->hash = ent.hash;
3890   (*slot)->id = data->inv_expr_id++;
3891   return (*slot)->id;
3892 }
3893
3894 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3895    requires a new compiler generated temporary.  Returns -1 otherwise.
3896    ADDRESS_P is a flag indicating if the expression is for address
3897    computation.  */
3898
3899 static int
3900 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3901                             tree cbase, HOST_WIDE_INT ratio,
3902                             bool address_p)
3903 {
3904   aff_tree ubase_aff, cbase_aff;
3905   tree expr, ub, cb;
3906
3907   STRIP_NOPS (ubase);
3908   STRIP_NOPS (cbase);
3909   ub = ubase;
3910   cb = cbase;
3911
3912   if ((TREE_CODE (ubase) == INTEGER_CST)
3913       && (TREE_CODE (cbase) == INTEGER_CST))
3914     return -1;
3915
3916   /* Strips the constant part. */
3917   if (TREE_CODE (ubase) == PLUS_EXPR
3918       || TREE_CODE (ubase) == MINUS_EXPR
3919       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3920     {
3921       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3922         ubase = TREE_OPERAND (ubase, 0);
3923     }
3924
3925   /* Strips the constant part. */
3926   if (TREE_CODE (cbase) == PLUS_EXPR
3927       || TREE_CODE (cbase) == MINUS_EXPR
3928       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3929     {
3930       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3931         cbase = TREE_OPERAND (cbase, 0);
3932     }
3933
3934   if (address_p)
3935     {
3936       if (((TREE_CODE (ubase) == SSA_NAME)
3937            || (TREE_CODE (ubase) == ADDR_EXPR
3938                && is_gimple_min_invariant (ubase)))
3939           && (TREE_CODE (cbase) == INTEGER_CST))
3940         return -1;
3941
3942       if (((TREE_CODE (cbase) == SSA_NAME)
3943            || (TREE_CODE (cbase) == ADDR_EXPR
3944                && is_gimple_min_invariant (cbase)))
3945           && (TREE_CODE (ubase) == INTEGER_CST))
3946         return -1;
3947     }
3948
3949   if (ratio == 1)
3950     {
3951       if (operand_equal_p (ubase, cbase, 0))
3952         return -1;
3953
3954       if (TREE_CODE (ubase) == ADDR_EXPR
3955           && TREE_CODE (cbase) == ADDR_EXPR)
3956         {
3957           tree usym, csym;
3958
3959           usym = TREE_OPERAND (ubase, 0);
3960           csym = TREE_OPERAND (cbase, 0);
3961           if (TREE_CODE (usym) == ARRAY_REF)
3962             {
3963               tree ind = TREE_OPERAND (usym, 1);
3964               if (TREE_CODE (ind) == INTEGER_CST
3965                   && host_integerp (ind, 0)
3966                   && TREE_INT_CST_LOW (ind) == 0)
3967                 usym = TREE_OPERAND (usym, 0);
3968             }
3969           if (TREE_CODE (csym) == ARRAY_REF)
3970             {
3971               tree ind = TREE_OPERAND (csym, 1);
3972               if (TREE_CODE (ind) == INTEGER_CST
3973                   && host_integerp (ind, 0)
3974                   && TREE_INT_CST_LOW (ind) == 0)
3975                 csym = TREE_OPERAND (csym, 0);
3976             }
3977           if (operand_equal_p (usym, csym, 0))
3978             return -1;
3979         }
3980       /* Now do more complex comparison  */
3981       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3982       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3983       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3984         return -1;
3985     }
3986
3987   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3988   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3989
3990   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3991   aff_combination_add (&ubase_aff, &cbase_aff);
3992   expr = aff_combination_to_tree (&ubase_aff);
3993   return get_expr_id (data, expr);
3994 }
3995
3996
3997
3998 /* Determines the cost of the computation by that USE is expressed
3999    from induction variable CAND.  If ADDRESS_P is true, we just need
4000    to create an address from it, otherwise we want to get it into
4001    register.  A set of invariants we depend on is stored in
4002    DEPENDS_ON.  AT is the statement at that the value is computed.
4003    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4004    addressing is likely.  */
4005
4006 static comp_cost
4007 get_computation_cost_at (struct ivopts_data *data,
4008                          struct iv_use *use, struct iv_cand *cand,
4009                          bool address_p, bitmap *depends_on, gimple at,
4010                          bool *can_autoinc,
4011                          int *inv_expr_id)
4012 {
4013   tree ubase = use->iv->base, ustep = use->iv->step;
4014   tree cbase, cstep;
4015   tree utype = TREE_TYPE (ubase), ctype;
4016   unsigned HOST_WIDE_INT cstepi, offset = 0;
4017   HOST_WIDE_INT ratio, aratio;
4018   bool var_present, symbol_present, stmt_is_after_inc;
4019   comp_cost cost;
4020   double_int rat;
4021   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4022   enum machine_mode mem_mode = (address_p
4023                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4024                                 : VOIDmode);
4025
4026   *depends_on = NULL;
4027
4028   /* Only consider real candidates.  */
4029   if (!cand->iv)
4030     return infinite_cost;
4031
4032   cbase = cand->iv->base;
4033   cstep = cand->iv->step;
4034   ctype = TREE_TYPE (cbase);
4035
4036   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4037     {
4038       /* We do not have a precision to express the values of use.  */
4039       return infinite_cost;
4040     }
4041
4042   if (address_p
4043       || (use->iv->base_object
4044           && cand->iv->base_object
4045           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4046           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4047     {
4048       /* Do not try to express address of an object with computation based
4049          on address of a different object.  This may cause problems in rtl
4050          level alias analysis (that does not expect this to be happening,
4051          as this is illegal in C), and would be unlikely to be useful
4052          anyway.  */
4053       if (use->iv->base_object
4054           && cand->iv->base_object
4055           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4056         return infinite_cost;
4057     }
4058
4059   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4060     {
4061       /* TODO -- add direct handling of this case.  */
4062       goto fallback;
4063     }
4064
4065   /* CSTEPI is removed from the offset in case statement is after the
4066      increment.  If the step is not constant, we use zero instead.
4067      This is a bit imprecise (there is the extra addition), but
4068      redundancy elimination is likely to transform the code so that
4069      it uses value of the variable before increment anyway,
4070      so it is not that much unrealistic.  */
4071   if (cst_and_fits_in_hwi (cstep))
4072     cstepi = int_cst_value (cstep);
4073   else
4074     cstepi = 0;
4075
4076   if (!constant_multiple_of (ustep, cstep, &rat))
4077     return infinite_cost;
4078
4079   if (rat.fits_shwi ())
4080     ratio = rat.to_shwi ();
4081   else
4082     return infinite_cost;
4083
4084   STRIP_NOPS (cbase);
4085   ctype = TREE_TYPE (cbase);
4086
4087   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4088
4089   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4090      or ratio == 1, it is better to handle this like
4091
4092      ubase - ratio * cbase + ratio * var
4093
4094      (also holds in the case ratio == -1, TODO.  */
4095
4096   if (cst_and_fits_in_hwi (cbase))
4097     {
4098       offset = - ratio * int_cst_value (cbase);
4099       cost = difference_cost (data,
4100                               ubase, build_int_cst (utype, 0),
4101                               &symbol_present, &var_present, &offset,
4102                               depends_on);
4103       cost.cost /= avg_loop_niter (data->current_loop);
4104     }
4105   else if (ratio == 1)
4106     {
4107       tree real_cbase = cbase;
4108
4109       /* Check to see if any adjustment is needed.  */
4110       if (cstepi == 0 && stmt_is_after_inc)
4111         {
4112           aff_tree real_cbase_aff;
4113           aff_tree cstep_aff;
4114
4115           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4116                                    &real_cbase_aff);
4117           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4118
4119           aff_combination_add (&real_cbase_aff, &cstep_aff);
4120           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4121         }
4122
4123       cost = difference_cost (data,
4124                               ubase, real_cbase,
4125                               &symbol_present, &var_present, &offset,
4126                               depends_on);
4127       cost.cost /= avg_loop_niter (data->current_loop);
4128     }
4129   else if (address_p
4130            && !POINTER_TYPE_P (ctype)
4131            && multiplier_allowed_in_address_p
4132                 (ratio, mem_mode,
4133                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4134     {
4135       cbase
4136         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4137       cost = difference_cost (data,
4138                               ubase, cbase,
4139                               &symbol_present, &var_present, &offset,
4140                               depends_on);
4141       cost.cost /= avg_loop_niter (data->current_loop);
4142     }
4143   else
4144     {
4145       cost = force_var_cost (data, cbase, depends_on);
4146       cost = add_costs (cost,
4147                         difference_cost (data,
4148                                          ubase, build_int_cst (utype, 0),
4149                                          &symbol_present, &var_present,
4150                                          &offset, depends_on));
4151       cost.cost /= avg_loop_niter (data->current_loop);
4152       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4153     }
4154
4155   if (inv_expr_id)
4156     {
4157       *inv_expr_id =
4158           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4159       /* Clear depends on.  */
4160       if (*inv_expr_id != -1 && depends_on && *depends_on)
4161         bitmap_clear (*depends_on);
4162     }
4163
4164   /* If we are after the increment, the value of the candidate is higher by
4165      one iteration.  */
4166   if (stmt_is_after_inc)
4167     offset -= ratio * cstepi;
4168
4169   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4170      (symbol/var1/const parts may be omitted).  If we are looking for an
4171      address, find the cost of addressing this.  */
4172   if (address_p)
4173     return add_costs (cost,
4174                       get_address_cost (symbol_present, var_present,
4175                                         offset, ratio, cstepi,
4176                                         mem_mode,
4177                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4178                                         speed, stmt_is_after_inc,
4179                                         can_autoinc));
4180
4181   /* Otherwise estimate the costs for computing the expression.  */
4182   if (!symbol_present && !var_present && !offset)
4183     {
4184       if (ratio != 1)
4185         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4186       return cost;
4187     }
4188
4189   /* Symbol + offset should be compile-time computable so consider that they
4190       are added once to the variable, if present.  */
4191   if (var_present && (symbol_present || offset))
4192     cost.cost += adjust_setup_cost (data,
4193                                     add_cost (speed, TYPE_MODE (ctype)));
4194
4195   /* Having offset does not affect runtime cost in case it is added to
4196      symbol, but it increases complexity.  */
4197   if (offset)
4198     cost.complexity++;
4199
4200   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4201
4202   aratio = ratio > 0 ? ratio : -ratio;
4203   if (aratio != 1)
4204     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4205   return cost;
4206
4207 fallback:
4208   if (can_autoinc)
4209     *can_autoinc = false;
4210
4211   {
4212     /* Just get the expression, expand it and measure the cost.  */
4213     tree comp = get_computation_at (data->current_loop, use, cand, at);
4214
4215     if (!comp)
4216       return infinite_cost;
4217
4218     if (address_p)
4219       comp = build_simple_mem_ref (comp);
4220
4221     return new_cost (computation_cost (comp, speed), 0);
4222   }
4223 }
4224
4225 /* Determines the cost of the computation by that USE is expressed
4226    from induction variable CAND.  If ADDRESS_P is true, we just need
4227    to create an address from it, otherwise we want to get it into
4228    register.  A set of invariants we depend on is stored in
4229    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4230    autoinc addressing is likely.  */
4231
4232 static comp_cost
4233 get_computation_cost (struct ivopts_data *data,
4234                       struct iv_use *use, struct iv_cand *cand,
4235                       bool address_p, bitmap *depends_on,
4236                       bool *can_autoinc, int *inv_expr_id)
4237 {
4238   return get_computation_cost_at (data,
4239                                   use, cand, address_p, depends_on, use->stmt,
4240                                   can_autoinc, inv_expr_id);
4241 }
4242
4243 /* Determines cost of basing replacement of USE on CAND in a generic
4244    expression.  */
4245
4246 static bool
4247 determine_use_iv_cost_generic (struct ivopts_data *data,
4248                                struct iv_use *use, struct iv_cand *cand)
4249 {
4250   bitmap depends_on;
4251   comp_cost cost;
4252   int inv_expr_id = -1;
4253
4254   /* The simple case first -- if we need to express value of the preserved
4255      original biv, the cost is 0.  This also prevents us from counting the
4256      cost of increment twice -- once at this use and once in the cost of
4257      the candidate.  */
4258   if (cand->pos == IP_ORIGINAL
4259       && cand->incremented_at == use->stmt)
4260     {
4261       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4262                        ERROR_MARK, -1);
4263       return true;
4264     }
4265
4266   cost = get_computation_cost (data, use, cand, false, &depends_on,
4267                                NULL, &inv_expr_id);
4268
4269   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4270                    inv_expr_id);
4271
4272   return !infinite_cost_p (cost);
4273 }
4274
4275 /* Determines cost of basing replacement of USE on CAND in an address.  */
4276
4277 static bool
4278 determine_use_iv_cost_address (struct ivopts_data *data,
4279                                struct iv_use *use, struct iv_cand *cand)
4280 {
4281   bitmap depends_on;
4282   bool can_autoinc;
4283   int inv_expr_id = -1;
4284   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4285                                          &can_autoinc, &inv_expr_id);
4286
4287   if (cand->ainc_use == use)
4288     {
4289       if (can_autoinc)
4290         cost.cost -= cand->cost_step;
4291       /* If we generated the candidate solely for exploiting autoincrement
4292          opportunities, and it turns out it can't be used, set the cost to
4293          infinity to make sure we ignore it.  */
4294       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4295         cost = infinite_cost;
4296     }
4297   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4298                    inv_expr_id);
4299
4300   return !infinite_cost_p (cost);
4301 }
4302
4303 /* Computes value of candidate CAND at position AT in iteration NITER, and
4304    stores it to VAL.  */
4305
4306 static void
4307 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4308                aff_tree *val)
4309 {
4310   aff_tree step, delta, nit;
4311   struct iv *iv = cand->iv;
4312   tree type = TREE_TYPE (iv->base);
4313   tree steptype = type;
4314   if (POINTER_TYPE_P (type))
4315     steptype = sizetype;
4316
4317   tree_to_aff_combination (iv->step, steptype, &step);
4318   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4319   aff_combination_convert (&nit, steptype);
4320   aff_combination_mult (&nit, &step, &delta);
4321   if (stmt_after_increment (loop, cand, at))
4322     aff_combination_add (&delta, &step);
4323
4324   tree_to_aff_combination (iv->base, type, val);
4325   aff_combination_add (val, &delta);
4326 }
4327
4328 /* Returns period of induction variable iv.  */
4329
4330 static tree
4331 iv_period (struct iv *iv)
4332 {
4333   tree step = iv->step, period, type;
4334   tree pow2div;
4335
4336   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4337
4338   type = unsigned_type_for (TREE_TYPE (step));
4339   /* Period of the iv is lcm (step, type_range)/step -1,
4340      i.e., N*type_range/step - 1. Since type range is power
4341      of two, N == (step >> num_of_ending_zeros_binary (step),
4342      so the final result is
4343
4344        (type_range >> num_of_ending_zeros_binary (step)) - 1
4345
4346   */
4347   pow2div = num_ending_zeros (step);
4348
4349   period = build_low_bits_mask (type,
4350                                 (TYPE_PRECISION (type)
4351                                  - tree_low_cst (pow2div, 1)));
4352
4353   return period;
4354 }
4355
4356 /* Returns the comparison operator used when eliminating the iv USE.  */
4357
4358 static enum tree_code
4359 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4360 {
4361   struct loop *loop = data->current_loop;
4362   basic_block ex_bb;
4363   edge exit;
4364
4365   ex_bb = gimple_bb (use->stmt);
4366   exit = EDGE_SUCC (ex_bb, 0);
4367   if (flow_bb_inside_loop_p (loop, exit->dest))
4368     exit = EDGE_SUCC (ex_bb, 1);
4369
4370   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4371 }
4372
4373 static tree
4374 strip_wrap_conserving_type_conversions (tree exp)
4375 {
4376   while (tree_ssa_useless_type_conversion (exp)
4377          && (nowrap_type_p (TREE_TYPE (exp))
4378              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4379     exp = TREE_OPERAND (exp, 0);
4380   return exp;
4381 }
4382
4383 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4384    check for an exact match.  */
4385
4386 static bool
4387 expr_equal_p (tree e, tree what)
4388 {
4389   gimple stmt;
4390   enum tree_code code;
4391
4392   e = strip_wrap_conserving_type_conversions (e);
4393   what = strip_wrap_conserving_type_conversions (what);
4394
4395   code = TREE_CODE (what);
4396   if (TREE_TYPE (e) != TREE_TYPE (what))
4397     return false;
4398
4399   if (operand_equal_p (e, what, 0))
4400     return true;
4401
4402   if (TREE_CODE (e) != SSA_NAME)
4403     return false;
4404
4405   stmt = SSA_NAME_DEF_STMT (e);
4406   if (gimple_code (stmt) != GIMPLE_ASSIGN
4407       || gimple_assign_rhs_code (stmt) != code)
4408     return false;
4409
4410   switch (get_gimple_rhs_class (code))
4411     {
4412     case GIMPLE_BINARY_RHS:
4413       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4414         return false;
4415       /* Fallthru.  */
4416
4417     case GIMPLE_UNARY_RHS:
4418     case GIMPLE_SINGLE_RHS:
4419       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4420     default:
4421       return false;
4422     }
4423 }
4424
4425 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4426    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4427    calculation is performed in non-wrapping type.
4428
4429    TODO: More generally, we could test for the situation that
4430          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4431          This would require knowing the sign of OFFSET.
4432
4433          Also, we only look for the first addition in the computation of BASE.
4434          More complex analysis would be better, but introducing it just for
4435          this optimization seems like an overkill.  */
4436
4437 static bool
4438 difference_cannot_overflow_p (tree base, tree offset)
4439 {
4440   enum tree_code code;
4441   tree e1, e2;
4442
4443   if (!nowrap_type_p (TREE_TYPE (base)))
4444     return false;
4445
4446   base = expand_simple_operations (base);
4447
4448   if (TREE_CODE (base) == SSA_NAME)
4449     {
4450       gimple stmt = SSA_NAME_DEF_STMT (base);
4451
4452       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4453         return false;
4454
4455       code = gimple_assign_rhs_code (stmt);
4456       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4457         return false;
4458
4459       e1 = gimple_assign_rhs1 (stmt);
4460       e2 = gimple_assign_rhs2 (stmt);
4461     }
4462   else
4463     {
4464       code = TREE_CODE (base);
4465       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4466         return false;
4467       e1 = TREE_OPERAND (base, 0);
4468       e2 = TREE_OPERAND (base, 1);
4469     }
4470
4471   /* TODO: deeper inspection may be necessary to prove the equality.  */
4472   switch (code)
4473     {
4474     case PLUS_EXPR:
4475       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4476     case POINTER_PLUS_EXPR:
4477       return expr_equal_p (e2, offset);
4478
4479     default:
4480       return false;
4481     }
4482 }
4483
4484 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4485    comparison with CAND.  NITER describes the number of iterations of
4486    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4487
4488    We aim to handle the following situation:
4489
4490    sometype *base, *p;
4491    int a, b, i;
4492
4493    i = a;
4494    p = p_0 = base + a;
4495
4496    do
4497      {
4498        bla (*p);
4499        p++;
4500        i++;
4501      }
4502    while (i < b);
4503
4504    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4505    We aim to optimize this to
4506
4507    p = p_0 = base + a;
4508    do
4509      {
4510        bla (*p);
4511        p++;
4512      }
4513    while (p < p_0 - a + b);
4514
4515    This preserves the correctness, since the pointer arithmetics does not
4516    overflow.  More precisely:
4517
4518    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4519       overflow in computing it or the values of p.
4520    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4521       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4522
4523 static bool
4524 iv_elimination_compare_lt (struct ivopts_data *data,
4525                            struct iv_cand *cand, enum tree_code *comp_p,
4526                            struct tree_niter_desc *niter)
4527 {
4528   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4529   struct affine_tree_combination nit, tmpa, tmpb;
4530   enum tree_code comp;
4531   HOST_WIDE_INT step;
4532
4533   /* We need to know that the candidate induction variable does not overflow.
4534      While more complex analysis may be used to prove this, for now just
4535      check that the variable appears in the original program and that it
4536      is computed in a type that guarantees no overflows.  */
4537   cand_type = TREE_TYPE (cand->iv->base);
4538   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4539     return false;
4540
4541   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4542      the calculation of the BOUND could overflow, making the comparison
4543      invalid.  */
4544   if (!data->loop_single_exit_p)
4545     return false;
4546
4547   /* We need to be able to decide whether candidate is increasing or decreasing
4548      in order to choose the right comparison operator.  */
4549   if (!cst_and_fits_in_hwi (cand->iv->step))
4550     return false;
4551   step = int_cst_value (cand->iv->step);
4552
4553   /* Check that the number of iterations matches the expected pattern:
4554      a + 1 > b ? 0 : b - a - 1.  */
4555   mbz = niter->may_be_zero;
4556   if (TREE_CODE (mbz) == GT_EXPR)
4557     {
4558       /* Handle a + 1 > b.  */
4559       tree op0 = TREE_OPERAND (mbz, 0);
4560       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4561         {
4562           a = TREE_OPERAND (op0, 0);
4563           b = TREE_OPERAND (mbz, 1);
4564         }
4565       else
4566         return false;
4567     }
4568   else if (TREE_CODE (mbz) == LT_EXPR)
4569     {
4570       tree op1 = TREE_OPERAND (mbz, 1);
4571
4572       /* Handle b < a + 1.  */
4573       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4574         {
4575           a = TREE_OPERAND (op1, 0);
4576           b = TREE_OPERAND (mbz, 0);
4577         }
4578       else
4579         return false;
4580     }
4581   else
4582     return false;
4583
4584   /* Expected number of iterations is B - A - 1.  Check that it matches
4585      the actual number, i.e., that B - A - NITER = 1.  */
4586   tree_to_aff_combination (niter->niter, nit_type, &nit);
4587   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4588   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4589   aff_combination_scale (&nit, double_int_minus_one);
4590   aff_combination_scale (&tmpa, double_int_minus_one);
4591   aff_combination_add (&tmpb, &tmpa);
4592   aff_combination_add (&tmpb, &nit);
4593   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4594     return false;
4595
4596   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4597      overflow.  */
4598   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4599                         cand->iv->step,
4600                         fold_convert (TREE_TYPE (cand->iv->step), a));
4601   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4602     return false;
4603
4604   /* Determine the new comparison operator.  */
4605   comp = step < 0 ? GT_EXPR : LT_EXPR;
4606   if (*comp_p == NE_EXPR)
4607     *comp_p = comp;
4608   else if (*comp_p == EQ_EXPR)
4609     *comp_p = invert_tree_comparison (comp, false);
4610   else
4611     gcc_unreachable ();
4612
4613   return true;
4614 }
4615
4616 /* Check whether it is possible to express the condition in USE by comparison
4617    of candidate CAND.  If so, store the value compared with to BOUND, and the
4618    comparison operator to COMP.  */
4619
4620 static bool
4621 may_eliminate_iv (struct ivopts_data *data,
4622                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4623                   enum tree_code *comp)
4624 {
4625   basic_block ex_bb;
4626   edge exit;
4627   tree period;
4628   struct loop *loop = data->current_loop;
4629   aff_tree bnd;
4630   struct tree_niter_desc *desc = NULL;
4631
4632   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4633     return false;
4634
4635   /* For now works only for exits that dominate the loop latch.
4636      TODO: extend to other conditions inside loop body.  */
4637   ex_bb = gimple_bb (use->stmt);
4638   if (use->stmt != last_stmt (ex_bb)
4639       || gimple_code (use->stmt) != GIMPLE_COND
4640       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4641     return false;
4642
4643   exit = EDGE_SUCC (ex_bb, 0);
4644   if (flow_bb_inside_loop_p (loop, exit->dest))
4645     exit = EDGE_SUCC (ex_bb, 1);
4646   if (flow_bb_inside_loop_p (loop, exit->dest))
4647     return false;
4648
4649   desc = niter_for_exit (data, exit);
4650   if (!desc)
4651     return false;
4652
4653   /* Determine whether we can use the variable to test the exit condition.
4654      This is the case iff the period of the induction variable is greater
4655      than the number of iterations for which the exit condition is true.  */
4656   period = iv_period (cand->iv);
4657
4658   /* If the number of iterations is constant, compare against it directly.  */
4659   if (TREE_CODE (desc->niter) == INTEGER_CST)
4660     {
4661       /* See cand_value_at.  */
4662       if (stmt_after_increment (loop, cand, use->stmt))
4663         {
4664           if (!tree_int_cst_lt (desc->niter, period))
4665             return false;
4666         }
4667       else
4668         {
4669           if (tree_int_cst_lt (period, desc->niter))
4670             return false;
4671         }
4672     }
4673
4674   /* If not, and if this is the only possible exit of the loop, see whether
4675      we can get a conservative estimate on the number of iterations of the
4676      entire loop and compare against that instead.  */
4677   else
4678     {
4679       double_int period_value, max_niter;
4680
4681       max_niter = desc->max;
4682       if (stmt_after_increment (loop, cand, use->stmt))
4683         max_niter += double_int_one;
4684       period_value = tree_to_double_int (period);
4685       if (max_niter.ugt (period_value))
4686         {
4687           /* See if we can take advantage of inferred loop bound information.  */
4688           if (data->loop_single_exit_p)
4689             {
4690               if (!max_loop_iterations (loop, &max_niter))
4691                 return false;
4692               /* The loop bound is already adjusted by adding 1.  */
4693               if (max_niter.ugt (period_value))
4694                 return false;
4695             }
4696           else
4697             return false;
4698         }
4699     }
4700
4701   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4702
4703   *bound = aff_combination_to_tree (&bnd);
4704   *comp = iv_elimination_compare (data, use);
4705
4706   /* It is unlikely that computing the number of iterations using division
4707      would be more profitable than keeping the original induction variable.  */
4708   if (expression_expensive_p (*bound))
4709     return false;
4710
4711   /* Sometimes, it is possible to handle the situation that the number of
4712      iterations may be zero unless additional assumtions by using <
4713      instead of != in the exit condition.
4714
4715      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4716            base the exit condition on it.  However, that is often too
4717            expensive.  */
4718   if (!integer_zerop (desc->may_be_zero))
4719     return iv_elimination_compare_lt (data, cand, comp, desc);
4720
4721   return true;
4722 }
4723
4724  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4725     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4726
4727 static int
4728 parm_decl_cost (struct ivopts_data *data, tree bound)
4729 {
4730   tree sbound = bound;
4731   STRIP_NOPS (sbound);
4732
4733   if (TREE_CODE (sbound) == SSA_NAME
4734       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4735       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4736       && data->body_includes_call)
4737     return COSTS_N_INSNS (1);
4738
4739   return 0;
4740 }
4741
4742 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4743
4744 static bool
4745 determine_use_iv_cost_condition (struct ivopts_data *data,
4746                                  struct iv_use *use, struct iv_cand *cand)
4747 {
4748   tree bound = NULL_TREE;
4749   struct iv *cmp_iv;
4750   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4751   comp_cost elim_cost, express_cost, cost, bound_cost;
4752   bool ok;
4753   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4754   tree *control_var, *bound_cst;
4755   enum tree_code comp = ERROR_MARK;
4756
4757   /* Only consider real candidates.  */
4758   if (!cand->iv)
4759     {
4760       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4761                        ERROR_MARK, -1);
4762       return false;
4763     }
4764
4765   /* Try iv elimination.  */
4766   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4767     {
4768       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4769       if (elim_cost.cost == 0)
4770         elim_cost.cost = parm_decl_cost (data, bound);
4771       else if (TREE_CODE (bound) == INTEGER_CST)
4772         elim_cost.cost = 0;
4773       /* If we replace a loop condition 'i < n' with 'p < base + n',
4774          depends_on_elim will have 'base' and 'n' set, which implies
4775          that both 'base' and 'n' will be live during the loop.  More likely,
4776          'base + n' will be loop invariant, resulting in only one live value
4777          during the loop.  So in that case we clear depends_on_elim and set
4778         elim_inv_expr_id instead.  */
4779       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4780         {
4781           elim_inv_expr_id = get_expr_id (data, bound);
4782           bitmap_clear (depends_on_elim);
4783         }
4784       /* The bound is a loop invariant, so it will be only computed
4785          once.  */
4786       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4787     }
4788   else
4789     elim_cost = infinite_cost;
4790
4791   /* Try expressing the original giv.  If it is compared with an invariant,
4792      note that we cannot get rid of it.  */
4793   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4794                               NULL, &cmp_iv);
4795   gcc_assert (ok);
4796
4797   /* When the condition is a comparison of the candidate IV against
4798      zero, prefer this IV.
4799
4800      TODO: The constant that we're subtracting from the cost should
4801      be target-dependent.  This information should be added to the
4802      target costs for each backend.  */
4803   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4804       && integer_zerop (*bound_cst)
4805       && (operand_equal_p (*control_var, cand->var_after, 0)
4806           || operand_equal_p (*control_var, cand->var_before, 0)))
4807     elim_cost.cost -= 1;
4808
4809   express_cost = get_computation_cost (data, use, cand, false,
4810                                        &depends_on_express, NULL,
4811                                        &express_inv_expr_id);
4812   fd_ivopts_data = data;
4813   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4814
4815   /* Count the cost of the original bound as well.  */
4816   bound_cost = force_var_cost (data, *bound_cst, NULL);
4817   if (bound_cost.cost == 0)
4818     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4819   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4820     bound_cost.cost = 0;
4821   express_cost.cost += bound_cost.cost;
4822
4823   /* Choose the better approach, preferring the eliminated IV. */
4824   if (compare_costs (elim_cost, express_cost) <= 0)
4825     {
4826       cost = elim_cost;
4827       depends_on = depends_on_elim;
4828       depends_on_elim = NULL;
4829       inv_expr_id = elim_inv_expr_id;
4830     }
4831   else
4832     {
4833       cost = express_cost;
4834       depends_on = depends_on_express;
4835       depends_on_express = NULL;
4836       bound = NULL_TREE;
4837       comp = ERROR_MARK;
4838       inv_expr_id = express_inv_expr_id;
4839     }
4840
4841   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4842
4843   if (depends_on_elim)
4844     BITMAP_FREE (depends_on_elim);
4845   if (depends_on_express)
4846     BITMAP_FREE (depends_on_express);
4847
4848   return !infinite_cost_p (cost);
4849 }
4850
4851 /* Determines cost of basing replacement of USE on CAND.  Returns false
4852    if USE cannot be based on CAND.  */
4853
4854 static bool
4855 determine_use_iv_cost (struct ivopts_data *data,
4856                        struct iv_use *use, struct iv_cand *cand)
4857 {
4858   switch (use->type)
4859     {
4860     case USE_NONLINEAR_EXPR:
4861       return determine_use_iv_cost_generic (data, use, cand);
4862
4863     case USE_ADDRESS:
4864       return determine_use_iv_cost_address (data, use, cand);
4865
4866     case USE_COMPARE:
4867       return determine_use_iv_cost_condition (data, use, cand);
4868
4869     default:
4870       gcc_unreachable ();
4871     }
4872 }
4873
4874 /* Return true if get_computation_cost indicates that autoincrement is
4875    a possibility for the pair of USE and CAND, false otherwise.  */
4876
4877 static bool
4878 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4879                            struct iv_cand *cand)
4880 {
4881   bitmap depends_on;
4882   bool can_autoinc;
4883   comp_cost cost;
4884
4885   if (use->type != USE_ADDRESS)
4886     return false;
4887
4888   cost = get_computation_cost (data, use, cand, true, &depends_on,
4889                                &can_autoinc, NULL);
4890
4891   BITMAP_FREE (depends_on);
4892
4893   return !infinite_cost_p (cost) && can_autoinc;
4894 }
4895
4896 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4897    use that allows autoincrement, and set their AINC_USE if possible.  */
4898
4899 static void
4900 set_autoinc_for_original_candidates (struct ivopts_data *data)
4901 {
4902   unsigned i, j;
4903
4904   for (i = 0; i < n_iv_cands (data); i++)
4905     {
4906       struct iv_cand *cand = iv_cand (data, i);
4907       struct iv_use *closest_before = NULL;
4908       struct iv_use *closest_after = NULL;
4909       if (cand->pos != IP_ORIGINAL)
4910         continue;
4911
4912       for (j = 0; j < n_iv_uses (data); j++)
4913         {
4914           struct iv_use *use = iv_use (data, j);
4915           unsigned uid = gimple_uid (use->stmt);
4916
4917           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4918             continue;
4919
4920           if (uid < gimple_uid (cand->incremented_at)
4921               && (closest_before == NULL
4922                   || uid > gimple_uid (closest_before->stmt)))
4923             closest_before = use;
4924
4925           if (uid > gimple_uid (cand->incremented_at)
4926               && (closest_after == NULL
4927                   || uid < gimple_uid (closest_after->stmt)))
4928             closest_after = use;
4929         }
4930
4931       if (closest_before != NULL
4932           && autoinc_possible_for_pair (data, closest_before, cand))
4933         cand->ainc_use = closest_before;
4934       else if (closest_after != NULL
4935                && autoinc_possible_for_pair (data, closest_after, cand))
4936         cand->ainc_use = closest_after;
4937     }
4938 }
4939
4940 /* Finds the candidates for the induction variables.  */
4941
4942 static void
4943 find_iv_candidates (struct ivopts_data *data)
4944 {
4945   /* Add commonly used ivs.  */
4946   add_standard_iv_candidates (data);
4947
4948   /* Add old induction variables.  */
4949   add_old_ivs_candidates (data);
4950
4951   /* Add induction variables derived from uses.  */
4952   add_derived_ivs_candidates (data);
4953
4954   set_autoinc_for_original_candidates (data);
4955
4956   /* Record the important candidates.  */
4957   record_important_candidates (data);
4958 }
4959
4960 /* Determines costs of basing the use of the iv on an iv candidate.  */
4961
4962 static void
4963 determine_use_iv_costs (struct ivopts_data *data)
4964 {
4965   unsigned i, j;
4966   struct iv_use *use;
4967   struct iv_cand *cand;
4968   bitmap to_clear = BITMAP_ALLOC (NULL);
4969
4970   alloc_use_cost_map (data);
4971
4972   for (i = 0; i < n_iv_uses (data); i++)
4973     {
4974       use = iv_use (data, i);
4975
4976       if (data->consider_all_candidates)
4977         {
4978           for (j = 0; j < n_iv_cands (data); j++)
4979             {
4980               cand = iv_cand (data, j);
4981               determine_use_iv_cost (data, use, cand);
4982             }
4983         }
4984       else
4985         {
4986           bitmap_iterator bi;
4987
4988           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4989             {
4990               cand = iv_cand (data, j);
4991               if (!determine_use_iv_cost (data, use, cand))
4992                 bitmap_set_bit (to_clear, j);
4993             }
4994
4995           /* Remove the candidates for that the cost is infinite from
4996              the list of related candidates.  */
4997           bitmap_and_compl_into (use->related_cands, to_clear);
4998           bitmap_clear (to_clear);
4999         }
5000     }
5001
5002   BITMAP_FREE (to_clear);
5003
5004   if (dump_file && (dump_flags & TDF_DETAILS))
5005     {
5006       fprintf (dump_file, "Use-candidate costs:\n");
5007
5008       for (i = 0; i < n_iv_uses (data); i++)
5009         {
5010           use = iv_use (data, i);
5011
5012           fprintf (dump_file, "Use %d:\n", i);
5013           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5014           for (j = 0; j < use->n_map_members; j++)
5015             {
5016               if (!use->cost_map[j].cand
5017                   || infinite_cost_p (use->cost_map[j].cost))
5018                 continue;
5019
5020               fprintf (dump_file, "  %d\t%d\t%d\t",
5021                        use->cost_map[j].cand->id,
5022                        use->cost_map[j].cost.cost,
5023                        use->cost_map[j].cost.complexity);
5024               if (use->cost_map[j].depends_on)
5025                 bitmap_print (dump_file,
5026                               use->cost_map[j].depends_on, "","");
5027               if (use->cost_map[j].inv_expr_id != -1)
5028                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5029               fprintf (dump_file, "\n");
5030             }
5031
5032           fprintf (dump_file, "\n");
5033         }
5034       fprintf (dump_file, "\n");
5035     }
5036 }
5037
5038 /* Determines cost of the candidate CAND.  */
5039
5040 static void
5041 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5042 {
5043   comp_cost cost_base;
5044   unsigned cost, cost_step;
5045   tree base;
5046
5047   if (!cand->iv)
5048     {
5049       cand->cost = 0;
5050       return;
5051     }
5052
5053   /* There are two costs associated with the candidate -- its increment
5054      and its initialization.  The second is almost negligible for any loop
5055      that rolls enough, so we take it just very little into account.  */
5056
5057   base = cand->iv->base;
5058   cost_base = force_var_cost (data, base, NULL);
5059   /* It will be exceptional that the iv register happens to be initialized with
5060      the proper value at no cost.  In general, there will at least be a regcopy
5061      or a const set.  */
5062   if (cost_base.cost == 0)
5063     cost_base.cost = COSTS_N_INSNS (1);
5064   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5065
5066   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5067
5068   /* Prefer the original ivs unless we may gain something by replacing it.
5069      The reason is to make debugging simpler; so this is not relevant for
5070      artificial ivs created by other optimization passes.  */
5071   if (cand->pos != IP_ORIGINAL
5072       || !SSA_NAME_VAR (cand->var_before)
5073       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5074     cost++;
5075
5076   /* Prefer not to insert statements into latch unless there are some
5077      already (so that we do not create unnecessary jumps).  */
5078   if (cand->pos == IP_END
5079       && empty_block_p (ip_end_pos (data->current_loop)))
5080     cost++;
5081
5082   cand->cost = cost;
5083   cand->cost_step = cost_step;
5084 }
5085
5086 /* Determines costs of computation of the candidates.  */
5087
5088 static void
5089 determine_iv_costs (struct ivopts_data *data)
5090 {
5091   unsigned i;
5092
5093   if (dump_file && (dump_flags & TDF_DETAILS))
5094     {
5095       fprintf (dump_file, "Candidate costs:\n");
5096       fprintf (dump_file, "  cand\tcost\n");
5097     }
5098
5099   for (i = 0; i < n_iv_cands (data); i++)
5100     {
5101       struct iv_cand *cand = iv_cand (data, i);
5102
5103       determine_iv_cost (data, cand);
5104
5105       if (dump_file && (dump_flags & TDF_DETAILS))
5106         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5107     }
5108
5109   if (dump_file && (dump_flags & TDF_DETAILS))
5110     fprintf (dump_file, "\n");
5111 }
5112
5113 /* Calculates cost for having SIZE induction variables.  */
5114
5115 static unsigned
5116 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5117 {
5118   /* We add size to the cost, so that we prefer eliminating ivs
5119      if possible.  */
5120   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5121                                             data->body_includes_call);
5122 }
5123
5124 /* For each size of the induction variable set determine the penalty.  */
5125
5126 static void
5127 determine_set_costs (struct ivopts_data *data)
5128 {
5129   unsigned j, n;
5130   gimple phi;
5131   gimple_stmt_iterator psi;
5132   tree op;
5133   struct loop *loop = data->current_loop;
5134   bitmap_iterator bi;
5135
5136   if (dump_file && (dump_flags & TDF_DETAILS))
5137     {
5138       fprintf (dump_file, "Global costs:\n");
5139       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5140       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5141       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5142       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5143     }
5144
5145   n = 0;
5146   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5147     {
5148       phi = gsi_stmt (psi);
5149       op = PHI_RESULT (phi);
5150
5151       if (virtual_operand_p (op))
5152         continue;
5153
5154       if (get_iv (data, op))
5155         continue;
5156
5157       n++;
5158     }
5159
5160   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5161     {
5162       struct version_info *info = ver_info (data, j);
5163
5164       if (info->inv_id && info->has_nonlin_use)
5165         n++;
5166     }
5167
5168   data->regs_used = n;
5169   if (dump_file && (dump_flags & TDF_DETAILS))
5170     fprintf (dump_file, "  regs_used %d\n", n);
5171
5172   if (dump_file && (dump_flags & TDF_DETAILS))
5173     {
5174       fprintf (dump_file, "  cost for size:\n");
5175       fprintf (dump_file, "  ivs\tcost\n");
5176       for (j = 0; j <= 2 * target_avail_regs; j++)
5177         fprintf (dump_file, "  %d\t%d\n", j,
5178                  ivopts_global_cost_for_size (data, j));
5179       fprintf (dump_file, "\n");
5180     }
5181 }
5182
5183 /* Returns true if A is a cheaper cost pair than B.  */
5184
5185 static bool
5186 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5187 {
5188   int cmp;
5189
5190   if (!a)
5191     return false;
5192
5193   if (!b)
5194     return true;
5195
5196   cmp = compare_costs (a->cost, b->cost);
5197   if (cmp < 0)
5198     return true;
5199
5200   if (cmp > 0)
5201     return false;
5202
5203   /* In case the costs are the same, prefer the cheaper candidate.  */
5204   if (a->cand->cost < b->cand->cost)
5205     return true;
5206
5207   return false;
5208 }
5209
5210
5211 /* Returns candidate by that USE is expressed in IVS.  */
5212
5213 static struct cost_pair *
5214 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5215 {
5216   return ivs->cand_for_use[use->id];
5217 }
5218
5219 /* Computes the cost field of IVS structure.  */
5220
5221 static void
5222 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5223 {
5224   comp_cost cost = ivs->cand_use_cost;
5225
5226   cost.cost += ivs->cand_cost;
5227
5228   cost.cost += ivopts_global_cost_for_size (data,
5229                                             ivs->n_regs + ivs->num_used_inv_expr);
5230
5231   ivs->cost = cost;
5232 }
5233
5234 /* Remove invariants in set INVS to set IVS.  */
5235
5236 static void
5237 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5238 {
5239   bitmap_iterator bi;
5240   unsigned iid;
5241
5242   if (!invs)
5243     return;
5244
5245   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5246     {
5247       ivs->n_invariant_uses[iid]--;
5248       if (ivs->n_invariant_uses[iid] == 0)
5249         ivs->n_regs--;
5250     }
5251 }
5252
5253 /* Set USE not to be expressed by any candidate in IVS.  */
5254
5255 static void
5256 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5257                  struct iv_use *use)
5258 {
5259   unsigned uid = use->id, cid;
5260   struct cost_pair *cp;
5261
5262   cp = ivs->cand_for_use[uid];
5263   if (!cp)
5264     return;
5265   cid = cp->cand->id;
5266
5267   ivs->bad_uses++;
5268   ivs->cand_for_use[uid] = NULL;
5269   ivs->n_cand_uses[cid]--;
5270
5271   if (ivs->n_cand_uses[cid] == 0)
5272     {
5273       bitmap_clear_bit (ivs->cands, cid);
5274       /* Do not count the pseudocandidates.  */
5275       if (cp->cand->iv)
5276         ivs->n_regs--;
5277       ivs->n_cands--;
5278       ivs->cand_cost -= cp->cand->cost;
5279
5280       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5281     }
5282
5283   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5284
5285   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5286
5287   if (cp->inv_expr_id != -1)
5288     {
5289       ivs->used_inv_expr[cp->inv_expr_id]--;
5290       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5291         ivs->num_used_inv_expr--;
5292     }
5293   iv_ca_recount_cost (data, ivs);
5294 }
5295
5296 /* Add invariants in set INVS to set IVS.  */
5297
5298 static void
5299 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5300 {
5301   bitmap_iterator bi;
5302   unsigned iid;
5303
5304   if (!invs)
5305     return;
5306
5307   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5308     {
5309       ivs->n_invariant_uses[iid]++;
5310       if (ivs->n_invariant_uses[iid] == 1)
5311         ivs->n_regs++;
5312     }
5313 }
5314
5315 /* Set cost pair for USE in set IVS to CP.  */
5316
5317 static void
5318 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5319               struct iv_use *use, struct cost_pair *cp)
5320 {
5321   unsigned uid = use->id, cid;
5322
5323   if (ivs->cand_for_use[uid] == cp)
5324     return;
5325
5326   if (ivs->cand_for_use[uid])
5327     iv_ca_set_no_cp (data, ivs, use);
5328
5329   if (cp)
5330     {
5331       cid = cp->cand->id;
5332
5333       ivs->bad_uses--;
5334       ivs->cand_for_use[uid] = cp;
5335       ivs->n_cand_uses[cid]++;
5336       if (ivs->n_cand_uses[cid] == 1)
5337         {
5338           bitmap_set_bit (ivs->cands, cid);
5339           /* Do not count the pseudocandidates.  */
5340           if (cp->cand->iv)
5341             ivs->n_regs++;
5342           ivs->n_cands++;
5343           ivs->cand_cost += cp->cand->cost;
5344
5345           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5346         }
5347
5348       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5349       iv_ca_set_add_invariants (ivs, cp->depends_on);
5350
5351       if (cp->inv_expr_id != -1)
5352         {
5353           ivs->used_inv_expr[cp->inv_expr_id]++;
5354           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5355             ivs->num_used_inv_expr++;
5356         }
5357       iv_ca_recount_cost (data, ivs);
5358     }
5359 }
5360
5361 /* Extend set IVS by expressing USE by some of the candidates in it
5362    if possible. All important candidates will be considered
5363    if IMPORTANT_CANDIDATES is true.  */
5364
5365 static void
5366 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5367                struct iv_use *use, bool important_candidates)
5368 {
5369   struct cost_pair *best_cp = NULL, *cp;
5370   bitmap_iterator bi;
5371   bitmap cands;
5372   unsigned i;
5373
5374   gcc_assert (ivs->upto >= use->id);
5375
5376   if (ivs->upto == use->id)
5377     {
5378       ivs->upto++;
5379       ivs->bad_uses++;
5380     }
5381
5382   cands = (important_candidates ? data->important_candidates : ivs->cands);
5383   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5384     {
5385       struct iv_cand *cand = iv_cand (data, i);
5386
5387       cp = get_use_iv_cost (data, use, cand);
5388
5389       if (cheaper_cost_pair (cp, best_cp))
5390         best_cp = cp;
5391     }
5392
5393   iv_ca_set_cp (data, ivs, use, best_cp);
5394 }
5395
5396 /* Get cost for assignment IVS.  */
5397
5398 static comp_cost
5399 iv_ca_cost (struct iv_ca *ivs)
5400 {
5401   /* This was a conditional expression but it triggered a bug in
5402      Sun C 5.5.  */
5403   if (ivs->bad_uses)
5404     return infinite_cost;
5405   else
5406     return ivs->cost;
5407 }
5408
5409 /* Returns true if all dependences of CP are among invariants in IVS.  */
5410
5411 static bool
5412 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5413 {
5414   unsigned i;
5415   bitmap_iterator bi;
5416
5417   if (!cp->depends_on)
5418     return true;
5419
5420   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5421     {
5422       if (ivs->n_invariant_uses[i] == 0)
5423         return false;
5424     }
5425
5426   return true;
5427 }
5428
5429 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5430    it before NEXT_CHANGE.  */
5431
5432 static struct iv_ca_delta *
5433 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5434                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5435 {
5436   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5437
5438   change->use = use;
5439   change->old_cp = old_cp;
5440   change->new_cp = new_cp;
5441   change->next_change = next_change;
5442
5443   return change;
5444 }
5445
5446 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5447    are rewritten.  */
5448
5449 static struct iv_ca_delta *
5450 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5451 {
5452   struct iv_ca_delta *last;
5453
5454   if (!l2)
5455     return l1;
5456
5457   if (!l1)
5458     return l2;
5459
5460   for (last = l1; last->next_change; last = last->next_change)
5461     continue;
5462   last->next_change = l2;
5463
5464   return l1;
5465 }
5466
5467 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5468
5469 static struct iv_ca_delta *
5470 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5471 {
5472   struct iv_ca_delta *act, *next, *prev = NULL;
5473   struct cost_pair *tmp;
5474
5475   for (act = delta; act; act = next)
5476     {
5477       next = act->next_change;
5478       act->next_change = prev;
5479       prev = act;
5480
5481       tmp = act->old_cp;
5482       act->old_cp = act->new_cp;
5483       act->new_cp = tmp;
5484     }
5485
5486   return prev;
5487 }
5488
5489 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5490    reverted instead.  */
5491
5492 static void
5493 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5494                     struct iv_ca_delta *delta, bool forward)
5495 {
5496   struct cost_pair *from, *to;
5497   struct iv_ca_delta *act;
5498
5499   if (!forward)
5500     delta = iv_ca_delta_reverse (delta);
5501
5502   for (act = delta; act; act = act->next_change)
5503     {
5504       from = act->old_cp;
5505       to = act->new_cp;
5506       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5507       iv_ca_set_cp (data, ivs, act->use, to);
5508     }
5509
5510   if (!forward)
5511     iv_ca_delta_reverse (delta);
5512 }
5513
5514 /* Returns true if CAND is used in IVS.  */
5515
5516 static bool
5517 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5518 {
5519   return ivs->n_cand_uses[cand->id] > 0;
5520 }
5521
5522 /* Returns number of induction variable candidates in the set IVS.  */
5523
5524 static unsigned
5525 iv_ca_n_cands (struct iv_ca *ivs)
5526 {
5527   return ivs->n_cands;
5528 }
5529
5530 /* Free the list of changes DELTA.  */
5531
5532 static void
5533 iv_ca_delta_free (struct iv_ca_delta **delta)
5534 {
5535   struct iv_ca_delta *act, *next;
5536
5537   for (act = *delta; act; act = next)
5538     {
5539       next = act->next_change;
5540       free (act);
5541     }
5542
5543   *delta = NULL;
5544 }
5545
5546 /* Allocates new iv candidates assignment.  */
5547
5548 static struct iv_ca *
5549 iv_ca_new (struct ivopts_data *data)
5550 {
5551   struct iv_ca *nw = XNEW (struct iv_ca);
5552
5553   nw->upto = 0;
5554   nw->bad_uses = 0;
5555   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5556   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5557   nw->cands = BITMAP_ALLOC (NULL);
5558   nw->n_cands = 0;
5559   nw->n_regs = 0;
5560   nw->cand_use_cost = no_cost;
5561   nw->cand_cost = 0;
5562   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5563   nw->cost = no_cost;
5564   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5565   nw->num_used_inv_expr = 0;
5566
5567   return nw;
5568 }
5569
5570 /* Free memory occupied by the set IVS.  */
5571
5572 static void
5573 iv_ca_free (struct iv_ca **ivs)
5574 {
5575   free ((*ivs)->cand_for_use);
5576   free ((*ivs)->n_cand_uses);
5577   BITMAP_FREE ((*ivs)->cands);
5578   free ((*ivs)->n_invariant_uses);
5579   free ((*ivs)->used_inv_expr);
5580   free (*ivs);
5581   *ivs = NULL;
5582 }
5583
5584 /* Dumps IVS to FILE.  */
5585
5586 static void
5587 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5588 {
5589   const char *pref = "  invariants ";
5590   unsigned i;
5591   comp_cost cost = iv_ca_cost (ivs);
5592
5593   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5594   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5595            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5596   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5597
5598    for (i = 0; i < ivs->upto; i++)
5599     {
5600       struct iv_use *use = iv_use (data, i);
5601       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5602       if (cp)
5603         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5604                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5605       else
5606         fprintf (file, "   use:%d --> ??\n", use->id);
5607     }
5608
5609   for (i = 1; i <= data->max_inv_id; i++)
5610     if (ivs->n_invariant_uses[i])
5611       {
5612         fprintf (file, "%s%d", pref, i);
5613         pref = ", ";
5614       }
5615   fprintf (file, "\n\n");
5616 }
5617
5618 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5619    new set, and store differences in DELTA.  Number of induction variables
5620    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5621    the function will try to find a solution with mimimal iv candidates.  */
5622
5623 static comp_cost
5624 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5625               struct iv_cand *cand, struct iv_ca_delta **delta,
5626               unsigned *n_ivs, bool min_ncand)
5627 {
5628   unsigned i;
5629   comp_cost cost;
5630   struct iv_use *use;
5631   struct cost_pair *old_cp, *new_cp;
5632
5633   *delta = NULL;
5634   for (i = 0; i < ivs->upto; i++)
5635     {
5636       use = iv_use (data, i);
5637       old_cp = iv_ca_cand_for_use (ivs, use);
5638
5639       if (old_cp
5640           && old_cp->cand == cand)
5641         continue;
5642
5643       new_cp = get_use_iv_cost (data, use, cand);
5644       if (!new_cp)
5645         continue;
5646
5647       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5648         continue;
5649
5650       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5651         continue;
5652
5653       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5654     }
5655
5656   iv_ca_delta_commit (data, ivs, *delta, true);
5657   cost = iv_ca_cost (ivs);
5658   if (n_ivs)
5659     *n_ivs = iv_ca_n_cands (ivs);
5660   iv_ca_delta_commit (data, ivs, *delta, false);
5661
5662   return cost;
5663 }
5664
5665 /* Try narrowing set IVS by removing CAND.  Return the cost of
5666    the new set and store the differences in DELTA.  */
5667
5668 static comp_cost
5669 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5670               struct iv_cand *cand, struct iv_ca_delta **delta)
5671 {
5672   unsigned i, ci;
5673   struct iv_use *use;
5674   struct cost_pair *old_cp, *new_cp, *cp;
5675   bitmap_iterator bi;
5676   struct iv_cand *cnd;
5677   comp_cost cost;
5678
5679   *delta = NULL;
5680   for (i = 0; i < n_iv_uses (data); i++)
5681     {
5682       use = iv_use (data, i);
5683
5684       old_cp = iv_ca_cand_for_use (ivs, use);
5685       if (old_cp->cand != cand)
5686         continue;
5687
5688       new_cp = NULL;
5689
5690       if (data->consider_all_candidates)
5691         {
5692           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5693             {
5694               if (ci == cand->id)
5695                 continue;
5696
5697               cnd = iv_cand (data, ci);
5698
5699               cp = get_use_iv_cost (data, use, cnd);
5700               if (!cp)
5701                 continue;
5702
5703               if (!iv_ca_has_deps (ivs, cp))
5704                 continue;
5705
5706               if (!cheaper_cost_pair (cp, new_cp))
5707                 continue;
5708
5709               new_cp = cp;
5710             }
5711         }
5712       else
5713         {
5714           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5715             {
5716               if (ci == cand->id)
5717                 continue;
5718
5719               cnd = iv_cand (data, ci);
5720
5721               cp = get_use_iv_cost (data, use, cnd);
5722               if (!cp)
5723                 continue;
5724               if (!iv_ca_has_deps (ivs, cp))
5725                 continue;
5726
5727               if (!cheaper_cost_pair (cp, new_cp))
5728                 continue;
5729
5730               new_cp = cp;
5731             }
5732         }
5733
5734       if (!new_cp)
5735         {
5736           iv_ca_delta_free (delta);
5737           return infinite_cost;
5738         }
5739
5740       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5741     }
5742
5743   iv_ca_delta_commit (data, ivs, *delta, true);
5744   cost = iv_ca_cost (ivs);
5745   iv_ca_delta_commit (data, ivs, *delta, false);
5746
5747   return cost;
5748 }
5749
5750 /* Try optimizing the set of candidates IVS by removing candidates different
5751    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5752    differences in DELTA.  */
5753
5754 static comp_cost
5755 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5756              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5757 {
5758   bitmap_iterator bi;
5759   struct iv_ca_delta *act_delta, *best_delta;
5760   unsigned i;
5761   comp_cost best_cost, acost;
5762   struct iv_cand *cand;
5763
5764   best_delta = NULL;
5765   best_cost = iv_ca_cost (ivs);
5766
5767   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5768     {
5769       cand = iv_cand (data, i);
5770
5771       if (cand == except_cand)
5772         continue;
5773
5774       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5775
5776       if (compare_costs (acost, best_cost) < 0)
5777         {
5778           best_cost = acost;
5779           iv_ca_delta_free (&best_delta);
5780           best_delta = act_delta;
5781         }
5782       else
5783         iv_ca_delta_free (&act_delta);
5784     }
5785
5786   if (!best_delta)
5787     {
5788       *delta = NULL;
5789       return best_cost;
5790     }
5791
5792   /* Recurse to possibly remove other unnecessary ivs.  */
5793   iv_ca_delta_commit (data, ivs, best_delta, true);
5794   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5795   iv_ca_delta_commit (data, ivs, best_delta, false);
5796   *delta = iv_ca_delta_join (best_delta, *delta);
5797   return best_cost;
5798 }
5799
5800 /* Tries to extend the sets IVS in the best possible way in order
5801    to express the USE.  If ORIGINALP is true, prefer candidates from
5802    the original set of IVs, otherwise favor important candidates not
5803    based on any memory object.  */
5804
5805 static bool
5806 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5807                   struct iv_use *use, bool originalp)
5808 {
5809   comp_cost best_cost, act_cost;
5810   unsigned i;
5811   bitmap_iterator bi;
5812   struct iv_cand *cand;
5813   struct iv_ca_delta *best_delta = NULL, *act_delta;
5814   struct cost_pair *cp;
5815
5816   iv_ca_add_use (data, ivs, use, false);
5817   best_cost = iv_ca_cost (ivs);
5818
5819   cp = iv_ca_cand_for_use (ivs, use);
5820   if (!cp)
5821     {
5822       ivs->upto--;
5823       ivs->bad_uses--;
5824       iv_ca_add_use (data, ivs, use, true);
5825       best_cost = iv_ca_cost (ivs);
5826       cp = iv_ca_cand_for_use (ivs, use);
5827     }
5828   if (cp)
5829     {
5830       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5831       iv_ca_set_no_cp (data, ivs, use);
5832     }
5833
5834   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5835      first try important candidates not based on any memory object.  Only if
5836      this fails, try the specific ones.  Rationale -- in loops with many
5837      variables the best choice often is to use just one generic biv.  If we
5838      added here many ivs specific to the uses, the optimization algorithm later
5839      would be likely to get stuck in a local minimum, thus causing us to create
5840      too many ivs.  The approach from few ivs to more seems more likely to be
5841      successful -- starting from few ivs, replacing an expensive use by a
5842      specific iv should always be a win.  */
5843   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5844     {
5845       cand = iv_cand (data, i);
5846
5847       if (originalp && cand->pos !=IP_ORIGINAL)
5848         continue;
5849
5850       if (!originalp && cand->iv->base_object != NULL_TREE)
5851         continue;
5852
5853       if (iv_ca_cand_used_p (ivs, cand))
5854         continue;
5855
5856       cp = get_use_iv_cost (data, use, cand);
5857       if (!cp)
5858         continue;
5859
5860       iv_ca_set_cp (data, ivs, use, cp);
5861       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5862                                true);
5863       iv_ca_set_no_cp (data, ivs, use);
5864       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5865
5866       if (compare_costs (act_cost, best_cost) < 0)
5867         {
5868           best_cost = act_cost;
5869
5870           iv_ca_delta_free (&best_delta);
5871           best_delta = act_delta;
5872         }
5873       else
5874         iv_ca_delta_free (&act_delta);
5875     }
5876
5877   if (infinite_cost_p (best_cost))
5878     {
5879       for (i = 0; i < use->n_map_members; i++)
5880         {
5881           cp = use->cost_map + i;
5882           cand = cp->cand;
5883           if (!cand)
5884             continue;
5885
5886           /* Already tried this.  */
5887           if (cand->important)
5888             {
5889               if (originalp && cand->pos == IP_ORIGINAL)
5890                 continue;
5891               if (!originalp && cand->iv->base_object == NULL_TREE)
5892                 continue;
5893             }
5894
5895           if (iv_ca_cand_used_p (ivs, cand))
5896             continue;
5897
5898           act_delta = NULL;
5899           iv_ca_set_cp (data, ivs, use, cp);
5900           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5901           iv_ca_set_no_cp (data, ivs, use);
5902           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5903                                        cp, act_delta);
5904
5905           if (compare_costs (act_cost, best_cost) < 0)
5906             {
5907               best_cost = act_cost;
5908
5909               if (best_delta)
5910                 iv_ca_delta_free (&best_delta);
5911               best_delta = act_delta;
5912             }
5913           else
5914             iv_ca_delta_free (&act_delta);
5915         }
5916     }
5917
5918   iv_ca_delta_commit (data, ivs, best_delta, true);
5919   iv_ca_delta_free (&best_delta);
5920
5921   return !infinite_cost_p (best_cost);
5922 }
5923
5924 /* Finds an initial assignment of candidates to uses.  */
5925
5926 static struct iv_ca *
5927 get_initial_solution (struct ivopts_data *data, bool originalp)
5928 {
5929   struct iv_ca *ivs = iv_ca_new (data);
5930   unsigned i;
5931
5932   for (i = 0; i < n_iv_uses (data); i++)
5933     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5934       {
5935         iv_ca_free (&ivs);
5936         return NULL;
5937       }
5938
5939   return ivs;
5940 }
5941
5942 /* Tries to improve set of induction variables IVS.  */
5943
5944 static bool
5945 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5946 {
5947   unsigned i, n_ivs;
5948   comp_cost acost, best_cost = iv_ca_cost (ivs);
5949   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5950   struct iv_cand *cand;
5951
5952   /* Try extending the set of induction variables by one.  */
5953   for (i = 0; i < n_iv_cands (data); i++)
5954     {
5955       cand = iv_cand (data, i);
5956
5957       if (iv_ca_cand_used_p (ivs, cand))
5958         continue;
5959
5960       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5961       if (!act_delta)
5962         continue;
5963
5964       /* If we successfully added the candidate and the set is small enough,
5965          try optimizing it by removing other candidates.  */
5966       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5967         {
5968           iv_ca_delta_commit (data, ivs, act_delta, true);
5969           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5970           iv_ca_delta_commit (data, ivs, act_delta, false);
5971           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5972         }
5973
5974       if (compare_costs (acost, best_cost) < 0)
5975         {
5976           best_cost = acost;
5977           iv_ca_delta_free (&best_delta);
5978           best_delta = act_delta;
5979         }
5980       else
5981         iv_ca_delta_free (&act_delta);
5982     }
5983
5984   if (!best_delta)
5985     {
5986       /* Try removing the candidates from the set instead.  */
5987       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5988
5989       /* Nothing more we can do.  */
5990       if (!best_delta)
5991         return false;
5992     }
5993
5994   iv_ca_delta_commit (data, ivs, best_delta, true);
5995   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5996   iv_ca_delta_free (&best_delta);
5997   return true;
5998 }
5999
6000 /* Attempts to find the optimal set of induction variables.  We do simple
6001    greedy heuristic -- we try to replace at most one candidate in the selected
6002    solution and remove the unused ivs while this improves the cost.  */
6003
6004 static struct iv_ca *
6005 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6006 {
6007   struct iv_ca *set;
6008
6009   /* Get the initial solution.  */
6010   set = get_initial_solution (data, originalp);
6011   if (!set)
6012     {
6013       if (dump_file && (dump_flags & TDF_DETAILS))
6014         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6015       return NULL;
6016     }
6017
6018   if (dump_file && (dump_flags & TDF_DETAILS))
6019     {
6020       fprintf (dump_file, "Initial set of candidates:\n");
6021       iv_ca_dump (data, dump_file, set);
6022     }
6023
6024   while (try_improve_iv_set (data, set))
6025     {
6026       if (dump_file && (dump_flags & TDF_DETAILS))
6027         {
6028           fprintf (dump_file, "Improved to:\n");
6029           iv_ca_dump (data, dump_file, set);
6030         }
6031     }
6032
6033   return set;
6034 }
6035
6036 static struct iv_ca *
6037 find_optimal_iv_set (struct ivopts_data *data)
6038 {
6039   unsigned i;
6040   struct iv_ca *set, *origset;
6041   struct iv_use *use;
6042   comp_cost cost, origcost;
6043
6044   /* Determine the cost based on a strategy that starts with original IVs,
6045      and try again using a strategy that prefers candidates not based
6046      on any IVs.  */
6047   origset = find_optimal_iv_set_1 (data, true);
6048   set = find_optimal_iv_set_1 (data, false);
6049
6050   if (!origset && !set)
6051     return NULL;
6052
6053   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6054   cost = set ? iv_ca_cost (set) : infinite_cost;
6055
6056   if (dump_file && (dump_flags & TDF_DETAILS))
6057     {
6058       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6059                origcost.cost, origcost.complexity);
6060       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6061                cost.cost, cost.complexity);
6062     }
6063
6064   /* Choose the one with the best cost.  */
6065   if (compare_costs (origcost, cost) <= 0)
6066     {
6067       if (set)
6068         iv_ca_free (&set);
6069       set = origset;
6070     }
6071   else if (origset)
6072     iv_ca_free (&origset);
6073
6074   for (i = 0; i < n_iv_uses (data); i++)
6075     {
6076       use = iv_use (data, i);
6077       use->selected = iv_ca_cand_for_use (set, use)->cand;
6078     }
6079
6080   return set;
6081 }
6082
6083 /* Creates a new induction variable corresponding to CAND.  */
6084
6085 static void
6086 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6087 {
6088   gimple_stmt_iterator incr_pos;
6089   tree base;
6090   bool after = false;
6091
6092   if (!cand->iv)
6093     return;
6094
6095   switch (cand->pos)
6096     {
6097     case IP_NORMAL:
6098       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6099       break;
6100
6101     case IP_END:
6102       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6103       after = true;
6104       break;
6105
6106     case IP_AFTER_USE:
6107       after = true;
6108       /* fall through */
6109     case IP_BEFORE_USE:
6110       incr_pos = gsi_for_stmt (cand->incremented_at);
6111       break;
6112
6113     case IP_ORIGINAL:
6114       /* Mark that the iv is preserved.  */
6115       name_info (data, cand->var_before)->preserve_biv = true;
6116       name_info (data, cand->var_after)->preserve_biv = true;
6117
6118       /* Rewrite the increment so that it uses var_before directly.  */
6119       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6120       return;
6121     }
6122
6123   gimple_add_tmp_var (cand->var_before);
6124
6125   base = unshare_expr (cand->iv->base);
6126
6127   create_iv (base, unshare_expr (cand->iv->step),
6128              cand->var_before, data->current_loop,
6129              &incr_pos, after, &cand->var_before, &cand->var_after);
6130 }
6131
6132 /* Creates new induction variables described in SET.  */
6133
6134 static void
6135 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6136 {
6137   unsigned i;
6138   struct iv_cand *cand;
6139   bitmap_iterator bi;
6140
6141   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6142     {
6143       cand = iv_cand (data, i);
6144       create_new_iv (data, cand);
6145     }
6146
6147   if (dump_file && (dump_flags & TDF_DETAILS))
6148     {
6149       fprintf (dump_file, "\nSelected IV set: \n");
6150       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6151         {
6152           cand = iv_cand (data, i);
6153           dump_cand (dump_file, cand);
6154         }
6155       fprintf (dump_file, "\n");
6156     }
6157 }
6158
6159 /* Rewrites USE (definition of iv used in a nonlinear expression)
6160    using candidate CAND.  */
6161
6162 static void
6163 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6164                             struct iv_use *use, struct iv_cand *cand)
6165 {
6166   tree comp;
6167   tree op, tgt;
6168   gimple ass;
6169   gimple_stmt_iterator bsi;
6170
6171   /* An important special case -- if we are asked to express value of
6172      the original iv by itself, just exit; there is no need to
6173      introduce a new computation (that might also need casting the
6174      variable to unsigned and back).  */
6175   if (cand->pos == IP_ORIGINAL
6176       && cand->incremented_at == use->stmt)
6177     {
6178       enum tree_code stmt_code;
6179
6180       gcc_assert (is_gimple_assign (use->stmt));
6181       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6182
6183       /* Check whether we may leave the computation unchanged.
6184          This is the case only if it does not rely on other
6185          computations in the loop -- otherwise, the computation
6186          we rely upon may be removed in remove_unused_ivs,
6187          thus leading to ICE.  */
6188       stmt_code = gimple_assign_rhs_code (use->stmt);
6189       if (stmt_code == PLUS_EXPR
6190           || stmt_code == MINUS_EXPR
6191           || stmt_code == POINTER_PLUS_EXPR)
6192         {
6193           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6194             op = gimple_assign_rhs2 (use->stmt);
6195           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6196             op = gimple_assign_rhs1 (use->stmt);
6197           else
6198             op = NULL_TREE;
6199         }
6200       else
6201         op = NULL_TREE;
6202
6203       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6204         return;
6205     }
6206
6207   comp = get_computation (data->current_loop, use, cand);
6208   gcc_assert (comp != NULL_TREE);
6209
6210   switch (gimple_code (use->stmt))
6211     {
6212     case GIMPLE_PHI:
6213       tgt = PHI_RESULT (use->stmt);
6214
6215       /* If we should keep the biv, do not replace it.  */
6216       if (name_info (data, tgt)->preserve_biv)
6217         return;
6218
6219       bsi = gsi_after_labels (gimple_bb (use->stmt));
6220       break;
6221
6222     case GIMPLE_ASSIGN:
6223       tgt = gimple_assign_lhs (use->stmt);
6224       bsi = gsi_for_stmt (use->stmt);
6225       break;
6226
6227     default:
6228       gcc_unreachable ();
6229     }
6230
6231   if (!valid_gimple_rhs_p (comp)
6232       || (gimple_code (use->stmt) != GIMPLE_PHI
6233           /* We can't allow re-allocating the stmt as it might be pointed
6234              to still.  */
6235           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6236               >= gimple_num_ops (gsi_stmt (bsi)))))
6237     {
6238       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6239                                        true, GSI_SAME_STMT);
6240       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6241         {
6242           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6243           /* As this isn't a plain copy we have to reset alignment
6244              information.  */
6245           if (SSA_NAME_PTR_INFO (comp))
6246             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6247         }
6248     }
6249
6250   if (gimple_code (use->stmt) == GIMPLE_PHI)
6251     {
6252       ass = gimple_build_assign (tgt, comp);
6253       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6254
6255       bsi = gsi_for_stmt (use->stmt);
6256       remove_phi_node (&bsi, false);
6257     }
6258   else
6259     {
6260       gimple_assign_set_rhs_from_tree (&bsi, comp);
6261       use->stmt = gsi_stmt (bsi);
6262     }
6263 }
6264
6265 /* Performs a peephole optimization to reorder the iv update statement with
6266    a mem ref to enable instruction combining in later phases. The mem ref uses
6267    the iv value before the update, so the reordering transformation requires
6268    adjustment of the offset. CAND is the selected IV_CAND.
6269
6270    Example:
6271
6272    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6273    iv2 = iv1 + 1;
6274
6275    if (t < val)      (1)
6276      goto L;
6277    goto Head;
6278
6279
6280    directly propagating t over to (1) will introduce overlapping live range
6281    thus increase register pressure. This peephole transform it into:
6282
6283
6284    iv2 = iv1 + 1;
6285    t = MEM_REF (base, iv2, 8, 8);
6286    if (t < val)
6287      goto L;
6288    goto Head;
6289 */
6290
6291 static void
6292 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6293 {
6294   tree var_after;
6295   gimple iv_update, stmt;
6296   basic_block bb;
6297   gimple_stmt_iterator gsi, gsi_iv;
6298
6299   if (cand->pos != IP_NORMAL)
6300     return;
6301
6302   var_after = cand->var_after;
6303   iv_update = SSA_NAME_DEF_STMT (var_after);
6304
6305   bb = gimple_bb (iv_update);
6306   gsi = gsi_last_nondebug_bb (bb);
6307   stmt = gsi_stmt (gsi);
6308
6309   /* Only handle conditional statement for now.  */
6310   if (gimple_code (stmt) != GIMPLE_COND)
6311     return;
6312
6313   gsi_prev_nondebug (&gsi);
6314   stmt = gsi_stmt (gsi);
6315   if (stmt != iv_update)
6316     return;
6317
6318   gsi_prev_nondebug (&gsi);
6319   if (gsi_end_p (gsi))
6320     return;
6321
6322   stmt = gsi_stmt (gsi);
6323   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6324     return;
6325
6326   if (stmt != use->stmt)
6327     return;
6328
6329   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6330     return;
6331
6332   if (dump_file && (dump_flags & TDF_DETAILS))
6333     {
6334       fprintf (dump_file, "Reordering \n");
6335       print_gimple_stmt (dump_file, iv_update, 0, 0);
6336       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6337       fprintf (dump_file, "\n");
6338     }
6339
6340   gsi = gsi_for_stmt (use->stmt);
6341   gsi_iv = gsi_for_stmt (iv_update);
6342   gsi_move_before (&gsi_iv, &gsi);
6343
6344   cand->pos = IP_BEFORE_USE;
6345   cand->incremented_at = use->stmt;
6346 }
6347
6348 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6349
6350 static void
6351 rewrite_use_address (struct ivopts_data *data,
6352                      struct iv_use *use, struct iv_cand *cand)
6353 {
6354   aff_tree aff;
6355   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6356   tree base_hint = NULL_TREE;
6357   tree ref, iv;
6358   bool ok;
6359
6360   adjust_iv_update_pos (cand, use);
6361   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6362   gcc_assert (ok);
6363   unshare_aff_combination (&aff);
6364
6365   /* To avoid undefined overflow problems, all IV candidates use unsigned
6366      integer types.  The drawback is that this makes it impossible for
6367      create_mem_ref to distinguish an IV that is based on a memory object
6368      from one that represents simply an offset.
6369
6370      To work around this problem, we pass a hint to create_mem_ref that
6371      indicates which variable (if any) in aff is an IV based on a memory
6372      object.  Note that we only consider the candidate.  If this is not
6373      based on an object, the base of the reference is in some subexpression
6374      of the use -- but these will use pointer types, so they are recognized
6375      by the create_mem_ref heuristics anyway.  */
6376   if (cand->iv->base_object)
6377     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6378
6379   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6380   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6381                         reference_alias_ptr_type (*use->op_p),
6382                         iv, base_hint, data->speed);
6383   copy_ref_info (ref, *use->op_p);
6384   *use->op_p = ref;
6385 }
6386
6387 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6388    candidate CAND.  */
6389
6390 static void
6391 rewrite_use_compare (struct ivopts_data *data,
6392                      struct iv_use *use, struct iv_cand *cand)
6393 {
6394   tree comp, *var_p, op, bound;
6395   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6396   enum tree_code compare;
6397   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6398   bool ok;
6399
6400   bound = cp->value;
6401   if (bound)
6402     {
6403       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6404       tree var_type = TREE_TYPE (var);
6405       gimple_seq stmts;
6406
6407       if (dump_file && (dump_flags & TDF_DETAILS))
6408         {
6409           fprintf (dump_file, "Replacing exit test: ");
6410           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6411         }
6412       compare = cp->comp;
6413       bound = unshare_expr (fold_convert (var_type, bound));
6414       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6415       if (stmts)
6416         gsi_insert_seq_on_edge_immediate (
6417                 loop_preheader_edge (data->current_loop),
6418                 stmts);
6419
6420       gimple_cond_set_lhs (use->stmt, var);
6421       gimple_cond_set_code (use->stmt, compare);
6422       gimple_cond_set_rhs (use->stmt, op);
6423       return;
6424     }
6425
6426   /* The induction variable elimination failed; just express the original
6427      giv.  */
6428   comp = get_computation (data->current_loop, use, cand);
6429   gcc_assert (comp != NULL_TREE);
6430
6431   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6432   gcc_assert (ok);
6433
6434   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6435                                      true, GSI_SAME_STMT);
6436 }
6437
6438 /* Rewrites USE using candidate CAND.  */
6439
6440 static void
6441 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6442 {
6443   switch (use->type)
6444     {
6445       case USE_NONLINEAR_EXPR:
6446         rewrite_use_nonlinear_expr (data, use, cand);
6447         break;
6448
6449       case USE_ADDRESS:
6450         rewrite_use_address (data, use, cand);
6451         break;
6452
6453       case USE_COMPARE:
6454         rewrite_use_compare (data, use, cand);
6455         break;
6456
6457       default:
6458         gcc_unreachable ();
6459     }
6460
6461   update_stmt (use->stmt);
6462 }
6463
6464 /* Rewrite the uses using the selected induction variables.  */
6465
6466 static void
6467 rewrite_uses (struct ivopts_data *data)
6468 {
6469   unsigned i;
6470   struct iv_cand *cand;
6471   struct iv_use *use;
6472
6473   for (i = 0; i < n_iv_uses (data); i++)
6474     {
6475       use = iv_use (data, i);
6476       cand = use->selected;
6477       gcc_assert (cand);
6478
6479       rewrite_use (data, use, cand);
6480     }
6481 }
6482
6483 /* Removes the ivs that are not used after rewriting.  */
6484
6485 static void
6486 remove_unused_ivs (struct ivopts_data *data)
6487 {
6488   unsigned j;
6489   bitmap_iterator bi;
6490   bitmap toremove = BITMAP_ALLOC (NULL);
6491
6492   /* Figure out an order in which to release SSA DEFs so that we don't
6493      release something that we'd have to propagate into a debug stmt
6494      afterwards.  */
6495   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6496     {
6497       struct version_info *info;
6498
6499       info = ver_info (data, j);
6500       if (info->iv
6501           && !integer_zerop (info->iv->step)
6502           && !info->inv_id
6503           && !info->iv->have_use_for
6504           && !info->preserve_biv)
6505         {
6506           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6507
6508           tree def = info->iv->ssa_name;
6509
6510           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6511             {
6512               imm_use_iterator imm_iter;
6513               use_operand_p use_p;
6514               gimple stmt;
6515               int count = 0;
6516
6517               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6518                 {
6519                   if (!gimple_debug_bind_p (stmt))
6520                     continue;
6521
6522                   /* We just want to determine whether to do nothing
6523                      (count == 0), to substitute the computed
6524                      expression into a single use of the SSA DEF by
6525                      itself (count == 1), or to use a debug temp
6526                      because the SSA DEF is used multiple times or as
6527                      part of a larger expression (count > 1). */
6528                   count++;
6529                   if (gimple_debug_bind_get_value (stmt) != def)
6530                     count++;
6531
6532                   if (count > 1)
6533                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6534                 }
6535
6536               if (!count)
6537                 continue;
6538
6539               struct iv_use dummy_use;
6540               struct iv_cand *best_cand = NULL, *cand;
6541               unsigned i, best_pref = 0, cand_pref;
6542
6543               memset (&dummy_use, 0, sizeof (dummy_use));
6544               dummy_use.iv = info->iv;
6545               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6546                 {
6547                   cand = iv_use (data, i)->selected;
6548                   if (cand == best_cand)
6549                     continue;
6550                   cand_pref = operand_equal_p (cand->iv->step,
6551                                                info->iv->step, 0)
6552                     ? 4 : 0;
6553                   cand_pref
6554                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6555                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6556                     ? 2 : 0;
6557                   cand_pref
6558                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6559                     ? 1 : 0;
6560                   if (best_cand == NULL || best_pref < cand_pref)
6561                     {
6562                       best_cand = cand;
6563                       best_pref = cand_pref;
6564                     }
6565                 }
6566
6567               if (!best_cand)
6568                 continue;
6569
6570               tree comp = get_computation_at (data->current_loop,
6571                                               &dummy_use, best_cand,
6572                                               SSA_NAME_DEF_STMT (def));
6573               if (!comp)
6574                 continue;
6575
6576               if (count > 1)
6577                 {
6578                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6579                   DECL_ARTIFICIAL (vexpr) = 1;
6580                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6581                   if (SSA_NAME_VAR (def))
6582                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6583                   else
6584                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6585                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6586                   gimple_stmt_iterator gsi;
6587
6588                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6589                     gsi = gsi_after_labels (gimple_bb
6590                                             (SSA_NAME_DEF_STMT (def)));
6591                   else
6592                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6593
6594                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6595                   comp = vexpr;
6596                 }
6597
6598               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6599                 {
6600                   if (!gimple_debug_bind_p (stmt))
6601                     continue;
6602
6603                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6604                     SET_USE (use_p, comp);
6605
6606                   update_stmt (stmt);
6607                 }
6608             }
6609         }
6610     }
6611
6612   release_defs_bitset (toremove);
6613
6614   BITMAP_FREE (toremove);
6615 }
6616
6617 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6618    for pointer_map_traverse.  */
6619
6620 static bool
6621 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6622                       void *data ATTRIBUTE_UNUSED)
6623 {
6624   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6625
6626   free (niter);
6627   return true;
6628 }
6629
6630 /* Frees data allocated by the optimization of a single loop.  */
6631
6632 static void
6633 free_loop_data (struct ivopts_data *data)
6634 {
6635   unsigned i, j;
6636   bitmap_iterator bi;
6637   tree obj;
6638
6639   if (data->niters)
6640     {
6641       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6642       pointer_map_destroy (data->niters);
6643       data->niters = NULL;
6644     }
6645
6646   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6647     {
6648       struct version_info *info;
6649
6650       info = ver_info (data, i);
6651       free (info->iv);
6652       info->iv = NULL;
6653       info->has_nonlin_use = false;
6654       info->preserve_biv = false;
6655       info->inv_id = 0;
6656     }
6657   bitmap_clear (data->relevant);
6658   bitmap_clear (data->important_candidates);
6659
6660   for (i = 0; i < n_iv_uses (data); i++)
6661     {
6662       struct iv_use *use = iv_use (data, i);
6663
6664       free (use->iv);
6665       BITMAP_FREE (use->related_cands);
6666       for (j = 0; j < use->n_map_members; j++)
6667         if (use->cost_map[j].depends_on)
6668           BITMAP_FREE (use->cost_map[j].depends_on);
6669       free (use->cost_map);
6670       free (use);
6671     }
6672   data->iv_uses.truncate (0);
6673
6674   for (i = 0; i < n_iv_cands (data); i++)
6675     {
6676       struct iv_cand *cand = iv_cand (data, i);
6677
6678       free (cand->iv);
6679       if (cand->depends_on)
6680         BITMAP_FREE (cand->depends_on);
6681       free (cand);
6682     }
6683   data->iv_candidates.truncate (0);
6684
6685   if (data->version_info_size < num_ssa_names)
6686     {
6687       data->version_info_size = 2 * num_ssa_names;
6688       free (data->version_info);
6689       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6690     }
6691
6692   data->max_inv_id = 0;
6693
6694   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6695     SET_DECL_RTL (obj, NULL_RTX);
6696
6697   decl_rtl_to_reset.truncate (0);
6698
6699   data->inv_expr_tab.empty ();
6700   data->inv_expr_id = 0;
6701 }
6702
6703 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6704    loop tree.  */
6705
6706 static void
6707 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6708 {
6709   free_loop_data (data);
6710   free (data->version_info);
6711   BITMAP_FREE (data->relevant);
6712   BITMAP_FREE (data->important_candidates);
6713
6714   decl_rtl_to_reset.release ();
6715   data->iv_uses.release ();
6716   data->iv_candidates.release ();
6717   data->inv_expr_tab.dispose ();
6718 }
6719
6720 /* Returns true if the loop body BODY includes any function calls.  */
6721
6722 static bool
6723 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6724 {
6725   gimple_stmt_iterator gsi;
6726   unsigned i;
6727
6728   for (i = 0; i < num_nodes; i++)
6729     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6730       {
6731         gimple stmt = gsi_stmt (gsi);
6732         if (is_gimple_call (stmt)
6733             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6734           return true;
6735       }
6736   return false;
6737 }
6738
6739 /* Optimizes the LOOP.  Returns true if anything changed.  */
6740
6741 static bool
6742 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6743 {
6744   bool changed = false;
6745   struct iv_ca *iv_ca;
6746   edge exit = single_dom_exit (loop);
6747   basic_block *body;
6748
6749   gcc_assert (!data->niters);
6750   data->current_loop = loop;
6751   data->speed = optimize_loop_for_speed_p (loop);
6752
6753   if (dump_file && (dump_flags & TDF_DETAILS))
6754     {
6755       fprintf (dump_file, "Processing loop %d\n", loop->num);
6756
6757       if (exit)
6758         {
6759           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6760                    exit->src->index, exit->dest->index);
6761           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6762           fprintf (dump_file, "\n");
6763         }
6764
6765       fprintf (dump_file, "\n");
6766     }
6767
6768   body = get_loop_body (loop);
6769   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6770   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6771   free (body);
6772
6773   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6774
6775   /* For each ssa name determines whether it behaves as an induction variable
6776      in some loop.  */
6777   if (!find_induction_variables (data))
6778     goto finish;
6779
6780   /* Finds interesting uses (item 1).  */
6781   find_interesting_uses (data);
6782   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6783     goto finish;
6784
6785   /* Finds candidates for the induction variables (item 2).  */
6786   find_iv_candidates (data);
6787
6788   /* Calculates the costs (item 3, part 1).  */
6789   determine_iv_costs (data);
6790   determine_use_iv_costs (data);
6791   determine_set_costs (data);
6792
6793   /* Find the optimal set of induction variables (item 3, part 2).  */
6794   iv_ca = find_optimal_iv_set (data);
6795   if (!iv_ca)
6796     goto finish;
6797   changed = true;
6798
6799   /* Create the new induction variables (item 4, part 1).  */
6800   create_new_ivs (data, iv_ca);
6801   iv_ca_free (&iv_ca);
6802
6803   /* Rewrite the uses (item 4, part 2).  */
6804   rewrite_uses (data);
6805
6806   /* Remove the ivs that are unused after rewriting.  */
6807   remove_unused_ivs (data);
6808
6809   /* We have changed the structure of induction variables; it might happen
6810      that definitions in the scev database refer to some of them that were
6811      eliminated.  */
6812   scev_reset ();
6813
6814 finish:
6815   free_loop_data (data);
6816
6817   return changed;
6818 }
6819
6820 /* Main entry point.  Optimizes induction variables in loops.  */
6821
6822 void
6823 tree_ssa_iv_optimize (void)
6824 {
6825   struct loop *loop;
6826   struct ivopts_data data;
6827   loop_iterator li;
6828
6829   tree_ssa_iv_optimize_init (&data);
6830
6831   /* Optimize the loops starting with the innermost ones.  */
6832   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6833     {
6834       if (dump_file && (dump_flags & TDF_DETAILS))
6835         flow_loop_dump (loop, dump_file, NULL, 1);
6836
6837       tree_ssa_iv_optimize_loop (&data, loop);
6838     }
6839
6840   tree_ssa_iv_optimize_finalize (&data);
6841 }