gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "tm_p.h"
  70 #include "basic-block.h"
  71 #include "gimple-pretty-print.h"
  72 #include "tree-flow.h"
  73 #include "cfgloop.h"
  74 #include "tree-pass.h"
  75 #include "ggc.h"
  76 #include "insn-config.h"
  77 #include "pointer-set.h"
  78 #include "hashtab.h"
  79 #include "tree-chrec.h"
  80 #include "tree-scalar-evolution.h"
  81 #include "cfgloop.h"
  82 #include "params.h"
  83 #include "langhooks.h"
  84 #include "tree-affine.h"
  85 #include "target.h"
  86 #include "tree-inline.h"
  87 #include "tree-ssa-propagate.h"
  88 #include "expmed.h"
  89
  90 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  91    cost of different addressing modes.  This should be moved to a TBD
  92    interface between the GIMPLE and RTL worlds.  */
  93 #include "expr.h"
  94 #include "recog.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 #define AVG_LOOP_NITER(LOOP) 5
 100
 101 /* Returns the expected number of loop iterations for LOOP.
 102    The average trip count is computed from profile data if it
 103    exists. */
 104
 105 static inline HOST_WIDE_INT
 106 avg_loop_niter (struct loop *loop)
 107 {
 108   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 109   if (niter == -1)
 110     return AVG_LOOP_NITER (loop);
 111
 112   return niter;
 113 }
 114
 115 /* Representation of the induction variable.  */
 116 struct iv
 117 {
 118   tree base;            /* Initial value of the iv.  */
 119   tree base_object;     /* A memory object to that the induction variable points.  */
 120   tree step;            /* Step of the iv (constant only).  */
 121   tree ssa_name;        /* The ssa name with the value.  */
 122   bool biv_p;           /* Is it a biv?  */
 123   bool have_use_for;    /* Do we already have a use for it?  */
 124   unsigned use_id;      /* The identifier in the use if it is the case.  */
 125 };
 126
 127 /* Per-ssa version information (induction variable descriptions, etc.).  */
 128 struct version_info
 129 {
 130   tree name;            /* The ssa name.  */
 131   struct iv *iv;        /* Induction variable description.  */
 132   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 133                            an expression that is not an induction variable.  */
 134   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 135   unsigned inv_id;      /* Id of an invariant.  */
 136 };
 137
 138 /* Types of uses.  */
 139 enum use_type
 140 {
 141   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 142   USE_ADDRESS,          /* Use in an address.  */
 143   USE_COMPARE           /* Use is a compare.  */
 144 };
 145
 146 /* Cost of a computation.  */
 147 typedef struct
 148 {
 149   int cost;             /* The runtime cost.  */
 150   unsigned complexity;  /* The estimate of the complexity of the code for
 151                            the computation (in no concrete units --
 152                            complexity field should be larger for more
 153                            complex expressions and addressing modes).  */
 154 } comp_cost;
 155
 156 static const comp_cost no_cost = {0, 0};
 157 static const comp_cost infinite_cost = {INFTY, INFTY};
 158
 159 /* The candidate - cost pair.  */
 160 struct cost_pair
 161 {
 162   struct iv_cand *cand; /* The candidate.  */
 163   comp_cost cost;       /* The cost.  */
 164   bitmap depends_on;    /* The list of invariants that have to be
 165                            preserved.  */
 166   tree value;           /* For final value elimination, the expression for
 167                            the final value of the iv.  For iv elimination,
 168                            the new bound to compare with.  */
 169   enum tree_code comp;  /* For iv elimination, the comparison.  */
 170   int inv_expr_id;      /* Loop invariant expression id.  */
 171 };
 172
 173 /* Use.  */
 174 struct iv_use
 175 {
 176   unsigned id;          /* The id of the use.  */
 177   enum use_type type;   /* Type of the use.  */
 178   struct iv *iv;        /* The induction variable it is based on.  */
 179   gimple stmt;          /* Statement in that it occurs.  */
 180   tree *op_p;           /* The place where it occurs.  */
 181   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 182                            important ones.  */
 183
 184   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 185   struct cost_pair *cost_map;
 186                         /* The costs wrto the iv candidates.  */
 187
 188   struct iv_cand *selected;
 189                         /* The selected candidate.  */
 190 };
 191
 192 /* The position where the iv is computed.  */
 193 enum iv_position
 194 {
 195   IP_NORMAL,            /* At the end, just before the exit condition.  */
 196   IP_END,               /* At the end of the latch block.  */
 197   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 198   IP_AFTER_USE,         /* Immediately after a specific use.  */
 199   IP_ORIGINAL           /* The original biv.  */
 200 };
 201
 202 /* The induction variable candidate.  */
 203 struct iv_cand
 204 {
 205   unsigned id;          /* The number of the candidate.  */
 206   bool important;       /* Whether this is an "important" candidate, i.e. such
 207                            that it should be considered by all uses.  */
 208   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 209   gimple incremented_at;/* For original biv, the statement where it is
 210                            incremented.  */
 211   tree var_before;      /* The variable used for it before increment.  */
 212   tree var_after;       /* The variable used for it after increment.  */
 213   struct iv *iv;        /* The value of the candidate.  NULL for
 214                            "pseudocandidate" used to indicate the possibility
 215                            to replace the final value of an iv by direct
 216                            computation of the value.  */
 217   unsigned cost;        /* Cost of the candidate.  */
 218   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 219   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 220                               where it is incremented.  */
 221   bitmap depends_on;    /* The list of invariants that are used in step of the
 222                            biv.  */
 223 };
 224
 225 /* Loop invariant expression hashtable entry.  */
 226 struct iv_inv_expr_ent
 227 {
 228   tree expr;
 229   int id;
 230   hashval_t hash;
 231 };
 232
 233 /* The data used by the induction variable optimizations.  */
 234
 235 typedef struct iv_use *iv_use_p;
 236
 237 typedef struct iv_cand *iv_cand_p;
 238
 239 struct ivopts_data
 240 {
 241   /* The currently optimized loop.  */
 242   struct loop *current_loop;
 243
 244   /* Numbers of iterations for all exits of the current loop.  */
 245   struct pointer_map_t *niters;
 246
 247   /* Number of registers used in it.  */
 248   unsigned regs_used;
 249
 250   /* The size of version_info array allocated.  */
 251   unsigned version_info_size;
 252
 253   /* The array of information for the ssa names.  */
 254   struct version_info *version_info;
 255
 256   /* The hashtable of loop invariant expressions created
 257      by ivopt.  */
 258   htab_t inv_expr_tab;
 259
 260   /* Loop invariant expression id.  */
 261   int inv_expr_id;
 262
 263   /* The bitmap of indices in version_info whose value was changed.  */
 264   bitmap relevant;
 265
 266   /* The uses of induction variables.  */
 267   vec<iv_use_p> iv_uses;
 268
 269   /* The candidates.  */
 270   vec<iv_cand_p> iv_candidates;
 271
 272   /* A bitmap of important candidates.  */
 273   bitmap important_candidates;
 274
 275   /* The maximum invariant id.  */
 276   unsigned max_inv_id;
 277
 278   /* Whether to consider just related and important candidates when replacing a
 279      use.  */
 280   bool consider_all_candidates;
 281
 282   /* Are we optimizing for speed?  */
 283   bool speed;
 284
 285   /* Whether the loop body includes any function calls.  */
 286   bool body_includes_call;
 287
 288   /* Whether the loop body can only be exited via single exit.  */
 289   bool loop_single_exit_p;
 290 };
 291
 292 /* An assignment of iv candidates to uses.  */
 293
 294 struct iv_ca
 295 {
 296   /* The number of uses covered by the assignment.  */
 297   unsigned upto;
 298
 299   /* Number of uses that cannot be expressed by the candidates in the set.  */
 300   unsigned bad_uses;
 301
 302   /* Candidate assigned to a use, together with the related costs.  */
 303   struct cost_pair **cand_for_use;
 304
 305   /* Number of times each candidate is used.  */
 306   unsigned *n_cand_uses;
 307
 308   /* The candidates used.  */
 309   bitmap cands;
 310
 311   /* The number of candidates in the set.  */
 312   unsigned n_cands;
 313
 314   /* Total number of registers needed.  */
 315   unsigned n_regs;
 316
 317   /* Total cost of expressing uses.  */
 318   comp_cost cand_use_cost;
 319
 320   /* Total cost of candidates.  */
 321   unsigned cand_cost;
 322
 323   /* Number of times each invariant is used.  */
 324   unsigned *n_invariant_uses;
 325
 326   /* The array holding the number of uses of each loop
 327      invariant expressions created by ivopt.  */
 328   unsigned *used_inv_expr;
 329
 330   /* The number of created loop invariants.  */
 331   unsigned num_used_inv_expr;
 332
 333   /* Total cost of the assignment.  */
 334   comp_cost cost;
 335 };
 336
 337 /* Difference of two iv candidate assignments.  */
 338
 339 struct iv_ca_delta
 340 {
 341   /* Changed use.  */
 342   struct iv_use *use;
 343
 344   /* An old assignment (for rollback purposes).  */
 345   struct cost_pair *old_cp;
 346
 347   /* A new assignment.  */
 348   struct cost_pair *new_cp;
 349
 350   /* Next change in the list.  */
 351   struct iv_ca_delta *next_change;
 352 };
 353
 354 /* Bound on number of candidates below that all candidates are considered.  */
 355
 356 #define CONSIDER_ALL_CANDIDATES_BOUND \
 357   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 358
 359 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 360    optimizing such a loop would help, and it would take ages).  */
 361
 362 #define MAX_CONSIDERED_USES \
 363   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 364
 365 /* If there are at most this number of ivs in the set, try removing unnecessary
 366    ivs from the set always.  */
 367
 368 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 369   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 370
 371 /* The list of trees for that the decl_rtl field must be reset is stored
 372    here.  */
 373
 374 static vec<tree> decl_rtl_to_reset;
 375
 376 static comp_cost force_expr_to_var_cost (tree, bool);
 377
 378 /* Number of uses recorded in DATA.  */
 379
 380 static inline unsigned
 381 n_iv_uses (struct ivopts_data *data)
 382 {
 383   return data->iv_uses.length ();
 384 }
 385
 386 /* Ith use recorded in DATA.  */
 387
 388 static inline struct iv_use *
 389 iv_use (struct ivopts_data *data, unsigned i)
 390 {
 391   return data->iv_uses[i];
 392 }
 393
 394 /* Number of candidates recorded in DATA.  */
 395
 396 static inline unsigned
 397 n_iv_cands (struct ivopts_data *data)
 398 {
 399   return data->iv_candidates.length ();
 400 }
 401
 402 /* Ith candidate recorded in DATA.  */
 403
 404 static inline struct iv_cand *
 405 iv_cand (struct ivopts_data *data, unsigned i)
 406 {
 407   return data->iv_candidates[i];
 408 }
 409
 410 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 411
 412 edge
 413 single_dom_exit (struct loop *loop)
 414 {
 415   edge exit = single_exit (loop);
 416
 417   if (!exit)
 418     return NULL;
 419
 420   if (!just_once_each_iteration_p (loop, exit->src))
 421     return NULL;
 422
 423   return exit;
 424 }
 425
 426 /* Dumps information about the induction variable IV to FILE.  */
 427
 428 extern void dump_iv (FILE *, struct iv *);
 429 void
 430 dump_iv (FILE *file, struct iv *iv)
 431 {
 432   if (iv->ssa_name)
 433     {
 434       fprintf (file, "ssa name ");
 435       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 436       fprintf (file, "\n");
 437     }
 438
 439   fprintf (file, "  type ");
 440   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 441   fprintf (file, "\n");
 442
 443   if (iv->step)
 444     {
 445       fprintf (file, "  base ");
 446       print_generic_expr (file, iv->base, TDF_SLIM);
 447       fprintf (file, "\n");
 448
 449       fprintf (file, "  step ");
 450       print_generic_expr (file, iv->step, TDF_SLIM);
 451       fprintf (file, "\n");
 452     }
 453   else
 454     {
 455       fprintf (file, "  invariant ");
 456       print_generic_expr (file, iv->base, TDF_SLIM);
 457       fprintf (file, "\n");
 458     }
 459
 460   if (iv->base_object)
 461     {
 462       fprintf (file, "  base object ");
 463       print_generic_expr (file, iv->base_object, TDF_SLIM);
 464       fprintf (file, "\n");
 465     }
 466
 467   if (iv->biv_p)
 468     fprintf (file, "  is a biv\n");
 469 }
 470
 471 /* Dumps information about the USE to FILE.  */
 472
 473 extern void dump_use (FILE *, struct iv_use *);
 474 void
 475 dump_use (FILE *file, struct iv_use *use)
 476 {
 477   fprintf (file, "use %d\n", use->id);
 478
 479   switch (use->type)
 480     {
 481     case USE_NONLINEAR_EXPR:
 482       fprintf (file, "  generic\n");
 483       break;
 484
 485     case USE_ADDRESS:
 486       fprintf (file, "  address\n");
 487       break;
 488
 489     case USE_COMPARE:
 490       fprintf (file, "  compare\n");
 491       break;
 492
 493     default:
 494       gcc_unreachable ();
 495     }
 496
 497   fprintf (file, "  in statement ");
 498   print_gimple_stmt (file, use->stmt, 0, 0);
 499   fprintf (file, "\n");
 500
 501   fprintf (file, "  at position ");
 502   if (use->op_p)
 503     print_generic_expr (file, *use->op_p, TDF_SLIM);
 504   fprintf (file, "\n");
 505
 506   dump_iv (file, use->iv);
 507
 508   if (use->related_cands)
 509     {
 510       fprintf (file, "  related candidates ");
 511       dump_bitmap (file, use->related_cands);
 512     }
 513 }
 514
 515 /* Dumps information about the uses to FILE.  */
 516
 517 extern void dump_uses (FILE *, struct ivopts_data *);
 518 void
 519 dump_uses (FILE *file, struct ivopts_data *data)
 520 {
 521   unsigned i;
 522   struct iv_use *use;
 523
 524   for (i = 0; i < n_iv_uses (data); i++)
 525     {
 526       use = iv_use (data, i);
 527
 528       dump_use (file, use);
 529       fprintf (file, "\n");
 530     }
 531 }
 532
 533 /* Dumps information about induction variable candidate CAND to FILE.  */
 534
 535 extern void dump_cand (FILE *, struct iv_cand *);
 536 void
 537 dump_cand (FILE *file, struct iv_cand *cand)
 538 {
 539   struct iv *iv = cand->iv;
 540
 541   fprintf (file, "candidate %d%s\n",
 542            cand->id, cand->important ? " (important)" : "");
 543
 544   if (cand->depends_on)
 545     {
 546       fprintf (file, "  depends on ");
 547       dump_bitmap (file, cand->depends_on);
 548     }
 549
 550   if (!iv)
 551     {
 552       fprintf (file, "  final value replacement\n");
 553       return;
 554     }
 555
 556   if (cand->var_before)
 557     {
 558       fprintf (file, "  var_before ");
 559       print_generic_expr (file, cand->var_before, TDF_SLIM);
 560       fprintf (file, "\n");
 561     }
 562   if (cand->var_after)
 563     {
 564       fprintf (file, "  var_after ");
 565       print_generic_expr (file, cand->var_after, TDF_SLIM);
 566       fprintf (file, "\n");
 567     }
 568
 569   switch (cand->pos)
 570     {
 571     case IP_NORMAL:
 572       fprintf (file, "  incremented before exit test\n");
 573       break;
 574
 575     case IP_BEFORE_USE:
 576       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 577       break;
 578
 579     case IP_AFTER_USE:
 580       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 581       break;
 582
 583     case IP_END:
 584       fprintf (file, "  incremented at end\n");
 585       break;
 586
 587     case IP_ORIGINAL:
 588       fprintf (file, "  original biv\n");
 589       break;
 590     }
 591
 592   dump_iv (file, iv);
 593 }
 594
 595 /* Returns the info for ssa version VER.  */
 596
 597 static inline struct version_info *
 598 ver_info (struct ivopts_data *data, unsigned ver)
 599 {
 600   return data->version_info + ver;
 601 }
 602
 603 /* Returns the info for ssa name NAME.  */
 604
 605 static inline struct version_info *
 606 name_info (struct ivopts_data *data, tree name)
 607 {
 608   return ver_info (data, SSA_NAME_VERSION (name));
 609 }
 610
 611 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 612    emitted in LOOP.  */
 613
 614 static bool
 615 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 616 {
 617   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 618
 619   gcc_assert (bb);
 620
 621   if (sbb == loop->latch)
 622     return true;
 623
 624   if (sbb != bb)
 625     return false;
 626
 627   return stmt == last_stmt (bb);
 628 }
 629
 630 /* Returns true if STMT if after the place where the original induction
 631    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 632    if the positions are identical.  */
 633
 634 static bool
 635 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 636 {
 637   basic_block cand_bb = gimple_bb (cand->incremented_at);
 638   basic_block stmt_bb = gimple_bb (stmt);
 639
 640   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 641     return false;
 642
 643   if (stmt_bb != cand_bb)
 644     return true;
 645
 646   if (true_if_equal
 647       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 648     return true;
 649   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 650 }
 651
 652 /* Returns true if STMT if after the place where the induction variable
 653    CAND is incremented in LOOP.  */
 654
 655 static bool
 656 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 657 {
 658   switch (cand->pos)
 659     {
 660     case IP_END:
 661       return false;
 662
 663     case IP_NORMAL:
 664       return stmt_after_ip_normal_pos (loop, stmt);
 665
 666     case IP_ORIGINAL:
 667     case IP_AFTER_USE:
 668       return stmt_after_inc_pos (cand, stmt, false);
 669
 670     case IP_BEFORE_USE:
 671       return stmt_after_inc_pos (cand, stmt, true);
 672
 673     default:
 674       gcc_unreachable ();
 675     }
 676 }
 677
 678 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 679
 680 static bool
 681 abnormal_ssa_name_p (tree exp)
 682 {
 683   if (!exp)
 684     return false;
 685
 686   if (TREE_CODE (exp) != SSA_NAME)
 687     return false;
 688
 689   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 690 }
 691
 692 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 693    abnormal phi node.  Callback for for_each_index.  */
 694
 695 static bool
 696 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 697                                   void *data ATTRIBUTE_UNUSED)
 698 {
 699   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 700     {
 701       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 702         return false;
 703       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 704         return false;
 705     }
 706
 707   return !abnormal_ssa_name_p (*index);
 708 }
 709
 710 /* Returns true if EXPR contains a ssa name that occurs in an
 711    abnormal phi node.  */
 712
 713 bool
 714 contains_abnormal_ssa_name_p (tree expr)
 715 {
 716   enum tree_code code;
 717   enum tree_code_class codeclass;
 718
 719   if (!expr)
 720     return false;
 721
 722   code = TREE_CODE (expr);
 723   codeclass = TREE_CODE_CLASS (code);
 724
 725   if (code == SSA_NAME)
 726     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 727
 728   if (code == INTEGER_CST
 729       || is_gimple_min_invariant (expr))
 730     return false;
 731
 732   if (code == ADDR_EXPR)
 733     return !for_each_index (&TREE_OPERAND (expr, 0),
 734                             idx_contains_abnormal_ssa_name_p,
 735                             NULL);
 736
 737   if (code == COND_EXPR)
 738     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 739       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 740       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 741
 742   switch (codeclass)
 743     {
 744     case tcc_binary:
 745     case tcc_comparison:
 746       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 747         return true;
 748
 749       /* Fallthru.  */
 750     case tcc_unary:
 751       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 752         return true;
 753
 754       break;
 755
 756     default:
 757       gcc_unreachable ();
 758     }
 759
 760   return false;
 761 }
 762
 763 /*  Returns the structure describing number of iterations determined from
 764     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 765
 766 static struct tree_niter_desc *
 767 niter_for_exit (struct ivopts_data *data, edge exit)
 768 {
 769   struct tree_niter_desc *desc;
 770   void **slot;
 771
 772   if (!data->niters)
 773     {
 774       data->niters = pointer_map_create ();
 775       slot = NULL;
 776     }
 777   else
 778     slot = pointer_map_contains (data->niters, exit);
 779
 780   if (!slot)
 781     {
 782       /* Try to determine number of iterations.  We cannot safely work with ssa
 783          names that appear in phi nodes on abnormal edges, so that we do not
 784          create overlapping life ranges for them (PR 27283).  */
 785       desc = XNEW (struct tree_niter_desc);
 786       if (!number_of_iterations_exit (data->current_loop,
 787                                       exit, desc, true)
 788           || contains_abnormal_ssa_name_p (desc->niter))
 789         {
 790           XDELETE (desc);
 791           desc = NULL;
 792         }
 793       slot = pointer_map_insert (data->niters, exit);
 794       *slot = desc;
 795     }
 796   else
 797     desc = (struct tree_niter_desc *) *slot;
 798
 799   return desc;
 800 }
 801
 802 /* Returns the structure describing number of iterations determined from
 803    single dominating exit of DATA->current_loop, or NULL if something
 804    goes wrong.  */
 805
 806 static struct tree_niter_desc *
 807 niter_for_single_dom_exit (struct ivopts_data *data)
 808 {
 809   edge exit = single_dom_exit (data->current_loop);
 810
 811   if (!exit)
 812     return NULL;
 813
 814   return niter_for_exit (data, exit);
 815 }
 816
 817 /* Hash table equality function for expressions.  */
 818
 819 static int
 820 htab_inv_expr_eq (const void *ent1, const void *ent2)
 821 {
 822   const struct iv_inv_expr_ent *expr1 =
 823       (const struct iv_inv_expr_ent *)ent1;
 824   const struct iv_inv_expr_ent *expr2 =
 825       (const struct iv_inv_expr_ent *)ent2;
 826
 827   return expr1->hash == expr2->hash
 828          && operand_equal_p (expr1->expr, expr2->expr, 0);
 829 }
 830
 831 /* Hash function for loop invariant expressions.  */
 832
 833 static hashval_t
 834 htab_inv_expr_hash (const void *ent)
 835 {
 836   const struct iv_inv_expr_ent *expr =
 837       (const struct iv_inv_expr_ent *)ent;
 838   return expr->hash;
 839 }
 840
 841 /* Initializes data structures used by the iv optimization pass, stored
 842    in DATA.  */
 843
 844 static void
 845 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 846 {
 847   data->version_info_size = 2 * num_ssa_names;
 848   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 849   data->relevant = BITMAP_ALLOC (NULL);
 850   data->important_candidates = BITMAP_ALLOC (NULL);
 851   data->max_inv_id = 0;
 852   data->niters = NULL;
 853   data->iv_uses.create (20);
 854   data->iv_candidates.create (20);
 855   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 856                                     htab_inv_expr_eq, free);
 857   data->inv_expr_id = 0;
 858   decl_rtl_to_reset.create (20);
 859 }
 860
 861 /* Returns a memory object to that EXPR points.  In case we are able to
 862    determine that it does not point to any such object, NULL is returned.  */
 863
 864 static tree
 865 determine_base_object (tree expr)
 866 {
 867   enum tree_code code = TREE_CODE (expr);
 868   tree base, obj;
 869
 870   /* If this is a pointer casted to any type, we need to determine
 871      the base object for the pointer; so handle conversions before
 872      throwing away non-pointer expressions.  */
 873   if (CONVERT_EXPR_P (expr))
 874     return determine_base_object (TREE_OPERAND (expr, 0));
 875
 876   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 877     return NULL_TREE;
 878
 879   switch (code)
 880     {
 881     case INTEGER_CST:
 882       return NULL_TREE;
 883
 884     case ADDR_EXPR:
 885       obj = TREE_OPERAND (expr, 0);
 886       base = get_base_address (obj);
 887
 888       if (!base)
 889         return expr;
 890
 891       if (TREE_CODE (base) == MEM_REF)
 892         return determine_base_object (TREE_OPERAND (base, 0));
 893
 894       return fold_convert (ptr_type_node,
 895                            build_fold_addr_expr (base));
 896
 897     case POINTER_PLUS_EXPR:
 898       return determine_base_object (TREE_OPERAND (expr, 0));
 899
 900     case PLUS_EXPR:
 901     case MINUS_EXPR:
 902       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 903       gcc_unreachable ();
 904
 905     default:
 906       return fold_convert (ptr_type_node, expr);
 907     }
 908 }
 909
 910 /* Allocates an induction variable with given initial value BASE and step STEP
 911    for loop LOOP.  */
 912
 913 static struct iv *
 914 alloc_iv (tree base, tree step)
 915 {
 916   struct iv *iv = XCNEW (struct iv);
 917   gcc_assert (step != NULL_TREE);
 918
 919   iv->base = base;
 920   iv->base_object = determine_base_object (base);
 921   iv->step = step;
 922   iv->biv_p = false;
 923   iv->have_use_for = false;
 924   iv->use_id = 0;
 925   iv->ssa_name = NULL_TREE;
 926
 927   return iv;
 928 }
 929
 930 /* Sets STEP and BASE for induction variable IV.  */
 931
 932 static void
 933 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 934 {
 935   struct version_info *info = name_info (data, iv);
 936
 937   gcc_assert (!info->iv);
 938
 939   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 940   info->iv = alloc_iv (base, step);
 941   info->iv->ssa_name = iv;
 942 }
 943
 944 /* Finds induction variable declaration for VAR.  */
 945
 946 static struct iv *
 947 get_iv (struct ivopts_data *data, tree var)
 948 {
 949   basic_block bb;
 950   tree type = TREE_TYPE (var);
 951
 952   if (!POINTER_TYPE_P (type)
 953       && !INTEGRAL_TYPE_P (type))
 954     return NULL;
 955
 956   if (!name_info (data, var)->iv)
 957     {
 958       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 959
 960       if (!bb
 961           || !flow_bb_inside_loop_p (data->current_loop, bb))
 962         set_iv (data, var, var, build_int_cst (type, 0));
 963     }
 964
 965   return name_info (data, var)->iv;
 966 }
 967
 968 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 969    not define a simple affine biv with nonzero step.  */
 970
 971 static tree
 972 determine_biv_step (gimple phi)
 973 {
 974   struct loop *loop = gimple_bb (phi)->loop_father;
 975   tree name = PHI_RESULT (phi);
 976   affine_iv iv;
 977
 978   if (virtual_operand_p (name))
 979     return NULL_TREE;
 980
 981   if (!simple_iv (loop, loop, name, &iv, true))
 982     return NULL_TREE;
 983
 984   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 985 }
 986
 987 /* Finds basic ivs.  */
 988
 989 static bool
 990 find_bivs (struct ivopts_data *data)
 991 {
 992   gimple phi;
 993   tree step, type, base;
 994   bool found = false;
 995   struct loop *loop = data->current_loop;
 996   gimple_stmt_iterator psi;
 997
 998   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 999     {
1000       phi = gsi_stmt (psi);
1001
1002       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1003         continue;
1004
1005       step = determine_biv_step (phi);
1006       if (!step)
1007         continue;
1008
1009       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1010       base = expand_simple_operations (base);
1011       if (contains_abnormal_ssa_name_p (base)
1012           || contains_abnormal_ssa_name_p (step))
1013         continue;
1014
1015       type = TREE_TYPE (PHI_RESULT (phi));
1016       base = fold_convert (type, base);
1017       if (step)
1018         {
1019           if (POINTER_TYPE_P (type))
1020             step = convert_to_ptrofftype (step);
1021           else
1022             step = fold_convert (type, step);
1023         }
1024
1025       set_iv (data, PHI_RESULT (phi), base, step);
1026       found = true;
1027     }
1028
1029   return found;
1030 }
1031
1032 /* Marks basic ivs.  */
1033
1034 static void
1035 mark_bivs (struct ivopts_data *data)
1036 {
1037   gimple phi;
1038   tree var;
1039   struct iv *iv, *incr_iv;
1040   struct loop *loop = data->current_loop;
1041   basic_block incr_bb;
1042   gimple_stmt_iterator psi;
1043
1044   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1045     {
1046       phi = gsi_stmt (psi);
1047
1048       iv = get_iv (data, PHI_RESULT (phi));
1049       if (!iv)
1050         continue;
1051
1052       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1053       incr_iv = get_iv (data, var);
1054       if (!incr_iv)
1055         continue;
1056
1057       /* If the increment is in the subloop, ignore it.  */
1058       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1059       if (incr_bb->loop_father != data->current_loop
1060           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1061         continue;
1062
1063       iv->biv_p = true;
1064       incr_iv->biv_p = true;
1065     }
1066 }
1067
1068 /* Checks whether STMT defines a linear induction variable and stores its
1069    parameters to IV.  */
1070
1071 static bool
1072 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1073 {
1074   tree lhs;
1075   struct loop *loop = data->current_loop;
1076
1077   iv->base = NULL_TREE;
1078   iv->step = NULL_TREE;
1079
1080   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1081     return false;
1082
1083   lhs = gimple_assign_lhs (stmt);
1084   if (TREE_CODE (lhs) != SSA_NAME)
1085     return false;
1086
1087   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1088     return false;
1089   iv->base = expand_simple_operations (iv->base);
1090
1091   if (contains_abnormal_ssa_name_p (iv->base)
1092       || contains_abnormal_ssa_name_p (iv->step))
1093     return false;
1094
1095   /* If STMT could throw, then do not consider STMT as defining a GIV.
1096      While this will suppress optimizations, we can not safely delete this
1097      GIV and associated statements, even if it appears it is not used.  */
1098   if (stmt_could_throw_p (stmt))
1099     return false;
1100
1101   return true;
1102 }
1103
1104 /* Finds general ivs in statement STMT.  */
1105
1106 static void
1107 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1108 {
1109   affine_iv iv;
1110
1111   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1112     return;
1113
1114   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1115 }
1116
1117 /* Finds general ivs in basic block BB.  */
1118
1119 static void
1120 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1121 {
1122   gimple_stmt_iterator bsi;
1123
1124   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1125     find_givs_in_stmt (data, gsi_stmt (bsi));
1126 }
1127
1128 /* Finds general ivs.  */
1129
1130 static void
1131 find_givs (struct ivopts_data *data)
1132 {
1133   struct loop *loop = data->current_loop;
1134   basic_block *body = get_loop_body_in_dom_order (loop);
1135   unsigned i;
1136
1137   for (i = 0; i < loop->num_nodes; i++)
1138     find_givs_in_bb (data, body[i]);
1139   free (body);
1140 }
1141
1142 /* For each ssa name defined in LOOP determines whether it is an induction
1143    variable and if so, its initial value and step.  */
1144
1145 static bool
1146 find_induction_variables (struct ivopts_data *data)
1147 {
1148   unsigned i;
1149   bitmap_iterator bi;
1150
1151   if (!find_bivs (data))
1152     return false;
1153
1154   find_givs (data);
1155   mark_bivs (data);
1156
1157   if (dump_file && (dump_flags & TDF_DETAILS))
1158     {
1159       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1160
1161       if (niter)
1162         {
1163           fprintf (dump_file, "  number of iterations ");
1164           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1165           if (!integer_zerop (niter->may_be_zero))
1166             {
1167               fprintf (dump_file, "; zero if ");
1168               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1169             }
1170           fprintf (dump_file, "\n\n");
1171         };
1172
1173       fprintf (dump_file, "Induction variables:\n\n");
1174
1175       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1176         {
1177           if (ver_info (data, i)->iv)
1178             dump_iv (dump_file, ver_info (data, i)->iv);
1179         }
1180     }
1181
1182   return true;
1183 }
1184
1185 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1186
1187 static struct iv_use *
1188 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1189             gimple stmt, enum use_type use_type)
1190 {
1191   struct iv_use *use = XCNEW (struct iv_use);
1192
1193   use->id = n_iv_uses (data);
1194   use->type = use_type;
1195   use->iv = iv;
1196   use->stmt = stmt;
1197   use->op_p = use_p;
1198   use->related_cands = BITMAP_ALLOC (NULL);
1199
1200   /* To avoid showing ssa name in the dumps, if it was not reset by the
1201      caller.  */
1202   iv->ssa_name = NULL_TREE;
1203
1204   if (dump_file && (dump_flags & TDF_DETAILS))
1205     dump_use (dump_file, use);
1206
1207   data->iv_uses.safe_push (use);
1208
1209   return use;
1210 }
1211
1212 /* Checks whether OP is a loop-level invariant and if so, records it.
1213    NONLINEAR_USE is true if the invariant is used in a way we do not
1214    handle specially.  */
1215
1216 static void
1217 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1218 {
1219   basic_block bb;
1220   struct version_info *info;
1221
1222   if (TREE_CODE (op) != SSA_NAME
1223       || virtual_operand_p (op))
1224     return;
1225
1226   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1227   if (bb
1228       && flow_bb_inside_loop_p (data->current_loop, bb))
1229     return;
1230
1231   info = name_info (data, op);
1232   info->name = op;
1233   info->has_nonlin_use |= nonlinear_use;
1234   if (!info->inv_id)
1235     info->inv_id = ++data->max_inv_id;
1236   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1237 }
1238
1239 /* Checks whether the use OP is interesting and if so, records it.  */
1240
1241 static struct iv_use *
1242 find_interesting_uses_op (struct ivopts_data *data, tree op)
1243 {
1244   struct iv *iv;
1245   struct iv *civ;
1246   gimple stmt;
1247   struct iv_use *use;
1248
1249   if (TREE_CODE (op) != SSA_NAME)
1250     return NULL;
1251
1252   iv = get_iv (data, op);
1253   if (!iv)
1254     return NULL;
1255
1256   if (iv->have_use_for)
1257     {
1258       use = iv_use (data, iv->use_id);
1259
1260       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1261       return use;
1262     }
1263
1264   if (integer_zerop (iv->step))
1265     {
1266       record_invariant (data, op, true);
1267       return NULL;
1268     }
1269   iv->have_use_for = true;
1270
1271   civ = XNEW (struct iv);
1272   *civ = *iv;
1273
1274   stmt = SSA_NAME_DEF_STMT (op);
1275   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1276               || is_gimple_assign (stmt));
1277
1278   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1279   iv->use_id = use->id;
1280
1281   return use;
1282 }
1283
1284 /* Given a condition in statement STMT, checks whether it is a compare
1285    of an induction variable and an invariant.  If this is the case,
1286    CONTROL_VAR is set to location of the iv, BOUND to the location of
1287    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1288    induction variable descriptions, and true is returned.  If this is not
1289    the case, CONTROL_VAR and BOUND are set to the arguments of the
1290    condition and false is returned.  */
1291
1292 static bool
1293 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1294                        tree **control_var, tree **bound,
1295                        struct iv **iv_var, struct iv **iv_bound)
1296 {
1297   /* The objects returned when COND has constant operands.  */
1298   static struct iv const_iv;
1299   static tree zero;
1300   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1301   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1302   bool ret = false;
1303
1304   if (gimple_code (stmt) == GIMPLE_COND)
1305     {
1306       op0 = gimple_cond_lhs_ptr (stmt);
1307       op1 = gimple_cond_rhs_ptr (stmt);
1308     }
1309   else
1310     {
1311       op0 = gimple_assign_rhs1_ptr (stmt);
1312       op1 = gimple_assign_rhs2_ptr (stmt);
1313     }
1314
1315   zero = integer_zero_node;
1316   const_iv.step = integer_zero_node;
1317
1318   if (TREE_CODE (*op0) == SSA_NAME)
1319     iv0 = get_iv (data, *op0);
1320   if (TREE_CODE (*op1) == SSA_NAME)
1321     iv1 = get_iv (data, *op1);
1322
1323   /* Exactly one of the compared values must be an iv, and the other one must
1324      be an invariant.  */
1325   if (!iv0 || !iv1)
1326     goto end;
1327
1328   if (integer_zerop (iv0->step))
1329     {
1330       /* Control variable may be on the other side.  */
1331       tmp_op = op0; op0 = op1; op1 = tmp_op;
1332       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1333     }
1334   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1335
1336 end:
1337   if (control_var)
1338     *control_var = op0;;
1339   if (iv_var)
1340     *iv_var = iv0;;
1341   if (bound)
1342     *bound = op1;
1343   if (iv_bound)
1344     *iv_bound = iv1;
1345
1346   return ret;
1347 }
1348
1349 /* Checks whether the condition in STMT is interesting and if so,
1350    records it.  */
1351
1352 static void
1353 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1354 {
1355   tree *var_p, *bound_p;
1356   struct iv *var_iv, *civ;
1357
1358   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1359     {
1360       find_interesting_uses_op (data, *var_p);
1361       find_interesting_uses_op (data, *bound_p);
1362       return;
1363     }
1364
1365   civ = XNEW (struct iv);
1366   *civ = *var_iv;
1367   record_use (data, NULL, civ, stmt, USE_COMPARE);
1368 }
1369
1370 /* Returns true if expression EXPR is obviously invariant in LOOP,
1371    i.e. if all its operands are defined outside of the LOOP.  LOOP
1372    should not be the function body.  */
1373
1374 bool
1375 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1376 {
1377   basic_block def_bb;
1378   unsigned i, len;
1379
1380   gcc_assert (loop_depth (loop) > 0);
1381
1382   if (is_gimple_min_invariant (expr))
1383     return true;
1384
1385   if (TREE_CODE (expr) == SSA_NAME)
1386     {
1387       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1388       if (def_bb
1389           && flow_bb_inside_loop_p (loop, def_bb))
1390         return false;
1391
1392       return true;
1393     }
1394
1395   if (!EXPR_P (expr))
1396     return false;
1397
1398   len = TREE_OPERAND_LENGTH (expr);
1399   for (i = 0; i < len; i++)
1400     if (TREE_OPERAND (expr, i)
1401         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1402       return false;
1403
1404   return true;
1405 }
1406
1407 /* Returns true if statement STMT is obviously invariant in LOOP,
1408    i.e. if all its operands on the RHS are defined outside of the LOOP.
1409    LOOP should not be the function body.  */
1410
1411 bool
1412 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1413 {
1414   unsigned i;
1415   tree lhs;
1416
1417   gcc_assert (loop_depth (loop) > 0);
1418
1419   lhs = gimple_get_lhs (stmt);
1420   for (i = 0; i < gimple_num_ops (stmt); i++)
1421     {
1422       tree op = gimple_op (stmt, i);
1423       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1424         return false;
1425     }
1426
1427   return true;
1428 }
1429
1430 /* Cumulates the steps of indices into DATA and replaces their values with the
1431    initial ones.  Returns false when the value of the index cannot be determined.
1432    Callback for for_each_index.  */
1433
1434 struct ifs_ivopts_data
1435 {
1436   struct ivopts_data *ivopts_data;
1437   gimple stmt;
1438   tree step;
1439 };
1440
1441 static bool
1442 idx_find_step (tree base, tree *idx, void *data)
1443 {
1444   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1445   struct iv *iv;
1446   tree step, iv_base, iv_step, lbound, off;
1447   struct loop *loop = dta->ivopts_data->current_loop;
1448
1449   /* If base is a component ref, require that the offset of the reference
1450      be invariant.  */
1451   if (TREE_CODE (base) == COMPONENT_REF)
1452     {
1453       off = component_ref_field_offset (base);
1454       return expr_invariant_in_loop_p (loop, off);
1455     }
1456
1457   /* If base is array, first check whether we will be able to move the
1458      reference out of the loop (in order to take its address in strength
1459      reduction).  In order for this to work we need both lower bound
1460      and step to be loop invariants.  */
1461   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1462     {
1463       /* Moreover, for a range, the size needs to be invariant as well.  */
1464       if (TREE_CODE (base) == ARRAY_RANGE_REF
1465           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1466         return false;
1467
1468       step = array_ref_element_size (base);
1469       lbound = array_ref_low_bound (base);
1470
1471       if (!expr_invariant_in_loop_p (loop, step)
1472           || !expr_invariant_in_loop_p (loop, lbound))
1473         return false;
1474     }
1475
1476   if (TREE_CODE (*idx) != SSA_NAME)
1477     return true;
1478
1479   iv = get_iv (dta->ivopts_data, *idx);
1480   if (!iv)
1481     return false;
1482
1483   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1484           *&x[0], which is not folded and does not trigger the
1485           ARRAY_REF path below.  */
1486   *idx = iv->base;
1487
1488   if (integer_zerop (iv->step))
1489     return true;
1490
1491   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1492     {
1493       step = array_ref_element_size (base);
1494
1495       /* We only handle addresses whose step is an integer constant.  */
1496       if (TREE_CODE (step) != INTEGER_CST)
1497         return false;
1498     }
1499   else
1500     /* The step for pointer arithmetics already is 1 byte.  */
1501     step = size_one_node;
1502
1503   iv_base = iv->base;
1504   iv_step = iv->step;
1505   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1506                             sizetype, &iv_base, &iv_step, dta->stmt,
1507                             false))
1508     {
1509       /* The index might wrap.  */
1510       return false;
1511     }
1512
1513   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1514   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1515
1516   return true;
1517 }
1518
1519 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1520    object is passed to it in DATA.  */
1521
1522 static bool
1523 idx_record_use (tree base, tree *idx,
1524                 void *vdata)
1525 {
1526   struct ivopts_data *data = (struct ivopts_data *) vdata;
1527   find_interesting_uses_op (data, *idx);
1528   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1529     {
1530       find_interesting_uses_op (data, array_ref_element_size (base));
1531       find_interesting_uses_op (data, array_ref_low_bound (base));
1532     }
1533   return true;
1534 }
1535
1536 /* If we can prove that TOP = cst * BOT for some constant cst,
1537    store cst to MUL and return true.  Otherwise return false.
1538    The returned value is always sign-extended, regardless of the
1539    signedness of TOP and BOT.  */
1540
1541 static bool
1542 constant_multiple_of (tree top, tree bot, double_int *mul)
1543 {
1544   tree mby;
1545   enum tree_code code;
1546   double_int res, p0, p1;
1547   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1548
1549   STRIP_NOPS (top);
1550   STRIP_NOPS (bot);
1551
1552   if (operand_equal_p (top, bot, 0))
1553     {
1554       *mul = double_int_one;
1555       return true;
1556     }
1557
1558   code = TREE_CODE (top);
1559   switch (code)
1560     {
1561     case MULT_EXPR:
1562       mby = TREE_OPERAND (top, 1);
1563       if (TREE_CODE (mby) != INTEGER_CST)
1564         return false;
1565
1566       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1567         return false;
1568
1569       *mul = (res * tree_to_double_int (mby)).sext (precision);
1570       return true;
1571
1572     case PLUS_EXPR:
1573     case MINUS_EXPR:
1574       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1575           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1576         return false;
1577
1578       if (code == MINUS_EXPR)
1579         p1 = -p1;
1580       *mul = (p0 + p1).sext (precision);
1581       return true;
1582
1583     case INTEGER_CST:
1584       if (TREE_CODE (bot) != INTEGER_CST)
1585         return false;
1586
1587       p0 = tree_to_double_int (top).sext (precision);
1588       p1 = tree_to_double_int (bot).sext (precision);
1589       if (p1.is_zero ())
1590         return false;
1591       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1592       return res.is_zero ();
1593
1594     default:
1595       return false;
1596     }
1597 }
1598
1599 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1600
1601 static bool
1602 may_be_unaligned_p (tree ref, tree step)
1603 {
1604   tree base;
1605   tree base_type;
1606   HOST_WIDE_INT bitsize;
1607   HOST_WIDE_INT bitpos;
1608   tree toffset;
1609   enum machine_mode mode;
1610   int unsignedp, volatilep;
1611   unsigned base_align;
1612
1613   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1614      thus they are not misaligned.  */
1615   if (TREE_CODE (ref) == TARGET_MEM_REF)
1616     return false;
1617
1618   /* The test below is basically copy of what expr.c:normal_inner_ref
1619      does to check whether the object must be loaded by parts when
1620      STRICT_ALIGNMENT is true.  */
1621   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1622                               &unsignedp, &volatilep, true);
1623   base_type = TREE_TYPE (base);
1624   base_align = get_object_alignment (base);
1625   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1626
1627   if (mode != BLKmode)
1628     {
1629       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1630
1631       if (base_align < mode_align
1632           || (bitpos % mode_align) != 0
1633           || (bitpos % BITS_PER_UNIT) != 0)
1634         return true;
1635
1636       if (toffset
1637           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1638         return true;
1639
1640       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1641         return true;
1642     }
1643
1644   return false;
1645 }
1646
1647 /* Return true if EXPR may be non-addressable.   */
1648
1649 bool
1650 may_be_nonaddressable_p (tree expr)
1651 {
1652   switch (TREE_CODE (expr))
1653     {
1654     case TARGET_MEM_REF:
1655       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1656          target, thus they are always addressable.  */
1657       return false;
1658
1659     case COMPONENT_REF:
1660       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1661              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1662
1663     case VIEW_CONVERT_EXPR:
1664       /* This kind of view-conversions may wrap non-addressable objects
1665          and make them look addressable.  After some processing the
1666          non-addressability may be uncovered again, causing ADDR_EXPRs
1667          of inappropriate objects to be built.  */
1668       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1669           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1670         return true;
1671
1672       /* ... fall through ... */
1673
1674     case ARRAY_REF:
1675     case ARRAY_RANGE_REF:
1676       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1677
1678     CASE_CONVERT:
1679       return true;
1680
1681     default:
1682       break;
1683     }
1684
1685   return false;
1686 }
1687
1688 /* Finds addresses in *OP_P inside STMT.  */
1689
1690 static void
1691 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1692 {
1693   tree base = *op_p, step = size_zero_node;
1694   struct iv *civ;
1695   struct ifs_ivopts_data ifs_ivopts_data;
1696
1697   /* Do not play with volatile memory references.  A bit too conservative,
1698      perhaps, but safe.  */
1699   if (gimple_has_volatile_ops (stmt))
1700     goto fail;
1701
1702   /* Ignore bitfields for now.  Not really something terribly complicated
1703      to handle.  TODO.  */
1704   if (TREE_CODE (base) == BIT_FIELD_REF)
1705     goto fail;
1706
1707   base = unshare_expr (base);
1708
1709   if (TREE_CODE (base) == TARGET_MEM_REF)
1710     {
1711       tree type = build_pointer_type (TREE_TYPE (base));
1712       tree astep;
1713
1714       if (TMR_BASE (base)
1715           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1716         {
1717           civ = get_iv (data, TMR_BASE (base));
1718           if (!civ)
1719             goto fail;
1720
1721           TMR_BASE (base) = civ->base;
1722           step = civ->step;
1723         }
1724       if (TMR_INDEX2 (base)
1725           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1726         {
1727           civ = get_iv (data, TMR_INDEX2 (base));
1728           if (!civ)
1729             goto fail;
1730
1731           TMR_INDEX2 (base) = civ->base;
1732           step = civ->step;
1733         }
1734       if (TMR_INDEX (base)
1735           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1736         {
1737           civ = get_iv (data, TMR_INDEX (base));
1738           if (!civ)
1739             goto fail;
1740
1741           TMR_INDEX (base) = civ->base;
1742           astep = civ->step;
1743
1744           if (astep)
1745             {
1746               if (TMR_STEP (base))
1747                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1748
1749               step = fold_build2 (PLUS_EXPR, type, step, astep);
1750             }
1751         }
1752
1753       if (integer_zerop (step))
1754         goto fail;
1755       base = tree_mem_ref_addr (type, base);
1756     }
1757   else
1758     {
1759       ifs_ivopts_data.ivopts_data = data;
1760       ifs_ivopts_data.stmt = stmt;
1761       ifs_ivopts_data.step = size_zero_node;
1762       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1763           || integer_zerop (ifs_ivopts_data.step))
1764         goto fail;
1765       step = ifs_ivopts_data.step;
1766
1767       /* Check that the base expression is addressable.  This needs
1768          to be done after substituting bases of IVs into it.  */
1769       if (may_be_nonaddressable_p (base))
1770         goto fail;
1771
1772       /* Moreover, on strict alignment platforms, check that it is
1773          sufficiently aligned.  */
1774       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1775         goto fail;
1776
1777       base = build_fold_addr_expr (base);
1778
1779       /* Substituting bases of IVs into the base expression might
1780          have caused folding opportunities.  */
1781       if (TREE_CODE (base) == ADDR_EXPR)
1782         {
1783           tree *ref = &TREE_OPERAND (base, 0);
1784           while (handled_component_p (*ref))
1785             ref = &TREE_OPERAND (*ref, 0);
1786           if (TREE_CODE (*ref) == MEM_REF)
1787             {
1788               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1789                                       TREE_OPERAND (*ref, 0),
1790                                       TREE_OPERAND (*ref, 1));
1791               if (tem)
1792                 *ref = tem;
1793             }
1794         }
1795     }
1796
1797   civ = alloc_iv (base, step);
1798   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1799   return;
1800
1801 fail:
1802   for_each_index (op_p, idx_record_use, data);
1803 }
1804
1805 /* Finds and records invariants used in STMT.  */
1806
1807 static void
1808 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1809 {
1810   ssa_op_iter iter;
1811   use_operand_p use_p;
1812   tree op;
1813
1814   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1815     {
1816       op = USE_FROM_PTR (use_p);
1817       record_invariant (data, op, false);
1818     }
1819 }
1820
1821 /* Finds interesting uses of induction variables in the statement STMT.  */
1822
1823 static void
1824 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1825 {
1826   struct iv *iv;
1827   tree op, *lhs, *rhs;
1828   ssa_op_iter iter;
1829   use_operand_p use_p;
1830   enum tree_code code;
1831
1832   find_invariants_stmt (data, stmt);
1833
1834   if (gimple_code (stmt) == GIMPLE_COND)
1835     {
1836       find_interesting_uses_cond (data, stmt);
1837       return;
1838     }
1839
1840   if (is_gimple_assign (stmt))
1841     {
1842       lhs = gimple_assign_lhs_ptr (stmt);
1843       rhs = gimple_assign_rhs1_ptr (stmt);
1844
1845       if (TREE_CODE (*lhs) == SSA_NAME)
1846         {
1847           /* If the statement defines an induction variable, the uses are not
1848              interesting by themselves.  */
1849
1850           iv = get_iv (data, *lhs);
1851
1852           if (iv && !integer_zerop (iv->step))
1853             return;
1854         }
1855
1856       code = gimple_assign_rhs_code (stmt);
1857       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1858           && (REFERENCE_CLASS_P (*rhs)
1859               || is_gimple_val (*rhs)))
1860         {
1861           if (REFERENCE_CLASS_P (*rhs))
1862             find_interesting_uses_address (data, stmt, rhs);
1863           else
1864             find_interesting_uses_op (data, *rhs);
1865
1866           if (REFERENCE_CLASS_P (*lhs))
1867             find_interesting_uses_address (data, stmt, lhs);
1868           return;
1869         }
1870       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1871         {
1872           find_interesting_uses_cond (data, stmt);
1873           return;
1874         }
1875
1876       /* TODO -- we should also handle address uses of type
1877
1878          memory = call (whatever);
1879
1880          and
1881
1882          call (memory).  */
1883     }
1884
1885   if (gimple_code (stmt) == GIMPLE_PHI
1886       && gimple_bb (stmt) == data->current_loop->header)
1887     {
1888       iv = get_iv (data, PHI_RESULT (stmt));
1889
1890       if (iv && !integer_zerop (iv->step))
1891         return;
1892     }
1893
1894   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1895     {
1896       op = USE_FROM_PTR (use_p);
1897
1898       if (TREE_CODE (op) != SSA_NAME)
1899         continue;
1900
1901       iv = get_iv (data, op);
1902       if (!iv)
1903         continue;
1904
1905       find_interesting_uses_op (data, op);
1906     }
1907 }
1908
1909 /* Finds interesting uses of induction variables outside of loops
1910    on loop exit edge EXIT.  */
1911
1912 static void
1913 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1914 {
1915   gimple phi;
1916   gimple_stmt_iterator psi;
1917   tree def;
1918
1919   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1920     {
1921       phi = gsi_stmt (psi);
1922       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1923       if (!virtual_operand_p (def))
1924         find_interesting_uses_op (data, def);
1925     }
1926 }
1927
1928 /* Finds uses of the induction variables that are interesting.  */
1929
1930 static void
1931 find_interesting_uses (struct ivopts_data *data)
1932 {
1933   basic_block bb;
1934   gimple_stmt_iterator bsi;
1935   basic_block *body = get_loop_body (data->current_loop);
1936   unsigned i;
1937   struct version_info *info;
1938   edge e;
1939
1940   if (dump_file && (dump_flags & TDF_DETAILS))
1941     fprintf (dump_file, "Uses:\n\n");
1942
1943   for (i = 0; i < data->current_loop->num_nodes; i++)
1944     {
1945       edge_iterator ei;
1946       bb = body[i];
1947
1948       FOR_EACH_EDGE (e, ei, bb->succs)
1949         if (e->dest != EXIT_BLOCK_PTR
1950             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1951           find_interesting_uses_outside (data, e);
1952
1953       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1954         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1955       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1956         if (!is_gimple_debug (gsi_stmt (bsi)))
1957           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1958     }
1959
1960   if (dump_file && (dump_flags & TDF_DETAILS))
1961     {
1962       bitmap_iterator bi;
1963
1964       fprintf (dump_file, "\n");
1965
1966       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1967         {
1968           info = ver_info (data, i);
1969           if (info->inv_id)
1970             {
1971               fprintf (dump_file, "  ");
1972               print_generic_expr (dump_file, info->name, TDF_SLIM);
1973               fprintf (dump_file, " is invariant (%d)%s\n",
1974                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1975             }
1976         }
1977
1978       fprintf (dump_file, "\n");
1979     }
1980
1981   free (body);
1982 }
1983
1984 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1985    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1986    we are at the top-level of the processed address.  */
1987
1988 static tree
1989 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1990                 unsigned HOST_WIDE_INT *offset)
1991 {
1992   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1993   enum tree_code code;
1994   tree type, orig_type = TREE_TYPE (expr);
1995   unsigned HOST_WIDE_INT off0, off1, st;
1996   tree orig_expr = expr;
1997
1998   STRIP_NOPS (expr);
1999
2000   type = TREE_TYPE (expr);
2001   code = TREE_CODE (expr);
2002   *offset = 0;
2003
2004   switch (code)
2005     {
2006     case INTEGER_CST:
2007       if (!cst_and_fits_in_hwi (expr)
2008           || integer_zerop (expr))
2009         return orig_expr;
2010
2011       *offset = int_cst_value (expr);
2012       return build_int_cst (orig_type, 0);
2013
2014     case POINTER_PLUS_EXPR:
2015     case PLUS_EXPR:
2016     case MINUS_EXPR:
2017       op0 = TREE_OPERAND (expr, 0);
2018       op1 = TREE_OPERAND (expr, 1);
2019
2020       op0 = strip_offset_1 (op0, false, false, &off0);
2021       op1 = strip_offset_1 (op1, false, false, &off1);
2022
2023       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2024       if (op0 == TREE_OPERAND (expr, 0)
2025           && op1 == TREE_OPERAND (expr, 1))
2026         return orig_expr;
2027
2028       if (integer_zerop (op1))
2029         expr = op0;
2030       else if (integer_zerop (op0))
2031         {
2032           if (code == MINUS_EXPR)
2033             expr = fold_build1 (NEGATE_EXPR, type, op1);
2034           else
2035             expr = op1;
2036         }
2037       else
2038         expr = fold_build2 (code, type, op0, op1);
2039
2040       return fold_convert (orig_type, expr);
2041
2042     case MULT_EXPR:
2043       op1 = TREE_OPERAND (expr, 1);
2044       if (!cst_and_fits_in_hwi (op1))
2045         return orig_expr;
2046
2047       op0 = TREE_OPERAND (expr, 0);
2048       op0 = strip_offset_1 (op0, false, false, &off0);
2049       if (op0 == TREE_OPERAND (expr, 0))
2050         return orig_expr;
2051
2052       *offset = off0 * int_cst_value (op1);
2053       if (integer_zerop (op0))
2054         expr = op0;
2055       else
2056         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2057
2058       return fold_convert (orig_type, expr);
2059
2060     case ARRAY_REF:
2061     case ARRAY_RANGE_REF:
2062       if (!inside_addr)
2063         return orig_expr;
2064
2065       step = array_ref_element_size (expr);
2066       if (!cst_and_fits_in_hwi (step))
2067         break;
2068
2069       st = int_cst_value (step);
2070       op1 = TREE_OPERAND (expr, 1);
2071       op1 = strip_offset_1 (op1, false, false, &off1);
2072       *offset = off1 * st;
2073
2074       if (top_compref
2075           && integer_zerop (op1))
2076         {
2077           /* Strip the component reference completely.  */
2078           op0 = TREE_OPERAND (expr, 0);
2079           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2080           *offset += off0;
2081           return op0;
2082         }
2083       break;
2084
2085     case COMPONENT_REF:
2086       if (!inside_addr)
2087         return orig_expr;
2088
2089       tmp = component_ref_field_offset (expr);
2090       if (top_compref
2091           && cst_and_fits_in_hwi (tmp))
2092         {
2093           /* Strip the component reference completely.  */
2094           op0 = TREE_OPERAND (expr, 0);
2095           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2096           *offset = off0 + int_cst_value (tmp);
2097           return op0;
2098         }
2099       break;
2100
2101     case ADDR_EXPR:
2102       op0 = TREE_OPERAND (expr, 0);
2103       op0 = strip_offset_1 (op0, true, true, &off0);
2104       *offset += off0;
2105
2106       if (op0 == TREE_OPERAND (expr, 0))
2107         return orig_expr;
2108
2109       expr = build_fold_addr_expr (op0);
2110       return fold_convert (orig_type, expr);
2111
2112     case MEM_REF:
2113       /* ???  Offset operand?  */
2114       inside_addr = false;
2115       break;
2116
2117     default:
2118       return orig_expr;
2119     }
2120
2121   /* Default handling of expressions for that we want to recurse into
2122      the first operand.  */
2123   op0 = TREE_OPERAND (expr, 0);
2124   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2125   *offset += off0;
2126
2127   if (op0 == TREE_OPERAND (expr, 0)
2128       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2129     return orig_expr;
2130
2131   expr = copy_node (expr);
2132   TREE_OPERAND (expr, 0) = op0;
2133   if (op1)
2134     TREE_OPERAND (expr, 1) = op1;
2135
2136   /* Inside address, we might strip the top level component references,
2137      thus changing type of the expression.  Handling of ADDR_EXPR
2138      will fix that.  */
2139   expr = fold_convert (orig_type, expr);
2140
2141   return expr;
2142 }
2143
2144 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2145
2146 static tree
2147 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2148 {
2149   return strip_offset_1 (expr, false, false, offset);
2150 }
2151
2152 /* Returns variant of TYPE that can be used as base for different uses.
2153    We return unsigned type with the same precision, which avoids problems
2154    with overflows.  */
2155
2156 static tree
2157 generic_type_for (tree type)
2158 {
2159   if (POINTER_TYPE_P (type))
2160     return unsigned_type_for (type);
2161
2162   if (TYPE_UNSIGNED (type))
2163     return type;
2164
2165   return unsigned_type_for (type);
2166 }
2167
2168 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2169    the bitmap to that we should store it.  */
2170
2171 static struct ivopts_data *fd_ivopts_data;
2172 static tree
2173 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2174 {
2175   bitmap *depends_on = (bitmap *) data;
2176   struct version_info *info;
2177
2178   if (TREE_CODE (*expr_p) != SSA_NAME)
2179     return NULL_TREE;
2180   info = name_info (fd_ivopts_data, *expr_p);
2181
2182   if (!info->inv_id || info->has_nonlin_use)
2183     return NULL_TREE;
2184
2185   if (!*depends_on)
2186     *depends_on = BITMAP_ALLOC (NULL);
2187   bitmap_set_bit (*depends_on, info->inv_id);
2188
2189   return NULL_TREE;
2190 }
2191
2192 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2193    position to POS.  If USE is not NULL, the candidate is set as related to
2194    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2195    replacement of the final value of the iv by a direct computation.  */
2196
2197 static struct iv_cand *
2198 add_candidate_1 (struct ivopts_data *data,
2199                  tree base, tree step, bool important, enum iv_position pos,
2200                  struct iv_use *use, gimple incremented_at)
2201 {
2202   unsigned i;
2203   struct iv_cand *cand = NULL;
2204   tree type, orig_type;
2205
2206   /* For non-original variables, make sure their values are computed in a type
2207      that does not invoke undefined behavior on overflows (since in general,
2208      we cannot prove that these induction variables are non-wrapping).  */
2209   if (pos != IP_ORIGINAL)
2210     {
2211       orig_type = TREE_TYPE (base);
2212       type = generic_type_for (orig_type);
2213       if (type != orig_type)
2214         {
2215           base = fold_convert (type, base);
2216           step = fold_convert (type, step);
2217         }
2218     }
2219
2220   for (i = 0; i < n_iv_cands (data); i++)
2221     {
2222       cand = iv_cand (data, i);
2223
2224       if (cand->pos != pos)
2225         continue;
2226
2227       if (cand->incremented_at != incremented_at
2228           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2229               && cand->ainc_use != use))
2230         continue;
2231
2232       if (!cand->iv)
2233         {
2234           if (!base && !step)
2235             break;
2236
2237           continue;
2238         }
2239
2240       if (!base && !step)
2241         continue;
2242
2243       if (operand_equal_p (base, cand->iv->base, 0)
2244           && operand_equal_p (step, cand->iv->step, 0)
2245           && (TYPE_PRECISION (TREE_TYPE (base))
2246               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2247         break;
2248     }
2249
2250   if (i == n_iv_cands (data))
2251     {
2252       cand = XCNEW (struct iv_cand);
2253       cand->id = i;
2254
2255       if (!base && !step)
2256         cand->iv = NULL;
2257       else
2258         cand->iv = alloc_iv (base, step);
2259
2260       cand->pos = pos;
2261       if (pos != IP_ORIGINAL && cand->iv)
2262         {
2263           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2264           cand->var_after = cand->var_before;
2265         }
2266       cand->important = important;
2267       cand->incremented_at = incremented_at;
2268       data->iv_candidates.safe_push (cand);
2269
2270       if (step
2271           && TREE_CODE (step) != INTEGER_CST)
2272         {
2273           fd_ivopts_data = data;
2274           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2275         }
2276
2277       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2278         cand->ainc_use = use;
2279       else
2280         cand->ainc_use = NULL;
2281
2282       if (dump_file && (dump_flags & TDF_DETAILS))
2283         dump_cand (dump_file, cand);
2284     }
2285
2286   if (important && !cand->important)
2287     {
2288       cand->important = true;
2289       if (dump_file && (dump_flags & TDF_DETAILS))
2290         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2291     }
2292
2293   if (use)
2294     {
2295       bitmap_set_bit (use->related_cands, i);
2296       if (dump_file && (dump_flags & TDF_DETAILS))
2297         fprintf (dump_file, "Candidate %d is related to use %d\n",
2298                  cand->id, use->id);
2299     }
2300
2301   return cand;
2302 }
2303
2304 /* Returns true if incrementing the induction variable at the end of the LOOP
2305    is allowed.
2306
2307    The purpose is to avoid splitting latch edge with a biv increment, thus
2308    creating a jump, possibly confusing other optimization passes and leaving
2309    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2310    is not available (so we do not have a better alternative), or if the latch
2311    edge is already nonempty.  */
2312
2313 static bool
2314 allow_ip_end_pos_p (struct loop *loop)
2315 {
2316   if (!ip_normal_pos (loop))
2317     return true;
2318
2319   if (!empty_block_p (ip_end_pos (loop)))
2320     return true;
2321
2322   return false;
2323 }
2324
2325 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2326    Important field is set to IMPORTANT.  */
2327
2328 static void
2329 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2330                         bool important, struct iv_use *use)
2331 {
2332   basic_block use_bb = gimple_bb (use->stmt);
2333   enum machine_mode mem_mode;
2334   unsigned HOST_WIDE_INT cstepi;
2335
2336   /* If we insert the increment in any position other than the standard
2337      ones, we must ensure that it is incremented once per iteration.
2338      It must not be in an inner nested loop, or one side of an if
2339      statement.  */
2340   if (use_bb->loop_father != data->current_loop
2341       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2342       || stmt_could_throw_p (use->stmt)
2343       || !cst_and_fits_in_hwi (step))
2344     return;
2345
2346   cstepi = int_cst_value (step);
2347
2348   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2349   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2350         || USE_STORE_PRE_INCREMENT (mem_mode))
2351        && GET_MODE_SIZE (mem_mode) == cstepi)
2352       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2353            || USE_STORE_PRE_DECREMENT (mem_mode))
2354           && GET_MODE_SIZE (mem_mode) == -cstepi))
2355     {
2356       enum tree_code code = MINUS_EXPR;
2357       tree new_base;
2358       tree new_step = step;
2359
2360       if (POINTER_TYPE_P (TREE_TYPE (base)))
2361         {
2362           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2363           code = POINTER_PLUS_EXPR;
2364         }
2365       else
2366         new_step = fold_convert (TREE_TYPE (base), new_step);
2367       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2368       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2369                        use->stmt);
2370     }
2371   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2372         || USE_STORE_POST_INCREMENT (mem_mode))
2373        && GET_MODE_SIZE (mem_mode) == cstepi)
2374       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2375            || USE_STORE_POST_DECREMENT (mem_mode))
2376           && GET_MODE_SIZE (mem_mode) == -cstepi))
2377     {
2378       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2379                        use->stmt);
2380     }
2381 }
2382
2383 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2384    position to POS.  If USE is not NULL, the candidate is set as related to
2385    it.  The candidate computation is scheduled on all available positions.  */
2386
2387 static void
2388 add_candidate (struct ivopts_data *data,
2389                tree base, tree step, bool important, struct iv_use *use)
2390 {
2391   if (ip_normal_pos (data->current_loop))
2392     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2393   if (ip_end_pos (data->current_loop)
2394       && allow_ip_end_pos_p (data->current_loop))
2395     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2396
2397   if (use != NULL && use->type == USE_ADDRESS)
2398     add_autoinc_candidates (data, base, step, important, use);
2399 }
2400
2401 /* Adds standard iv candidates.  */
2402
2403 static void
2404 add_standard_iv_candidates (struct ivopts_data *data)
2405 {
2406   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2407
2408   /* The same for a double-integer type if it is still fast enough.  */
2409   if (TYPE_PRECISION
2410         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2411       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2412     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2413                    build_int_cst (long_integer_type_node, 1), true, NULL);
2414
2415   /* The same for a double-integer type if it is still fast enough.  */
2416   if (TYPE_PRECISION
2417         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2418       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2419     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2420                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2421 }
2422
2423
2424 /* Adds candidates bases on the old induction variable IV.  */
2425
2426 static void
2427 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2428 {
2429   gimple phi;
2430   tree def;
2431   struct iv_cand *cand;
2432
2433   add_candidate (data, iv->base, iv->step, true, NULL);
2434
2435   /* The same, but with initial value zero.  */
2436   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2437     add_candidate (data, size_int (0), iv->step, true, NULL);
2438   else
2439     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2440                    iv->step, true, NULL);
2441
2442   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2443   if (gimple_code (phi) == GIMPLE_PHI)
2444     {
2445       /* Additionally record the possibility of leaving the original iv
2446          untouched.  */
2447       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2448       cand = add_candidate_1 (data,
2449                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2450                               SSA_NAME_DEF_STMT (def));
2451       cand->var_before = iv->ssa_name;
2452       cand->var_after = def;
2453     }
2454 }
2455
2456 /* Adds candidates based on the old induction variables.  */
2457
2458 static void
2459 add_old_ivs_candidates (struct ivopts_data *data)
2460 {
2461   unsigned i;
2462   struct iv *iv;
2463   bitmap_iterator bi;
2464
2465   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2466     {
2467       iv = ver_info (data, i)->iv;
2468       if (iv && iv->biv_p && !integer_zerop (iv->step))
2469         add_old_iv_candidates (data, iv);
2470     }
2471 }
2472
2473 /* Adds candidates based on the value of the induction variable IV and USE.  */
2474
2475 static void
2476 add_iv_value_candidates (struct ivopts_data *data,
2477                          struct iv *iv, struct iv_use *use)
2478 {
2479   unsigned HOST_WIDE_INT offset;
2480   tree base;
2481   tree basetype;
2482
2483   add_candidate (data, iv->base, iv->step, false, use);
2484
2485   /* The same, but with initial value zero.  Make such variable important,
2486      since it is generic enough so that possibly many uses may be based
2487      on it.  */
2488   basetype = TREE_TYPE (iv->base);
2489   if (POINTER_TYPE_P (basetype))
2490     basetype = sizetype;
2491   add_candidate (data, build_int_cst (basetype, 0),
2492                  iv->step, true, use);
2493
2494   /* Third, try removing the constant offset.  Make sure to even
2495      add a candidate for &a[0] vs. (T *)&a.  */
2496   base = strip_offset (iv->base, &offset);
2497   if (offset
2498       || base != iv->base)
2499     add_candidate (data, base, iv->step, false, use);
2500 }
2501
2502 /* Adds candidates based on the uses.  */
2503
2504 static void
2505 add_derived_ivs_candidates (struct ivopts_data *data)
2506 {
2507   unsigned i;
2508
2509   for (i = 0; i < n_iv_uses (data); i++)
2510     {
2511       struct iv_use *use = iv_use (data, i);
2512
2513       if (!use)
2514         continue;
2515
2516       switch (use->type)
2517         {
2518         case USE_NONLINEAR_EXPR:
2519         case USE_COMPARE:
2520         case USE_ADDRESS:
2521           /* Just add the ivs based on the value of the iv used here.  */
2522           add_iv_value_candidates (data, use->iv, use);
2523           break;
2524
2525         default:
2526           gcc_unreachable ();
2527         }
2528     }
2529 }
2530
2531 /* Record important candidates and add them to related_cands bitmaps
2532    if needed.  */
2533
2534 static void
2535 record_important_candidates (struct ivopts_data *data)
2536 {
2537   unsigned i;
2538   struct iv_use *use;
2539
2540   for (i = 0; i < n_iv_cands (data); i++)
2541     {
2542       struct iv_cand *cand = iv_cand (data, i);
2543
2544       if (cand->important)
2545         bitmap_set_bit (data->important_candidates, i);
2546     }
2547
2548   data->consider_all_candidates = (n_iv_cands (data)
2549                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2550
2551   if (data->consider_all_candidates)
2552     {
2553       /* We will not need "related_cands" bitmaps in this case,
2554          so release them to decrease peak memory consumption.  */
2555       for (i = 0; i < n_iv_uses (data); i++)
2556         {
2557           use = iv_use (data, i);
2558           BITMAP_FREE (use->related_cands);
2559         }
2560     }
2561   else
2562     {
2563       /* Add important candidates to the related_cands bitmaps.  */
2564       for (i = 0; i < n_iv_uses (data); i++)
2565         bitmap_ior_into (iv_use (data, i)->related_cands,
2566                          data->important_candidates);
2567     }
2568 }
2569
2570 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2571    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2572    we allocate a simple list to every use.  */
2573
2574 static void
2575 alloc_use_cost_map (struct ivopts_data *data)
2576 {
2577   unsigned i, size, s, j;
2578
2579   for (i = 0; i < n_iv_uses (data); i++)
2580     {
2581       struct iv_use *use = iv_use (data, i);
2582       bitmap_iterator bi;
2583
2584       if (data->consider_all_candidates)
2585         size = n_iv_cands (data);
2586       else
2587         {
2588           s = 0;
2589           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2590             {
2591               s++;
2592             }
2593
2594           /* Round up to the power of two, so that moduling by it is fast.  */
2595           for (size = 1; size < s; size <<= 1)
2596             continue;
2597         }
2598
2599       use->n_map_members = size;
2600       use->cost_map = XCNEWVEC (struct cost_pair, size);
2601     }
2602 }
2603
2604 /* Returns description of computation cost of expression whose runtime
2605    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2606
2607 static comp_cost
2608 new_cost (unsigned runtime, unsigned complexity)
2609 {
2610   comp_cost cost;
2611
2612   cost.cost = runtime;
2613   cost.complexity = complexity;
2614
2615   return cost;
2616 }
2617
2618 /* Adds costs COST1 and COST2.  */
2619
2620 static comp_cost
2621 add_costs (comp_cost cost1, comp_cost cost2)
2622 {
2623   cost1.cost += cost2.cost;
2624   cost1.complexity += cost2.complexity;
2625
2626   return cost1;
2627 }
2628 /* Subtracts costs COST1 and COST2.  */
2629
2630 static comp_cost
2631 sub_costs (comp_cost cost1, comp_cost cost2)
2632 {
2633   cost1.cost -= cost2.cost;
2634   cost1.complexity -= cost2.complexity;
2635
2636   return cost1;
2637 }
2638
2639 /* Returns a negative number if COST1 < COST2, a positive number if
2640    COST1 > COST2, and 0 if COST1 = COST2.  */
2641
2642 static int
2643 compare_costs (comp_cost cost1, comp_cost cost2)
2644 {
2645   if (cost1.cost == cost2.cost)
2646     return cost1.complexity - cost2.complexity;
2647
2648   return cost1.cost - cost2.cost;
2649 }
2650
2651 /* Returns true if COST is infinite.  */
2652
2653 static bool
2654 infinite_cost_p (comp_cost cost)
2655 {
2656   return cost.cost == INFTY;
2657 }
2658
2659 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2660    on invariants DEPENDS_ON and that the value used in expressing it
2661    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2662
2663 static void
2664 set_use_iv_cost (struct ivopts_data *data,
2665                  struct iv_use *use, struct iv_cand *cand,
2666                  comp_cost cost, bitmap depends_on, tree value,
2667                  enum tree_code comp, int inv_expr_id)
2668 {
2669   unsigned i, s;
2670
2671   if (infinite_cost_p (cost))
2672     {
2673       BITMAP_FREE (depends_on);
2674       return;
2675     }
2676
2677   if (data->consider_all_candidates)
2678     {
2679       use->cost_map[cand->id].cand = cand;
2680       use->cost_map[cand->id].cost = cost;
2681       use->cost_map[cand->id].depends_on = depends_on;
2682       use->cost_map[cand->id].value = value;
2683       use->cost_map[cand->id].comp = comp;
2684       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2685       return;
2686     }
2687
2688   /* n_map_members is a power of two, so this computes modulo.  */
2689   s = cand->id & (use->n_map_members - 1);
2690   for (i = s; i < use->n_map_members; i++)
2691     if (!use->cost_map[i].cand)
2692       goto found;
2693   for (i = 0; i < s; i++)
2694     if (!use->cost_map[i].cand)
2695       goto found;
2696
2697   gcc_unreachable ();
2698
2699 found:
2700   use->cost_map[i].cand = cand;
2701   use->cost_map[i].cost = cost;
2702   use->cost_map[i].depends_on = depends_on;
2703   use->cost_map[i].value = value;
2704   use->cost_map[i].comp = comp;
2705   use->cost_map[i].inv_expr_id = inv_expr_id;
2706 }
2707
2708 /* Gets cost of (USE, CANDIDATE) pair.  */
2709
2710 static struct cost_pair *
2711 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2712                  struct iv_cand *cand)
2713 {
2714   unsigned i, s;
2715   struct cost_pair *ret;
2716
2717   if (!cand)
2718     return NULL;
2719
2720   if (data->consider_all_candidates)
2721     {
2722       ret = use->cost_map + cand->id;
2723       if (!ret->cand)
2724         return NULL;
2725
2726       return ret;
2727     }
2728
2729   /* n_map_members is a power of two, so this computes modulo.  */
2730   s = cand->id & (use->n_map_members - 1);
2731   for (i = s; i < use->n_map_members; i++)
2732     if (use->cost_map[i].cand == cand)
2733       return use->cost_map + i;
2734
2735   for (i = 0; i < s; i++)
2736     if (use->cost_map[i].cand == cand)
2737       return use->cost_map + i;
2738
2739   return NULL;
2740 }
2741
2742 /* Returns estimate on cost of computing SEQ.  */
2743
2744 static unsigned
2745 seq_cost (rtx seq, bool speed)
2746 {
2747   unsigned cost = 0;
2748   rtx set;
2749
2750   for (; seq; seq = NEXT_INSN (seq))
2751     {
2752       set = single_set (seq);
2753       if (set)
2754         cost += set_src_cost (SET_SRC (set), speed);
2755       else
2756         cost++;
2757     }
2758
2759   return cost;
2760 }
2761
2762 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2763 static rtx
2764 produce_memory_decl_rtl (tree obj, int *regno)
2765 {
2766   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2767   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2768   rtx x;
2769
2770   gcc_assert (obj);
2771   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2772     {
2773       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2774       x = gen_rtx_SYMBOL_REF (address_mode, name);
2775       SET_SYMBOL_REF_DECL (x, obj);
2776       x = gen_rtx_MEM (DECL_MODE (obj), x);
2777       set_mem_addr_space (x, as);
2778       targetm.encode_section_info (obj, x, true);
2779     }
2780   else
2781     {
2782       x = gen_raw_REG (address_mode, (*regno)++);
2783       x = gen_rtx_MEM (DECL_MODE (obj), x);
2784       set_mem_addr_space (x, as);
2785     }
2786
2787   return x;
2788 }
2789
2790 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2791    walk_tree.  DATA contains the actual fake register number.  */
2792
2793 static tree
2794 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2795 {
2796   tree obj = NULL_TREE;
2797   rtx x = NULL_RTX;
2798   int *regno = (int *) data;
2799
2800   switch (TREE_CODE (*expr_p))
2801     {
2802     case ADDR_EXPR:
2803       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2804            handled_component_p (*expr_p);
2805            expr_p = &TREE_OPERAND (*expr_p, 0))
2806         continue;
2807       obj = *expr_p;
2808       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2809         x = produce_memory_decl_rtl (obj, regno);
2810       break;
2811
2812     case SSA_NAME:
2813       *ws = 0;
2814       obj = SSA_NAME_VAR (*expr_p);
2815       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2816       if (!obj)
2817         return NULL_TREE;
2818       if (!DECL_RTL_SET_P (obj))
2819         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2820       break;
2821
2822     case VAR_DECL:
2823     case PARM_DECL:
2824     case RESULT_DECL:
2825       *ws = 0;
2826       obj = *expr_p;
2827
2828       if (DECL_RTL_SET_P (obj))
2829         break;
2830
2831       if (DECL_MODE (obj) == BLKmode)
2832         x = produce_memory_decl_rtl (obj, regno);
2833       else
2834         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2835
2836       break;
2837
2838     default:
2839       break;
2840     }
2841
2842   if (x)
2843     {
2844       decl_rtl_to_reset.safe_push (obj);
2845       SET_DECL_RTL (obj, x);
2846     }
2847
2848   return NULL_TREE;
2849 }
2850
2851 /* Determines cost of the computation of EXPR.  */
2852
2853 static unsigned
2854 computation_cost (tree expr, bool speed)
2855 {
2856   rtx seq, rslt;
2857   tree type = TREE_TYPE (expr);
2858   unsigned cost;
2859   /* Avoid using hard regs in ways which may be unsupported.  */
2860   int regno = LAST_VIRTUAL_REGISTER + 1;
2861   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2862   enum node_frequency real_frequency = node->frequency;
2863
2864   node->frequency = NODE_FREQUENCY_NORMAL;
2865   crtl->maybe_hot_insn_p = speed;
2866   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2867   start_sequence ();
2868   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2869   seq = get_insns ();
2870   end_sequence ();
2871   default_rtl_profile ();
2872   node->frequency = real_frequency;
2873
2874   cost = seq_cost (seq, speed);
2875   if (MEM_P (rslt))
2876     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2877                           TYPE_ADDR_SPACE (type), speed);
2878   else if (!REG_P (rslt))
2879     cost += set_src_cost (rslt, speed);
2880
2881   return cost;
2882 }
2883
2884 /* Returns variable containing the value of candidate CAND at statement AT.  */
2885
2886 static tree
2887 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2888 {
2889   if (stmt_after_increment (loop, cand, stmt))
2890     return cand->var_after;
2891   else
2892     return cand->var_before;
2893 }
2894
2895 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2896    same precision that is at least as wide as the precision of TYPE, stores
2897    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2898    type of A and B.  */
2899
2900 static tree
2901 determine_common_wider_type (tree *a, tree *b)
2902 {
2903   tree wider_type = NULL;
2904   tree suba, subb;
2905   tree atype = TREE_TYPE (*a);
2906
2907   if (CONVERT_EXPR_P (*a))
2908     {
2909       suba = TREE_OPERAND (*a, 0);
2910       wider_type = TREE_TYPE (suba);
2911       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2912         return atype;
2913     }
2914   else
2915     return atype;
2916
2917   if (CONVERT_EXPR_P (*b))
2918     {
2919       subb = TREE_OPERAND (*b, 0);
2920       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2921         return atype;
2922     }
2923   else
2924     return atype;
2925
2926   *a = suba;
2927   *b = subb;
2928   return wider_type;
2929 }
2930
2931 /* Determines the expression by that USE is expressed from induction variable
2932    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2933    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2934
2935 static bool
2936 get_computation_aff (struct loop *loop,
2937                      struct iv_use *use, struct iv_cand *cand, gimple at,
2938                      struct affine_tree_combination *aff)
2939 {
2940   tree ubase = use->iv->base;
2941   tree ustep = use->iv->step;
2942   tree cbase = cand->iv->base;
2943   tree cstep = cand->iv->step, cstep_common;
2944   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2945   tree common_type, var;
2946   tree uutype;
2947   aff_tree cbase_aff, var_aff;
2948   double_int rat;
2949
2950   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2951     {
2952       /* We do not have a precision to express the values of use.  */
2953       return false;
2954     }
2955
2956   var = var_at_stmt (loop, cand, at);
2957   uutype = unsigned_type_for (utype);
2958
2959   /* If the conversion is not noop, perform it.  */
2960   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2961     {
2962       cstep = fold_convert (uutype, cstep);
2963       cbase = fold_convert (uutype, cbase);
2964       var = fold_convert (uutype, var);
2965     }
2966
2967   if (!constant_multiple_of (ustep, cstep, &rat))
2968     return false;
2969
2970   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2971      type, we achieve better folding by computing their difference in this
2972      wider type, and cast the result to UUTYPE.  We do not need to worry about
2973      overflows, as all the arithmetics will in the end be performed in UUTYPE
2974      anyway.  */
2975   common_type = determine_common_wider_type (&ubase, &cbase);
2976
2977   /* use = ubase - ratio * cbase + ratio * var.  */
2978   tree_to_aff_combination (ubase, common_type, aff);
2979   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2980   tree_to_aff_combination (var, uutype, &var_aff);
2981
2982   /* We need to shift the value if we are after the increment.  */
2983   if (stmt_after_increment (loop, cand, at))
2984     {
2985       aff_tree cstep_aff;
2986
2987       if (common_type != uutype)
2988         cstep_common = fold_convert (common_type, cstep);
2989       else
2990         cstep_common = cstep;
2991
2992       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2993       aff_combination_add (&cbase_aff, &cstep_aff);
2994     }
2995
2996   aff_combination_scale (&cbase_aff, -rat);
2997   aff_combination_add (aff, &cbase_aff);
2998   if (common_type != uutype)
2999     aff_combination_convert (aff, uutype);
3000
3001   aff_combination_scale (&var_aff, rat);
3002   aff_combination_add (aff, &var_aff);
3003
3004   return true;
3005 }
3006
3007 /* Return the type of USE.  */
3008
3009 static tree
3010 get_use_type (struct iv_use *use)
3011 {
3012   tree base_type = TREE_TYPE (use->iv->base);
3013   tree type;
3014
3015   if (use->type == USE_ADDRESS)
3016     {
3017       /* The base_type may be a void pointer.  Create a pointer type based on
3018          the mem_ref instead.  */
3019       type = build_pointer_type (TREE_TYPE (*use->op_p));
3020       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3021                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3022     }
3023   else
3024     type = base_type;
3025
3026   return type;
3027 }
3028
3029 /* Determines the expression by that USE is expressed from induction variable
3030    CAND at statement AT in LOOP.  The computation is unshared.  */
3031
3032 static tree
3033 get_computation_at (struct loop *loop,
3034                     struct iv_use *use, struct iv_cand *cand, gimple at)
3035 {
3036   aff_tree aff;
3037   tree type = get_use_type (use);
3038
3039   if (!get_computation_aff (loop, use, cand, at, &aff))
3040     return NULL_TREE;
3041   unshare_aff_combination (&aff);
3042   return fold_convert (type, aff_combination_to_tree (&aff));
3043 }
3044
3045 /* Determines the expression by that USE is expressed from induction variable
3046    CAND in LOOP.  The computation is unshared.  */
3047
3048 static tree
3049 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3050 {
3051   return get_computation_at (loop, use, cand, use->stmt);
3052 }
3053
3054 /* Adjust the cost COST for being in loop setup rather than loop body.
3055    If we're optimizing for space, the loop setup overhead is constant;
3056    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3057 static unsigned
3058 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3059 {
3060   if (cost == INFTY)
3061     return cost;
3062   else if (optimize_loop_for_speed_p (data->current_loop))
3063     return cost / avg_loop_niter (data->current_loop);
3064   else
3065     return cost;
3066 }
3067
3068 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3069    validity for a memory reference accessing memory of mode MODE in
3070    address space AS.  */
3071
3072
3073 bool
3074 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3075                                  addr_space_t as)
3076 {
3077 #define MAX_RATIO 128
3078   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3079   static vec<sbitmap> valid_mult_list;
3080   sbitmap valid_mult;
3081
3082   if (data_index >= valid_mult_list.length ())
3083     valid_mult_list.safe_grow_cleared (data_index + 1);
3084
3085   valid_mult = valid_mult_list[data_index];
3086   if (!valid_mult)
3087     {
3088       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3089       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3090       rtx addr;
3091       HOST_WIDE_INT i;
3092
3093       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3094       bitmap_clear (valid_mult);
3095       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3096       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3097         {
3098           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3099           if (memory_address_addr_space_p (mode, addr, as))
3100             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3101         }
3102
3103       if (dump_file && (dump_flags & TDF_DETAILS))
3104         {
3105           fprintf (dump_file, "  allowed multipliers:");
3106           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3107             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3108               fprintf (dump_file, " %d", (int) i);
3109           fprintf (dump_file, "\n");
3110           fprintf (dump_file, "\n");
3111         }
3112
3113       valid_mult_list[data_index] = valid_mult;
3114     }
3115
3116   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3117     return false;
3118
3119   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3120 }
3121
3122 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3123    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3124    variable is omitted.  Compute the cost for a memory reference that accesses
3125    a memory location of mode MEM_MODE in address space AS.
3126
3127    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3128    size of MEM_MODE / RATIO) is available.  To make this determination, we
3129    look at the size of the increment to be made, which is given in CSTEP.
3130    CSTEP may be zero if the step is unknown.
3131    STMT_AFTER_INC is true iff the statement we're looking at is after the
3132    increment of the original biv.
3133
3134    TODO -- there must be some better way.  This all is quite crude.  */
3135
3136 typedef struct address_cost_data_s
3137 {
3138   HOST_WIDE_INT min_offset, max_offset;
3139   unsigned costs[2][2][2][2];
3140 } *address_cost_data;
3141
3142
3143 static comp_cost
3144 get_address_cost (bool symbol_present, bool var_present,
3145                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3146                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3147                   addr_space_t as, bool speed,
3148                   bool stmt_after_inc, bool *may_autoinc)
3149 {
3150   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3151   static vec<address_cost_data> address_cost_data_list;
3152   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3153   address_cost_data data;
3154   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3155   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3156   unsigned cost, acost, complexity;
3157   bool offset_p, ratio_p, autoinc;
3158   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3159   unsigned HOST_WIDE_INT mask;
3160   unsigned bits;
3161
3162   if (data_index >= address_cost_data_list.length ())
3163     address_cost_data_list.safe_grow_cleared (data_index + 1);
3164
3165   data = address_cost_data_list[data_index];
3166   if (!data)
3167     {
3168       HOST_WIDE_INT i;
3169       HOST_WIDE_INT rat, off = 0;
3170       int old_cse_not_expected, width;
3171       unsigned sym_p, var_p, off_p, rat_p, add_c;
3172       rtx seq, addr, base;
3173       rtx reg0, reg1;
3174
3175       data = (address_cost_data) xcalloc (1, sizeof (*data));
3176
3177       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3178
3179       width = GET_MODE_BITSIZE (address_mode) - 1;
3180       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3181         width = HOST_BITS_PER_WIDE_INT - 1;
3182       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3183
3184       for (i = width; i >= 0; i--)
3185         {
3186           off = -((unsigned HOST_WIDE_INT) 1 << i);
3187           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3188           if (memory_address_addr_space_p (mem_mode, addr, as))
3189             break;
3190         }
3191       data->min_offset = (i == -1? 0 : off);
3192
3193       for (i = width; i >= 0; i--)
3194         {
3195           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3196           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3197           if (memory_address_addr_space_p (mem_mode, addr, as))
3198             break;
3199         }
3200       if (i == -1)
3201         off = 0;
3202       data->max_offset = off;
3203
3204       if (dump_file && (dump_flags & TDF_DETAILS))
3205         {
3206           fprintf (dump_file, "get_address_cost:\n");
3207           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3208                    GET_MODE_NAME (mem_mode),
3209                    data->min_offset);
3210           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3211                    GET_MODE_NAME (mem_mode),
3212                    data->max_offset);
3213         }
3214
3215       rat = 1;
3216       for (i = 2; i <= MAX_RATIO; i++)
3217         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3218           {
3219             rat = i;
3220             break;
3221           }
3222
3223       /* Compute the cost of various addressing modes.  */
3224       acost = 0;
3225       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3226       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3227
3228       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3229           || USE_STORE_PRE_DECREMENT (mem_mode))
3230         {
3231           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3232           has_predec[mem_mode]
3233             = memory_address_addr_space_p (mem_mode, addr, as);
3234         }
3235       if (USE_LOAD_POST_DECREMENT (mem_mode)
3236           || USE_STORE_POST_DECREMENT (mem_mode))
3237         {
3238           addr = gen_rtx_POST_DEC (address_mode, reg0);
3239           has_postdec[mem_mode]
3240             = memory_address_addr_space_p (mem_mode, addr, as);
3241         }
3242       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3243           || USE_STORE_PRE_DECREMENT (mem_mode))
3244         {
3245           addr = gen_rtx_PRE_INC (address_mode, reg0);
3246           has_preinc[mem_mode]
3247             = memory_address_addr_space_p (mem_mode, addr, as);
3248         }
3249       if (USE_LOAD_POST_INCREMENT (mem_mode)
3250           || USE_STORE_POST_INCREMENT (mem_mode))
3251         {
3252           addr = gen_rtx_POST_INC (address_mode, reg0);
3253           has_postinc[mem_mode]
3254             = memory_address_addr_space_p (mem_mode, addr, as);
3255         }
3256       for (i = 0; i < 16; i++)
3257         {
3258           sym_p = i & 1;
3259           var_p = (i >> 1) & 1;
3260           off_p = (i >> 2) & 1;
3261           rat_p = (i >> 3) & 1;
3262
3263           addr = reg0;
3264           if (rat_p)
3265             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3266                                    gen_int_mode (rat, address_mode));
3267
3268           if (var_p)
3269             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3270
3271           if (sym_p)
3272             {
3273               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3274               /* ??? We can run into trouble with some backends by presenting
3275                  it with symbols which haven't been properly passed through
3276                  targetm.encode_section_info.  By setting the local bit, we
3277                  enhance the probability of things working.  */
3278               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3279
3280               if (off_p)
3281                 base = gen_rtx_fmt_e (CONST, address_mode,
3282                                       gen_rtx_fmt_ee
3283                                         (PLUS, address_mode, base,
3284                                          gen_int_mode (off, address_mode)));
3285             }
3286           else if (off_p)
3287             base = gen_int_mode (off, address_mode);
3288           else
3289             base = NULL_RTX;
3290
3291           if (base)
3292             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3293
3294           start_sequence ();
3295           /* To avoid splitting addressing modes, pretend that no cse will
3296              follow.  */
3297           old_cse_not_expected = cse_not_expected;
3298           cse_not_expected = true;
3299           addr = memory_address_addr_space (mem_mode, addr, as);
3300           cse_not_expected = old_cse_not_expected;
3301           seq = get_insns ();
3302           end_sequence ();
3303
3304           acost = seq_cost (seq, speed);
3305           acost += address_cost (addr, mem_mode, as, speed);
3306
3307           if (!acost)
3308             acost = 1;
3309           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3310         }
3311
3312       /* On some targets, it is quite expensive to load symbol to a register,
3313          which makes addresses that contain symbols look much more expensive.
3314          However, the symbol will have to be loaded in any case before the
3315          loop (and quite likely we have it in register already), so it does not
3316          make much sense to penalize them too heavily.  So make some final
3317          tweaks for the SYMBOL_PRESENT modes:
3318
3319          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3320          var is cheaper, use this mode with small penalty.
3321          If VAR_PRESENT is true, try whether the mode with
3322          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3323          if this is the case, use it.  */
3324       add_c = add_cost (speed, address_mode);
3325       for (i = 0; i < 8; i++)
3326         {
3327           var_p = i & 1;
3328           off_p = (i >> 1) & 1;
3329           rat_p = (i >> 2) & 1;
3330
3331           acost = data->costs[0][1][off_p][rat_p] + 1;
3332           if (var_p)
3333             acost += add_c;
3334
3335           if (acost < data->costs[1][var_p][off_p][rat_p])
3336             data->costs[1][var_p][off_p][rat_p] = acost;
3337         }
3338
3339       if (dump_file && (dump_flags & TDF_DETAILS))
3340         {
3341           fprintf (dump_file, "Address costs:\n");
3342
3343           for (i = 0; i < 16; i++)
3344             {
3345               sym_p = i & 1;
3346               var_p = (i >> 1) & 1;
3347               off_p = (i >> 2) & 1;
3348               rat_p = (i >> 3) & 1;
3349
3350               fprintf (dump_file, "  ");
3351               if (sym_p)
3352                 fprintf (dump_file, "sym + ");
3353               if (var_p)
3354                 fprintf (dump_file, "var + ");
3355               if (off_p)
3356                 fprintf (dump_file, "cst + ");
3357               if (rat_p)
3358                 fprintf (dump_file, "rat * ");
3359
3360               acost = data->costs[sym_p][var_p][off_p][rat_p];
3361               fprintf (dump_file, "index costs %d\n", acost);
3362             }
3363           if (has_predec[mem_mode] || has_postdec[mem_mode]
3364               || has_preinc[mem_mode] || has_postinc[mem_mode])
3365             fprintf (dump_file, "  May include autoinc/dec\n");
3366           fprintf (dump_file, "\n");
3367         }
3368
3369       address_cost_data_list[data_index] = data;
3370     }
3371
3372   bits = GET_MODE_BITSIZE (address_mode);
3373   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3374   offset &= mask;
3375   if ((offset >> (bits - 1) & 1))
3376     offset |= ~mask;
3377   s_offset = offset;
3378
3379   autoinc = false;
3380   msize = GET_MODE_SIZE (mem_mode);
3381   autoinc_offset = offset;
3382   if (stmt_after_inc)
3383     autoinc_offset += ratio * cstep;
3384   if (symbol_present || var_present || ratio != 1)
3385     autoinc = false;
3386   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3387                && msize == cstep)
3388            || (has_postdec[mem_mode] && autoinc_offset == 0
3389                && msize == -cstep)
3390            || (has_preinc[mem_mode] && autoinc_offset == msize
3391                && msize == cstep)
3392            || (has_predec[mem_mode] && autoinc_offset == -msize
3393                && msize == -cstep))
3394     autoinc = true;
3395
3396   cost = 0;
3397   offset_p = (s_offset != 0
3398               && data->min_offset <= s_offset
3399               && s_offset <= data->max_offset);
3400   ratio_p = (ratio != 1
3401              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3402
3403   if (ratio != 1 && !ratio_p)
3404     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3405
3406   if (s_offset && !offset_p && !symbol_present)
3407     cost += add_cost (speed, address_mode);
3408
3409   if (may_autoinc)
3410     *may_autoinc = autoinc;
3411   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3412   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3413   return new_cost (cost + acost, complexity);
3414 }
3415
3416  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3417     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3418     calculating the operands of EXPR.  Returns true if successful, and returns
3419     the cost in COST.  */
3420
3421 static bool
3422 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3423                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3424 {
3425   comp_cost res;
3426   tree op1 = TREE_OPERAND (expr, 1);
3427   tree cst = TREE_OPERAND (mult, 1);
3428   tree multop = TREE_OPERAND (mult, 0);
3429   int m = exact_log2 (int_cst_value (cst));
3430   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3431   int sa_cost;
3432
3433   if (!(m >= 0 && m < maxm))
3434     return false;
3435
3436   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3437              ? shiftadd_cost (speed, mode, m)
3438              : (mult == op1
3439                 ? shiftsub1_cost (speed, mode, m)
3440                 : shiftsub0_cost (speed, mode, m)));
3441   res = new_cost (sa_cost, 0);
3442   res = add_costs (res, mult == op1 ? cost0 : cost1);
3443
3444   STRIP_NOPS (multop);
3445   if (!is_gimple_val (multop))
3446     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3447
3448   *cost = res;
3449   return true;
3450 }
3451
3452 /* Estimates cost of forcing expression EXPR into a variable.  */
3453
3454 static comp_cost
3455 force_expr_to_var_cost (tree expr, bool speed)
3456 {
3457   static bool costs_initialized = false;
3458   static unsigned integer_cost [2];
3459   static unsigned symbol_cost [2];
3460   static unsigned address_cost [2];
3461   tree op0, op1;
3462   comp_cost cost0, cost1, cost;
3463   enum machine_mode mode;
3464
3465   if (!costs_initialized)
3466     {
3467       tree type = build_pointer_type (integer_type_node);
3468       tree var, addr;
3469       rtx x;
3470       int i;
3471
3472       var = create_tmp_var_raw (integer_type_node, "test_var");
3473       TREE_STATIC (var) = 1;
3474       x = produce_memory_decl_rtl (var, NULL);
3475       SET_DECL_RTL (var, x);
3476
3477       addr = build1 (ADDR_EXPR, type, var);
3478
3479
3480       for (i = 0; i < 2; i++)
3481         {
3482           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3483                                                              2000), i);
3484
3485           symbol_cost[i] = computation_cost (addr, i) + 1;
3486
3487           address_cost[i]
3488             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3489           if (dump_file && (dump_flags & TDF_DETAILS))
3490             {
3491               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3492               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3493               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3494               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3495               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3496               fprintf (dump_file, "\n");
3497             }
3498         }
3499
3500       costs_initialized = true;
3501     }
3502
3503   STRIP_NOPS (expr);
3504
3505   if (SSA_VAR_P (expr))
3506     return no_cost;
3507
3508   if (is_gimple_min_invariant (expr))
3509     {
3510       if (TREE_CODE (expr) == INTEGER_CST)
3511         return new_cost (integer_cost [speed], 0);
3512
3513       if (TREE_CODE (expr) == ADDR_EXPR)
3514         {
3515           tree obj = TREE_OPERAND (expr, 0);
3516
3517           if (TREE_CODE (obj) == VAR_DECL
3518               || TREE_CODE (obj) == PARM_DECL
3519               || TREE_CODE (obj) == RESULT_DECL)
3520             return new_cost (symbol_cost [speed], 0);
3521         }
3522
3523       return new_cost (address_cost [speed], 0);
3524     }
3525
3526   switch (TREE_CODE (expr))
3527     {
3528     case POINTER_PLUS_EXPR:
3529     case PLUS_EXPR:
3530     case MINUS_EXPR:
3531     case MULT_EXPR:
3532       op0 = TREE_OPERAND (expr, 0);
3533       op1 = TREE_OPERAND (expr, 1);
3534       STRIP_NOPS (op0);
3535       STRIP_NOPS (op1);
3536
3537       if (is_gimple_val (op0))
3538         cost0 = no_cost;
3539       else
3540         cost0 = force_expr_to_var_cost (op0, speed);
3541
3542       if (is_gimple_val (op1))
3543         cost1 = no_cost;
3544       else
3545         cost1 = force_expr_to_var_cost (op1, speed);
3546
3547       break;
3548
3549     case NEGATE_EXPR:
3550       op0 = TREE_OPERAND (expr, 0);
3551       STRIP_NOPS (op0);
3552       op1 = NULL_TREE;
3553
3554       if (is_gimple_val (op0))
3555         cost0 = no_cost;
3556       else
3557         cost0 = force_expr_to_var_cost (op0, speed);
3558
3559       cost1 = no_cost;
3560       break;
3561
3562     default:
3563       /* Just an arbitrary value, FIXME.  */
3564       return new_cost (target_spill_cost[speed], 0);
3565     }
3566
3567   mode = TYPE_MODE (TREE_TYPE (expr));
3568   switch (TREE_CODE (expr))
3569     {
3570     case POINTER_PLUS_EXPR:
3571     case PLUS_EXPR:
3572     case MINUS_EXPR:
3573     case NEGATE_EXPR:
3574       cost = new_cost (add_cost (speed, mode), 0);
3575       if (TREE_CODE (expr) != NEGATE_EXPR)
3576         {
3577           tree mult = NULL_TREE;
3578           comp_cost sa_cost;
3579           if (TREE_CODE (op1) == MULT_EXPR)
3580             mult = op1;
3581           else if (TREE_CODE (op0) == MULT_EXPR)
3582             mult = op0;
3583
3584           if (mult != NULL_TREE
3585               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3586               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3587                                     speed, &sa_cost))
3588             return sa_cost;
3589         }
3590       break;
3591
3592     case MULT_EXPR:
3593       if (cst_and_fits_in_hwi (op0))
3594         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3595                                              mode, speed), 0);
3596       else if (cst_and_fits_in_hwi (op1))
3597         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3598                                              mode, speed), 0);
3599       else
3600         return new_cost (target_spill_cost [speed], 0);
3601       break;
3602
3603     default:
3604       gcc_unreachable ();
3605     }
3606
3607   cost = add_costs (cost, cost0);
3608   cost = add_costs (cost, cost1);
3609
3610   /* Bound the cost by target_spill_cost.  The parts of complicated
3611      computations often are either loop invariant or at least can
3612      be shared between several iv uses, so letting this grow without
3613      limits would not give reasonable results.  */
3614   if (cost.cost > (int) target_spill_cost [speed])
3615     cost.cost = target_spill_cost [speed];
3616
3617   return cost;
3618 }
3619
3620 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3621    invariants the computation depends on.  */
3622
3623 static comp_cost
3624 force_var_cost (struct ivopts_data *data,
3625                 tree expr, bitmap *depends_on)
3626 {
3627   if (depends_on)
3628     {
3629       fd_ivopts_data = data;
3630       walk_tree (&expr, find_depends, depends_on, NULL);
3631     }
3632
3633   return force_expr_to_var_cost (expr, data->speed);
3634 }
3635
3636 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3637    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3638    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3639    invariants the computation depends on.  */
3640
3641 static comp_cost
3642 split_address_cost (struct ivopts_data *data,
3643                     tree addr, bool *symbol_present, bool *var_present,
3644                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3645 {
3646   tree core;
3647   HOST_WIDE_INT bitsize;
3648   HOST_WIDE_INT bitpos;
3649   tree toffset;
3650   enum machine_mode mode;
3651   int unsignedp, volatilep;
3652
3653   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3654                               &unsignedp, &volatilep, false);
3655
3656   if (toffset != 0
3657       || bitpos % BITS_PER_UNIT != 0
3658       || TREE_CODE (core) != VAR_DECL)
3659     {
3660       *symbol_present = false;
3661       *var_present = true;
3662       fd_ivopts_data = data;
3663       walk_tree (&addr, find_depends, depends_on, NULL);
3664       return new_cost (target_spill_cost[data->speed], 0);
3665     }
3666
3667   *offset += bitpos / BITS_PER_UNIT;
3668   if (TREE_STATIC (core)
3669       || DECL_EXTERNAL (core))
3670     {
3671       *symbol_present = true;
3672       *var_present = false;
3673       return no_cost;
3674     }
3675
3676   *symbol_present = false;
3677   *var_present = true;
3678   return no_cost;
3679 }
3680
3681 /* Estimates cost of expressing difference of addresses E1 - E2 as
3682    var + symbol + offset.  The value of offset is added to OFFSET,
3683    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3684    part is missing.  DEPENDS_ON is a set of the invariants the computation
3685    depends on.  */
3686
3687 static comp_cost
3688 ptr_difference_cost (struct ivopts_data *data,
3689                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3690                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3691 {
3692   HOST_WIDE_INT diff = 0;
3693   aff_tree aff_e1, aff_e2;
3694   tree type;
3695
3696   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3697
3698   if (ptr_difference_const (e1, e2, &diff))
3699     {
3700       *offset += diff;
3701       *symbol_present = false;
3702       *var_present = false;
3703       return no_cost;
3704     }
3705
3706   if (integer_zerop (e2))
3707     return split_address_cost (data, TREE_OPERAND (e1, 0),
3708                                symbol_present, var_present, offset, depends_on);
3709
3710   *symbol_present = false;
3711   *var_present = true;
3712
3713   type = signed_type_for (TREE_TYPE (e1));
3714   tree_to_aff_combination (e1, type, &aff_e1);
3715   tree_to_aff_combination (e2, type, &aff_e2);
3716   aff_combination_scale (&aff_e2, double_int_minus_one);
3717   aff_combination_add (&aff_e1, &aff_e2);
3718
3719   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3720 }
3721
3722 /* Estimates cost of expressing difference E1 - E2 as
3723    var + symbol + offset.  The value of offset is added to OFFSET,
3724    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3725    part is missing.  DEPENDS_ON is a set of the invariants the computation
3726    depends on.  */
3727
3728 static comp_cost
3729 difference_cost (struct ivopts_data *data,
3730                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3731                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3732 {
3733   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3734   unsigned HOST_WIDE_INT off1, off2;
3735   aff_tree aff_e1, aff_e2;
3736   tree type;
3737
3738   e1 = strip_offset (e1, &off1);
3739   e2 = strip_offset (e2, &off2);
3740   *offset += off1 - off2;
3741
3742   STRIP_NOPS (e1);
3743   STRIP_NOPS (e2);
3744
3745   if (TREE_CODE (e1) == ADDR_EXPR)
3746     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3747                                 offset, depends_on);
3748   *symbol_present = false;
3749
3750   if (operand_equal_p (e1, e2, 0))
3751     {
3752       *var_present = false;
3753       return no_cost;
3754     }
3755
3756   *var_present = true;
3757
3758   if (integer_zerop (e2))
3759     return force_var_cost (data, e1, depends_on);
3760
3761   if (integer_zerop (e1))
3762     {
3763       comp_cost cost = force_var_cost (data, e2, depends_on);
3764       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3765       return cost;
3766     }
3767
3768   type = signed_type_for (TREE_TYPE (e1));
3769   tree_to_aff_combination (e1, type, &aff_e1);
3770   tree_to_aff_combination (e2, type, &aff_e2);
3771   aff_combination_scale (&aff_e2, double_int_minus_one);
3772   aff_combination_add (&aff_e1, &aff_e2);
3773
3774   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3775 }
3776
3777 /* Returns true if AFF1 and AFF2 are identical.  */
3778
3779 static bool
3780 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3781 {
3782   unsigned i;
3783
3784   if (aff1->n != aff2->n)
3785     return false;
3786
3787   for (i = 0; i < aff1->n; i++)
3788     {
3789       if (aff1->elts[i].coef != aff2->elts[i].coef)
3790         return false;
3791
3792       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3793         return false;
3794     }
3795   return true;
3796 }
3797
3798 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3799
3800 static int
3801 get_expr_id (struct ivopts_data *data, tree expr)
3802 {
3803   struct iv_inv_expr_ent ent;
3804   struct iv_inv_expr_ent **slot;
3805
3806   ent.expr = expr;
3807   ent.hash = iterative_hash_expr (expr, 0);
3808   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3809                                                      &ent, INSERT);
3810   if (*slot)
3811     return (*slot)->id;
3812
3813   *slot = XNEW (struct iv_inv_expr_ent);
3814   (*slot)->expr = expr;
3815   (*slot)->hash = ent.hash;
3816   (*slot)->id = data->inv_expr_id++;
3817   return (*slot)->id;
3818 }
3819
3820 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3821    requires a new compiler generated temporary.  Returns -1 otherwise.
3822    ADDRESS_P is a flag indicating if the expression is for address
3823    computation.  */
3824
3825 static int
3826 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3827                             tree cbase, HOST_WIDE_INT ratio,
3828                             bool address_p)
3829 {
3830   aff_tree ubase_aff, cbase_aff;
3831   tree expr, ub, cb;
3832
3833   STRIP_NOPS (ubase);
3834   STRIP_NOPS (cbase);
3835   ub = ubase;
3836   cb = cbase;
3837
3838   if ((TREE_CODE (ubase) == INTEGER_CST)
3839       && (TREE_CODE (cbase) == INTEGER_CST))
3840     return -1;
3841
3842   /* Strips the constant part. */
3843   if (TREE_CODE (ubase) == PLUS_EXPR
3844       || TREE_CODE (ubase) == MINUS_EXPR
3845       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3846     {
3847       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3848         ubase = TREE_OPERAND (ubase, 0);
3849     }
3850
3851   /* Strips the constant part. */
3852   if (TREE_CODE (cbase) == PLUS_EXPR
3853       || TREE_CODE (cbase) == MINUS_EXPR
3854       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3855     {
3856       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3857         cbase = TREE_OPERAND (cbase, 0);
3858     }
3859
3860   if (address_p)
3861     {
3862       if (((TREE_CODE (ubase) == SSA_NAME)
3863            || (TREE_CODE (ubase) == ADDR_EXPR
3864                && is_gimple_min_invariant (ubase)))
3865           && (TREE_CODE (cbase) == INTEGER_CST))
3866         return -1;
3867
3868       if (((TREE_CODE (cbase) == SSA_NAME)
3869            || (TREE_CODE (cbase) == ADDR_EXPR
3870                && is_gimple_min_invariant (cbase)))
3871           && (TREE_CODE (ubase) == INTEGER_CST))
3872         return -1;
3873     }
3874
3875   if (ratio == 1)
3876     {
3877       if(operand_equal_p (ubase, cbase, 0))
3878         return -1;
3879
3880       if (TREE_CODE (ubase) == ADDR_EXPR
3881           && TREE_CODE (cbase) == ADDR_EXPR)
3882         {
3883           tree usym, csym;
3884
3885           usym = TREE_OPERAND (ubase, 0);
3886           csym = TREE_OPERAND (cbase, 0);
3887           if (TREE_CODE (usym) == ARRAY_REF)
3888             {
3889               tree ind = TREE_OPERAND (usym, 1);
3890               if (TREE_CODE (ind) == INTEGER_CST
3891                   && host_integerp (ind, 0)
3892                   && TREE_INT_CST_LOW (ind) == 0)
3893                 usym = TREE_OPERAND (usym, 0);
3894             }
3895           if (TREE_CODE (csym) == ARRAY_REF)
3896             {
3897               tree ind = TREE_OPERAND (csym, 1);
3898               if (TREE_CODE (ind) == INTEGER_CST
3899                   && host_integerp (ind, 0)
3900                   && TREE_INT_CST_LOW (ind) == 0)
3901                 csym = TREE_OPERAND (csym, 0);
3902             }
3903           if (operand_equal_p (usym, csym, 0))
3904             return -1;
3905         }
3906       /* Now do more complex comparison  */
3907       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3908       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3909       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3910         return -1;
3911     }
3912
3913   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3914   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3915
3916   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3917   aff_combination_add (&ubase_aff, &cbase_aff);
3918   expr = aff_combination_to_tree (&ubase_aff);
3919   return get_expr_id (data, expr);
3920 }
3921
3922
3923
3924 /* Determines the cost of the computation by that USE is expressed
3925    from induction variable CAND.  If ADDRESS_P is true, we just need
3926    to create an address from it, otherwise we want to get it into
3927    register.  A set of invariants we depend on is stored in
3928    DEPENDS_ON.  AT is the statement at that the value is computed.
3929    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3930    addressing is likely.  */
3931
3932 static comp_cost
3933 get_computation_cost_at (struct ivopts_data *data,
3934                          struct iv_use *use, struct iv_cand *cand,
3935                          bool address_p, bitmap *depends_on, gimple at,
3936                          bool *can_autoinc,
3937                          int *inv_expr_id)
3938 {
3939   tree ubase = use->iv->base, ustep = use->iv->step;
3940   tree cbase, cstep;
3941   tree utype = TREE_TYPE (ubase), ctype;
3942   unsigned HOST_WIDE_INT cstepi, offset = 0;
3943   HOST_WIDE_INT ratio, aratio;
3944   bool var_present, symbol_present, stmt_is_after_inc;
3945   comp_cost cost;
3946   double_int rat;
3947   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3948   enum machine_mode mem_mode = (address_p
3949                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
3950                                 : VOIDmode);
3951
3952   *depends_on = NULL;
3953
3954   /* Only consider real candidates.  */
3955   if (!cand->iv)
3956     return infinite_cost;
3957
3958   cbase = cand->iv->base;
3959   cstep = cand->iv->step;
3960   ctype = TREE_TYPE (cbase);
3961
3962   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3963     {
3964       /* We do not have a precision to express the values of use.  */
3965       return infinite_cost;
3966     }
3967
3968   if (address_p
3969       || (use->iv->base_object
3970           && cand->iv->base_object
3971           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3972           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3973     {
3974       /* Do not try to express address of an object with computation based
3975          on address of a different object.  This may cause problems in rtl
3976          level alias analysis (that does not expect this to be happening,
3977          as this is illegal in C), and would be unlikely to be useful
3978          anyway.  */
3979       if (use->iv->base_object
3980           && cand->iv->base_object
3981           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3982         return infinite_cost;
3983     }
3984
3985   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3986     {
3987       /* TODO -- add direct handling of this case.  */
3988       goto fallback;
3989     }
3990
3991   /* CSTEPI is removed from the offset in case statement is after the
3992      increment.  If the step is not constant, we use zero instead.
3993      This is a bit imprecise (there is the extra addition), but
3994      redundancy elimination is likely to transform the code so that
3995      it uses value of the variable before increment anyway,
3996      so it is not that much unrealistic.  */
3997   if (cst_and_fits_in_hwi (cstep))
3998     cstepi = int_cst_value (cstep);
3999   else
4000     cstepi = 0;
4001
4002   if (!constant_multiple_of (ustep, cstep, &rat))
4003     return infinite_cost;
4004
4005   if (rat.fits_shwi ())
4006     ratio = rat.to_shwi ();
4007   else
4008     return infinite_cost;
4009
4010   STRIP_NOPS (cbase);
4011   ctype = TREE_TYPE (cbase);
4012
4013   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4014
4015   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4016      or ratio == 1, it is better to handle this like
4017
4018      ubase - ratio * cbase + ratio * var
4019
4020      (also holds in the case ratio == -1, TODO.  */
4021
4022   if (cst_and_fits_in_hwi (cbase))
4023     {
4024       offset = - ratio * int_cst_value (cbase);
4025       cost = difference_cost (data,
4026                               ubase, build_int_cst (utype, 0),
4027                               &symbol_present, &var_present, &offset,
4028                               depends_on);
4029       cost.cost /= avg_loop_niter (data->current_loop);
4030     }
4031   else if (ratio == 1)
4032     {
4033       tree real_cbase = cbase;
4034
4035       /* Check to see if any adjustment is needed.  */
4036       if (cstepi == 0 && stmt_is_after_inc)
4037         {
4038           aff_tree real_cbase_aff;
4039           aff_tree cstep_aff;
4040
4041           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4042                                    &real_cbase_aff);
4043           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4044
4045           aff_combination_add (&real_cbase_aff, &cstep_aff);
4046           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4047         }
4048
4049       cost = difference_cost (data,
4050                               ubase, real_cbase,
4051                               &symbol_present, &var_present, &offset,
4052                               depends_on);
4053       cost.cost /= avg_loop_niter (data->current_loop);
4054     }
4055   else if (address_p
4056            && !POINTER_TYPE_P (ctype)
4057            && multiplier_allowed_in_address_p
4058                 (ratio, mem_mode,
4059                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4060     {
4061       cbase
4062         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4063       cost = difference_cost (data,
4064                               ubase, cbase,
4065                               &symbol_present, &var_present, &offset,
4066                               depends_on);
4067       cost.cost /= avg_loop_niter (data->current_loop);
4068     }
4069   else
4070     {
4071       cost = force_var_cost (data, cbase, depends_on);
4072       cost = add_costs (cost,
4073                         difference_cost (data,
4074                                          ubase, build_int_cst (utype, 0),
4075                                          &symbol_present, &var_present,
4076                                          &offset, depends_on));
4077       cost.cost /= avg_loop_niter (data->current_loop);
4078       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4079     }
4080
4081   if (inv_expr_id)
4082     {
4083       *inv_expr_id =
4084           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4085       /* Clear depends on.  */
4086       if (*inv_expr_id != -1 && depends_on && *depends_on)
4087         bitmap_clear (*depends_on);
4088     }
4089
4090   /* If we are after the increment, the value of the candidate is higher by
4091      one iteration.  */
4092   if (stmt_is_after_inc)
4093     offset -= ratio * cstepi;
4094
4095   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4096      (symbol/var1/const parts may be omitted).  If we are looking for an
4097      address, find the cost of addressing this.  */
4098   if (address_p)
4099     return add_costs (cost,
4100                       get_address_cost (symbol_present, var_present,
4101                                         offset, ratio, cstepi,
4102                                         mem_mode,
4103                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4104                                         speed, stmt_is_after_inc,
4105                                         can_autoinc));
4106
4107   /* Otherwise estimate the costs for computing the expression.  */
4108   if (!symbol_present && !var_present && !offset)
4109     {
4110       if (ratio != 1)
4111         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4112       return cost;
4113     }
4114
4115   /* Symbol + offset should be compile-time computable so consider that they
4116       are added once to the variable, if present.  */
4117   if (var_present && (symbol_present || offset))
4118     cost.cost += adjust_setup_cost (data,
4119                                     add_cost (speed, TYPE_MODE (ctype)));
4120
4121   /* Having offset does not affect runtime cost in case it is added to
4122      symbol, but it increases complexity.  */
4123   if (offset)
4124     cost.complexity++;
4125
4126   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4127
4128   aratio = ratio > 0 ? ratio : -ratio;
4129   if (aratio != 1)
4130     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4131   return cost;
4132
4133 fallback:
4134   if (can_autoinc)
4135     *can_autoinc = false;
4136
4137   {
4138     /* Just get the expression, expand it and measure the cost.  */
4139     tree comp = get_computation_at (data->current_loop, use, cand, at);
4140
4141     if (!comp)
4142       return infinite_cost;
4143
4144     if (address_p)
4145       comp = build_simple_mem_ref (comp);
4146
4147     return new_cost (computation_cost (comp, speed), 0);
4148   }
4149 }
4150
4151 /* Determines the cost of the computation by that USE is expressed
4152    from induction variable CAND.  If ADDRESS_P is true, we just need
4153    to create an address from it, otherwise we want to get it into
4154    register.  A set of invariants we depend on is stored in
4155    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4156    autoinc addressing is likely.  */
4157
4158 static comp_cost
4159 get_computation_cost (struct ivopts_data *data,
4160                       struct iv_use *use, struct iv_cand *cand,
4161                       bool address_p, bitmap *depends_on,
4162                       bool *can_autoinc, int *inv_expr_id)
4163 {
4164   return get_computation_cost_at (data,
4165                                   use, cand, address_p, depends_on, use->stmt,
4166                                   can_autoinc, inv_expr_id);
4167 }
4168
4169 /* Determines cost of basing replacement of USE on CAND in a generic
4170    expression.  */
4171
4172 static bool
4173 determine_use_iv_cost_generic (struct ivopts_data *data,
4174                                struct iv_use *use, struct iv_cand *cand)
4175 {
4176   bitmap depends_on;
4177   comp_cost cost;
4178   int inv_expr_id = -1;
4179
4180   /* The simple case first -- if we need to express value of the preserved
4181      original biv, the cost is 0.  This also prevents us from counting the
4182      cost of increment twice -- once at this use and once in the cost of
4183      the candidate.  */
4184   if (cand->pos == IP_ORIGINAL
4185       && cand->incremented_at == use->stmt)
4186     {
4187       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4188                        ERROR_MARK, -1);
4189       return true;
4190     }
4191
4192   cost = get_computation_cost (data, use, cand, false, &depends_on,
4193                                NULL, &inv_expr_id);
4194
4195   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4196                    inv_expr_id);
4197
4198   return !infinite_cost_p (cost);
4199 }
4200
4201 /* Determines cost of basing replacement of USE on CAND in an address.  */
4202
4203 static bool
4204 determine_use_iv_cost_address (struct ivopts_data *data,
4205                                struct iv_use *use, struct iv_cand *cand)
4206 {
4207   bitmap depends_on;
4208   bool can_autoinc;
4209   int inv_expr_id = -1;
4210   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4211                                          &can_autoinc, &inv_expr_id);
4212
4213   if (cand->ainc_use == use)
4214     {
4215       if (can_autoinc)
4216         cost.cost -= cand->cost_step;
4217       /* If we generated the candidate solely for exploiting autoincrement
4218          opportunities, and it turns out it can't be used, set the cost to
4219          infinity to make sure we ignore it.  */
4220       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4221         cost = infinite_cost;
4222     }
4223   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4224                    inv_expr_id);
4225
4226   return !infinite_cost_p (cost);
4227 }
4228
4229 /* Computes value of candidate CAND at position AT in iteration NITER, and
4230    stores it to VAL.  */
4231
4232 static void
4233 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4234                aff_tree *val)
4235 {
4236   aff_tree step, delta, nit;
4237   struct iv *iv = cand->iv;
4238   tree type = TREE_TYPE (iv->base);
4239   tree steptype = type;
4240   if (POINTER_TYPE_P (type))
4241     steptype = sizetype;
4242
4243   tree_to_aff_combination (iv->step, steptype, &step);
4244   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4245   aff_combination_convert (&nit, steptype);
4246   aff_combination_mult (&nit, &step, &delta);
4247   if (stmt_after_increment (loop, cand, at))
4248     aff_combination_add (&delta, &step);
4249
4250   tree_to_aff_combination (iv->base, type, val);
4251   aff_combination_add (val, &delta);
4252 }
4253
4254 /* Returns period of induction variable iv.  */
4255
4256 static tree
4257 iv_period (struct iv *iv)
4258 {
4259   tree step = iv->step, period, type;
4260   tree pow2div;
4261
4262   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4263
4264   type = unsigned_type_for (TREE_TYPE (step));
4265   /* Period of the iv is lcm (step, type_range)/step -1,
4266      i.e., N*type_range/step - 1. Since type range is power
4267      of two, N == (step >> num_of_ending_zeros_binary (step),
4268      so the final result is
4269
4270        (type_range >> num_of_ending_zeros_binary (step)) - 1
4271
4272   */
4273   pow2div = num_ending_zeros (step);
4274
4275   period = build_low_bits_mask (type,
4276                                 (TYPE_PRECISION (type)
4277                                  - tree_low_cst (pow2div, 1)));
4278
4279   return period;
4280 }
4281
4282 /* Returns the comparison operator used when eliminating the iv USE.  */
4283
4284 static enum tree_code
4285 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4286 {
4287   struct loop *loop = data->current_loop;
4288   basic_block ex_bb;
4289   edge exit;
4290
4291   ex_bb = gimple_bb (use->stmt);
4292   exit = EDGE_SUCC (ex_bb, 0);
4293   if (flow_bb_inside_loop_p (loop, exit->dest))
4294     exit = EDGE_SUCC (ex_bb, 1);
4295
4296   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4297 }
4298
4299 static tree
4300 strip_wrap_conserving_type_conversions (tree exp)
4301 {
4302   while (tree_ssa_useless_type_conversion (exp)
4303          && (nowrap_type_p (TREE_TYPE (exp))
4304              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4305     exp = TREE_OPERAND (exp, 0);
4306   return exp;
4307 }
4308
4309 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4310    check for an exact match.  */
4311
4312 static bool
4313 expr_equal_p (tree e, tree what)
4314 {
4315   gimple stmt;
4316   enum tree_code code;
4317
4318   e = strip_wrap_conserving_type_conversions (e);
4319   what = strip_wrap_conserving_type_conversions (what);
4320
4321   code = TREE_CODE (what);
4322   if (TREE_TYPE (e) != TREE_TYPE (what))
4323     return false;
4324
4325   if (operand_equal_p (e, what, 0))
4326     return true;
4327
4328   if (TREE_CODE (e) != SSA_NAME)
4329     return false;
4330
4331   stmt = SSA_NAME_DEF_STMT (e);
4332   if (gimple_code (stmt) != GIMPLE_ASSIGN
4333       || gimple_assign_rhs_code (stmt) != code)
4334     return false;
4335
4336   switch (get_gimple_rhs_class (code))
4337     {
4338     case GIMPLE_BINARY_RHS:
4339       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4340         return false;
4341       /* Fallthru.  */
4342
4343     case GIMPLE_UNARY_RHS:
4344     case GIMPLE_SINGLE_RHS:
4345       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4346     default:
4347       return false;
4348     }
4349 }
4350
4351 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4352    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4353    calculation is performed in non-wrapping type.
4354
4355    TODO: More generally, we could test for the situation that
4356          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4357          This would require knowing the sign of OFFSET.
4358
4359          Also, we only look for the first addition in the computation of BASE.
4360          More complex analysis would be better, but introducing it just for
4361          this optimization seems like an overkill.  */
4362
4363 static bool
4364 difference_cannot_overflow_p (tree base, tree offset)
4365 {
4366   enum tree_code code;
4367   tree e1, e2;
4368
4369   if (!nowrap_type_p (TREE_TYPE (base)))
4370     return false;
4371
4372   base = expand_simple_operations (base);
4373
4374   if (TREE_CODE (base) == SSA_NAME)
4375     {
4376       gimple stmt = SSA_NAME_DEF_STMT (base);
4377
4378       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4379         return false;
4380
4381       code = gimple_assign_rhs_code (stmt);
4382       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4383         return false;
4384
4385       e1 = gimple_assign_rhs1 (stmt);
4386       e2 = gimple_assign_rhs2 (stmt);
4387     }
4388   else
4389     {
4390       code = TREE_CODE (base);
4391       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4392         return false;
4393       e1 = TREE_OPERAND (base, 0);
4394       e2 = TREE_OPERAND (base, 1);
4395     }
4396
4397   /* TODO: deeper inspection may be necessary to prove the equality.  */
4398   switch (code)
4399     {
4400     case PLUS_EXPR:
4401       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4402     case POINTER_PLUS_EXPR:
4403       return expr_equal_p (e2, offset);
4404
4405     default:
4406       return false;
4407     }
4408 }
4409
4410 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4411    comparison with CAND.  NITER describes the number of iterations of
4412    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4413
4414    We aim to handle the following situation:
4415
4416    sometype *base, *p;
4417    int a, b, i;
4418
4419    i = a;
4420    p = p_0 = base + a;
4421
4422    do
4423      {
4424        bla (*p);
4425        p++;
4426        i++;
4427      }
4428    while (i < b);
4429
4430    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4431    We aim to optimize this to
4432
4433    p = p_0 = base + a;
4434    do
4435      {
4436        bla (*p);
4437        p++;
4438      }
4439    while (p < p_0 - a + b);
4440
4441    This preserves the correctness, since the pointer arithmetics does not
4442    overflow.  More precisely:
4443
4444    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4445       overflow in computing it or the values of p.
4446    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4447       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4448
4449 static bool
4450 iv_elimination_compare_lt (struct ivopts_data *data,
4451                            struct iv_cand *cand, enum tree_code *comp_p,
4452                            struct tree_niter_desc *niter)
4453 {
4454   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4455   struct affine_tree_combination nit, tmpa, tmpb;
4456   enum tree_code comp;
4457   HOST_WIDE_INT step;
4458
4459   /* We need to know that the candidate induction variable does not overflow.
4460      While more complex analysis may be used to prove this, for now just
4461      check that the variable appears in the original program and that it
4462      is computed in a type that guarantees no overflows.  */
4463   cand_type = TREE_TYPE (cand->iv->base);
4464   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4465     return false;
4466
4467   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4468      the calculation of the BOUND could overflow, making the comparison
4469      invalid.  */
4470   if (!data->loop_single_exit_p)
4471     return false;
4472
4473   /* We need to be able to decide whether candidate is increasing or decreasing
4474      in order to choose the right comparison operator.  */
4475   if (!cst_and_fits_in_hwi (cand->iv->step))
4476     return false;
4477   step = int_cst_value (cand->iv->step);
4478
4479   /* Check that the number of iterations matches the expected pattern:
4480      a + 1 > b ? 0 : b - a - 1.  */
4481   mbz = niter->may_be_zero;
4482   if (TREE_CODE (mbz) == GT_EXPR)
4483     {
4484       /* Handle a + 1 > b.  */
4485       tree op0 = TREE_OPERAND (mbz, 0);
4486       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4487         {
4488           a = TREE_OPERAND (op0, 0);
4489           b = TREE_OPERAND (mbz, 1);
4490         }
4491       else
4492         return false;
4493     }
4494   else if (TREE_CODE (mbz) == LT_EXPR)
4495     {
4496       tree op1 = TREE_OPERAND (mbz, 1);
4497
4498       /* Handle b < a + 1.  */
4499       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4500         {
4501           a = TREE_OPERAND (op1, 0);
4502           b = TREE_OPERAND (mbz, 0);
4503         }
4504       else
4505         return false;
4506     }
4507   else
4508     return false;
4509
4510   /* Expected number of iterations is B - A - 1.  Check that it matches
4511      the actual number, i.e., that B - A - NITER = 1.  */
4512   tree_to_aff_combination (niter->niter, nit_type, &nit);
4513   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4514   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4515   aff_combination_scale (&nit, double_int_minus_one);
4516   aff_combination_scale (&tmpa, double_int_minus_one);
4517   aff_combination_add (&tmpb, &tmpa);
4518   aff_combination_add (&tmpb, &nit);
4519   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4520     return false;
4521
4522   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4523      overflow.  */
4524   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4525                         cand->iv->step,
4526                         fold_convert (TREE_TYPE (cand->iv->step), a));
4527   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4528     return false;
4529
4530   /* Determine the new comparison operator.  */
4531   comp = step < 0 ? GT_EXPR : LT_EXPR;
4532   if (*comp_p == NE_EXPR)
4533     *comp_p = comp;
4534   else if (*comp_p == EQ_EXPR)
4535     *comp_p = invert_tree_comparison (comp, false);
4536   else
4537     gcc_unreachable ();
4538
4539   return true;
4540 }
4541
4542 /* Check whether it is possible to express the condition in USE by comparison
4543    of candidate CAND.  If so, store the value compared with to BOUND, and the
4544    comparison operator to COMP.  */
4545
4546 static bool
4547 may_eliminate_iv (struct ivopts_data *data,
4548                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4549                   enum tree_code *comp)
4550 {
4551   basic_block ex_bb;
4552   edge exit;
4553   tree period;
4554   struct loop *loop = data->current_loop;
4555   aff_tree bnd;
4556   struct tree_niter_desc *desc = NULL;
4557
4558   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4559     return false;
4560
4561   /* For now works only for exits that dominate the loop latch.
4562      TODO: extend to other conditions inside loop body.  */
4563   ex_bb = gimple_bb (use->stmt);
4564   if (use->stmt != last_stmt (ex_bb)
4565       || gimple_code (use->stmt) != GIMPLE_COND
4566       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4567     return false;
4568
4569   exit = EDGE_SUCC (ex_bb, 0);
4570   if (flow_bb_inside_loop_p (loop, exit->dest))
4571     exit = EDGE_SUCC (ex_bb, 1);
4572   if (flow_bb_inside_loop_p (loop, exit->dest))
4573     return false;
4574
4575   desc = niter_for_exit (data, exit);
4576   if (!desc)
4577     return false;
4578
4579   /* Determine whether we can use the variable to test the exit condition.
4580      This is the case iff the period of the induction variable is greater
4581      than the number of iterations for which the exit condition is true.  */
4582   period = iv_period (cand->iv);
4583
4584   /* If the number of iterations is constant, compare against it directly.  */
4585   if (TREE_CODE (desc->niter) == INTEGER_CST)
4586     {
4587       /* See cand_value_at.  */
4588       if (stmt_after_increment (loop, cand, use->stmt))
4589         {
4590           if (!tree_int_cst_lt (desc->niter, period))
4591             return false;
4592         }
4593       else
4594         {
4595           if (tree_int_cst_lt (period, desc->niter))
4596             return false;
4597         }
4598     }
4599
4600   /* If not, and if this is the only possible exit of the loop, see whether
4601      we can get a conservative estimate on the number of iterations of the
4602      entire loop and compare against that instead.  */
4603   else
4604     {
4605       double_int period_value, max_niter;
4606
4607       max_niter = desc->max;
4608       if (stmt_after_increment (loop, cand, use->stmt))
4609         max_niter += double_int_one;
4610       period_value = tree_to_double_int (period);
4611       if (max_niter.ugt (period_value))
4612         {
4613           /* See if we can take advantage of inferred loop bound information.  */
4614           if (data->loop_single_exit_p)
4615             {
4616               if (!max_loop_iterations (loop, &max_niter))
4617                 return false;
4618               /* The loop bound is already adjusted by adding 1.  */
4619               if (max_niter.ugt (period_value))
4620                 return false;
4621             }
4622           else
4623             return false;
4624         }
4625     }
4626
4627   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4628
4629   *bound = aff_combination_to_tree (&bnd);
4630   *comp = iv_elimination_compare (data, use);
4631
4632   /* It is unlikely that computing the number of iterations using division
4633      would be more profitable than keeping the original induction variable.  */
4634   if (expression_expensive_p (*bound))
4635     return false;
4636
4637   /* Sometimes, it is possible to handle the situation that the number of
4638      iterations may be zero unless additional assumtions by using <
4639      instead of != in the exit condition.
4640
4641      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4642            base the exit condition on it.  However, that is often too
4643            expensive.  */
4644   if (!integer_zerop (desc->may_be_zero))
4645     return iv_elimination_compare_lt (data, cand, comp, desc);
4646
4647   return true;
4648 }
4649
4650  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4651     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4652
4653 static int
4654 parm_decl_cost (struct ivopts_data *data, tree bound)
4655 {
4656   tree sbound = bound;
4657   STRIP_NOPS (sbound);
4658
4659   if (TREE_CODE (sbound) == SSA_NAME
4660       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4661       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4662       && data->body_includes_call)
4663     return COSTS_N_INSNS (1);
4664
4665   return 0;
4666 }
4667
4668 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4669
4670 static bool
4671 determine_use_iv_cost_condition (struct ivopts_data *data,
4672                                  struct iv_use *use, struct iv_cand *cand)
4673 {
4674   tree bound = NULL_TREE;
4675   struct iv *cmp_iv;
4676   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4677   comp_cost elim_cost, express_cost, cost, bound_cost;
4678   bool ok;
4679   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4680   tree *control_var, *bound_cst;
4681   enum tree_code comp = ERROR_MARK;
4682
4683   /* Only consider real candidates.  */
4684   if (!cand->iv)
4685     {
4686       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4687                        ERROR_MARK, -1);
4688       return false;
4689     }
4690
4691   /* Try iv elimination.  */
4692   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4693     {
4694       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4695       if (elim_cost.cost == 0)
4696         elim_cost.cost = parm_decl_cost (data, bound);
4697       else if (TREE_CODE (bound) == INTEGER_CST)
4698         elim_cost.cost = 0;
4699       /* If we replace a loop condition 'i < n' with 'p < base + n',
4700          depends_on_elim will have 'base' and 'n' set, which implies
4701          that both 'base' and 'n' will be live during the loop.  More likely,
4702          'base + n' will be loop invariant, resulting in only one live value
4703          during the loop.  So in that case we clear depends_on_elim and set
4704         elim_inv_expr_id instead.  */
4705       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4706         {
4707           elim_inv_expr_id = get_expr_id (data, bound);
4708           bitmap_clear (depends_on_elim);
4709         }
4710       /* The bound is a loop invariant, so it will be only computed
4711          once.  */
4712       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4713     }
4714   else
4715     elim_cost = infinite_cost;
4716
4717   /* Try expressing the original giv.  If it is compared with an invariant,
4718      note that we cannot get rid of it.  */
4719   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4720                               NULL, &cmp_iv);
4721   gcc_assert (ok);
4722
4723   /* When the condition is a comparison of the candidate IV against
4724      zero, prefer this IV.
4725
4726      TODO: The constant that we're subtracting from the cost should
4727      be target-dependent.  This information should be added to the
4728      target costs for each backend.  */
4729   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4730       && integer_zerop (*bound_cst)
4731       && (operand_equal_p (*control_var, cand->var_after, 0)
4732           || operand_equal_p (*control_var, cand->var_before, 0)))
4733     elim_cost.cost -= 1;
4734
4735   express_cost = get_computation_cost (data, use, cand, false,
4736                                        &depends_on_express, NULL,
4737                                        &express_inv_expr_id);
4738   fd_ivopts_data = data;
4739   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4740
4741   /* Count the cost of the original bound as well.  */
4742   bound_cost = force_var_cost (data, *bound_cst, NULL);
4743   if (bound_cost.cost == 0)
4744     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4745   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4746     bound_cost.cost = 0;
4747   express_cost.cost += bound_cost.cost;
4748
4749   /* Choose the better approach, preferring the eliminated IV. */
4750   if (compare_costs (elim_cost, express_cost) <= 0)
4751     {
4752       cost = elim_cost;
4753       depends_on = depends_on_elim;
4754       depends_on_elim = NULL;
4755       inv_expr_id = elim_inv_expr_id;
4756     }
4757   else
4758     {
4759       cost = express_cost;
4760       depends_on = depends_on_express;
4761       depends_on_express = NULL;
4762       bound = NULL_TREE;
4763       comp = ERROR_MARK;
4764       inv_expr_id = express_inv_expr_id;
4765     }
4766
4767   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4768
4769   if (depends_on_elim)
4770     BITMAP_FREE (depends_on_elim);
4771   if (depends_on_express)
4772     BITMAP_FREE (depends_on_express);
4773
4774   return !infinite_cost_p (cost);
4775 }
4776
4777 /* Determines cost of basing replacement of USE on CAND.  Returns false
4778    if USE cannot be based on CAND.  */
4779
4780 static bool
4781 determine_use_iv_cost (struct ivopts_data *data,
4782                        struct iv_use *use, struct iv_cand *cand)
4783 {
4784   switch (use->type)
4785     {
4786     case USE_NONLINEAR_EXPR:
4787       return determine_use_iv_cost_generic (data, use, cand);
4788
4789     case USE_ADDRESS:
4790       return determine_use_iv_cost_address (data, use, cand);
4791
4792     case USE_COMPARE:
4793       return determine_use_iv_cost_condition (data, use, cand);
4794
4795     default:
4796       gcc_unreachable ();
4797     }
4798 }
4799
4800 /* Return true if get_computation_cost indicates that autoincrement is
4801    a possibility for the pair of USE and CAND, false otherwise.  */
4802
4803 static bool
4804 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4805                            struct iv_cand *cand)
4806 {
4807   bitmap depends_on;
4808   bool can_autoinc;
4809   comp_cost cost;
4810
4811   if (use->type != USE_ADDRESS)
4812     return false;
4813
4814   cost = get_computation_cost (data, use, cand, true, &depends_on,
4815                                &can_autoinc, NULL);
4816
4817   BITMAP_FREE (depends_on);
4818
4819   return !infinite_cost_p (cost) && can_autoinc;
4820 }
4821
4822 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4823    use that allows autoincrement, and set their AINC_USE if possible.  */
4824
4825 static void
4826 set_autoinc_for_original_candidates (struct ivopts_data *data)
4827 {
4828   unsigned i, j;
4829
4830   for (i = 0; i < n_iv_cands (data); i++)
4831     {
4832       struct iv_cand *cand = iv_cand (data, i);
4833       struct iv_use *closest = NULL;
4834       if (cand->pos != IP_ORIGINAL)
4835         continue;
4836       for (j = 0; j < n_iv_uses (data); j++)
4837         {
4838           struct iv_use *use = iv_use (data, j);
4839           unsigned uid = gimple_uid (use->stmt);
4840           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4841               || uid > gimple_uid (cand->incremented_at))
4842             continue;
4843           if (closest == NULL || uid > gimple_uid (closest->stmt))
4844             closest = use;
4845         }
4846       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4847         continue;
4848       cand->ainc_use = closest;
4849     }
4850 }
4851
4852 /* Finds the candidates for the induction variables.  */
4853
4854 static void
4855 find_iv_candidates (struct ivopts_data *data)
4856 {
4857   /* Add commonly used ivs.  */
4858   add_standard_iv_candidates (data);
4859
4860   /* Add old induction variables.  */
4861   add_old_ivs_candidates (data);
4862
4863   /* Add induction variables derived from uses.  */
4864   add_derived_ivs_candidates (data);
4865
4866   set_autoinc_for_original_candidates (data);
4867
4868   /* Record the important candidates.  */
4869   record_important_candidates (data);
4870 }
4871
4872 /* Determines costs of basing the use of the iv on an iv candidate.  */
4873
4874 static void
4875 determine_use_iv_costs (struct ivopts_data *data)
4876 {
4877   unsigned i, j;
4878   struct iv_use *use;
4879   struct iv_cand *cand;
4880   bitmap to_clear = BITMAP_ALLOC (NULL);
4881
4882   alloc_use_cost_map (data);
4883
4884   for (i = 0; i < n_iv_uses (data); i++)
4885     {
4886       use = iv_use (data, i);
4887
4888       if (data->consider_all_candidates)
4889         {
4890           for (j = 0; j < n_iv_cands (data); j++)
4891             {
4892               cand = iv_cand (data, j);
4893               determine_use_iv_cost (data, use, cand);
4894             }
4895         }
4896       else
4897         {
4898           bitmap_iterator bi;
4899
4900           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4901             {
4902               cand = iv_cand (data, j);
4903               if (!determine_use_iv_cost (data, use, cand))
4904                 bitmap_set_bit (to_clear, j);
4905             }
4906
4907           /* Remove the candidates for that the cost is infinite from
4908              the list of related candidates.  */
4909           bitmap_and_compl_into (use->related_cands, to_clear);
4910           bitmap_clear (to_clear);
4911         }
4912     }
4913
4914   BITMAP_FREE (to_clear);
4915
4916   if (dump_file && (dump_flags & TDF_DETAILS))
4917     {
4918       fprintf (dump_file, "Use-candidate costs:\n");
4919
4920       for (i = 0; i < n_iv_uses (data); i++)
4921         {
4922           use = iv_use (data, i);
4923
4924           fprintf (dump_file, "Use %d:\n", i);
4925           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4926           for (j = 0; j < use->n_map_members; j++)
4927             {
4928               if (!use->cost_map[j].cand
4929                   || infinite_cost_p (use->cost_map[j].cost))
4930                 continue;
4931
4932               fprintf (dump_file, "  %d\t%d\t%d\t",
4933                        use->cost_map[j].cand->id,
4934                        use->cost_map[j].cost.cost,
4935                        use->cost_map[j].cost.complexity);
4936               if (use->cost_map[j].depends_on)
4937                 bitmap_print (dump_file,
4938                               use->cost_map[j].depends_on, "","");
4939               if (use->cost_map[j].inv_expr_id != -1)
4940                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4941               fprintf (dump_file, "\n");
4942             }
4943
4944           fprintf (dump_file, "\n");
4945         }
4946       fprintf (dump_file, "\n");
4947     }
4948 }
4949
4950 /* Determines cost of the candidate CAND.  */
4951
4952 static void
4953 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4954 {
4955   comp_cost cost_base;
4956   unsigned cost, cost_step;
4957   tree base;
4958
4959   if (!cand->iv)
4960     {
4961       cand->cost = 0;
4962       return;
4963     }
4964
4965   /* There are two costs associated with the candidate -- its increment
4966      and its initialization.  The second is almost negligible for any loop
4967      that rolls enough, so we take it just very little into account.  */
4968
4969   base = cand->iv->base;
4970   cost_base = force_var_cost (data, base, NULL);
4971   /* It will be exceptional that the iv register happens to be initialized with
4972      the proper value at no cost.  In general, there will at least be a regcopy
4973      or a const set.  */
4974   if (cost_base.cost == 0)
4975     cost_base.cost = COSTS_N_INSNS (1);
4976   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
4977
4978   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4979
4980   /* Prefer the original ivs unless we may gain something by replacing it.
4981      The reason is to make debugging simpler; so this is not relevant for
4982      artificial ivs created by other optimization passes.  */
4983   if (cand->pos != IP_ORIGINAL
4984       || !SSA_NAME_VAR (cand->var_before)
4985       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4986     cost++;
4987
4988   /* Prefer not to insert statements into latch unless there are some
4989      already (so that we do not create unnecessary jumps).  */
4990   if (cand->pos == IP_END
4991       && empty_block_p (ip_end_pos (data->current_loop)))
4992     cost++;
4993
4994   cand->cost = cost;
4995   cand->cost_step = cost_step;
4996 }
4997
4998 /* Determines costs of computation of the candidates.  */
4999
5000 static void
5001 determine_iv_costs (struct ivopts_data *data)
5002 {
5003   unsigned i;
5004
5005   if (dump_file && (dump_flags & TDF_DETAILS))
5006     {
5007       fprintf (dump_file, "Candidate costs:\n");
5008       fprintf (dump_file, "  cand\tcost\n");
5009     }
5010
5011   for (i = 0; i < n_iv_cands (data); i++)
5012     {
5013       struct iv_cand *cand = iv_cand (data, i);
5014
5015       determine_iv_cost (data, cand);
5016
5017       if (dump_file && (dump_flags & TDF_DETAILS))
5018         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5019     }
5020
5021   if (dump_file && (dump_flags & TDF_DETAILS))
5022     fprintf (dump_file, "\n");
5023 }
5024
5025 /* Calculates cost for having SIZE induction variables.  */
5026
5027 static unsigned
5028 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5029 {
5030   /* We add size to the cost, so that we prefer eliminating ivs
5031      if possible.  */
5032   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5033                                             data->body_includes_call);
5034 }
5035
5036 /* For each size of the induction variable set determine the penalty.  */
5037
5038 static void
5039 determine_set_costs (struct ivopts_data *data)
5040 {
5041   unsigned j, n;
5042   gimple phi;
5043   gimple_stmt_iterator psi;
5044   tree op;
5045   struct loop *loop = data->current_loop;
5046   bitmap_iterator bi;
5047
5048   if (dump_file && (dump_flags & TDF_DETAILS))
5049     {
5050       fprintf (dump_file, "Global costs:\n");
5051       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5052       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5053       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5054       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5055     }
5056
5057   n = 0;
5058   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5059     {
5060       phi = gsi_stmt (psi);
5061       op = PHI_RESULT (phi);
5062
5063       if (virtual_operand_p (op))
5064         continue;
5065
5066       if (get_iv (data, op))
5067         continue;
5068
5069       n++;
5070     }
5071
5072   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5073     {
5074       struct version_info *info = ver_info (data, j);
5075
5076       if (info->inv_id && info->has_nonlin_use)
5077         n++;
5078     }
5079
5080   data->regs_used = n;
5081   if (dump_file && (dump_flags & TDF_DETAILS))
5082     fprintf (dump_file, "  regs_used %d\n", n);
5083
5084   if (dump_file && (dump_flags & TDF_DETAILS))
5085     {
5086       fprintf (dump_file, "  cost for size:\n");
5087       fprintf (dump_file, "  ivs\tcost\n");
5088       for (j = 0; j <= 2 * target_avail_regs; j++)
5089         fprintf (dump_file, "  %d\t%d\n", j,
5090                  ivopts_global_cost_for_size (data, j));
5091       fprintf (dump_file, "\n");
5092     }
5093 }
5094
5095 /* Returns true if A is a cheaper cost pair than B.  */
5096
5097 static bool
5098 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5099 {
5100   int cmp;
5101
5102   if (!a)
5103     return false;
5104
5105   if (!b)
5106     return true;
5107
5108   cmp = compare_costs (a->cost, b->cost);
5109   if (cmp < 0)
5110     return true;
5111
5112   if (cmp > 0)
5113     return false;
5114
5115   /* In case the costs are the same, prefer the cheaper candidate.  */
5116   if (a->cand->cost < b->cand->cost)
5117     return true;
5118
5119   return false;
5120 }
5121
5122
5123 /* Returns candidate by that USE is expressed in IVS.  */
5124
5125 static struct cost_pair *
5126 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5127 {
5128   return ivs->cand_for_use[use->id];
5129 }
5130
5131 /* Computes the cost field of IVS structure.  */
5132
5133 static void
5134 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5135 {
5136   comp_cost cost = ivs->cand_use_cost;
5137
5138   cost.cost += ivs->cand_cost;
5139
5140   cost.cost += ivopts_global_cost_for_size (data,
5141                                             ivs->n_regs + ivs->num_used_inv_expr);
5142
5143   ivs->cost = cost;
5144 }
5145
5146 /* Remove invariants in set INVS to set IVS.  */
5147
5148 static void
5149 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5150 {
5151   bitmap_iterator bi;
5152   unsigned iid;
5153
5154   if (!invs)
5155     return;
5156
5157   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5158     {
5159       ivs->n_invariant_uses[iid]--;
5160       if (ivs->n_invariant_uses[iid] == 0)
5161         ivs->n_regs--;
5162     }
5163 }
5164
5165 /* Set USE not to be expressed by any candidate in IVS.  */
5166
5167 static void
5168 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5169                  struct iv_use *use)
5170 {
5171   unsigned uid = use->id, cid;
5172   struct cost_pair *cp;
5173
5174   cp = ivs->cand_for_use[uid];
5175   if (!cp)
5176     return;
5177   cid = cp->cand->id;
5178
5179   ivs->bad_uses++;
5180   ivs->cand_for_use[uid] = NULL;
5181   ivs->n_cand_uses[cid]--;
5182
5183   if (ivs->n_cand_uses[cid] == 0)
5184     {
5185       bitmap_clear_bit (ivs->cands, cid);
5186       /* Do not count the pseudocandidates.  */
5187       if (cp->cand->iv)
5188         ivs->n_regs--;
5189       ivs->n_cands--;
5190       ivs->cand_cost -= cp->cand->cost;
5191
5192       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5193     }
5194
5195   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5196
5197   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5198
5199   if (cp->inv_expr_id != -1)
5200     {
5201       ivs->used_inv_expr[cp->inv_expr_id]--;
5202       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5203         ivs->num_used_inv_expr--;
5204     }
5205   iv_ca_recount_cost (data, ivs);
5206 }
5207
5208 /* Add invariants in set INVS to set IVS.  */
5209
5210 static void
5211 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5212 {
5213   bitmap_iterator bi;
5214   unsigned iid;
5215
5216   if (!invs)
5217     return;
5218
5219   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5220     {
5221       ivs->n_invariant_uses[iid]++;
5222       if (ivs->n_invariant_uses[iid] == 1)
5223         ivs->n_regs++;
5224     }
5225 }
5226
5227 /* Set cost pair for USE in set IVS to CP.  */
5228
5229 static void
5230 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5231               struct iv_use *use, struct cost_pair *cp)
5232 {
5233   unsigned uid = use->id, cid;
5234
5235   if (ivs->cand_for_use[uid] == cp)
5236     return;
5237
5238   if (ivs->cand_for_use[uid])
5239     iv_ca_set_no_cp (data, ivs, use);
5240
5241   if (cp)
5242     {
5243       cid = cp->cand->id;
5244
5245       ivs->bad_uses--;
5246       ivs->cand_for_use[uid] = cp;
5247       ivs->n_cand_uses[cid]++;
5248       if (ivs->n_cand_uses[cid] == 1)
5249         {
5250           bitmap_set_bit (ivs->cands, cid);
5251           /* Do not count the pseudocandidates.  */
5252           if (cp->cand->iv)
5253             ivs->n_regs++;
5254           ivs->n_cands++;
5255           ivs->cand_cost += cp->cand->cost;
5256
5257           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5258         }
5259
5260       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5261       iv_ca_set_add_invariants (ivs, cp->depends_on);
5262
5263       if (cp->inv_expr_id != -1)
5264         {
5265           ivs->used_inv_expr[cp->inv_expr_id]++;
5266           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5267             ivs->num_used_inv_expr++;
5268         }
5269       iv_ca_recount_cost (data, ivs);
5270     }
5271 }
5272
5273 /* Extend set IVS by expressing USE by some of the candidates in it
5274    if possible. All important candidates will be considered
5275    if IMPORTANT_CANDIDATES is true.  */
5276
5277 static void
5278 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5279                struct iv_use *use, bool important_candidates)
5280 {
5281   struct cost_pair *best_cp = NULL, *cp;
5282   bitmap_iterator bi;
5283   bitmap cands;
5284   unsigned i;
5285
5286   gcc_assert (ivs->upto >= use->id);
5287
5288   if (ivs->upto == use->id)
5289     {
5290       ivs->upto++;
5291       ivs->bad_uses++;
5292     }
5293
5294   cands = (important_candidates ? data->important_candidates : ivs->cands);
5295   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5296     {
5297       struct iv_cand *cand = iv_cand (data, i);
5298
5299       cp = get_use_iv_cost (data, use, cand);
5300
5301       if (cheaper_cost_pair (cp, best_cp))
5302         best_cp = cp;
5303     }
5304
5305   iv_ca_set_cp (data, ivs, use, best_cp);
5306 }
5307
5308 /* Get cost for assignment IVS.  */
5309
5310 static comp_cost
5311 iv_ca_cost (struct iv_ca *ivs)
5312 {
5313   /* This was a conditional expression but it triggered a bug in
5314      Sun C 5.5.  */
5315   if (ivs->bad_uses)
5316     return infinite_cost;
5317   else
5318     return ivs->cost;
5319 }
5320
5321 /* Returns true if all dependences of CP are among invariants in IVS.  */
5322
5323 static bool
5324 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5325 {
5326   unsigned i;
5327   bitmap_iterator bi;
5328
5329   if (!cp->depends_on)
5330     return true;
5331
5332   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5333     {
5334       if (ivs->n_invariant_uses[i] == 0)
5335         return false;
5336     }
5337
5338   return true;
5339 }
5340
5341 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5342    it before NEXT_CHANGE.  */
5343
5344 static struct iv_ca_delta *
5345 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5346                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5347 {
5348   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5349
5350   change->use = use;
5351   change->old_cp = old_cp;
5352   change->new_cp = new_cp;
5353   change->next_change = next_change;
5354
5355   return change;
5356 }
5357
5358 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5359    are rewritten.  */
5360
5361 static struct iv_ca_delta *
5362 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5363 {
5364   struct iv_ca_delta *last;
5365
5366   if (!l2)
5367     return l1;
5368
5369   if (!l1)
5370     return l2;
5371
5372   for (last = l1; last->next_change; last = last->next_change)
5373     continue;
5374   last->next_change = l2;
5375
5376   return l1;
5377 }
5378
5379 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5380
5381 static struct iv_ca_delta *
5382 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5383 {
5384   struct iv_ca_delta *act, *next, *prev = NULL;
5385   struct cost_pair *tmp;
5386
5387   for (act = delta; act; act = next)
5388     {
5389       next = act->next_change;
5390       act->next_change = prev;
5391       prev = act;
5392
5393       tmp = act->old_cp;
5394       act->old_cp = act->new_cp;
5395       act->new_cp = tmp;
5396     }
5397
5398   return prev;
5399 }
5400
5401 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5402    reverted instead.  */
5403
5404 static void
5405 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5406                     struct iv_ca_delta *delta, bool forward)
5407 {
5408   struct cost_pair *from, *to;
5409   struct iv_ca_delta *act;
5410
5411   if (!forward)
5412     delta = iv_ca_delta_reverse (delta);
5413
5414   for (act = delta; act; act = act->next_change)
5415     {
5416       from = act->old_cp;
5417       to = act->new_cp;
5418       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5419       iv_ca_set_cp (data, ivs, act->use, to);
5420     }
5421
5422   if (!forward)
5423     iv_ca_delta_reverse (delta);
5424 }
5425
5426 /* Returns true if CAND is used in IVS.  */
5427
5428 static bool
5429 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5430 {
5431   return ivs->n_cand_uses[cand->id] > 0;
5432 }
5433
5434 /* Returns number of induction variable candidates in the set IVS.  */
5435
5436 static unsigned
5437 iv_ca_n_cands (struct iv_ca *ivs)
5438 {
5439   return ivs->n_cands;
5440 }
5441
5442 /* Free the list of changes DELTA.  */
5443
5444 static void
5445 iv_ca_delta_free (struct iv_ca_delta **delta)
5446 {
5447   struct iv_ca_delta *act, *next;
5448
5449   for (act = *delta; act; act = next)
5450     {
5451       next = act->next_change;
5452       free (act);
5453     }
5454
5455   *delta = NULL;
5456 }
5457
5458 /* Allocates new iv candidates assignment.  */
5459
5460 static struct iv_ca *
5461 iv_ca_new (struct ivopts_data *data)
5462 {
5463   struct iv_ca *nw = XNEW (struct iv_ca);
5464
5465   nw->upto = 0;
5466   nw->bad_uses = 0;
5467   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5468   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5469   nw->cands = BITMAP_ALLOC (NULL);
5470   nw->n_cands = 0;
5471   nw->n_regs = 0;
5472   nw->cand_use_cost = no_cost;
5473   nw->cand_cost = 0;
5474   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5475   nw->cost = no_cost;
5476   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5477   nw->num_used_inv_expr = 0;
5478
5479   return nw;
5480 }
5481
5482 /* Free memory occupied by the set IVS.  */
5483
5484 static void
5485 iv_ca_free (struct iv_ca **ivs)
5486 {
5487   free ((*ivs)->cand_for_use);
5488   free ((*ivs)->n_cand_uses);
5489   BITMAP_FREE ((*ivs)->cands);
5490   free ((*ivs)->n_invariant_uses);
5491   free ((*ivs)->used_inv_expr);
5492   free (*ivs);
5493   *ivs = NULL;
5494 }
5495
5496 /* Dumps IVS to FILE.  */
5497
5498 static void
5499 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5500 {
5501   const char *pref = "  invariants ";
5502   unsigned i;
5503   comp_cost cost = iv_ca_cost (ivs);
5504
5505   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5506   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5507            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5508   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5509
5510    for (i = 0; i < ivs->upto; i++)
5511     {
5512       struct iv_use *use = iv_use (data, i);
5513       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5514       if (cp)
5515         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5516                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5517       else
5518         fprintf (file, "   use:%d --> ??\n", use->id);
5519     }
5520
5521   for (i = 1; i <= data->max_inv_id; i++)
5522     if (ivs->n_invariant_uses[i])
5523       {
5524         fprintf (file, "%s%d", pref, i);
5525         pref = ", ";
5526       }
5527   fprintf (file, "\n\n");
5528 }
5529
5530 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5531    new set, and store differences in DELTA.  Number of induction variables
5532    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5533    the function will try to find a solution with mimimal iv candidates.  */
5534
5535 static comp_cost
5536 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5537               struct iv_cand *cand, struct iv_ca_delta **delta,
5538               unsigned *n_ivs, bool min_ncand)
5539 {
5540   unsigned i;
5541   comp_cost cost;
5542   struct iv_use *use;
5543   struct cost_pair *old_cp, *new_cp;
5544
5545   *delta = NULL;
5546   for (i = 0; i < ivs->upto; i++)
5547     {
5548       use = iv_use (data, i);
5549       old_cp = iv_ca_cand_for_use (ivs, use);
5550
5551       if (old_cp
5552           && old_cp->cand == cand)
5553         continue;
5554
5555       new_cp = get_use_iv_cost (data, use, cand);
5556       if (!new_cp)
5557         continue;
5558
5559       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5560         continue;
5561
5562       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5563         continue;
5564
5565       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5566     }
5567
5568   iv_ca_delta_commit (data, ivs, *delta, true);
5569   cost = iv_ca_cost (ivs);
5570   if (n_ivs)
5571     *n_ivs = iv_ca_n_cands (ivs);
5572   iv_ca_delta_commit (data, ivs, *delta, false);
5573
5574   return cost;
5575 }
5576
5577 /* Try narrowing set IVS by removing CAND.  Return the cost of
5578    the new set and store the differences in DELTA.  */
5579
5580 static comp_cost
5581 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5582               struct iv_cand *cand, struct iv_ca_delta **delta)
5583 {
5584   unsigned i, ci;
5585   struct iv_use *use;
5586   struct cost_pair *old_cp, *new_cp, *cp;
5587   bitmap_iterator bi;
5588   struct iv_cand *cnd;
5589   comp_cost cost;
5590
5591   *delta = NULL;
5592   for (i = 0; i < n_iv_uses (data); i++)
5593     {
5594       use = iv_use (data, i);
5595
5596       old_cp = iv_ca_cand_for_use (ivs, use);
5597       if (old_cp->cand != cand)
5598         continue;
5599
5600       new_cp = NULL;
5601
5602       if (data->consider_all_candidates)
5603         {
5604           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5605             {
5606               if (ci == cand->id)
5607                 continue;
5608
5609               cnd = iv_cand (data, ci);
5610
5611               cp = get_use_iv_cost (data, use, cnd);
5612               if (!cp)
5613                 continue;
5614
5615               if (!iv_ca_has_deps (ivs, cp))
5616                 continue;
5617
5618               if (!cheaper_cost_pair (cp, new_cp))
5619                 continue;
5620
5621               new_cp = cp;
5622             }
5623         }
5624       else
5625         {
5626           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5627             {
5628               if (ci == cand->id)
5629                 continue;
5630
5631               cnd = iv_cand (data, ci);
5632
5633               cp = get_use_iv_cost (data, use, cnd);
5634               if (!cp)
5635                 continue;
5636               if (!iv_ca_has_deps (ivs, cp))
5637                 continue;
5638
5639               if (!cheaper_cost_pair (cp, new_cp))
5640                 continue;
5641
5642               new_cp = cp;
5643             }
5644         }
5645
5646       if (!new_cp)
5647         {
5648           iv_ca_delta_free (delta);
5649           return infinite_cost;
5650         }
5651
5652       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5653     }
5654
5655   iv_ca_delta_commit (data, ivs, *delta, true);
5656   cost = iv_ca_cost (ivs);
5657   iv_ca_delta_commit (data, ivs, *delta, false);
5658
5659   return cost;
5660 }
5661
5662 /* Try optimizing the set of candidates IVS by removing candidates different
5663    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5664    differences in DELTA.  */
5665
5666 static comp_cost
5667 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5668              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5669 {
5670   bitmap_iterator bi;
5671   struct iv_ca_delta *act_delta, *best_delta;
5672   unsigned i;
5673   comp_cost best_cost, acost;
5674   struct iv_cand *cand;
5675
5676   best_delta = NULL;
5677   best_cost = iv_ca_cost (ivs);
5678
5679   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5680     {
5681       cand = iv_cand (data, i);
5682
5683       if (cand == except_cand)
5684         continue;
5685
5686       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5687
5688       if (compare_costs (acost, best_cost) < 0)
5689         {
5690           best_cost = acost;
5691           iv_ca_delta_free (&best_delta);
5692           best_delta = act_delta;
5693         }
5694       else
5695         iv_ca_delta_free (&act_delta);
5696     }
5697
5698   if (!best_delta)
5699     {
5700       *delta = NULL;
5701       return best_cost;
5702     }
5703
5704   /* Recurse to possibly remove other unnecessary ivs.  */
5705   iv_ca_delta_commit (data, ivs, best_delta, true);
5706   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5707   iv_ca_delta_commit (data, ivs, best_delta, false);
5708   *delta = iv_ca_delta_join (best_delta, *delta);
5709   return best_cost;
5710 }
5711
5712 /* Tries to extend the sets IVS in the best possible way in order
5713    to express the USE.  If ORIGINALP is true, prefer candidates from
5714    the original set of IVs, otherwise favor important candidates not
5715    based on any memory object.  */
5716
5717 static bool
5718 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5719                   struct iv_use *use, bool originalp)
5720 {
5721   comp_cost best_cost, act_cost;
5722   unsigned i;
5723   bitmap_iterator bi;
5724   struct iv_cand *cand;
5725   struct iv_ca_delta *best_delta = NULL, *act_delta;
5726   struct cost_pair *cp;
5727
5728   iv_ca_add_use (data, ivs, use, false);
5729   best_cost = iv_ca_cost (ivs);
5730
5731   cp = iv_ca_cand_for_use (ivs, use);
5732   if (!cp)
5733     {
5734       ivs->upto--;
5735       ivs->bad_uses--;
5736       iv_ca_add_use (data, ivs, use, true);
5737       best_cost = iv_ca_cost (ivs);
5738       cp = iv_ca_cand_for_use (ivs, use);
5739     }
5740   if (cp)
5741     {
5742       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5743       iv_ca_set_no_cp (data, ivs, use);
5744     }
5745
5746   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5747      first try important candidates not based on any memory object.  Only if
5748      this fails, try the specific ones.  Rationale -- in loops with many
5749      variables the best choice often is to use just one generic biv.  If we
5750      added here many ivs specific to the uses, the optimization algorithm later
5751      would be likely to get stuck in a local minimum, thus causing us to create
5752      too many ivs.  The approach from few ivs to more seems more likely to be
5753      successful -- starting from few ivs, replacing an expensive use by a
5754      specific iv should always be a win.  */
5755   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5756     {
5757       cand = iv_cand (data, i);
5758
5759       if (originalp && cand->pos !=IP_ORIGINAL)
5760         continue;
5761
5762       if (!originalp && cand->iv->base_object != NULL_TREE)
5763         continue;
5764
5765       if (iv_ca_cand_used_p (ivs, cand))
5766         continue;
5767
5768       cp = get_use_iv_cost (data, use, cand);
5769       if (!cp)
5770         continue;
5771
5772       iv_ca_set_cp (data, ivs, use, cp);
5773       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5774                                true);
5775       iv_ca_set_no_cp (data, ivs, use);
5776       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5777
5778       if (compare_costs (act_cost, best_cost) < 0)
5779         {
5780           best_cost = act_cost;
5781
5782           iv_ca_delta_free (&best_delta);
5783           best_delta = act_delta;
5784         }
5785       else
5786         iv_ca_delta_free (&act_delta);
5787     }
5788
5789   if (infinite_cost_p (best_cost))
5790     {
5791       for (i = 0; i < use->n_map_members; i++)
5792         {
5793           cp = use->cost_map + i;
5794           cand = cp->cand;
5795           if (!cand)
5796             continue;
5797
5798           /* Already tried this.  */
5799           if (cand->important)
5800             {
5801               if (originalp && cand->pos == IP_ORIGINAL)
5802                 continue;
5803               if (!originalp && cand->iv->base_object == NULL_TREE)
5804                 continue;
5805             }
5806
5807           if (iv_ca_cand_used_p (ivs, cand))
5808             continue;
5809
5810           act_delta = NULL;
5811           iv_ca_set_cp (data, ivs, use, cp);
5812           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5813           iv_ca_set_no_cp (data, ivs, use);
5814           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5815                                        cp, act_delta);
5816
5817           if (compare_costs (act_cost, best_cost) < 0)
5818             {
5819               best_cost = act_cost;
5820
5821               if (best_delta)
5822                 iv_ca_delta_free (&best_delta);
5823               best_delta = act_delta;
5824             }
5825           else
5826             iv_ca_delta_free (&act_delta);
5827         }
5828     }
5829
5830   iv_ca_delta_commit (data, ivs, best_delta, true);
5831   iv_ca_delta_free (&best_delta);
5832
5833   return !infinite_cost_p (best_cost);
5834 }
5835
5836 /* Finds an initial assignment of candidates to uses.  */
5837
5838 static struct iv_ca *
5839 get_initial_solution (struct ivopts_data *data, bool originalp)
5840 {
5841   struct iv_ca *ivs = iv_ca_new (data);
5842   unsigned i;
5843
5844   for (i = 0; i < n_iv_uses (data); i++)
5845     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5846       {
5847         iv_ca_free (&ivs);
5848         return NULL;
5849       }
5850
5851   return ivs;
5852 }
5853
5854 /* Tries to improve set of induction variables IVS.  */
5855
5856 static bool
5857 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5858 {
5859   unsigned i, n_ivs;
5860   comp_cost acost, best_cost = iv_ca_cost (ivs);
5861   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5862   struct iv_cand *cand;
5863
5864   /* Try extending the set of induction variables by one.  */
5865   for (i = 0; i < n_iv_cands (data); i++)
5866     {
5867       cand = iv_cand (data, i);
5868
5869       if (iv_ca_cand_used_p (ivs, cand))
5870         continue;
5871
5872       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5873       if (!act_delta)
5874         continue;
5875
5876       /* If we successfully added the candidate and the set is small enough,
5877          try optimizing it by removing other candidates.  */
5878       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5879         {
5880           iv_ca_delta_commit (data, ivs, act_delta, true);
5881           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5882           iv_ca_delta_commit (data, ivs, act_delta, false);
5883           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5884         }
5885
5886       if (compare_costs (acost, best_cost) < 0)
5887         {
5888           best_cost = acost;
5889           iv_ca_delta_free (&best_delta);
5890           best_delta = act_delta;
5891         }
5892       else
5893         iv_ca_delta_free (&act_delta);
5894     }
5895
5896   if (!best_delta)
5897     {
5898       /* Try removing the candidates from the set instead.  */
5899       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5900
5901       /* Nothing more we can do.  */
5902       if (!best_delta)
5903         return false;
5904     }
5905
5906   iv_ca_delta_commit (data, ivs, best_delta, true);
5907   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5908   iv_ca_delta_free (&best_delta);
5909   return true;
5910 }
5911
5912 /* Attempts to find the optimal set of induction variables.  We do simple
5913    greedy heuristic -- we try to replace at most one candidate in the selected
5914    solution and remove the unused ivs while this improves the cost.  */
5915
5916 static struct iv_ca *
5917 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5918 {
5919   struct iv_ca *set;
5920
5921   /* Get the initial solution.  */
5922   set = get_initial_solution (data, originalp);
5923   if (!set)
5924     {
5925       if (dump_file && (dump_flags & TDF_DETAILS))
5926         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5927       return NULL;
5928     }
5929
5930   if (dump_file && (dump_flags & TDF_DETAILS))
5931     {
5932       fprintf (dump_file, "Initial set of candidates:\n");
5933       iv_ca_dump (data, dump_file, set);
5934     }
5935
5936   while (try_improve_iv_set (data, set))
5937     {
5938       if (dump_file && (dump_flags & TDF_DETAILS))
5939         {
5940           fprintf (dump_file, "Improved to:\n");
5941           iv_ca_dump (data, dump_file, set);
5942         }
5943     }
5944
5945   return set;
5946 }
5947
5948 static struct iv_ca *
5949 find_optimal_iv_set (struct ivopts_data *data)
5950 {
5951   unsigned i;
5952   struct iv_ca *set, *origset;
5953   struct iv_use *use;
5954   comp_cost cost, origcost;
5955
5956   /* Determine the cost based on a strategy that starts with original IVs,
5957      and try again using a strategy that prefers candidates not based
5958      on any IVs.  */
5959   origset = find_optimal_iv_set_1 (data, true);
5960   set = find_optimal_iv_set_1 (data, false);
5961
5962   if (!origset && !set)
5963     return NULL;
5964
5965   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5966   cost = set ? iv_ca_cost (set) : infinite_cost;
5967
5968   if (dump_file && (dump_flags & TDF_DETAILS))
5969     {
5970       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5971                origcost.cost, origcost.complexity);
5972       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5973                cost.cost, cost.complexity);
5974     }
5975
5976   /* Choose the one with the best cost.  */
5977   if (compare_costs (origcost, cost) <= 0)
5978     {
5979       if (set)
5980         iv_ca_free (&set);
5981       set = origset;
5982     }
5983   else if (origset)
5984     iv_ca_free (&origset);
5985
5986   for (i = 0; i < n_iv_uses (data); i++)
5987     {
5988       use = iv_use (data, i);
5989       use->selected = iv_ca_cand_for_use (set, use)->cand;
5990     }
5991
5992   return set;
5993 }
5994
5995 /* Creates a new induction variable corresponding to CAND.  */
5996
5997 static void
5998 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5999 {
6000   gimple_stmt_iterator incr_pos;
6001   tree base;
6002   bool after = false;
6003
6004   if (!cand->iv)
6005     return;
6006
6007   switch (cand->pos)
6008     {
6009     case IP_NORMAL:
6010       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6011       break;
6012
6013     case IP_END:
6014       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6015       after = true;
6016       break;
6017
6018     case IP_AFTER_USE:
6019       after = true;
6020       /* fall through */
6021     case IP_BEFORE_USE:
6022       incr_pos = gsi_for_stmt (cand->incremented_at);
6023       break;
6024
6025     case IP_ORIGINAL:
6026       /* Mark that the iv is preserved.  */
6027       name_info (data, cand->var_before)->preserve_biv = true;
6028       name_info (data, cand->var_after)->preserve_biv = true;
6029
6030       /* Rewrite the increment so that it uses var_before directly.  */
6031       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6032       return;
6033     }
6034
6035   gimple_add_tmp_var (cand->var_before);
6036
6037   base = unshare_expr (cand->iv->base);
6038
6039   create_iv (base, unshare_expr (cand->iv->step),
6040              cand->var_before, data->current_loop,
6041              &incr_pos, after, &cand->var_before, &cand->var_after);
6042 }
6043
6044 /* Creates new induction variables described in SET.  */
6045
6046 static void
6047 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6048 {
6049   unsigned i;
6050   struct iv_cand *cand;
6051   bitmap_iterator bi;
6052
6053   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6054     {
6055       cand = iv_cand (data, i);
6056       create_new_iv (data, cand);
6057     }
6058
6059   if (dump_file && (dump_flags & TDF_DETAILS))
6060     {
6061       fprintf (dump_file, "\nSelected IV set: \n");
6062       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6063         {
6064           cand = iv_cand (data, i);
6065           dump_cand (dump_file, cand);
6066         }
6067       fprintf (dump_file, "\n");
6068     }
6069 }
6070
6071 /* Rewrites USE (definition of iv used in a nonlinear expression)
6072    using candidate CAND.  */
6073
6074 static void
6075 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6076                             struct iv_use *use, struct iv_cand *cand)
6077 {
6078   tree comp;
6079   tree op, tgt;
6080   gimple ass;
6081   gimple_stmt_iterator bsi;
6082
6083   /* An important special case -- if we are asked to express value of
6084      the original iv by itself, just exit; there is no need to
6085      introduce a new computation (that might also need casting the
6086      variable to unsigned and back).  */
6087   if (cand->pos == IP_ORIGINAL
6088       && cand->incremented_at == use->stmt)
6089     {
6090       enum tree_code stmt_code;
6091
6092       gcc_assert (is_gimple_assign (use->stmt));
6093       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6094
6095       /* Check whether we may leave the computation unchanged.
6096          This is the case only if it does not rely on other
6097          computations in the loop -- otherwise, the computation
6098          we rely upon may be removed in remove_unused_ivs,
6099          thus leading to ICE.  */
6100       stmt_code = gimple_assign_rhs_code (use->stmt);
6101       if (stmt_code == PLUS_EXPR
6102           || stmt_code == MINUS_EXPR
6103           || stmt_code == POINTER_PLUS_EXPR)
6104         {
6105           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6106             op = gimple_assign_rhs2 (use->stmt);
6107           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6108             op = gimple_assign_rhs1 (use->stmt);
6109           else
6110             op = NULL_TREE;
6111         }
6112       else
6113         op = NULL_TREE;
6114
6115       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6116         return;
6117     }
6118
6119   comp = get_computation (data->current_loop, use, cand);
6120   gcc_assert (comp != NULL_TREE);
6121
6122   switch (gimple_code (use->stmt))
6123     {
6124     case GIMPLE_PHI:
6125       tgt = PHI_RESULT (use->stmt);
6126
6127       /* If we should keep the biv, do not replace it.  */
6128       if (name_info (data, tgt)->preserve_biv)
6129         return;
6130
6131       bsi = gsi_after_labels (gimple_bb (use->stmt));
6132       break;
6133
6134     case GIMPLE_ASSIGN:
6135       tgt = gimple_assign_lhs (use->stmt);
6136       bsi = gsi_for_stmt (use->stmt);
6137       break;
6138
6139     default:
6140       gcc_unreachable ();
6141     }
6142
6143   if (!valid_gimple_rhs_p (comp)
6144       || (gimple_code (use->stmt) != GIMPLE_PHI
6145           /* We can't allow re-allocating the stmt as it might be pointed
6146              to still.  */
6147           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6148               >= gimple_num_ops (gsi_stmt (bsi)))))
6149     {
6150       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6151                                        true, GSI_SAME_STMT);
6152       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6153         {
6154           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6155           /* As this isn't a plain copy we have to reset alignment
6156              information.  */
6157           if (SSA_NAME_PTR_INFO (comp))
6158             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6159         }
6160     }
6161
6162   if (gimple_code (use->stmt) == GIMPLE_PHI)
6163     {
6164       ass = gimple_build_assign (tgt, comp);
6165       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6166
6167       bsi = gsi_for_stmt (use->stmt);
6168       remove_phi_node (&bsi, false);
6169     }
6170   else
6171     {
6172       gimple_assign_set_rhs_from_tree (&bsi, comp);
6173       use->stmt = gsi_stmt (bsi);
6174     }
6175 }
6176
6177 /* Performs a peephole optimization to reorder the iv update statement with
6178    a mem ref to enable instruction combining in later phases. The mem ref uses
6179    the iv value before the update, so the reordering transformation requires
6180    adjustment of the offset. CAND is the selected IV_CAND.
6181
6182    Example:
6183
6184    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6185    iv2 = iv1 + 1;
6186
6187    if (t < val)      (1)
6188      goto L;
6189    goto Head;
6190
6191
6192    directly propagating t over to (1) will introduce overlapping live range
6193    thus increase register pressure. This peephole transform it into:
6194
6195
6196    iv2 = iv1 + 1;
6197    t = MEM_REF (base, iv2, 8, 8);
6198    if (t < val)
6199      goto L;
6200    goto Head;
6201 */
6202
6203 static void
6204 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6205 {
6206   tree var_after;
6207   gimple iv_update, stmt;
6208   basic_block bb;
6209   gimple_stmt_iterator gsi, gsi_iv;
6210
6211   if (cand->pos != IP_NORMAL)
6212     return;
6213
6214   var_after = cand->var_after;
6215   iv_update = SSA_NAME_DEF_STMT (var_after);
6216
6217   bb = gimple_bb (iv_update);
6218   gsi = gsi_last_nondebug_bb (bb);
6219   stmt = gsi_stmt (gsi);
6220
6221   /* Only handle conditional statement for now.  */
6222   if (gimple_code (stmt) != GIMPLE_COND)
6223     return;
6224
6225   gsi_prev_nondebug (&gsi);
6226   stmt = gsi_stmt (gsi);
6227   if (stmt != iv_update)
6228     return;
6229
6230   gsi_prev_nondebug (&gsi);
6231   if (gsi_end_p (gsi))
6232     return;
6233
6234   stmt = gsi_stmt (gsi);
6235   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6236     return;
6237
6238   if (stmt != use->stmt)
6239     return;
6240
6241   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6242     return;
6243
6244   if (dump_file && (dump_flags & TDF_DETAILS))
6245     {
6246       fprintf (dump_file, "Reordering \n");
6247       print_gimple_stmt (dump_file, iv_update, 0, 0);
6248       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6249       fprintf (dump_file, "\n");
6250     }
6251
6252   gsi = gsi_for_stmt (use->stmt);
6253   gsi_iv = gsi_for_stmt (iv_update);
6254   gsi_move_before (&gsi_iv, &gsi);
6255
6256   cand->pos = IP_BEFORE_USE;
6257   cand->incremented_at = use->stmt;
6258 }
6259
6260 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6261
6262 static void
6263 rewrite_use_address (struct ivopts_data *data,
6264                      struct iv_use *use, struct iv_cand *cand)
6265 {
6266   aff_tree aff;
6267   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6268   tree base_hint = NULL_TREE;
6269   tree ref, iv;
6270   bool ok;
6271
6272   adjust_iv_update_pos (cand, use);
6273   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6274   gcc_assert (ok);
6275   unshare_aff_combination (&aff);
6276
6277   /* To avoid undefined overflow problems, all IV candidates use unsigned
6278      integer types.  The drawback is that this makes it impossible for
6279      create_mem_ref to distinguish an IV that is based on a memory object
6280      from one that represents simply an offset.
6281
6282      To work around this problem, we pass a hint to create_mem_ref that
6283      indicates which variable (if any) in aff is an IV based on a memory
6284      object.  Note that we only consider the candidate.  If this is not
6285      based on an object, the base of the reference is in some subexpression
6286      of the use -- but these will use pointer types, so they are recognized
6287      by the create_mem_ref heuristics anyway.  */
6288   if (cand->iv->base_object)
6289     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6290
6291   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6292   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6293                         reference_alias_ptr_type (*use->op_p),
6294                         iv, base_hint, data->speed);
6295   copy_ref_info (ref, *use->op_p);
6296   *use->op_p = ref;
6297 }
6298
6299 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6300    candidate CAND.  */
6301
6302 static void
6303 rewrite_use_compare (struct ivopts_data *data,
6304                      struct iv_use *use, struct iv_cand *cand)
6305 {
6306   tree comp, *var_p, op, bound;
6307   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6308   enum tree_code compare;
6309   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6310   bool ok;
6311
6312   bound = cp->value;
6313   if (bound)
6314     {
6315       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6316       tree var_type = TREE_TYPE (var);
6317       gimple_seq stmts;
6318
6319       if (dump_file && (dump_flags & TDF_DETAILS))
6320         {
6321           fprintf (dump_file, "Replacing exit test: ");
6322           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6323         }
6324       compare = cp->comp;
6325       bound = unshare_expr (fold_convert (var_type, bound));
6326       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6327       if (stmts)
6328         gsi_insert_seq_on_edge_immediate (
6329                 loop_preheader_edge (data->current_loop),
6330                 stmts);
6331
6332       gimple_cond_set_lhs (use->stmt, var);
6333       gimple_cond_set_code (use->stmt, compare);
6334       gimple_cond_set_rhs (use->stmt, op);
6335       return;
6336     }
6337
6338   /* The induction variable elimination failed; just express the original
6339      giv.  */
6340   comp = get_computation (data->current_loop, use, cand);
6341   gcc_assert (comp != NULL_TREE);
6342
6343   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6344   gcc_assert (ok);
6345
6346   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6347                                      true, GSI_SAME_STMT);
6348 }
6349
6350 /* Rewrites USE using candidate CAND.  */
6351
6352 static void
6353 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6354 {
6355   switch (use->type)
6356     {
6357       case USE_NONLINEAR_EXPR:
6358         rewrite_use_nonlinear_expr (data, use, cand);
6359         break;
6360
6361       case USE_ADDRESS:
6362         rewrite_use_address (data, use, cand);
6363         break;
6364
6365       case USE_COMPARE:
6366         rewrite_use_compare (data, use, cand);
6367         break;
6368
6369       default:
6370         gcc_unreachable ();
6371     }
6372
6373   update_stmt (use->stmt);
6374 }
6375
6376 /* Rewrite the uses using the selected induction variables.  */
6377
6378 static void
6379 rewrite_uses (struct ivopts_data *data)
6380 {
6381   unsigned i;
6382   struct iv_cand *cand;
6383   struct iv_use *use;
6384
6385   for (i = 0; i < n_iv_uses (data); i++)
6386     {
6387       use = iv_use (data, i);
6388       cand = use->selected;
6389       gcc_assert (cand);
6390
6391       rewrite_use (data, use, cand);
6392     }
6393 }
6394
6395 /* Removes the ivs that are not used after rewriting.  */
6396
6397 static void
6398 remove_unused_ivs (struct ivopts_data *data)
6399 {
6400   unsigned j;
6401   bitmap_iterator bi;
6402   bitmap toremove = BITMAP_ALLOC (NULL);
6403
6404   /* Figure out an order in which to release SSA DEFs so that we don't
6405      release something that we'd have to propagate into a debug stmt
6406      afterwards.  */
6407   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6408     {
6409       struct version_info *info;
6410
6411       info = ver_info (data, j);
6412       if (info->iv
6413           && !integer_zerop (info->iv->step)
6414           && !info->inv_id
6415           && !info->iv->have_use_for
6416           && !info->preserve_biv)
6417         {
6418           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6419
6420           tree def = info->iv->ssa_name;
6421
6422           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6423             {
6424               imm_use_iterator imm_iter;
6425               use_operand_p use_p;
6426               gimple stmt;
6427               int count = 0;
6428
6429               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6430                 {
6431                   if (!gimple_debug_bind_p (stmt))
6432                     continue;
6433
6434                   /* We just want to determine whether to do nothing
6435                      (count == 0), to substitute the computed
6436                      expression into a single use of the SSA DEF by
6437                      itself (count == 1), or to use a debug temp
6438                      because the SSA DEF is used multiple times or as
6439                      part of a larger expression (count > 1). */
6440                   count++;
6441                   if (gimple_debug_bind_get_value (stmt) != def)
6442                     count++;
6443
6444                   if (count > 1)
6445                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6446                 }
6447
6448               if (!count)
6449                 continue;
6450
6451               struct iv_use dummy_use;
6452               struct iv_cand *best_cand = NULL, *cand;
6453               unsigned i, best_pref = 0, cand_pref;
6454
6455               memset (&dummy_use, 0, sizeof (dummy_use));
6456               dummy_use.iv = info->iv;
6457               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6458                 {
6459                   cand = iv_use (data, i)->selected;
6460                   if (cand == best_cand)
6461                     continue;
6462                   cand_pref = operand_equal_p (cand->iv->step,
6463                                                info->iv->step, 0)
6464                     ? 4 : 0;
6465                   cand_pref
6466                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6467                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6468                     ? 2 : 0;
6469                   cand_pref
6470                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6471                     ? 1 : 0;
6472                   if (best_cand == NULL || best_pref < cand_pref)
6473                     {
6474                       best_cand = cand;
6475                       best_pref = cand_pref;
6476                     }
6477                 }
6478
6479               if (!best_cand)
6480                 continue;
6481
6482               tree comp = get_computation_at (data->current_loop,
6483                                               &dummy_use, best_cand,
6484                                               SSA_NAME_DEF_STMT (def));
6485               if (!comp)
6486                 continue;
6487
6488               if (count > 1)
6489                 {
6490                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6491                   DECL_ARTIFICIAL (vexpr) = 1;
6492                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6493                   if (SSA_NAME_VAR (def))
6494                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6495                   else
6496                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6497                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6498                   gimple_stmt_iterator gsi;
6499
6500                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6501                     gsi = gsi_after_labels (gimple_bb
6502                                             (SSA_NAME_DEF_STMT (def)));
6503                   else
6504                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6505
6506                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6507                   comp = vexpr;
6508                 }
6509
6510               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6511                 {
6512                   if (!gimple_debug_bind_p (stmt))
6513                     continue;
6514
6515                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6516                     SET_USE (use_p, comp);
6517
6518                   update_stmt (stmt);
6519                 }
6520             }
6521         }
6522     }
6523
6524   release_defs_bitset (toremove);
6525
6526   BITMAP_FREE (toremove);
6527 }
6528
6529 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6530    for pointer_map_traverse.  */
6531
6532 static bool
6533 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6534                       void *data ATTRIBUTE_UNUSED)
6535 {
6536   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6537
6538   free (niter);
6539   return true;
6540 }
6541
6542 /* Frees data allocated by the optimization of a single loop.  */
6543
6544 static void
6545 free_loop_data (struct ivopts_data *data)
6546 {
6547   unsigned i, j;
6548   bitmap_iterator bi;
6549   tree obj;
6550
6551   if (data->niters)
6552     {
6553       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6554       pointer_map_destroy (data->niters);
6555       data->niters = NULL;
6556     }
6557
6558   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6559     {
6560       struct version_info *info;
6561
6562       info = ver_info (data, i);
6563       free (info->iv);
6564       info->iv = NULL;
6565       info->has_nonlin_use = false;
6566       info->preserve_biv = false;
6567       info->inv_id = 0;
6568     }
6569   bitmap_clear (data->relevant);
6570   bitmap_clear (data->important_candidates);
6571
6572   for (i = 0; i < n_iv_uses (data); i++)
6573     {
6574       struct iv_use *use = iv_use (data, i);
6575
6576       free (use->iv);
6577       BITMAP_FREE (use->related_cands);
6578       for (j = 0; j < use->n_map_members; j++)
6579         if (use->cost_map[j].depends_on)
6580           BITMAP_FREE (use->cost_map[j].depends_on);
6581       free (use->cost_map);
6582       free (use);
6583     }
6584   data->iv_uses.truncate (0);
6585
6586   for (i = 0; i < n_iv_cands (data); i++)
6587     {
6588       struct iv_cand *cand = iv_cand (data, i);
6589
6590       free (cand->iv);
6591       if (cand->depends_on)
6592         BITMAP_FREE (cand->depends_on);
6593       free (cand);
6594     }
6595   data->iv_candidates.truncate (0);
6596
6597   if (data->version_info_size < num_ssa_names)
6598     {
6599       data->version_info_size = 2 * num_ssa_names;
6600       free (data->version_info);
6601       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6602     }
6603
6604   data->max_inv_id = 0;
6605
6606   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6607     SET_DECL_RTL (obj, NULL_RTX);
6608
6609   decl_rtl_to_reset.truncate (0);
6610
6611   htab_empty (data->inv_expr_tab);
6612   data->inv_expr_id = 0;
6613 }
6614
6615 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6616    loop tree.  */
6617
6618 static void
6619 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6620 {
6621   free_loop_data (data);
6622   free (data->version_info);
6623   BITMAP_FREE (data->relevant);
6624   BITMAP_FREE (data->important_candidates);
6625
6626   decl_rtl_to_reset.release ();
6627   data->iv_uses.release ();
6628   data->iv_candidates.release ();
6629   htab_delete (data->inv_expr_tab);
6630 }
6631
6632 /* Returns true if the loop body BODY includes any function calls.  */
6633
6634 static bool
6635 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6636 {
6637   gimple_stmt_iterator gsi;
6638   unsigned i;
6639
6640   for (i = 0; i < num_nodes; i++)
6641     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6642       {
6643         gimple stmt = gsi_stmt (gsi);
6644         if (is_gimple_call (stmt)
6645             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6646           return true;
6647       }
6648   return false;
6649 }
6650
6651 /* Optimizes the LOOP.  Returns true if anything changed.  */
6652
6653 static bool
6654 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6655 {
6656   bool changed = false;
6657   struct iv_ca *iv_ca;
6658   edge exit = single_dom_exit (loop);
6659   basic_block *body;
6660
6661   gcc_assert (!data->niters);
6662   data->current_loop = loop;
6663   data->speed = optimize_loop_for_speed_p (loop);
6664
6665   if (dump_file && (dump_flags & TDF_DETAILS))
6666     {
6667       fprintf (dump_file, "Processing loop %d\n", loop->num);
6668
6669       if (exit)
6670         {
6671           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6672                    exit->src->index, exit->dest->index);
6673           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6674           fprintf (dump_file, "\n");
6675         }
6676
6677       fprintf (dump_file, "\n");
6678     }
6679
6680   body = get_loop_body (loop);
6681   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6682   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6683   free (body);
6684
6685   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6686
6687   /* For each ssa name determines whether it behaves as an induction variable
6688      in some loop.  */
6689   if (!find_induction_variables (data))
6690     goto finish;
6691
6692   /* Finds interesting uses (item 1).  */
6693   find_interesting_uses (data);
6694   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6695     goto finish;
6696
6697   /* Finds candidates for the induction variables (item 2).  */
6698   find_iv_candidates (data);
6699
6700   /* Calculates the costs (item 3, part 1).  */
6701   determine_iv_costs (data);
6702   determine_use_iv_costs (data);
6703   determine_set_costs (data);
6704
6705   /* Find the optimal set of induction variables (item 3, part 2).  */
6706   iv_ca = find_optimal_iv_set (data);
6707   if (!iv_ca)
6708     goto finish;
6709   changed = true;
6710
6711   /* Create the new induction variables (item 4, part 1).  */
6712   create_new_ivs (data, iv_ca);
6713   iv_ca_free (&iv_ca);
6714
6715   /* Rewrite the uses (item 4, part 2).  */
6716   rewrite_uses (data);
6717
6718   /* Remove the ivs that are unused after rewriting.  */
6719   remove_unused_ivs (data);
6720
6721   /* We have changed the structure of induction variables; it might happen
6722      that definitions in the scev database refer to some of them that were
6723      eliminated.  */
6724   scev_reset ();
6725
6726 finish:
6727   free_loop_data (data);
6728
6729   return changed;
6730 }
6731
6732 /* Main entry point.  Optimizes induction variables in loops.  */
6733
6734 void
6735 tree_ssa_iv_optimize (void)
6736 {
6737   struct loop *loop;
6738   struct ivopts_data data;
6739   loop_iterator li;
6740
6741   tree_ssa_iv_optimize_init (&data);
6742
6743   /* Optimize the loops starting with the innermost ones.  */
6744   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6745     {
6746       if (dump_file && (dump_flags & TDF_DETAILS))
6747         flow_loop_dump (loop, dump_file, NULL, 1);
6748
6749       tree_ssa_iv_optimize_loop (&data, loop);
6750     }
6751
6752   tree_ssa_iv_optimize_finalize (&data);
6753 }