gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2013 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "tm_p.h"
  70 #include "basic-block.h"
  71 #include "gimple-pretty-print.h"
  72 #include "tree-flow.h"
  73 #include "cfgloop.h"
  74 #include "tree-pass.h"
  75 #include "ggc.h"
  76 #include "insn-config.h"
  77 #include "pointer-set.h"
  78 #include "hashtab.h"
  79 #include "tree-chrec.h"
  80 #include "tree-scalar-evolution.h"
  81 #include "cfgloop.h"
  82 #include "params.h"
  83 #include "langhooks.h"
  84 #include "tree-affine.h"
  85 #include "target.h"
  86 #include "tree-inline.h"
  87 #include "tree-ssa-propagate.h"
  88 #include "expmed.h"
  89
  90 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  91    cost of different addressing modes.  This should be moved to a TBD
  92    interface between the GIMPLE and RTL worlds.  */
  93 #include "expr.h"
  94 #include "recog.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 #define AVG_LOOP_NITER(LOOP) 5
 100
 101 /* Returns the expected number of loop iterations for LOOP.
 102    The average trip count is computed from profile data if it
 103    exists. */
 104
 105 static inline HOST_WIDE_INT
 106 avg_loop_niter (struct loop *loop)
 107 {
 108   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 109   if (niter == -1)
 110     return AVG_LOOP_NITER (loop);
 111
 112   return niter;
 113 }
 114
 115 /* Representation of the induction variable.  */
 116 struct iv
 117 {
 118   tree base;            /* Initial value of the iv.  */
 119   tree base_object;     /* A memory object to that the induction variable points.  */
 120   tree step;            /* Step of the iv (constant only).  */
 121   tree ssa_name;        /* The ssa name with the value.  */
 122   bool biv_p;           /* Is it a biv?  */
 123   bool have_use_for;    /* Do we already have a use for it?  */
 124   unsigned use_id;      /* The identifier in the use if it is the case.  */
 125 };
 126
 127 /* Per-ssa version information (induction variable descriptions, etc.).  */
 128 struct version_info
 129 {
 130   tree name;            /* The ssa name.  */
 131   struct iv *iv;        /* Induction variable description.  */
 132   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 133                            an expression that is not an induction variable.  */
 134   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 135   unsigned inv_id;      /* Id of an invariant.  */
 136 };
 137
 138 /* Types of uses.  */
 139 enum use_type
 140 {
 141   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 142   USE_ADDRESS,          /* Use in an address.  */
 143   USE_COMPARE           /* Use is a compare.  */
 144 };
 145
 146 /* Cost of a computation.  */
 147 typedef struct
 148 {
 149   int cost;             /* The runtime cost.  */
 150   unsigned complexity;  /* The estimate of the complexity of the code for
 151                            the computation (in no concrete units --
 152                            complexity field should be larger for more
 153                            complex expressions and addressing modes).  */
 154 } comp_cost;
 155
 156 static const comp_cost no_cost = {0, 0};
 157 static const comp_cost infinite_cost = {INFTY, INFTY};
 158
 159 /* The candidate - cost pair.  */
 160 struct cost_pair
 161 {
 162   struct iv_cand *cand; /* The candidate.  */
 163   comp_cost cost;       /* The cost.  */
 164   bitmap depends_on;    /* The list of invariants that have to be
 165                            preserved.  */
 166   tree value;           /* For final value elimination, the expression for
 167                            the final value of the iv.  For iv elimination,
 168                            the new bound to compare with.  */
 169   enum tree_code comp;  /* For iv elimination, the comparison.  */
 170   int inv_expr_id;      /* Loop invariant expression id.  */
 171 };
 172
 173 /* Use.  */
 174 struct iv_use
 175 {
 176   unsigned id;          /* The id of the use.  */
 177   enum use_type type;   /* Type of the use.  */
 178   struct iv *iv;        /* The induction variable it is based on.  */
 179   gimple stmt;          /* Statement in that it occurs.  */
 180   tree *op_p;           /* The place where it occurs.  */
 181   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 182                            important ones.  */
 183
 184   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 185   struct cost_pair *cost_map;
 186                         /* The costs wrto the iv candidates.  */
 187
 188   struct iv_cand *selected;
 189                         /* The selected candidate.  */
 190 };
 191
 192 /* The position where the iv is computed.  */
 193 enum iv_position
 194 {
 195   IP_NORMAL,            /* At the end, just before the exit condition.  */
 196   IP_END,               /* At the end of the latch block.  */
 197   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 198   IP_AFTER_USE,         /* Immediately after a specific use.  */
 199   IP_ORIGINAL           /* The original biv.  */
 200 };
 201
 202 /* The induction variable candidate.  */
 203 struct iv_cand
 204 {
 205   unsigned id;          /* The number of the candidate.  */
 206   bool important;       /* Whether this is an "important" candidate, i.e. such
 207                            that it should be considered by all uses.  */
 208   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 209   gimple incremented_at;/* For original biv, the statement where it is
 210                            incremented.  */
 211   tree var_before;      /* The variable used for it before increment.  */
 212   tree var_after;       /* The variable used for it after increment.  */
 213   struct iv *iv;        /* The value of the candidate.  NULL for
 214                            "pseudocandidate" used to indicate the possibility
 215                            to replace the final value of an iv by direct
 216                            computation of the value.  */
 217   unsigned cost;        /* Cost of the candidate.  */
 218   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 219   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 220                               where it is incremented.  */
 221   bitmap depends_on;    /* The list of invariants that are used in step of the
 222                            biv.  */
 223 };
 224
 225 /* Loop invariant expression hashtable entry.  */
 226 struct iv_inv_expr_ent
 227 {
 228   tree expr;
 229   int id;
 230   hashval_t hash;
 231 };
 232
 233 /* The data used by the induction variable optimizations.  */
 234
 235 typedef struct iv_use *iv_use_p;
 236
 237 typedef struct iv_cand *iv_cand_p;
 238
 239 struct ivopts_data
 240 {
 241   /* The currently optimized loop.  */
 242   struct loop *current_loop;
 243
 244   /* Numbers of iterations for all exits of the current loop.  */
 245   struct pointer_map_t *niters;
 246
 247   /* Number of registers used in it.  */
 248   unsigned regs_used;
 249
 250   /* The size of version_info array allocated.  */
 251   unsigned version_info_size;
 252
 253   /* The array of information for the ssa names.  */
 254   struct version_info *version_info;
 255
 256   /* The hashtable of loop invariant expressions created
 257      by ivopt.  */
 258   htab_t inv_expr_tab;
 259
 260   /* Loop invariant expression id.  */
 261   int inv_expr_id;
 262
 263   /* The bitmap of indices in version_info whose value was changed.  */
 264   bitmap relevant;
 265
 266   /* The uses of induction variables.  */
 267   vec<iv_use_p> iv_uses;
 268
 269   /* The candidates.  */
 270   vec<iv_cand_p> iv_candidates;
 271
 272   /* A bitmap of important candidates.  */
 273   bitmap important_candidates;
 274
 275   /* The maximum invariant id.  */
 276   unsigned max_inv_id;
 277
 278   /* Whether to consider just related and important candidates when replacing a
 279      use.  */
 280   bool consider_all_candidates;
 281
 282   /* Are we optimizing for speed?  */
 283   bool speed;
 284
 285   /* Whether the loop body includes any function calls.  */
 286   bool body_includes_call;
 287
 288   /* Whether the loop body can only be exited via single exit.  */
 289   bool loop_single_exit_p;
 290 };
 291
 292 /* An assignment of iv candidates to uses.  */
 293
 294 struct iv_ca
 295 {
 296   /* The number of uses covered by the assignment.  */
 297   unsigned upto;
 298
 299   /* Number of uses that cannot be expressed by the candidates in the set.  */
 300   unsigned bad_uses;
 301
 302   /* Candidate assigned to a use, together with the related costs.  */
 303   struct cost_pair **cand_for_use;
 304
 305   /* Number of times each candidate is used.  */
 306   unsigned *n_cand_uses;
 307
 308   /* The candidates used.  */
 309   bitmap cands;
 310
 311   /* The number of candidates in the set.  */
 312   unsigned n_cands;
 313
 314   /* Total number of registers needed.  */
 315   unsigned n_regs;
 316
 317   /* Total cost of expressing uses.  */
 318   comp_cost cand_use_cost;
 319
 320   /* Total cost of candidates.  */
 321   unsigned cand_cost;
 322
 323   /* Number of times each invariant is used.  */
 324   unsigned *n_invariant_uses;
 325
 326   /* The array holding the number of uses of each loop
 327      invariant expressions created by ivopt.  */
 328   unsigned *used_inv_expr;
 329
 330   /* The number of created loop invariants.  */
 331   unsigned num_used_inv_expr;
 332
 333   /* Total cost of the assignment.  */
 334   comp_cost cost;
 335 };
 336
 337 /* Difference of two iv candidate assignments.  */
 338
 339 struct iv_ca_delta
 340 {
 341   /* Changed use.  */
 342   struct iv_use *use;
 343
 344   /* An old assignment (for rollback purposes).  */
 345   struct cost_pair *old_cp;
 346
 347   /* A new assignment.  */
 348   struct cost_pair *new_cp;
 349
 350   /* Next change in the list.  */
 351   struct iv_ca_delta *next_change;
 352 };
 353
 354 /* Bound on number of candidates below that all candidates are considered.  */
 355
 356 #define CONSIDER_ALL_CANDIDATES_BOUND \
 357   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 358
 359 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 360    optimizing such a loop would help, and it would take ages).  */
 361
 362 #define MAX_CONSIDERED_USES \
 363   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 364
 365 /* If there are at most this number of ivs in the set, try removing unnecessary
 366    ivs from the set always.  */
 367
 368 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 369   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 370
 371 /* The list of trees for that the decl_rtl field must be reset is stored
 372    here.  */
 373
 374 static vec<tree> decl_rtl_to_reset;
 375
 376 static comp_cost force_expr_to_var_cost (tree, bool);
 377
 378 /* Number of uses recorded in DATA.  */
 379
 380 static inline unsigned
 381 n_iv_uses (struct ivopts_data *data)
 382 {
 383   return data->iv_uses.length ();
 384 }
 385
 386 /* Ith use recorded in DATA.  */
 387
 388 static inline struct iv_use *
 389 iv_use (struct ivopts_data *data, unsigned i)
 390 {
 391   return data->iv_uses[i];
 392 }
 393
 394 /* Number of candidates recorded in DATA.  */
 395
 396 static inline unsigned
 397 n_iv_cands (struct ivopts_data *data)
 398 {
 399   return data->iv_candidates.length ();
 400 }
 401
 402 /* Ith candidate recorded in DATA.  */
 403
 404 static inline struct iv_cand *
 405 iv_cand (struct ivopts_data *data, unsigned i)
 406 {
 407   return data->iv_candidates[i];
 408 }
 409
 410 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 411
 412 edge
 413 single_dom_exit (struct loop *loop)
 414 {
 415   edge exit = single_exit (loop);
 416
 417   if (!exit)
 418     return NULL;
 419
 420   if (!just_once_each_iteration_p (loop, exit->src))
 421     return NULL;
 422
 423   return exit;
 424 }
 425
 426 /* Dumps information about the induction variable IV to FILE.  */
 427
 428 extern void dump_iv (FILE *, struct iv *);
 429 void
 430 dump_iv (FILE *file, struct iv *iv)
 431 {
 432   if (iv->ssa_name)
 433     {
 434       fprintf (file, "ssa name ");
 435       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 436       fprintf (file, "\n");
 437     }
 438
 439   fprintf (file, "  type ");
 440   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 441   fprintf (file, "\n");
 442
 443   if (iv->step)
 444     {
 445       fprintf (file, "  base ");
 446       print_generic_expr (file, iv->base, TDF_SLIM);
 447       fprintf (file, "\n");
 448
 449       fprintf (file, "  step ");
 450       print_generic_expr (file, iv->step, TDF_SLIM);
 451       fprintf (file, "\n");
 452     }
 453   else
 454     {
 455       fprintf (file, "  invariant ");
 456       print_generic_expr (file, iv->base, TDF_SLIM);
 457       fprintf (file, "\n");
 458     }
 459
 460   if (iv->base_object)
 461     {
 462       fprintf (file, "  base object ");
 463       print_generic_expr (file, iv->base_object, TDF_SLIM);
 464       fprintf (file, "\n");
 465     }
 466
 467   if (iv->biv_p)
 468     fprintf (file, "  is a biv\n");
 469 }
 470
 471 /* Dumps information about the USE to FILE.  */
 472
 473 extern void dump_use (FILE *, struct iv_use *);
 474 void
 475 dump_use (FILE *file, struct iv_use *use)
 476 {
 477   fprintf (file, "use %d\n", use->id);
 478
 479   switch (use->type)
 480     {
 481     case USE_NONLINEAR_EXPR:
 482       fprintf (file, "  generic\n");
 483       break;
 484
 485     case USE_ADDRESS:
 486       fprintf (file, "  address\n");
 487       break;
 488
 489     case USE_COMPARE:
 490       fprintf (file, "  compare\n");
 491       break;
 492
 493     default:
 494       gcc_unreachable ();
 495     }
 496
 497   fprintf (file, "  in statement ");
 498   print_gimple_stmt (file, use->stmt, 0, 0);
 499   fprintf (file, "\n");
 500
 501   fprintf (file, "  at position ");
 502   if (use->op_p)
 503     print_generic_expr (file, *use->op_p, TDF_SLIM);
 504   fprintf (file, "\n");
 505
 506   dump_iv (file, use->iv);
 507
 508   if (use->related_cands)
 509     {
 510       fprintf (file, "  related candidates ");
 511       dump_bitmap (file, use->related_cands);
 512     }
 513 }
 514
 515 /* Dumps information about the uses to FILE.  */
 516
 517 extern void dump_uses (FILE *, struct ivopts_data *);
 518 void
 519 dump_uses (FILE *file, struct ivopts_data *data)
 520 {
 521   unsigned i;
 522   struct iv_use *use;
 523
 524   for (i = 0; i < n_iv_uses (data); i++)
 525     {
 526       use = iv_use (data, i);
 527
 528       dump_use (file, use);
 529       fprintf (file, "\n");
 530     }
 531 }
 532
 533 /* Dumps information about induction variable candidate CAND to FILE.  */
 534
 535 extern void dump_cand (FILE *, struct iv_cand *);
 536 void
 537 dump_cand (FILE *file, struct iv_cand *cand)
 538 {
 539   struct iv *iv = cand->iv;
 540
 541   fprintf (file, "candidate %d%s\n",
 542            cand->id, cand->important ? " (important)" : "");
 543
 544   if (cand->depends_on)
 545     {
 546       fprintf (file, "  depends on ");
 547       dump_bitmap (file, cand->depends_on);
 548     }
 549
 550   if (!iv)
 551     {
 552       fprintf (file, "  final value replacement\n");
 553       return;
 554     }
 555
 556   if (cand->var_before)
 557     {
 558       fprintf (file, "  var_before ");
 559       print_generic_expr (file, cand->var_before, TDF_SLIM);
 560       fprintf (file, "\n");
 561     }
 562   if (cand->var_after)
 563     {
 564       fprintf (file, "  var_after ");
 565       print_generic_expr (file, cand->var_after, TDF_SLIM);
 566       fprintf (file, "\n");
 567     }
 568
 569   switch (cand->pos)
 570     {
 571     case IP_NORMAL:
 572       fprintf (file, "  incremented before exit test\n");
 573       break;
 574
 575     case IP_BEFORE_USE:
 576       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 577       break;
 578
 579     case IP_AFTER_USE:
 580       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 581       break;
 582
 583     case IP_END:
 584       fprintf (file, "  incremented at end\n");
 585       break;
 586
 587     case IP_ORIGINAL:
 588       fprintf (file, "  original biv\n");
 589       break;
 590     }
 591
 592   dump_iv (file, iv);
 593 }
 594
 595 /* Returns the info for ssa version VER.  */
 596
 597 static inline struct version_info *
 598 ver_info (struct ivopts_data *data, unsigned ver)
 599 {
 600   return data->version_info + ver;
 601 }
 602
 603 /* Returns the info for ssa name NAME.  */
 604
 605 static inline struct version_info *
 606 name_info (struct ivopts_data *data, tree name)
 607 {
 608   return ver_info (data, SSA_NAME_VERSION (name));
 609 }
 610
 611 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 612    emitted in LOOP.  */
 613
 614 static bool
 615 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 616 {
 617   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 618
 619   gcc_assert (bb);
 620
 621   if (sbb == loop->latch)
 622     return true;
 623
 624   if (sbb != bb)
 625     return false;
 626
 627   return stmt == last_stmt (bb);
 628 }
 629
 630 /* Returns true if STMT if after the place where the original induction
 631    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 632    if the positions are identical.  */
 633
 634 static bool
 635 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 636 {
 637   basic_block cand_bb = gimple_bb (cand->incremented_at);
 638   basic_block stmt_bb = gimple_bb (stmt);
 639
 640   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 641     return false;
 642
 643   if (stmt_bb != cand_bb)
 644     return true;
 645
 646   if (true_if_equal
 647       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 648     return true;
 649   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 650 }
 651
 652 /* Returns true if STMT if after the place where the induction variable
 653    CAND is incremented in LOOP.  */
 654
 655 static bool
 656 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 657 {
 658   switch (cand->pos)
 659     {
 660     case IP_END:
 661       return false;
 662
 663     case IP_NORMAL:
 664       return stmt_after_ip_normal_pos (loop, stmt);
 665
 666     case IP_ORIGINAL:
 667     case IP_AFTER_USE:
 668       return stmt_after_inc_pos (cand, stmt, false);
 669
 670     case IP_BEFORE_USE:
 671       return stmt_after_inc_pos (cand, stmt, true);
 672
 673     default:
 674       gcc_unreachable ();
 675     }
 676 }
 677
 678 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 679
 680 static bool
 681 abnormal_ssa_name_p (tree exp)
 682 {
 683   if (!exp)
 684     return false;
 685
 686   if (TREE_CODE (exp) != SSA_NAME)
 687     return false;
 688
 689   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 690 }
 691
 692 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 693    abnormal phi node.  Callback for for_each_index.  */
 694
 695 static bool
 696 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 697                                   void *data ATTRIBUTE_UNUSED)
 698 {
 699   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 700     {
 701       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 702         return false;
 703       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 704         return false;
 705     }
 706
 707   return !abnormal_ssa_name_p (*index);
 708 }
 709
 710 /* Returns true if EXPR contains a ssa name that occurs in an
 711    abnormal phi node.  */
 712
 713 bool
 714 contains_abnormal_ssa_name_p (tree expr)
 715 {
 716   enum tree_code code;
 717   enum tree_code_class codeclass;
 718
 719   if (!expr)
 720     return false;
 721
 722   code = TREE_CODE (expr);
 723   codeclass = TREE_CODE_CLASS (code);
 724
 725   if (code == SSA_NAME)
 726     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 727
 728   if (code == INTEGER_CST
 729       || is_gimple_min_invariant (expr))
 730     return false;
 731
 732   if (code == ADDR_EXPR)
 733     return !for_each_index (&TREE_OPERAND (expr, 0),
 734                             idx_contains_abnormal_ssa_name_p,
 735                             NULL);
 736
 737   if (code == COND_EXPR)
 738     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 739       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 740       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 741
 742   switch (codeclass)
 743     {
 744     case tcc_binary:
 745     case tcc_comparison:
 746       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 747         return true;
 748
 749       /* Fallthru.  */
 750     case tcc_unary:
 751       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 752         return true;
 753
 754       break;
 755
 756     default:
 757       gcc_unreachable ();
 758     }
 759
 760   return false;
 761 }
 762
 763 /*  Returns the structure describing number of iterations determined from
 764     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 765
 766 static struct tree_niter_desc *
 767 niter_for_exit (struct ivopts_data *data, edge exit)
 768 {
 769   struct tree_niter_desc *desc;
 770   void **slot;
 771
 772   if (!data->niters)
 773     {
 774       data->niters = pointer_map_create ();
 775       slot = NULL;
 776     }
 777   else
 778     slot = pointer_map_contains (data->niters, exit);
 779
 780   if (!slot)
 781     {
 782       /* Try to determine number of iterations.  We cannot safely work with ssa
 783          names that appear in phi nodes on abnormal edges, so that we do not
 784          create overlapping life ranges for them (PR 27283).  */
 785       desc = XNEW (struct tree_niter_desc);
 786       if (!number_of_iterations_exit (data->current_loop,
 787                                       exit, desc, true)
 788           || contains_abnormal_ssa_name_p (desc->niter))
 789         {
 790           XDELETE (desc);
 791           desc = NULL;
 792         }
 793       slot = pointer_map_insert (data->niters, exit);
 794       *slot = desc;
 795     }
 796   else
 797     desc = (struct tree_niter_desc *) *slot;
 798
 799   return desc;
 800 }
 801
 802 /* Returns the structure describing number of iterations determined from
 803    single dominating exit of DATA->current_loop, or NULL if something
 804    goes wrong.  */
 805
 806 static struct tree_niter_desc *
 807 niter_for_single_dom_exit (struct ivopts_data *data)
 808 {
 809   edge exit = single_dom_exit (data->current_loop);
 810
 811   if (!exit)
 812     return NULL;
 813
 814   return niter_for_exit (data, exit);
 815 }
 816
 817 /* Hash table equality function for expressions.  */
 818
 819 static int
 820 htab_inv_expr_eq (const void *ent1, const void *ent2)
 821 {
 822   const struct iv_inv_expr_ent *expr1 =
 823       (const struct iv_inv_expr_ent *)ent1;
 824   const struct iv_inv_expr_ent *expr2 =
 825       (const struct iv_inv_expr_ent *)ent2;
 826
 827   return expr1->hash == expr2->hash
 828          && operand_equal_p (expr1->expr, expr2->expr, 0);
 829 }
 830
 831 /* Hash function for loop invariant expressions.  */
 832
 833 static hashval_t
 834 htab_inv_expr_hash (const void *ent)
 835 {
 836   const struct iv_inv_expr_ent *expr =
 837       (const struct iv_inv_expr_ent *)ent;
 838   return expr->hash;
 839 }
 840
 841 /* Initializes data structures used by the iv optimization pass, stored
 842    in DATA.  */
 843
 844 static void
 845 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 846 {
 847   data->version_info_size = 2 * num_ssa_names;
 848   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 849   data->relevant = BITMAP_ALLOC (NULL);
 850   data->important_candidates = BITMAP_ALLOC (NULL);
 851   data->max_inv_id = 0;
 852   data->niters = NULL;
 853   data->iv_uses.create (20);
 854   data->iv_candidates.create (20);
 855   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 856                                     htab_inv_expr_eq, free);
 857   data->inv_expr_id = 0;
 858   decl_rtl_to_reset.create (20);
 859 }
 860
 861 /* Returns a memory object to that EXPR points.  In case we are able to
 862    determine that it does not point to any such object, NULL is returned.  */
 863
 864 static tree
 865 determine_base_object (tree expr)
 866 {
 867   enum tree_code code = TREE_CODE (expr);
 868   tree base, obj;
 869
 870   /* If this is a pointer casted to any type, we need to determine
 871      the base object for the pointer; so handle conversions before
 872      throwing away non-pointer expressions.  */
 873   if (CONVERT_EXPR_P (expr))
 874     return determine_base_object (TREE_OPERAND (expr, 0));
 875
 876   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 877     return NULL_TREE;
 878
 879   switch (code)
 880     {
 881     case INTEGER_CST:
 882       return NULL_TREE;
 883
 884     case ADDR_EXPR:
 885       obj = TREE_OPERAND (expr, 0);
 886       base = get_base_address (obj);
 887
 888       if (!base)
 889         return expr;
 890
 891       if (TREE_CODE (base) == MEM_REF)
 892         return determine_base_object (TREE_OPERAND (base, 0));
 893
 894       return fold_convert (ptr_type_node,
 895                            build_fold_addr_expr (base));
 896
 897     case POINTER_PLUS_EXPR:
 898       return determine_base_object (TREE_OPERAND (expr, 0));
 899
 900     case PLUS_EXPR:
 901     case MINUS_EXPR:
 902       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 903       gcc_unreachable ();
 904
 905     default:
 906       return fold_convert (ptr_type_node, expr);
 907     }
 908 }
 909
 910 /* Allocates an induction variable with given initial value BASE and step STEP
 911    for loop LOOP.  */
 912
 913 static struct iv *
 914 alloc_iv (tree base, tree step)
 915 {
 916   struct iv *iv = XCNEW (struct iv);
 917   gcc_assert (step != NULL_TREE);
 918
 919   iv->base = base;
 920   iv->base_object = determine_base_object (base);
 921   iv->step = step;
 922   iv->biv_p = false;
 923   iv->have_use_for = false;
 924   iv->use_id = 0;
 925   iv->ssa_name = NULL_TREE;
 926
 927   return iv;
 928 }
 929
 930 /* Sets STEP and BASE for induction variable IV.  */
 931
 932 static void
 933 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 934 {
 935   struct version_info *info = name_info (data, iv);
 936
 937   gcc_assert (!info->iv);
 938
 939   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 940   info->iv = alloc_iv (base, step);
 941   info->iv->ssa_name = iv;
 942 }
 943
 944 /* Finds induction variable declaration for VAR.  */
 945
 946 static struct iv *
 947 get_iv (struct ivopts_data *data, tree var)
 948 {
 949   basic_block bb;
 950   tree type = TREE_TYPE (var);
 951
 952   if (!POINTER_TYPE_P (type)
 953       && !INTEGRAL_TYPE_P (type))
 954     return NULL;
 955
 956   if (!name_info (data, var)->iv)
 957     {
 958       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 959
 960       if (!bb
 961           || !flow_bb_inside_loop_p (data->current_loop, bb))
 962         set_iv (data, var, var, build_int_cst (type, 0));
 963     }
 964
 965   return name_info (data, var)->iv;
 966 }
 967
 968 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 969    not define a simple affine biv with nonzero step.  */
 970
 971 static tree
 972 determine_biv_step (gimple phi)
 973 {
 974   struct loop *loop = gimple_bb (phi)->loop_father;
 975   tree name = PHI_RESULT (phi);
 976   affine_iv iv;
 977
 978   if (virtual_operand_p (name))
 979     return NULL_TREE;
 980
 981   if (!simple_iv (loop, loop, name, &iv, true))
 982     return NULL_TREE;
 983
 984   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 985 }
 986
 987 /* Finds basic ivs.  */
 988
 989 static bool
 990 find_bivs (struct ivopts_data *data)
 991 {
 992   gimple phi;
 993   tree step, type, base;
 994   bool found = false;
 995   struct loop *loop = data->current_loop;
 996   gimple_stmt_iterator psi;
 997
 998   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
 999     {
1000       phi = gsi_stmt (psi);
1001
1002       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1003         continue;
1004
1005       step = determine_biv_step (phi);
1006       if (!step)
1007         continue;
1008
1009       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1010       base = expand_simple_operations (base);
1011       if (contains_abnormal_ssa_name_p (base)
1012           || contains_abnormal_ssa_name_p (step))
1013         continue;
1014
1015       type = TREE_TYPE (PHI_RESULT (phi));
1016       base = fold_convert (type, base);
1017       if (step)
1018         {
1019           if (POINTER_TYPE_P (type))
1020             step = convert_to_ptrofftype (step);
1021           else
1022             step = fold_convert (type, step);
1023         }
1024
1025       set_iv (data, PHI_RESULT (phi), base, step);
1026       found = true;
1027     }
1028
1029   return found;
1030 }
1031
1032 /* Marks basic ivs.  */
1033
1034 static void
1035 mark_bivs (struct ivopts_data *data)
1036 {
1037   gimple phi;
1038   tree var;
1039   struct iv *iv, *incr_iv;
1040   struct loop *loop = data->current_loop;
1041   basic_block incr_bb;
1042   gimple_stmt_iterator psi;
1043
1044   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1045     {
1046       phi = gsi_stmt (psi);
1047
1048       iv = get_iv (data, PHI_RESULT (phi));
1049       if (!iv)
1050         continue;
1051
1052       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1053       incr_iv = get_iv (data, var);
1054       if (!incr_iv)
1055         continue;
1056
1057       /* If the increment is in the subloop, ignore it.  */
1058       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1059       if (incr_bb->loop_father != data->current_loop
1060           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1061         continue;
1062
1063       iv->biv_p = true;
1064       incr_iv->biv_p = true;
1065     }
1066 }
1067
1068 /* Checks whether STMT defines a linear induction variable and stores its
1069    parameters to IV.  */
1070
1071 static bool
1072 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1073 {
1074   tree lhs;
1075   struct loop *loop = data->current_loop;
1076
1077   iv->base = NULL_TREE;
1078   iv->step = NULL_TREE;
1079
1080   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1081     return false;
1082
1083   lhs = gimple_assign_lhs (stmt);
1084   if (TREE_CODE (lhs) != SSA_NAME)
1085     return false;
1086
1087   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1088     return false;
1089   iv->base = expand_simple_operations (iv->base);
1090
1091   if (contains_abnormal_ssa_name_p (iv->base)
1092       || contains_abnormal_ssa_name_p (iv->step))
1093     return false;
1094
1095   /* If STMT could throw, then do not consider STMT as defining a GIV.
1096      While this will suppress optimizations, we can not safely delete this
1097      GIV and associated statements, even if it appears it is not used.  */
1098   if (stmt_could_throw_p (stmt))
1099     return false;
1100
1101   return true;
1102 }
1103
1104 /* Finds general ivs in statement STMT.  */
1105
1106 static void
1107 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1108 {
1109   affine_iv iv;
1110
1111   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1112     return;
1113
1114   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1115 }
1116
1117 /* Finds general ivs in basic block BB.  */
1118
1119 static void
1120 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1121 {
1122   gimple_stmt_iterator bsi;
1123
1124   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1125     find_givs_in_stmt (data, gsi_stmt (bsi));
1126 }
1127
1128 /* Finds general ivs.  */
1129
1130 static void
1131 find_givs (struct ivopts_data *data)
1132 {
1133   struct loop *loop = data->current_loop;
1134   basic_block *body = get_loop_body_in_dom_order (loop);
1135   unsigned i;
1136
1137   for (i = 0; i < loop->num_nodes; i++)
1138     find_givs_in_bb (data, body[i]);
1139   free (body);
1140 }
1141
1142 /* For each ssa name defined in LOOP determines whether it is an induction
1143    variable and if so, its initial value and step.  */
1144
1145 static bool
1146 find_induction_variables (struct ivopts_data *data)
1147 {
1148   unsigned i;
1149   bitmap_iterator bi;
1150
1151   if (!find_bivs (data))
1152     return false;
1153
1154   find_givs (data);
1155   mark_bivs (data);
1156
1157   if (dump_file && (dump_flags & TDF_DETAILS))
1158     {
1159       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1160
1161       if (niter)
1162         {
1163           fprintf (dump_file, "  number of iterations ");
1164           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1165           if (!integer_zerop (niter->may_be_zero))
1166             {
1167               fprintf (dump_file, "; zero if ");
1168               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1169             }
1170           fprintf (dump_file, "\n\n");
1171         };
1172
1173       fprintf (dump_file, "Induction variables:\n\n");
1174
1175       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1176         {
1177           if (ver_info (data, i)->iv)
1178             dump_iv (dump_file, ver_info (data, i)->iv);
1179         }
1180     }
1181
1182   return true;
1183 }
1184
1185 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1186
1187 static struct iv_use *
1188 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1189             gimple stmt, enum use_type use_type)
1190 {
1191   struct iv_use *use = XCNEW (struct iv_use);
1192
1193   use->id = n_iv_uses (data);
1194   use->type = use_type;
1195   use->iv = iv;
1196   use->stmt = stmt;
1197   use->op_p = use_p;
1198   use->related_cands = BITMAP_ALLOC (NULL);
1199
1200   /* To avoid showing ssa name in the dumps, if it was not reset by the
1201      caller.  */
1202   iv->ssa_name = NULL_TREE;
1203
1204   if (dump_file && (dump_flags & TDF_DETAILS))
1205     dump_use (dump_file, use);
1206
1207   data->iv_uses.safe_push (use);
1208
1209   return use;
1210 }
1211
1212 /* Checks whether OP is a loop-level invariant and if so, records it.
1213    NONLINEAR_USE is true if the invariant is used in a way we do not
1214    handle specially.  */
1215
1216 static void
1217 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1218 {
1219   basic_block bb;
1220   struct version_info *info;
1221
1222   if (TREE_CODE (op) != SSA_NAME
1223       || virtual_operand_p (op))
1224     return;
1225
1226   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1227   if (bb
1228       && flow_bb_inside_loop_p (data->current_loop, bb))
1229     return;
1230
1231   info = name_info (data, op);
1232   info->name = op;
1233   info->has_nonlin_use |= nonlinear_use;
1234   if (!info->inv_id)
1235     info->inv_id = ++data->max_inv_id;
1236   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1237 }
1238
1239 /* Checks whether the use OP is interesting and if so, records it.  */
1240
1241 static struct iv_use *
1242 find_interesting_uses_op (struct ivopts_data *data, tree op)
1243 {
1244   struct iv *iv;
1245   struct iv *civ;
1246   gimple stmt;
1247   struct iv_use *use;
1248
1249   if (TREE_CODE (op) != SSA_NAME)
1250     return NULL;
1251
1252   iv = get_iv (data, op);
1253   if (!iv)
1254     return NULL;
1255
1256   if (iv->have_use_for)
1257     {
1258       use = iv_use (data, iv->use_id);
1259
1260       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1261       return use;
1262     }
1263
1264   if (integer_zerop (iv->step))
1265     {
1266       record_invariant (data, op, true);
1267       return NULL;
1268     }
1269   iv->have_use_for = true;
1270
1271   civ = XNEW (struct iv);
1272   *civ = *iv;
1273
1274   stmt = SSA_NAME_DEF_STMT (op);
1275   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1276               || is_gimple_assign (stmt));
1277
1278   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1279   iv->use_id = use->id;
1280
1281   return use;
1282 }
1283
1284 /* Given a condition in statement STMT, checks whether it is a compare
1285    of an induction variable and an invariant.  If this is the case,
1286    CONTROL_VAR is set to location of the iv, BOUND to the location of
1287    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1288    induction variable descriptions, and true is returned.  If this is not
1289    the case, CONTROL_VAR and BOUND are set to the arguments of the
1290    condition and false is returned.  */
1291
1292 static bool
1293 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1294                        tree **control_var, tree **bound,
1295                        struct iv **iv_var, struct iv **iv_bound)
1296 {
1297   /* The objects returned when COND has constant operands.  */
1298   static struct iv const_iv;
1299   static tree zero;
1300   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1301   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1302   bool ret = false;
1303
1304   if (gimple_code (stmt) == GIMPLE_COND)
1305     {
1306       op0 = gimple_cond_lhs_ptr (stmt);
1307       op1 = gimple_cond_rhs_ptr (stmt);
1308     }
1309   else
1310     {
1311       op0 = gimple_assign_rhs1_ptr (stmt);
1312       op1 = gimple_assign_rhs2_ptr (stmt);
1313     }
1314
1315   zero = integer_zero_node;
1316   const_iv.step = integer_zero_node;
1317
1318   if (TREE_CODE (*op0) == SSA_NAME)
1319     iv0 = get_iv (data, *op0);
1320   if (TREE_CODE (*op1) == SSA_NAME)
1321     iv1 = get_iv (data, *op1);
1322
1323   /* Exactly one of the compared values must be an iv, and the other one must
1324      be an invariant.  */
1325   if (!iv0 || !iv1)
1326     goto end;
1327
1328   if (integer_zerop (iv0->step))
1329     {
1330       /* Control variable may be on the other side.  */
1331       tmp_op = op0; op0 = op1; op1 = tmp_op;
1332       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1333     }
1334   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1335
1336 end:
1337   if (control_var)
1338     *control_var = op0;;
1339   if (iv_var)
1340     *iv_var = iv0;;
1341   if (bound)
1342     *bound = op1;
1343   if (iv_bound)
1344     *iv_bound = iv1;
1345
1346   return ret;
1347 }
1348
1349 /* Checks whether the condition in STMT is interesting and if so,
1350    records it.  */
1351
1352 static void
1353 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1354 {
1355   tree *var_p, *bound_p;
1356   struct iv *var_iv, *civ;
1357
1358   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1359     {
1360       find_interesting_uses_op (data, *var_p);
1361       find_interesting_uses_op (data, *bound_p);
1362       return;
1363     }
1364
1365   civ = XNEW (struct iv);
1366   *civ = *var_iv;
1367   record_use (data, NULL, civ, stmt, USE_COMPARE);
1368 }
1369
1370 /* Returns true if expression EXPR is obviously invariant in LOOP,
1371    i.e. if all its operands are defined outside of the LOOP.  LOOP
1372    should not be the function body.  */
1373
1374 bool
1375 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1376 {
1377   basic_block def_bb;
1378   unsigned i, len;
1379
1380   gcc_assert (loop_depth (loop) > 0);
1381
1382   if (is_gimple_min_invariant (expr))
1383     return true;
1384
1385   if (TREE_CODE (expr) == SSA_NAME)
1386     {
1387       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1388       if (def_bb
1389           && flow_bb_inside_loop_p (loop, def_bb))
1390         return false;
1391
1392       return true;
1393     }
1394
1395   if (!EXPR_P (expr))
1396     return false;
1397
1398   len = TREE_OPERAND_LENGTH (expr);
1399   for (i = 0; i < len; i++)
1400     if (TREE_OPERAND (expr, i)
1401         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1402       return false;
1403
1404   return true;
1405 }
1406
1407 /* Returns true if statement STMT is obviously invariant in LOOP,
1408    i.e. if all its operands on the RHS are defined outside of the LOOP.
1409    LOOP should not be the function body.  */
1410
1411 bool
1412 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1413 {
1414   unsigned i;
1415   tree lhs;
1416
1417   gcc_assert (loop_depth (loop) > 0);
1418
1419   lhs = gimple_get_lhs (stmt);
1420   for (i = 0; i < gimple_num_ops (stmt); i++)
1421     {
1422       tree op = gimple_op (stmt, i);
1423       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1424         return false;
1425     }
1426
1427   return true;
1428 }
1429
1430 /* Cumulates the steps of indices into DATA and replaces their values with the
1431    initial ones.  Returns false when the value of the index cannot be determined.
1432    Callback for for_each_index.  */
1433
1434 struct ifs_ivopts_data
1435 {
1436   struct ivopts_data *ivopts_data;
1437   gimple stmt;
1438   tree step;
1439 };
1440
1441 static bool
1442 idx_find_step (tree base, tree *idx, void *data)
1443 {
1444   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1445   struct iv *iv;
1446   tree step, iv_base, iv_step, lbound, off;
1447   struct loop *loop = dta->ivopts_data->current_loop;
1448
1449   /* If base is a component ref, require that the offset of the reference
1450      be invariant.  */
1451   if (TREE_CODE (base) == COMPONENT_REF)
1452     {
1453       off = component_ref_field_offset (base);
1454       return expr_invariant_in_loop_p (loop, off);
1455     }
1456
1457   /* If base is array, first check whether we will be able to move the
1458      reference out of the loop (in order to take its address in strength
1459      reduction).  In order for this to work we need both lower bound
1460      and step to be loop invariants.  */
1461   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1462     {
1463       /* Moreover, for a range, the size needs to be invariant as well.  */
1464       if (TREE_CODE (base) == ARRAY_RANGE_REF
1465           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1466         return false;
1467
1468       step = array_ref_element_size (base);
1469       lbound = array_ref_low_bound (base);
1470
1471       if (!expr_invariant_in_loop_p (loop, step)
1472           || !expr_invariant_in_loop_p (loop, lbound))
1473         return false;
1474     }
1475
1476   if (TREE_CODE (*idx) != SSA_NAME)
1477     return true;
1478
1479   iv = get_iv (dta->ivopts_data, *idx);
1480   if (!iv)
1481     return false;
1482
1483   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1484           *&x[0], which is not folded and does not trigger the
1485           ARRAY_REF path below.  */
1486   *idx = iv->base;
1487
1488   if (integer_zerop (iv->step))
1489     return true;
1490
1491   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1492     {
1493       step = array_ref_element_size (base);
1494
1495       /* We only handle addresses whose step is an integer constant.  */
1496       if (TREE_CODE (step) != INTEGER_CST)
1497         return false;
1498     }
1499   else
1500     /* The step for pointer arithmetics already is 1 byte.  */
1501     step = size_one_node;
1502
1503   iv_base = iv->base;
1504   iv_step = iv->step;
1505   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1506                             sizetype, &iv_base, &iv_step, dta->stmt,
1507                             false))
1508     {
1509       /* The index might wrap.  */
1510       return false;
1511     }
1512
1513   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1514   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1515
1516   return true;
1517 }
1518
1519 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1520    object is passed to it in DATA.  */
1521
1522 static bool
1523 idx_record_use (tree base, tree *idx,
1524                 void *vdata)
1525 {
1526   struct ivopts_data *data = (struct ivopts_data *) vdata;
1527   find_interesting_uses_op (data, *idx);
1528   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1529     {
1530       find_interesting_uses_op (data, array_ref_element_size (base));
1531       find_interesting_uses_op (data, array_ref_low_bound (base));
1532     }
1533   return true;
1534 }
1535
1536 /* If we can prove that TOP = cst * BOT for some constant cst,
1537    store cst to MUL and return true.  Otherwise return false.
1538    The returned value is always sign-extended, regardless of the
1539    signedness of TOP and BOT.  */
1540
1541 static bool
1542 constant_multiple_of (tree top, tree bot, double_int *mul)
1543 {
1544   tree mby;
1545   enum tree_code code;
1546   double_int res, p0, p1;
1547   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1548
1549   STRIP_NOPS (top);
1550   STRIP_NOPS (bot);
1551
1552   if (operand_equal_p (top, bot, 0))
1553     {
1554       *mul = double_int_one;
1555       return true;
1556     }
1557
1558   code = TREE_CODE (top);
1559   switch (code)
1560     {
1561     case MULT_EXPR:
1562       mby = TREE_OPERAND (top, 1);
1563       if (TREE_CODE (mby) != INTEGER_CST)
1564         return false;
1565
1566       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1567         return false;
1568
1569       *mul = (res * tree_to_double_int (mby)).sext (precision);
1570       return true;
1571
1572     case PLUS_EXPR:
1573     case MINUS_EXPR:
1574       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1575           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1576         return false;
1577
1578       if (code == MINUS_EXPR)
1579         p1 = -p1;
1580       *mul = (p0 + p1).sext (precision);
1581       return true;
1582
1583     case INTEGER_CST:
1584       if (TREE_CODE (bot) != INTEGER_CST)
1585         return false;
1586
1587       p0 = tree_to_double_int (top).sext (precision);
1588       p1 = tree_to_double_int (bot).sext (precision);
1589       if (p1.is_zero ())
1590         return false;
1591       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1592       return res.is_zero ();
1593
1594     default:
1595       return false;
1596     }
1597 }
1598
1599 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1600
1601 static bool
1602 may_be_unaligned_p (tree ref, tree step)
1603 {
1604   tree base;
1605   tree base_type;
1606   HOST_WIDE_INT bitsize;
1607   HOST_WIDE_INT bitpos;
1608   tree toffset;
1609   enum machine_mode mode;
1610   int unsignedp, volatilep;
1611   unsigned base_align;
1612
1613   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1614      thus they are not misaligned.  */
1615   if (TREE_CODE (ref) == TARGET_MEM_REF)
1616     return false;
1617
1618   /* The test below is basically copy of what expr.c:normal_inner_ref
1619      does to check whether the object must be loaded by parts when
1620      STRICT_ALIGNMENT is true.  */
1621   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1622                               &unsignedp, &volatilep, true);
1623   base_type = TREE_TYPE (base);
1624   base_align = get_object_alignment (base);
1625   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1626
1627   if (mode != BLKmode)
1628     {
1629       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1630
1631       if (base_align < mode_align
1632           || (bitpos % mode_align) != 0
1633           || (bitpos % BITS_PER_UNIT) != 0)
1634         return true;
1635
1636       if (toffset
1637           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1638         return true;
1639
1640       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1641         return true;
1642     }
1643
1644   return false;
1645 }
1646
1647 /* Return true if EXPR may be non-addressable.   */
1648
1649 bool
1650 may_be_nonaddressable_p (tree expr)
1651 {
1652   switch (TREE_CODE (expr))
1653     {
1654     case TARGET_MEM_REF:
1655       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1656          target, thus they are always addressable.  */
1657       return false;
1658
1659     case COMPONENT_REF:
1660       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1661              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1662
1663     case VIEW_CONVERT_EXPR:
1664       /* This kind of view-conversions may wrap non-addressable objects
1665          and make them look addressable.  After some processing the
1666          non-addressability may be uncovered again, causing ADDR_EXPRs
1667          of inappropriate objects to be built.  */
1668       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1669           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1670         return true;
1671
1672       /* ... fall through ... */
1673
1674     case ARRAY_REF:
1675     case ARRAY_RANGE_REF:
1676       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1677
1678     CASE_CONVERT:
1679       return true;
1680
1681     default:
1682       break;
1683     }
1684
1685   return false;
1686 }
1687
1688 /* Finds addresses in *OP_P inside STMT.  */
1689
1690 static void
1691 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1692 {
1693   tree base = *op_p, step = size_zero_node;
1694   struct iv *civ;
1695   struct ifs_ivopts_data ifs_ivopts_data;
1696
1697   /* Do not play with volatile memory references.  A bit too conservative,
1698      perhaps, but safe.  */
1699   if (gimple_has_volatile_ops (stmt))
1700     goto fail;
1701
1702   /* Ignore bitfields for now.  Not really something terribly complicated
1703      to handle.  TODO.  */
1704   if (TREE_CODE (base) == BIT_FIELD_REF)
1705     goto fail;
1706
1707   base = unshare_expr (base);
1708
1709   if (TREE_CODE (base) == TARGET_MEM_REF)
1710     {
1711       tree type = build_pointer_type (TREE_TYPE (base));
1712       tree astep;
1713
1714       if (TMR_BASE (base)
1715           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1716         {
1717           civ = get_iv (data, TMR_BASE (base));
1718           if (!civ)
1719             goto fail;
1720
1721           TMR_BASE (base) = civ->base;
1722           step = civ->step;
1723         }
1724       if (TMR_INDEX2 (base)
1725           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1726         {
1727           civ = get_iv (data, TMR_INDEX2 (base));
1728           if (!civ)
1729             goto fail;
1730
1731           TMR_INDEX2 (base) = civ->base;
1732           step = civ->step;
1733         }
1734       if (TMR_INDEX (base)
1735           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1736         {
1737           civ = get_iv (data, TMR_INDEX (base));
1738           if (!civ)
1739             goto fail;
1740
1741           TMR_INDEX (base) = civ->base;
1742           astep = civ->step;
1743
1744           if (astep)
1745             {
1746               if (TMR_STEP (base))
1747                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1748
1749               step = fold_build2 (PLUS_EXPR, type, step, astep);
1750             }
1751         }
1752
1753       if (integer_zerop (step))
1754         goto fail;
1755       base = tree_mem_ref_addr (type, base);
1756     }
1757   else
1758     {
1759       ifs_ivopts_data.ivopts_data = data;
1760       ifs_ivopts_data.stmt = stmt;
1761       ifs_ivopts_data.step = size_zero_node;
1762       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1763           || integer_zerop (ifs_ivopts_data.step))
1764         goto fail;
1765       step = ifs_ivopts_data.step;
1766
1767       /* Check that the base expression is addressable.  This needs
1768          to be done after substituting bases of IVs into it.  */
1769       if (may_be_nonaddressable_p (base))
1770         goto fail;
1771
1772       /* Moreover, on strict alignment platforms, check that it is
1773          sufficiently aligned.  */
1774       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1775         goto fail;
1776
1777       base = build_fold_addr_expr (base);
1778
1779       /* Substituting bases of IVs into the base expression might
1780          have caused folding opportunities.  */
1781       if (TREE_CODE (base) == ADDR_EXPR)
1782         {
1783           tree *ref = &TREE_OPERAND (base, 0);
1784           while (handled_component_p (*ref))
1785             ref = &TREE_OPERAND (*ref, 0);
1786           if (TREE_CODE (*ref) == MEM_REF)
1787             {
1788               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1789                                       TREE_OPERAND (*ref, 0),
1790                                       TREE_OPERAND (*ref, 1));
1791               if (tem)
1792                 *ref = tem;
1793             }
1794         }
1795     }
1796
1797   civ = alloc_iv (base, step);
1798   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1799   return;
1800
1801 fail:
1802   for_each_index (op_p, idx_record_use, data);
1803 }
1804
1805 /* Finds and records invariants used in STMT.  */
1806
1807 static void
1808 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1809 {
1810   ssa_op_iter iter;
1811   use_operand_p use_p;
1812   tree op;
1813
1814   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1815     {
1816       op = USE_FROM_PTR (use_p);
1817       record_invariant (data, op, false);
1818     }
1819 }
1820
1821 /* Finds interesting uses of induction variables in the statement STMT.  */
1822
1823 static void
1824 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1825 {
1826   struct iv *iv;
1827   tree op, *lhs, *rhs;
1828   ssa_op_iter iter;
1829   use_operand_p use_p;
1830   enum tree_code code;
1831
1832   find_invariants_stmt (data, stmt);
1833
1834   if (gimple_code (stmt) == GIMPLE_COND)
1835     {
1836       find_interesting_uses_cond (data, stmt);
1837       return;
1838     }
1839
1840   if (is_gimple_assign (stmt))
1841     {
1842       lhs = gimple_assign_lhs_ptr (stmt);
1843       rhs = gimple_assign_rhs1_ptr (stmt);
1844
1845       if (TREE_CODE (*lhs) == SSA_NAME)
1846         {
1847           /* If the statement defines an induction variable, the uses are not
1848              interesting by themselves.  */
1849
1850           iv = get_iv (data, *lhs);
1851
1852           if (iv && !integer_zerop (iv->step))
1853             return;
1854         }
1855
1856       code = gimple_assign_rhs_code (stmt);
1857       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1858           && (REFERENCE_CLASS_P (*rhs)
1859               || is_gimple_val (*rhs)))
1860         {
1861           if (REFERENCE_CLASS_P (*rhs))
1862             find_interesting_uses_address (data, stmt, rhs);
1863           else
1864             find_interesting_uses_op (data, *rhs);
1865
1866           if (REFERENCE_CLASS_P (*lhs))
1867             find_interesting_uses_address (data, stmt, lhs);
1868           return;
1869         }
1870       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1871         {
1872           find_interesting_uses_cond (data, stmt);
1873           return;
1874         }
1875
1876       /* TODO -- we should also handle address uses of type
1877
1878          memory = call (whatever);
1879
1880          and
1881
1882          call (memory).  */
1883     }
1884
1885   if (gimple_code (stmt) == GIMPLE_PHI
1886       && gimple_bb (stmt) == data->current_loop->header)
1887     {
1888       iv = get_iv (data, PHI_RESULT (stmt));
1889
1890       if (iv && !integer_zerop (iv->step))
1891         return;
1892     }
1893
1894   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1895     {
1896       op = USE_FROM_PTR (use_p);
1897
1898       if (TREE_CODE (op) != SSA_NAME)
1899         continue;
1900
1901       iv = get_iv (data, op);
1902       if (!iv)
1903         continue;
1904
1905       find_interesting_uses_op (data, op);
1906     }
1907 }
1908
1909 /* Finds interesting uses of induction variables outside of loops
1910    on loop exit edge EXIT.  */
1911
1912 static void
1913 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1914 {
1915   gimple phi;
1916   gimple_stmt_iterator psi;
1917   tree def;
1918
1919   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1920     {
1921       phi = gsi_stmt (psi);
1922       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1923       if (!virtual_operand_p (def))
1924         find_interesting_uses_op (data, def);
1925     }
1926 }
1927
1928 /* Finds uses of the induction variables that are interesting.  */
1929
1930 static void
1931 find_interesting_uses (struct ivopts_data *data)
1932 {
1933   basic_block bb;
1934   gimple_stmt_iterator bsi;
1935   basic_block *body = get_loop_body (data->current_loop);
1936   unsigned i;
1937   struct version_info *info;
1938   edge e;
1939
1940   if (dump_file && (dump_flags & TDF_DETAILS))
1941     fprintf (dump_file, "Uses:\n\n");
1942
1943   for (i = 0; i < data->current_loop->num_nodes; i++)
1944     {
1945       edge_iterator ei;
1946       bb = body[i];
1947
1948       FOR_EACH_EDGE (e, ei, bb->succs)
1949         if (e->dest != EXIT_BLOCK_PTR
1950             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1951           find_interesting_uses_outside (data, e);
1952
1953       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1954         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1955       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1956         if (!is_gimple_debug (gsi_stmt (bsi)))
1957           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1958     }
1959
1960   if (dump_file && (dump_flags & TDF_DETAILS))
1961     {
1962       bitmap_iterator bi;
1963
1964       fprintf (dump_file, "\n");
1965
1966       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1967         {
1968           info = ver_info (data, i);
1969           if (info->inv_id)
1970             {
1971               fprintf (dump_file, "  ");
1972               print_generic_expr (dump_file, info->name, TDF_SLIM);
1973               fprintf (dump_file, " is invariant (%d)%s\n",
1974                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1975             }
1976         }
1977
1978       fprintf (dump_file, "\n");
1979     }
1980
1981   free (body);
1982 }
1983
1984 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1985    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1986    we are at the top-level of the processed address.  */
1987
1988 static tree
1989 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1990                 unsigned HOST_WIDE_INT *offset)
1991 {
1992   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1993   enum tree_code code;
1994   tree type, orig_type = TREE_TYPE (expr);
1995   unsigned HOST_WIDE_INT off0, off1, st;
1996   tree orig_expr = expr;
1997
1998   STRIP_NOPS (expr);
1999
2000   type = TREE_TYPE (expr);
2001   code = TREE_CODE (expr);
2002   *offset = 0;
2003
2004   switch (code)
2005     {
2006     case INTEGER_CST:
2007       if (!cst_and_fits_in_hwi (expr)
2008           || integer_zerop (expr))
2009         return orig_expr;
2010
2011       *offset = int_cst_value (expr);
2012       return build_int_cst (orig_type, 0);
2013
2014     case POINTER_PLUS_EXPR:
2015     case PLUS_EXPR:
2016     case MINUS_EXPR:
2017       op0 = TREE_OPERAND (expr, 0);
2018       op1 = TREE_OPERAND (expr, 1);
2019
2020       op0 = strip_offset_1 (op0, false, false, &off0);
2021       op1 = strip_offset_1 (op1, false, false, &off1);
2022
2023       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2024       if (op0 == TREE_OPERAND (expr, 0)
2025           && op1 == TREE_OPERAND (expr, 1))
2026         return orig_expr;
2027
2028       if (integer_zerop (op1))
2029         expr = op0;
2030       else if (integer_zerop (op0))
2031         {
2032           if (code == MINUS_EXPR)
2033             expr = fold_build1 (NEGATE_EXPR, type, op1);
2034           else
2035             expr = op1;
2036         }
2037       else
2038         expr = fold_build2 (code, type, op0, op1);
2039
2040       return fold_convert (orig_type, expr);
2041
2042     case MULT_EXPR:
2043       op1 = TREE_OPERAND (expr, 1);
2044       if (!cst_and_fits_in_hwi (op1))
2045         return orig_expr;
2046
2047       op0 = TREE_OPERAND (expr, 0);
2048       op0 = strip_offset_1 (op0, false, false, &off0);
2049       if (op0 == TREE_OPERAND (expr, 0))
2050         return orig_expr;
2051
2052       *offset = off0 * int_cst_value (op1);
2053       if (integer_zerop (op0))
2054         expr = op0;
2055       else
2056         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2057
2058       return fold_convert (orig_type, expr);
2059
2060     case ARRAY_REF:
2061     case ARRAY_RANGE_REF:
2062       if (!inside_addr)
2063         return orig_expr;
2064
2065       step = array_ref_element_size (expr);
2066       if (!cst_and_fits_in_hwi (step))
2067         break;
2068
2069       st = int_cst_value (step);
2070       op1 = TREE_OPERAND (expr, 1);
2071       op1 = strip_offset_1 (op1, false, false, &off1);
2072       *offset = off1 * st;
2073
2074       if (top_compref
2075           && integer_zerop (op1))
2076         {
2077           /* Strip the component reference completely.  */
2078           op0 = TREE_OPERAND (expr, 0);
2079           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2080           *offset += off0;
2081           return op0;
2082         }
2083       break;
2084
2085     case COMPONENT_REF:
2086       if (!inside_addr)
2087         return orig_expr;
2088
2089       tmp = component_ref_field_offset (expr);
2090       if (top_compref
2091           && cst_and_fits_in_hwi (tmp))
2092         {
2093           /* Strip the component reference completely.  */
2094           op0 = TREE_OPERAND (expr, 0);
2095           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2096           *offset = off0 + int_cst_value (tmp);
2097           return op0;
2098         }
2099       break;
2100
2101     case ADDR_EXPR:
2102       op0 = TREE_OPERAND (expr, 0);
2103       op0 = strip_offset_1 (op0, true, true, &off0);
2104       *offset += off0;
2105
2106       if (op0 == TREE_OPERAND (expr, 0))
2107         return orig_expr;
2108
2109       expr = build_fold_addr_expr (op0);
2110       return fold_convert (orig_type, expr);
2111
2112     case MEM_REF:
2113       /* ???  Offset operand?  */
2114       inside_addr = false;
2115       break;
2116
2117     default:
2118       return orig_expr;
2119     }
2120
2121   /* Default handling of expressions for that we want to recurse into
2122      the first operand.  */
2123   op0 = TREE_OPERAND (expr, 0);
2124   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2125   *offset += off0;
2126
2127   if (op0 == TREE_OPERAND (expr, 0)
2128       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2129     return orig_expr;
2130
2131   expr = copy_node (expr);
2132   TREE_OPERAND (expr, 0) = op0;
2133   if (op1)
2134     TREE_OPERAND (expr, 1) = op1;
2135
2136   /* Inside address, we might strip the top level component references,
2137      thus changing type of the expression.  Handling of ADDR_EXPR
2138      will fix that.  */
2139   expr = fold_convert (orig_type, expr);
2140
2141   return expr;
2142 }
2143
2144 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2145
2146 static tree
2147 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2148 {
2149   return strip_offset_1 (expr, false, false, offset);
2150 }
2151
2152 /* Returns variant of TYPE that can be used as base for different uses.
2153    We return unsigned type with the same precision, which avoids problems
2154    with overflows.  */
2155
2156 static tree
2157 generic_type_for (tree type)
2158 {
2159   if (POINTER_TYPE_P (type))
2160     return unsigned_type_for (type);
2161
2162   if (TYPE_UNSIGNED (type))
2163     return type;
2164
2165   return unsigned_type_for (type);
2166 }
2167
2168 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2169    the bitmap to that we should store it.  */
2170
2171 static struct ivopts_data *fd_ivopts_data;
2172 static tree
2173 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2174 {
2175   bitmap *depends_on = (bitmap *) data;
2176   struct version_info *info;
2177
2178   if (TREE_CODE (*expr_p) != SSA_NAME)
2179     return NULL_TREE;
2180   info = name_info (fd_ivopts_data, *expr_p);
2181
2182   if (!info->inv_id || info->has_nonlin_use)
2183     return NULL_TREE;
2184
2185   if (!*depends_on)
2186     *depends_on = BITMAP_ALLOC (NULL);
2187   bitmap_set_bit (*depends_on, info->inv_id);
2188
2189   return NULL_TREE;
2190 }
2191
2192 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2193    position to POS.  If USE is not NULL, the candidate is set as related to
2194    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2195    replacement of the final value of the iv by a direct computation.  */
2196
2197 static struct iv_cand *
2198 add_candidate_1 (struct ivopts_data *data,
2199                  tree base, tree step, bool important, enum iv_position pos,
2200                  struct iv_use *use, gimple incremented_at)
2201 {
2202   unsigned i;
2203   struct iv_cand *cand = NULL;
2204   tree type, orig_type;
2205
2206   /* For non-original variables, make sure their values are computed in a type
2207      that does not invoke undefined behavior on overflows (since in general,
2208      we cannot prove that these induction variables are non-wrapping).  */
2209   if (pos != IP_ORIGINAL)
2210     {
2211       orig_type = TREE_TYPE (base);
2212       type = generic_type_for (orig_type);
2213       if (type != orig_type)
2214         {
2215           base = fold_convert (type, base);
2216           step = fold_convert (type, step);
2217         }
2218     }
2219
2220   for (i = 0; i < n_iv_cands (data); i++)
2221     {
2222       cand = iv_cand (data, i);
2223
2224       if (cand->pos != pos)
2225         continue;
2226
2227       if (cand->incremented_at != incremented_at
2228           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2229               && cand->ainc_use != use))
2230         continue;
2231
2232       if (!cand->iv)
2233         {
2234           if (!base && !step)
2235             break;
2236
2237           continue;
2238         }
2239
2240       if (!base && !step)
2241         continue;
2242
2243       if (operand_equal_p (base, cand->iv->base, 0)
2244           && operand_equal_p (step, cand->iv->step, 0)
2245           && (TYPE_PRECISION (TREE_TYPE (base))
2246               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2247         break;
2248     }
2249
2250   if (i == n_iv_cands (data))
2251     {
2252       cand = XCNEW (struct iv_cand);
2253       cand->id = i;
2254
2255       if (!base && !step)
2256         cand->iv = NULL;
2257       else
2258         cand->iv = alloc_iv (base, step);
2259
2260       cand->pos = pos;
2261       if (pos != IP_ORIGINAL && cand->iv)
2262         {
2263           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2264           cand->var_after = cand->var_before;
2265         }
2266       cand->important = important;
2267       cand->incremented_at = incremented_at;
2268       data->iv_candidates.safe_push (cand);
2269
2270       if (step
2271           && TREE_CODE (step) != INTEGER_CST)
2272         {
2273           fd_ivopts_data = data;
2274           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2275         }
2276
2277       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2278         cand->ainc_use = use;
2279       else
2280         cand->ainc_use = NULL;
2281
2282       if (dump_file && (dump_flags & TDF_DETAILS))
2283         dump_cand (dump_file, cand);
2284     }
2285
2286   if (important && !cand->important)
2287     {
2288       cand->important = true;
2289       if (dump_file && (dump_flags & TDF_DETAILS))
2290         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2291     }
2292
2293   if (use)
2294     {
2295       bitmap_set_bit (use->related_cands, i);
2296       if (dump_file && (dump_flags & TDF_DETAILS))
2297         fprintf (dump_file, "Candidate %d is related to use %d\n",
2298                  cand->id, use->id);
2299     }
2300
2301   return cand;
2302 }
2303
2304 /* Returns true if incrementing the induction variable at the end of the LOOP
2305    is allowed.
2306
2307    The purpose is to avoid splitting latch edge with a biv increment, thus
2308    creating a jump, possibly confusing other optimization passes and leaving
2309    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2310    is not available (so we do not have a better alternative), or if the latch
2311    edge is already nonempty.  */
2312
2313 static bool
2314 allow_ip_end_pos_p (struct loop *loop)
2315 {
2316   if (!ip_normal_pos (loop))
2317     return true;
2318
2319   if (!empty_block_p (ip_end_pos (loop)))
2320     return true;
2321
2322   return false;
2323 }
2324
2325 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2326    Important field is set to IMPORTANT.  */
2327
2328 static void
2329 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2330                         bool important, struct iv_use *use)
2331 {
2332   basic_block use_bb = gimple_bb (use->stmt);
2333   enum machine_mode mem_mode;
2334   unsigned HOST_WIDE_INT cstepi;
2335
2336   /* If we insert the increment in any position other than the standard
2337      ones, we must ensure that it is incremented once per iteration.
2338      It must not be in an inner nested loop, or one side of an if
2339      statement.  */
2340   if (use_bb->loop_father != data->current_loop
2341       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2342       || stmt_could_throw_p (use->stmt)
2343       || !cst_and_fits_in_hwi (step))
2344     return;
2345
2346   cstepi = int_cst_value (step);
2347
2348   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2349   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2350         || USE_STORE_PRE_INCREMENT (mem_mode))
2351        && GET_MODE_SIZE (mem_mode) == cstepi)
2352       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2353            || USE_STORE_PRE_DECREMENT (mem_mode))
2354           && GET_MODE_SIZE (mem_mode) == -cstepi))
2355     {
2356       enum tree_code code = MINUS_EXPR;
2357       tree new_base;
2358       tree new_step = step;
2359
2360       if (POINTER_TYPE_P (TREE_TYPE (base)))
2361         {
2362           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2363           code = POINTER_PLUS_EXPR;
2364         }
2365       else
2366         new_step = fold_convert (TREE_TYPE (base), new_step);
2367       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2368       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2369                        use->stmt);
2370     }
2371   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2372         || USE_STORE_POST_INCREMENT (mem_mode))
2373        && GET_MODE_SIZE (mem_mode) == cstepi)
2374       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2375            || USE_STORE_POST_DECREMENT (mem_mode))
2376           && GET_MODE_SIZE (mem_mode) == -cstepi))
2377     {
2378       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2379                        use->stmt);
2380     }
2381 }
2382
2383 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2384    position to POS.  If USE is not NULL, the candidate is set as related to
2385    it.  The candidate computation is scheduled on all available positions.  */
2386
2387 static void
2388 add_candidate (struct ivopts_data *data,
2389                tree base, tree step, bool important, struct iv_use *use)
2390 {
2391   if (ip_normal_pos (data->current_loop))
2392     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2393   if (ip_end_pos (data->current_loop)
2394       && allow_ip_end_pos_p (data->current_loop))
2395     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2396
2397   if (use != NULL && use->type == USE_ADDRESS)
2398     add_autoinc_candidates (data, base, step, important, use);
2399 }
2400
2401 /* Adds standard iv candidates.  */
2402
2403 static void
2404 add_standard_iv_candidates (struct ivopts_data *data)
2405 {
2406   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2407
2408   /* The same for a double-integer type if it is still fast enough.  */
2409   if (TYPE_PRECISION
2410         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2411       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2412     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2413                    build_int_cst (long_integer_type_node, 1), true, NULL);
2414
2415   /* The same for a double-integer type if it is still fast enough.  */
2416   if (TYPE_PRECISION
2417         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2418       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2419     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2420                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2421 }
2422
2423
2424 /* Adds candidates bases on the old induction variable IV.  */
2425
2426 static void
2427 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2428 {
2429   gimple phi;
2430   tree def;
2431   struct iv_cand *cand;
2432
2433   add_candidate (data, iv->base, iv->step, true, NULL);
2434
2435   /* The same, but with initial value zero.  */
2436   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2437     add_candidate (data, size_int (0), iv->step, true, NULL);
2438   else
2439     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2440                    iv->step, true, NULL);
2441
2442   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2443   if (gimple_code (phi) == GIMPLE_PHI)
2444     {
2445       /* Additionally record the possibility of leaving the original iv
2446          untouched.  */
2447       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2448       cand = add_candidate_1 (data,
2449                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2450                               SSA_NAME_DEF_STMT (def));
2451       cand->var_before = iv->ssa_name;
2452       cand->var_after = def;
2453     }
2454 }
2455
2456 /* Adds candidates based on the old induction variables.  */
2457
2458 static void
2459 add_old_ivs_candidates (struct ivopts_data *data)
2460 {
2461   unsigned i;
2462   struct iv *iv;
2463   bitmap_iterator bi;
2464
2465   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2466     {
2467       iv = ver_info (data, i)->iv;
2468       if (iv && iv->biv_p && !integer_zerop (iv->step))
2469         add_old_iv_candidates (data, iv);
2470     }
2471 }
2472
2473 /* Adds candidates based on the value of the induction variable IV and USE.  */
2474
2475 static void
2476 add_iv_value_candidates (struct ivopts_data *data,
2477                          struct iv *iv, struct iv_use *use)
2478 {
2479   unsigned HOST_WIDE_INT offset;
2480   tree base;
2481   tree basetype;
2482
2483   add_candidate (data, iv->base, iv->step, false, use);
2484
2485   /* The same, but with initial value zero.  Make such variable important,
2486      since it is generic enough so that possibly many uses may be based
2487      on it.  */
2488   basetype = TREE_TYPE (iv->base);
2489   if (POINTER_TYPE_P (basetype))
2490     basetype = sizetype;
2491   add_candidate (data, build_int_cst (basetype, 0),
2492                  iv->step, true, use);
2493
2494   /* Third, try removing the constant offset.  Make sure to even
2495      add a candidate for &a[0] vs. (T *)&a.  */
2496   base = strip_offset (iv->base, &offset);
2497   if (offset
2498       || base != iv->base)
2499     add_candidate (data, base, iv->step, false, use);
2500 }
2501
2502 /* Adds candidates based on the uses.  */
2503
2504 static void
2505 add_derived_ivs_candidates (struct ivopts_data *data)
2506 {
2507   unsigned i;
2508
2509   for (i = 0; i < n_iv_uses (data); i++)
2510     {
2511       struct iv_use *use = iv_use (data, i);
2512
2513       if (!use)
2514         continue;
2515
2516       switch (use->type)
2517         {
2518         case USE_NONLINEAR_EXPR:
2519         case USE_COMPARE:
2520         case USE_ADDRESS:
2521           /* Just add the ivs based on the value of the iv used here.  */
2522           add_iv_value_candidates (data, use->iv, use);
2523           break;
2524
2525         default:
2526           gcc_unreachable ();
2527         }
2528     }
2529 }
2530
2531 /* Record important candidates and add them to related_cands bitmaps
2532    if needed.  */
2533
2534 static void
2535 record_important_candidates (struct ivopts_data *data)
2536 {
2537   unsigned i;
2538   struct iv_use *use;
2539
2540   for (i = 0; i < n_iv_cands (data); i++)
2541     {
2542       struct iv_cand *cand = iv_cand (data, i);
2543
2544       if (cand->important)
2545         bitmap_set_bit (data->important_candidates, i);
2546     }
2547
2548   data->consider_all_candidates = (n_iv_cands (data)
2549                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2550
2551   if (data->consider_all_candidates)
2552     {
2553       /* We will not need "related_cands" bitmaps in this case,
2554          so release them to decrease peak memory consumption.  */
2555       for (i = 0; i < n_iv_uses (data); i++)
2556         {
2557           use = iv_use (data, i);
2558           BITMAP_FREE (use->related_cands);
2559         }
2560     }
2561   else
2562     {
2563       /* Add important candidates to the related_cands bitmaps.  */
2564       for (i = 0; i < n_iv_uses (data); i++)
2565         bitmap_ior_into (iv_use (data, i)->related_cands,
2566                          data->important_candidates);
2567     }
2568 }
2569
2570 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2571    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2572    we allocate a simple list to every use.  */
2573
2574 static void
2575 alloc_use_cost_map (struct ivopts_data *data)
2576 {
2577   unsigned i, size, s;
2578
2579   for (i = 0; i < n_iv_uses (data); i++)
2580     {
2581       struct iv_use *use = iv_use (data, i);
2582
2583       if (data->consider_all_candidates)
2584         size = n_iv_cands (data);
2585       else
2586         {
2587           s = bitmap_count_bits (use->related_cands);
2588
2589           /* Round up to the power of two, so that moduling by it is fast.  */
2590           size = s ? (1 << ceil_log2 (s)) : 1;
2591         }
2592
2593       use->n_map_members = size;
2594       use->cost_map = XCNEWVEC (struct cost_pair, size);
2595     }
2596 }
2597
2598 /* Returns description of computation cost of expression whose runtime
2599    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2600
2601 static comp_cost
2602 new_cost (unsigned runtime, unsigned complexity)
2603 {
2604   comp_cost cost;
2605
2606   cost.cost = runtime;
2607   cost.complexity = complexity;
2608
2609   return cost;
2610 }
2611
2612 /* Adds costs COST1 and COST2.  */
2613
2614 static comp_cost
2615 add_costs (comp_cost cost1, comp_cost cost2)
2616 {
2617   cost1.cost += cost2.cost;
2618   cost1.complexity += cost2.complexity;
2619
2620   return cost1;
2621 }
2622 /* Subtracts costs COST1 and COST2.  */
2623
2624 static comp_cost
2625 sub_costs (comp_cost cost1, comp_cost cost2)
2626 {
2627   cost1.cost -= cost2.cost;
2628   cost1.complexity -= cost2.complexity;
2629
2630   return cost1;
2631 }
2632
2633 /* Returns a negative number if COST1 < COST2, a positive number if
2634    COST1 > COST2, and 0 if COST1 = COST2.  */
2635
2636 static int
2637 compare_costs (comp_cost cost1, comp_cost cost2)
2638 {
2639   if (cost1.cost == cost2.cost)
2640     return cost1.complexity - cost2.complexity;
2641
2642   return cost1.cost - cost2.cost;
2643 }
2644
2645 /* Returns true if COST is infinite.  */
2646
2647 static bool
2648 infinite_cost_p (comp_cost cost)
2649 {
2650   return cost.cost == INFTY;
2651 }
2652
2653 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2654    on invariants DEPENDS_ON and that the value used in expressing it
2655    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2656
2657 static void
2658 set_use_iv_cost (struct ivopts_data *data,
2659                  struct iv_use *use, struct iv_cand *cand,
2660                  comp_cost cost, bitmap depends_on, tree value,
2661                  enum tree_code comp, int inv_expr_id)
2662 {
2663   unsigned i, s;
2664
2665   if (infinite_cost_p (cost))
2666     {
2667       BITMAP_FREE (depends_on);
2668       return;
2669     }
2670
2671   if (data->consider_all_candidates)
2672     {
2673       use->cost_map[cand->id].cand = cand;
2674       use->cost_map[cand->id].cost = cost;
2675       use->cost_map[cand->id].depends_on = depends_on;
2676       use->cost_map[cand->id].value = value;
2677       use->cost_map[cand->id].comp = comp;
2678       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2679       return;
2680     }
2681
2682   /* n_map_members is a power of two, so this computes modulo.  */
2683   s = cand->id & (use->n_map_members - 1);
2684   for (i = s; i < use->n_map_members; i++)
2685     if (!use->cost_map[i].cand)
2686       goto found;
2687   for (i = 0; i < s; i++)
2688     if (!use->cost_map[i].cand)
2689       goto found;
2690
2691   gcc_unreachable ();
2692
2693 found:
2694   use->cost_map[i].cand = cand;
2695   use->cost_map[i].cost = cost;
2696   use->cost_map[i].depends_on = depends_on;
2697   use->cost_map[i].value = value;
2698   use->cost_map[i].comp = comp;
2699   use->cost_map[i].inv_expr_id = inv_expr_id;
2700 }
2701
2702 /* Gets cost of (USE, CANDIDATE) pair.  */
2703
2704 static struct cost_pair *
2705 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2706                  struct iv_cand *cand)
2707 {
2708   unsigned i, s;
2709   struct cost_pair *ret;
2710
2711   if (!cand)
2712     return NULL;
2713
2714   if (data->consider_all_candidates)
2715     {
2716       ret = use->cost_map + cand->id;
2717       if (!ret->cand)
2718         return NULL;
2719
2720       return ret;
2721     }
2722
2723   /* n_map_members is a power of two, so this computes modulo.  */
2724   s = cand->id & (use->n_map_members - 1);
2725   for (i = s; i < use->n_map_members; i++)
2726     if (use->cost_map[i].cand == cand)
2727       return use->cost_map + i;
2728     else if (use->cost_map[i].cand == NULL)
2729       return NULL;
2730   for (i = 0; i < s; i++)
2731     if (use->cost_map[i].cand == cand)
2732       return use->cost_map + i;
2733     else if (use->cost_map[i].cand == NULL)
2734       return NULL;
2735
2736   return NULL;
2737 }
2738
2739 /* Returns estimate on cost of computing SEQ.  */
2740
2741 static unsigned
2742 seq_cost (rtx seq, bool speed)
2743 {
2744   unsigned cost = 0;
2745   rtx set;
2746
2747   for (; seq; seq = NEXT_INSN (seq))
2748     {
2749       set = single_set (seq);
2750       if (set)
2751         cost += set_src_cost (SET_SRC (set), speed);
2752       else
2753         cost++;
2754     }
2755
2756   return cost;
2757 }
2758
2759 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2760 static rtx
2761 produce_memory_decl_rtl (tree obj, int *regno)
2762 {
2763   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2764   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2765   rtx x;
2766
2767   gcc_assert (obj);
2768   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2769     {
2770       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2771       x = gen_rtx_SYMBOL_REF (address_mode, name);
2772       SET_SYMBOL_REF_DECL (x, obj);
2773       x = gen_rtx_MEM (DECL_MODE (obj), x);
2774       set_mem_addr_space (x, as);
2775       targetm.encode_section_info (obj, x, true);
2776     }
2777   else
2778     {
2779       x = gen_raw_REG (address_mode, (*regno)++);
2780       x = gen_rtx_MEM (DECL_MODE (obj), x);
2781       set_mem_addr_space (x, as);
2782     }
2783
2784   return x;
2785 }
2786
2787 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2788    walk_tree.  DATA contains the actual fake register number.  */
2789
2790 static tree
2791 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2792 {
2793   tree obj = NULL_TREE;
2794   rtx x = NULL_RTX;
2795   int *regno = (int *) data;
2796
2797   switch (TREE_CODE (*expr_p))
2798     {
2799     case ADDR_EXPR:
2800       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2801            handled_component_p (*expr_p);
2802            expr_p = &TREE_OPERAND (*expr_p, 0))
2803         continue;
2804       obj = *expr_p;
2805       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2806         x = produce_memory_decl_rtl (obj, regno);
2807       break;
2808
2809     case SSA_NAME:
2810       *ws = 0;
2811       obj = SSA_NAME_VAR (*expr_p);
2812       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2813       if (!obj)
2814         return NULL_TREE;
2815       if (!DECL_RTL_SET_P (obj))
2816         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2817       break;
2818
2819     case VAR_DECL:
2820     case PARM_DECL:
2821     case RESULT_DECL:
2822       *ws = 0;
2823       obj = *expr_p;
2824
2825       if (DECL_RTL_SET_P (obj))
2826         break;
2827
2828       if (DECL_MODE (obj) == BLKmode)
2829         x = produce_memory_decl_rtl (obj, regno);
2830       else
2831         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2832
2833       break;
2834
2835     default:
2836       break;
2837     }
2838
2839   if (x)
2840     {
2841       decl_rtl_to_reset.safe_push (obj);
2842       SET_DECL_RTL (obj, x);
2843     }
2844
2845   return NULL_TREE;
2846 }
2847
2848 /* Determines cost of the computation of EXPR.  */
2849
2850 static unsigned
2851 computation_cost (tree expr, bool speed)
2852 {
2853   rtx seq, rslt;
2854   tree type = TREE_TYPE (expr);
2855   unsigned cost;
2856   /* Avoid using hard regs in ways which may be unsupported.  */
2857   int regno = LAST_VIRTUAL_REGISTER + 1;
2858   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2859   enum node_frequency real_frequency = node->frequency;
2860
2861   node->frequency = NODE_FREQUENCY_NORMAL;
2862   crtl->maybe_hot_insn_p = speed;
2863   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2864   start_sequence ();
2865   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2866   seq = get_insns ();
2867   end_sequence ();
2868   default_rtl_profile ();
2869   node->frequency = real_frequency;
2870
2871   cost = seq_cost (seq, speed);
2872   if (MEM_P (rslt))
2873     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2874                           TYPE_ADDR_SPACE (type), speed);
2875   else if (!REG_P (rslt))
2876     cost += set_src_cost (rslt, speed);
2877
2878   return cost;
2879 }
2880
2881 /* Returns variable containing the value of candidate CAND at statement AT.  */
2882
2883 static tree
2884 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2885 {
2886   if (stmt_after_increment (loop, cand, stmt))
2887     return cand->var_after;
2888   else
2889     return cand->var_before;
2890 }
2891
2892 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2893    same precision that is at least as wide as the precision of TYPE, stores
2894    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2895    type of A and B.  */
2896
2897 static tree
2898 determine_common_wider_type (tree *a, tree *b)
2899 {
2900   tree wider_type = NULL;
2901   tree suba, subb;
2902   tree atype = TREE_TYPE (*a);
2903
2904   if (CONVERT_EXPR_P (*a))
2905     {
2906       suba = TREE_OPERAND (*a, 0);
2907       wider_type = TREE_TYPE (suba);
2908       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2909         return atype;
2910     }
2911   else
2912     return atype;
2913
2914   if (CONVERT_EXPR_P (*b))
2915     {
2916       subb = TREE_OPERAND (*b, 0);
2917       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2918         return atype;
2919     }
2920   else
2921     return atype;
2922
2923   *a = suba;
2924   *b = subb;
2925   return wider_type;
2926 }
2927
2928 /* Determines the expression by that USE is expressed from induction variable
2929    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2930    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2931
2932 static bool
2933 get_computation_aff (struct loop *loop,
2934                      struct iv_use *use, struct iv_cand *cand, gimple at,
2935                      struct affine_tree_combination *aff)
2936 {
2937   tree ubase = use->iv->base;
2938   tree ustep = use->iv->step;
2939   tree cbase = cand->iv->base;
2940   tree cstep = cand->iv->step, cstep_common;
2941   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2942   tree common_type, var;
2943   tree uutype;
2944   aff_tree cbase_aff, var_aff;
2945   double_int rat;
2946
2947   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2948     {
2949       /* We do not have a precision to express the values of use.  */
2950       return false;
2951     }
2952
2953   var = var_at_stmt (loop, cand, at);
2954   uutype = unsigned_type_for (utype);
2955
2956   /* If the conversion is not noop, perform it.  */
2957   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2958     {
2959       cstep = fold_convert (uutype, cstep);
2960       cbase = fold_convert (uutype, cbase);
2961       var = fold_convert (uutype, var);
2962     }
2963
2964   if (!constant_multiple_of (ustep, cstep, &rat))
2965     return false;
2966
2967   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2968      type, we achieve better folding by computing their difference in this
2969      wider type, and cast the result to UUTYPE.  We do not need to worry about
2970      overflows, as all the arithmetics will in the end be performed in UUTYPE
2971      anyway.  */
2972   common_type = determine_common_wider_type (&ubase, &cbase);
2973
2974   /* use = ubase - ratio * cbase + ratio * var.  */
2975   tree_to_aff_combination (ubase, common_type, aff);
2976   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2977   tree_to_aff_combination (var, uutype, &var_aff);
2978
2979   /* We need to shift the value if we are after the increment.  */
2980   if (stmt_after_increment (loop, cand, at))
2981     {
2982       aff_tree cstep_aff;
2983
2984       if (common_type != uutype)
2985         cstep_common = fold_convert (common_type, cstep);
2986       else
2987         cstep_common = cstep;
2988
2989       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2990       aff_combination_add (&cbase_aff, &cstep_aff);
2991     }
2992
2993   aff_combination_scale (&cbase_aff, -rat);
2994   aff_combination_add (aff, &cbase_aff);
2995   if (common_type != uutype)
2996     aff_combination_convert (aff, uutype);
2997
2998   aff_combination_scale (&var_aff, rat);
2999   aff_combination_add (aff, &var_aff);
3000
3001   return true;
3002 }
3003
3004 /* Return the type of USE.  */
3005
3006 static tree
3007 get_use_type (struct iv_use *use)
3008 {
3009   tree base_type = TREE_TYPE (use->iv->base);
3010   tree type;
3011
3012   if (use->type == USE_ADDRESS)
3013     {
3014       /* The base_type may be a void pointer.  Create a pointer type based on
3015          the mem_ref instead.  */
3016       type = build_pointer_type (TREE_TYPE (*use->op_p));
3017       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3018                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3019     }
3020   else
3021     type = base_type;
3022
3023   return type;
3024 }
3025
3026 /* Determines the expression by that USE is expressed from induction variable
3027    CAND at statement AT in LOOP.  The computation is unshared.  */
3028
3029 static tree
3030 get_computation_at (struct loop *loop,
3031                     struct iv_use *use, struct iv_cand *cand, gimple at)
3032 {
3033   aff_tree aff;
3034   tree type = get_use_type (use);
3035
3036   if (!get_computation_aff (loop, use, cand, at, &aff))
3037     return NULL_TREE;
3038   unshare_aff_combination (&aff);
3039   return fold_convert (type, aff_combination_to_tree (&aff));
3040 }
3041
3042 /* Determines the expression by that USE is expressed from induction variable
3043    CAND in LOOP.  The computation is unshared.  */
3044
3045 static tree
3046 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3047 {
3048   return get_computation_at (loop, use, cand, use->stmt);
3049 }
3050
3051 /* Adjust the cost COST for being in loop setup rather than loop body.
3052    If we're optimizing for space, the loop setup overhead is constant;
3053    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3054 static unsigned
3055 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3056 {
3057   if (cost == INFTY)
3058     return cost;
3059   else if (optimize_loop_for_speed_p (data->current_loop))
3060     return cost / avg_loop_niter (data->current_loop);
3061   else
3062     return cost;
3063 }
3064
3065 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3066    validity for a memory reference accessing memory of mode MODE in
3067    address space AS.  */
3068
3069
3070 bool
3071 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3072                                  addr_space_t as)
3073 {
3074 #define MAX_RATIO 128
3075   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3076   static vec<sbitmap> valid_mult_list;
3077   sbitmap valid_mult;
3078
3079   if (data_index >= valid_mult_list.length ())
3080     valid_mult_list.safe_grow_cleared (data_index + 1);
3081
3082   valid_mult = valid_mult_list[data_index];
3083   if (!valid_mult)
3084     {
3085       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3086       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3087       rtx addr;
3088       HOST_WIDE_INT i;
3089
3090       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3091       bitmap_clear (valid_mult);
3092       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3093       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3094         {
3095           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3096           if (memory_address_addr_space_p (mode, addr, as))
3097             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3098         }
3099
3100       if (dump_file && (dump_flags & TDF_DETAILS))
3101         {
3102           fprintf (dump_file, "  allowed multipliers:");
3103           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3104             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3105               fprintf (dump_file, " %d", (int) i);
3106           fprintf (dump_file, "\n");
3107           fprintf (dump_file, "\n");
3108         }
3109
3110       valid_mult_list[data_index] = valid_mult;
3111     }
3112
3113   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3114     return false;
3115
3116   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3117 }
3118
3119 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3120    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3121    variable is omitted.  Compute the cost for a memory reference that accesses
3122    a memory location of mode MEM_MODE in address space AS.
3123
3124    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3125    size of MEM_MODE / RATIO) is available.  To make this determination, we
3126    look at the size of the increment to be made, which is given in CSTEP.
3127    CSTEP may be zero if the step is unknown.
3128    STMT_AFTER_INC is true iff the statement we're looking at is after the
3129    increment of the original biv.
3130
3131    TODO -- there must be some better way.  This all is quite crude.  */
3132
3133 typedef struct address_cost_data_s
3134 {
3135   HOST_WIDE_INT min_offset, max_offset;
3136   unsigned costs[2][2][2][2];
3137 } *address_cost_data;
3138
3139
3140 static comp_cost
3141 get_address_cost (bool symbol_present, bool var_present,
3142                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3143                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3144                   addr_space_t as, bool speed,
3145                   bool stmt_after_inc, bool *may_autoinc)
3146 {
3147   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3148   static vec<address_cost_data> address_cost_data_list;
3149   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3150   address_cost_data data;
3151   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3152   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3153   unsigned cost, acost, complexity;
3154   bool offset_p, ratio_p, autoinc;
3155   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3156   unsigned HOST_WIDE_INT mask;
3157   unsigned bits;
3158
3159   if (data_index >= address_cost_data_list.length ())
3160     address_cost_data_list.safe_grow_cleared (data_index + 1);
3161
3162   data = address_cost_data_list[data_index];
3163   if (!data)
3164     {
3165       HOST_WIDE_INT i;
3166       HOST_WIDE_INT rat, off = 0;
3167       int old_cse_not_expected, width;
3168       unsigned sym_p, var_p, off_p, rat_p, add_c;
3169       rtx seq, addr, base;
3170       rtx reg0, reg1;
3171
3172       data = (address_cost_data) xcalloc (1, sizeof (*data));
3173
3174       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3175
3176       width = GET_MODE_BITSIZE (address_mode) - 1;
3177       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3178         width = HOST_BITS_PER_WIDE_INT - 1;
3179       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3180
3181       for (i = width; i >= 0; i--)
3182         {
3183           off = -((unsigned HOST_WIDE_INT) 1 << i);
3184           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3185           if (memory_address_addr_space_p (mem_mode, addr, as))
3186             break;
3187         }
3188       data->min_offset = (i == -1? 0 : off);
3189
3190       for (i = width; i >= 0; i--)
3191         {
3192           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3193           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3194           if (memory_address_addr_space_p (mem_mode, addr, as))
3195             break;
3196         }
3197       if (i == -1)
3198         off = 0;
3199       data->max_offset = off;
3200
3201       if (dump_file && (dump_flags & TDF_DETAILS))
3202         {
3203           fprintf (dump_file, "get_address_cost:\n");
3204           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3205                    GET_MODE_NAME (mem_mode),
3206                    data->min_offset);
3207           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3208                    GET_MODE_NAME (mem_mode),
3209                    data->max_offset);
3210         }
3211
3212       rat = 1;
3213       for (i = 2; i <= MAX_RATIO; i++)
3214         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3215           {
3216             rat = i;
3217             break;
3218           }
3219
3220       /* Compute the cost of various addressing modes.  */
3221       acost = 0;
3222       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3223       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3224
3225       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3226           || USE_STORE_PRE_DECREMENT (mem_mode))
3227         {
3228           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3229           has_predec[mem_mode]
3230             = memory_address_addr_space_p (mem_mode, addr, as);
3231         }
3232       if (USE_LOAD_POST_DECREMENT (mem_mode)
3233           || USE_STORE_POST_DECREMENT (mem_mode))
3234         {
3235           addr = gen_rtx_POST_DEC (address_mode, reg0);
3236           has_postdec[mem_mode]
3237             = memory_address_addr_space_p (mem_mode, addr, as);
3238         }
3239       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3240           || USE_STORE_PRE_DECREMENT (mem_mode))
3241         {
3242           addr = gen_rtx_PRE_INC (address_mode, reg0);
3243           has_preinc[mem_mode]
3244             = memory_address_addr_space_p (mem_mode, addr, as);
3245         }
3246       if (USE_LOAD_POST_INCREMENT (mem_mode)
3247           || USE_STORE_POST_INCREMENT (mem_mode))
3248         {
3249           addr = gen_rtx_POST_INC (address_mode, reg0);
3250           has_postinc[mem_mode]
3251             = memory_address_addr_space_p (mem_mode, addr, as);
3252         }
3253       for (i = 0; i < 16; i++)
3254         {
3255           sym_p = i & 1;
3256           var_p = (i >> 1) & 1;
3257           off_p = (i >> 2) & 1;
3258           rat_p = (i >> 3) & 1;
3259
3260           addr = reg0;
3261           if (rat_p)
3262             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3263                                    gen_int_mode (rat, address_mode));
3264
3265           if (var_p)
3266             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3267
3268           if (sym_p)
3269             {
3270               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3271               /* ??? We can run into trouble with some backends by presenting
3272                  it with symbols which haven't been properly passed through
3273                  targetm.encode_section_info.  By setting the local bit, we
3274                  enhance the probability of things working.  */
3275               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3276
3277               if (off_p)
3278                 base = gen_rtx_fmt_e (CONST, address_mode,
3279                                       gen_rtx_fmt_ee
3280                                         (PLUS, address_mode, base,
3281                                          gen_int_mode (off, address_mode)));
3282             }
3283           else if (off_p)
3284             base = gen_int_mode (off, address_mode);
3285           else
3286             base = NULL_RTX;
3287
3288           if (base)
3289             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3290
3291           start_sequence ();
3292           /* To avoid splitting addressing modes, pretend that no cse will
3293              follow.  */
3294           old_cse_not_expected = cse_not_expected;
3295           cse_not_expected = true;
3296           addr = memory_address_addr_space (mem_mode, addr, as);
3297           cse_not_expected = old_cse_not_expected;
3298           seq = get_insns ();
3299           end_sequence ();
3300
3301           acost = seq_cost (seq, speed);
3302           acost += address_cost (addr, mem_mode, as, speed);
3303
3304           if (!acost)
3305             acost = 1;
3306           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3307         }
3308
3309       /* On some targets, it is quite expensive to load symbol to a register,
3310          which makes addresses that contain symbols look much more expensive.
3311          However, the symbol will have to be loaded in any case before the
3312          loop (and quite likely we have it in register already), so it does not
3313          make much sense to penalize them too heavily.  So make some final
3314          tweaks for the SYMBOL_PRESENT modes:
3315
3316          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3317          var is cheaper, use this mode with small penalty.
3318          If VAR_PRESENT is true, try whether the mode with
3319          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3320          if this is the case, use it.  */
3321       add_c = add_cost (speed, address_mode);
3322       for (i = 0; i < 8; i++)
3323         {
3324           var_p = i & 1;
3325           off_p = (i >> 1) & 1;
3326           rat_p = (i >> 2) & 1;
3327
3328           acost = data->costs[0][1][off_p][rat_p] + 1;
3329           if (var_p)
3330             acost += add_c;
3331
3332           if (acost < data->costs[1][var_p][off_p][rat_p])
3333             data->costs[1][var_p][off_p][rat_p] = acost;
3334         }
3335
3336       if (dump_file && (dump_flags & TDF_DETAILS))
3337         {
3338           fprintf (dump_file, "Address costs:\n");
3339
3340           for (i = 0; i < 16; i++)
3341             {
3342               sym_p = i & 1;
3343               var_p = (i >> 1) & 1;
3344               off_p = (i >> 2) & 1;
3345               rat_p = (i >> 3) & 1;
3346
3347               fprintf (dump_file, "  ");
3348               if (sym_p)
3349                 fprintf (dump_file, "sym + ");
3350               if (var_p)
3351                 fprintf (dump_file, "var + ");
3352               if (off_p)
3353                 fprintf (dump_file, "cst + ");
3354               if (rat_p)
3355                 fprintf (dump_file, "rat * ");
3356
3357               acost = data->costs[sym_p][var_p][off_p][rat_p];
3358               fprintf (dump_file, "index costs %d\n", acost);
3359             }
3360           if (has_predec[mem_mode] || has_postdec[mem_mode]
3361               || has_preinc[mem_mode] || has_postinc[mem_mode])
3362             fprintf (dump_file, "  May include autoinc/dec\n");
3363           fprintf (dump_file, "\n");
3364         }
3365
3366       address_cost_data_list[data_index] = data;
3367     }
3368
3369   bits = GET_MODE_BITSIZE (address_mode);
3370   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3371   offset &= mask;
3372   if ((offset >> (bits - 1) & 1))
3373     offset |= ~mask;
3374   s_offset = offset;
3375
3376   autoinc = false;
3377   msize = GET_MODE_SIZE (mem_mode);
3378   autoinc_offset = offset;
3379   if (stmt_after_inc)
3380     autoinc_offset += ratio * cstep;
3381   if (symbol_present || var_present || ratio != 1)
3382     autoinc = false;
3383   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3384                && msize == cstep)
3385            || (has_postdec[mem_mode] && autoinc_offset == 0
3386                && msize == -cstep)
3387            || (has_preinc[mem_mode] && autoinc_offset == msize
3388                && msize == cstep)
3389            || (has_predec[mem_mode] && autoinc_offset == -msize
3390                && msize == -cstep))
3391     autoinc = true;
3392
3393   cost = 0;
3394   offset_p = (s_offset != 0
3395               && data->min_offset <= s_offset
3396               && s_offset <= data->max_offset);
3397   ratio_p = (ratio != 1
3398              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3399
3400   if (ratio != 1 && !ratio_p)
3401     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3402
3403   if (s_offset && !offset_p && !symbol_present)
3404     cost += add_cost (speed, address_mode);
3405
3406   if (may_autoinc)
3407     *may_autoinc = autoinc;
3408   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3409   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3410   return new_cost (cost + acost, complexity);
3411 }
3412
3413  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3414     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3415     calculating the operands of EXPR.  Returns true if successful, and returns
3416     the cost in COST.  */
3417
3418 static bool
3419 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3420                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3421 {
3422   comp_cost res;
3423   tree op1 = TREE_OPERAND (expr, 1);
3424   tree cst = TREE_OPERAND (mult, 1);
3425   tree multop = TREE_OPERAND (mult, 0);
3426   int m = exact_log2 (int_cst_value (cst));
3427   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3428   int sa_cost;
3429
3430   if (!(m >= 0 && m < maxm))
3431     return false;
3432
3433   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3434              ? shiftadd_cost (speed, mode, m)
3435              : (mult == op1
3436                 ? shiftsub1_cost (speed, mode, m)
3437                 : shiftsub0_cost (speed, mode, m)));
3438   res = new_cost (sa_cost, 0);
3439   res = add_costs (res, mult == op1 ? cost0 : cost1);
3440
3441   STRIP_NOPS (multop);
3442   if (!is_gimple_val (multop))
3443     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3444
3445   *cost = res;
3446   return true;
3447 }
3448
3449 /* Estimates cost of forcing expression EXPR into a variable.  */
3450
3451 static comp_cost
3452 force_expr_to_var_cost (tree expr, bool speed)
3453 {
3454   static bool costs_initialized = false;
3455   static unsigned integer_cost [2];
3456   static unsigned symbol_cost [2];
3457   static unsigned address_cost [2];
3458   tree op0, op1;
3459   comp_cost cost0, cost1, cost;
3460   enum machine_mode mode;
3461
3462   if (!costs_initialized)
3463     {
3464       tree type = build_pointer_type (integer_type_node);
3465       tree var, addr;
3466       rtx x;
3467       int i;
3468
3469       var = create_tmp_var_raw (integer_type_node, "test_var");
3470       TREE_STATIC (var) = 1;
3471       x = produce_memory_decl_rtl (var, NULL);
3472       SET_DECL_RTL (var, x);
3473
3474       addr = build1 (ADDR_EXPR, type, var);
3475
3476
3477       for (i = 0; i < 2; i++)
3478         {
3479           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3480                                                              2000), i);
3481
3482           symbol_cost[i] = computation_cost (addr, i) + 1;
3483
3484           address_cost[i]
3485             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3486           if (dump_file && (dump_flags & TDF_DETAILS))
3487             {
3488               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3489               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3490               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3491               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3492               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3493               fprintf (dump_file, "\n");
3494             }
3495         }
3496
3497       costs_initialized = true;
3498     }
3499
3500   STRIP_NOPS (expr);
3501
3502   if (SSA_VAR_P (expr))
3503     return no_cost;
3504
3505   if (is_gimple_min_invariant (expr))
3506     {
3507       if (TREE_CODE (expr) == INTEGER_CST)
3508         return new_cost (integer_cost [speed], 0);
3509
3510       if (TREE_CODE (expr) == ADDR_EXPR)
3511         {
3512           tree obj = TREE_OPERAND (expr, 0);
3513
3514           if (TREE_CODE (obj) == VAR_DECL
3515               || TREE_CODE (obj) == PARM_DECL
3516               || TREE_CODE (obj) == RESULT_DECL)
3517             return new_cost (symbol_cost [speed], 0);
3518         }
3519
3520       return new_cost (address_cost [speed], 0);
3521     }
3522
3523   switch (TREE_CODE (expr))
3524     {
3525     case POINTER_PLUS_EXPR:
3526     case PLUS_EXPR:
3527     case MINUS_EXPR:
3528     case MULT_EXPR:
3529       op0 = TREE_OPERAND (expr, 0);
3530       op1 = TREE_OPERAND (expr, 1);
3531       STRIP_NOPS (op0);
3532       STRIP_NOPS (op1);
3533
3534       if (is_gimple_val (op0))
3535         cost0 = no_cost;
3536       else
3537         cost0 = force_expr_to_var_cost (op0, speed);
3538
3539       if (is_gimple_val (op1))
3540         cost1 = no_cost;
3541       else
3542         cost1 = force_expr_to_var_cost (op1, speed);
3543
3544       break;
3545
3546     case NEGATE_EXPR:
3547       op0 = TREE_OPERAND (expr, 0);
3548       STRIP_NOPS (op0);
3549       op1 = NULL_TREE;
3550
3551       if (is_gimple_val (op0))
3552         cost0 = no_cost;
3553       else
3554         cost0 = force_expr_to_var_cost (op0, speed);
3555
3556       cost1 = no_cost;
3557       break;
3558
3559     default:
3560       /* Just an arbitrary value, FIXME.  */
3561       return new_cost (target_spill_cost[speed], 0);
3562     }
3563
3564   mode = TYPE_MODE (TREE_TYPE (expr));
3565   switch (TREE_CODE (expr))
3566     {
3567     case POINTER_PLUS_EXPR:
3568     case PLUS_EXPR:
3569     case MINUS_EXPR:
3570     case NEGATE_EXPR:
3571       cost = new_cost (add_cost (speed, mode), 0);
3572       if (TREE_CODE (expr) != NEGATE_EXPR)
3573         {
3574           tree mult = NULL_TREE;
3575           comp_cost sa_cost;
3576           if (TREE_CODE (op1) == MULT_EXPR)
3577             mult = op1;
3578           else if (TREE_CODE (op0) == MULT_EXPR)
3579             mult = op0;
3580
3581           if (mult != NULL_TREE
3582               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3583               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3584                                     speed, &sa_cost))
3585             return sa_cost;
3586         }
3587       break;
3588
3589     case MULT_EXPR:
3590       if (cst_and_fits_in_hwi (op0))
3591         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3592                                              mode, speed), 0);
3593       else if (cst_and_fits_in_hwi (op1))
3594         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3595                                              mode, speed), 0);
3596       else
3597         return new_cost (target_spill_cost [speed], 0);
3598       break;
3599
3600     default:
3601       gcc_unreachable ();
3602     }
3603
3604   cost = add_costs (cost, cost0);
3605   cost = add_costs (cost, cost1);
3606
3607   /* Bound the cost by target_spill_cost.  The parts of complicated
3608      computations often are either loop invariant or at least can
3609      be shared between several iv uses, so letting this grow without
3610      limits would not give reasonable results.  */
3611   if (cost.cost > (int) target_spill_cost [speed])
3612     cost.cost = target_spill_cost [speed];
3613
3614   return cost;
3615 }
3616
3617 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3618    invariants the computation depends on.  */
3619
3620 static comp_cost
3621 force_var_cost (struct ivopts_data *data,
3622                 tree expr, bitmap *depends_on)
3623 {
3624   if (depends_on)
3625     {
3626       fd_ivopts_data = data;
3627       walk_tree (&expr, find_depends, depends_on, NULL);
3628     }
3629
3630   return force_expr_to_var_cost (expr, data->speed);
3631 }
3632
3633 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3634    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3635    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3636    invariants the computation depends on.  */
3637
3638 static comp_cost
3639 split_address_cost (struct ivopts_data *data,
3640                     tree addr, bool *symbol_present, bool *var_present,
3641                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3642 {
3643   tree core;
3644   HOST_WIDE_INT bitsize;
3645   HOST_WIDE_INT bitpos;
3646   tree toffset;
3647   enum machine_mode mode;
3648   int unsignedp, volatilep;
3649
3650   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3651                               &unsignedp, &volatilep, false);
3652
3653   if (toffset != 0
3654       || bitpos % BITS_PER_UNIT != 0
3655       || TREE_CODE (core) != VAR_DECL)
3656     {
3657       *symbol_present = false;
3658       *var_present = true;
3659       fd_ivopts_data = data;
3660       walk_tree (&addr, find_depends, depends_on, NULL);
3661       return new_cost (target_spill_cost[data->speed], 0);
3662     }
3663
3664   *offset += bitpos / BITS_PER_UNIT;
3665   if (TREE_STATIC (core)
3666       || DECL_EXTERNAL (core))
3667     {
3668       *symbol_present = true;
3669       *var_present = false;
3670       return no_cost;
3671     }
3672
3673   *symbol_present = false;
3674   *var_present = true;
3675   return no_cost;
3676 }
3677
3678 /* Estimates cost of expressing difference of addresses E1 - E2 as
3679    var + symbol + offset.  The value of offset is added to OFFSET,
3680    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3681    part is missing.  DEPENDS_ON is a set of the invariants the computation
3682    depends on.  */
3683
3684 static comp_cost
3685 ptr_difference_cost (struct ivopts_data *data,
3686                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3687                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3688 {
3689   HOST_WIDE_INT diff = 0;
3690   aff_tree aff_e1, aff_e2;
3691   tree type;
3692
3693   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3694
3695   if (ptr_difference_const (e1, e2, &diff))
3696     {
3697       *offset += diff;
3698       *symbol_present = false;
3699       *var_present = false;
3700       return no_cost;
3701     }
3702
3703   if (integer_zerop (e2))
3704     return split_address_cost (data, TREE_OPERAND (e1, 0),
3705                                symbol_present, var_present, offset, depends_on);
3706
3707   *symbol_present = false;
3708   *var_present = true;
3709
3710   type = signed_type_for (TREE_TYPE (e1));
3711   tree_to_aff_combination (e1, type, &aff_e1);
3712   tree_to_aff_combination (e2, type, &aff_e2);
3713   aff_combination_scale (&aff_e2, double_int_minus_one);
3714   aff_combination_add (&aff_e1, &aff_e2);
3715
3716   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3717 }
3718
3719 /* Estimates cost of expressing difference E1 - E2 as
3720    var + symbol + offset.  The value of offset is added to OFFSET,
3721    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3722    part is missing.  DEPENDS_ON is a set of the invariants the computation
3723    depends on.  */
3724
3725 static comp_cost
3726 difference_cost (struct ivopts_data *data,
3727                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3728                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3729 {
3730   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3731   unsigned HOST_WIDE_INT off1, off2;
3732   aff_tree aff_e1, aff_e2;
3733   tree type;
3734
3735   e1 = strip_offset (e1, &off1);
3736   e2 = strip_offset (e2, &off2);
3737   *offset += off1 - off2;
3738
3739   STRIP_NOPS (e1);
3740   STRIP_NOPS (e2);
3741
3742   if (TREE_CODE (e1) == ADDR_EXPR)
3743     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3744                                 offset, depends_on);
3745   *symbol_present = false;
3746
3747   if (operand_equal_p (e1, e2, 0))
3748     {
3749       *var_present = false;
3750       return no_cost;
3751     }
3752
3753   *var_present = true;
3754
3755   if (integer_zerop (e2))
3756     return force_var_cost (data, e1, depends_on);
3757
3758   if (integer_zerop (e1))
3759     {
3760       comp_cost cost = force_var_cost (data, e2, depends_on);
3761       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3762       return cost;
3763     }
3764
3765   type = signed_type_for (TREE_TYPE (e1));
3766   tree_to_aff_combination (e1, type, &aff_e1);
3767   tree_to_aff_combination (e2, type, &aff_e2);
3768   aff_combination_scale (&aff_e2, double_int_minus_one);
3769   aff_combination_add (&aff_e1, &aff_e2);
3770
3771   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3772 }
3773
3774 /* Returns true if AFF1 and AFF2 are identical.  */
3775
3776 static bool
3777 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3778 {
3779   unsigned i;
3780
3781   if (aff1->n != aff2->n)
3782     return false;
3783
3784   for (i = 0; i < aff1->n; i++)
3785     {
3786       if (aff1->elts[i].coef != aff2->elts[i].coef)
3787         return false;
3788
3789       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3790         return false;
3791     }
3792   return true;
3793 }
3794
3795 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3796
3797 static int
3798 get_expr_id (struct ivopts_data *data, tree expr)
3799 {
3800   struct iv_inv_expr_ent ent;
3801   struct iv_inv_expr_ent **slot;
3802
3803   ent.expr = expr;
3804   ent.hash = iterative_hash_expr (expr, 0);
3805   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3806                                                      &ent, INSERT);
3807   if (*slot)
3808     return (*slot)->id;
3809
3810   *slot = XNEW (struct iv_inv_expr_ent);
3811   (*slot)->expr = expr;
3812   (*slot)->hash = ent.hash;
3813   (*slot)->id = data->inv_expr_id++;
3814   return (*slot)->id;
3815 }
3816
3817 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3818    requires a new compiler generated temporary.  Returns -1 otherwise.
3819    ADDRESS_P is a flag indicating if the expression is for address
3820    computation.  */
3821
3822 static int
3823 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3824                             tree cbase, HOST_WIDE_INT ratio,
3825                             bool address_p)
3826 {
3827   aff_tree ubase_aff, cbase_aff;
3828   tree expr, ub, cb;
3829
3830   STRIP_NOPS (ubase);
3831   STRIP_NOPS (cbase);
3832   ub = ubase;
3833   cb = cbase;
3834
3835   if ((TREE_CODE (ubase) == INTEGER_CST)
3836       && (TREE_CODE (cbase) == INTEGER_CST))
3837     return -1;
3838
3839   /* Strips the constant part. */
3840   if (TREE_CODE (ubase) == PLUS_EXPR
3841       || TREE_CODE (ubase) == MINUS_EXPR
3842       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3843     {
3844       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3845         ubase = TREE_OPERAND (ubase, 0);
3846     }
3847
3848   /* Strips the constant part. */
3849   if (TREE_CODE (cbase) == PLUS_EXPR
3850       || TREE_CODE (cbase) == MINUS_EXPR
3851       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3852     {
3853       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3854         cbase = TREE_OPERAND (cbase, 0);
3855     }
3856
3857   if (address_p)
3858     {
3859       if (((TREE_CODE (ubase) == SSA_NAME)
3860            || (TREE_CODE (ubase) == ADDR_EXPR
3861                && is_gimple_min_invariant (ubase)))
3862           && (TREE_CODE (cbase) == INTEGER_CST))
3863         return -1;
3864
3865       if (((TREE_CODE (cbase) == SSA_NAME)
3866            || (TREE_CODE (cbase) == ADDR_EXPR
3867                && is_gimple_min_invariant (cbase)))
3868           && (TREE_CODE (ubase) == INTEGER_CST))
3869         return -1;
3870     }
3871
3872   if (ratio == 1)
3873     {
3874       if(operand_equal_p (ubase, cbase, 0))
3875         return -1;
3876
3877       if (TREE_CODE (ubase) == ADDR_EXPR
3878           && TREE_CODE (cbase) == ADDR_EXPR)
3879         {
3880           tree usym, csym;
3881
3882           usym = TREE_OPERAND (ubase, 0);
3883           csym = TREE_OPERAND (cbase, 0);
3884           if (TREE_CODE (usym) == ARRAY_REF)
3885             {
3886               tree ind = TREE_OPERAND (usym, 1);
3887               if (TREE_CODE (ind) == INTEGER_CST
3888                   && host_integerp (ind, 0)
3889                   && TREE_INT_CST_LOW (ind) == 0)
3890                 usym = TREE_OPERAND (usym, 0);
3891             }
3892           if (TREE_CODE (csym) == ARRAY_REF)
3893             {
3894               tree ind = TREE_OPERAND (csym, 1);
3895               if (TREE_CODE (ind) == INTEGER_CST
3896                   && host_integerp (ind, 0)
3897                   && TREE_INT_CST_LOW (ind) == 0)
3898                 csym = TREE_OPERAND (csym, 0);
3899             }
3900           if (operand_equal_p (usym, csym, 0))
3901             return -1;
3902         }
3903       /* Now do more complex comparison  */
3904       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3905       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3906       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3907         return -1;
3908     }
3909
3910   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3911   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3912
3913   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3914   aff_combination_add (&ubase_aff, &cbase_aff);
3915   expr = aff_combination_to_tree (&ubase_aff);
3916   return get_expr_id (data, expr);
3917 }
3918
3919
3920
3921 /* Determines the cost of the computation by that USE is expressed
3922    from induction variable CAND.  If ADDRESS_P is true, we just need
3923    to create an address from it, otherwise we want to get it into
3924    register.  A set of invariants we depend on is stored in
3925    DEPENDS_ON.  AT is the statement at that the value is computed.
3926    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3927    addressing is likely.  */
3928
3929 static comp_cost
3930 get_computation_cost_at (struct ivopts_data *data,
3931                          struct iv_use *use, struct iv_cand *cand,
3932                          bool address_p, bitmap *depends_on, gimple at,
3933                          bool *can_autoinc,
3934                          int *inv_expr_id)
3935 {
3936   tree ubase = use->iv->base, ustep = use->iv->step;
3937   tree cbase, cstep;
3938   tree utype = TREE_TYPE (ubase), ctype;
3939   unsigned HOST_WIDE_INT cstepi, offset = 0;
3940   HOST_WIDE_INT ratio, aratio;
3941   bool var_present, symbol_present, stmt_is_after_inc;
3942   comp_cost cost;
3943   double_int rat;
3944   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3945   enum machine_mode mem_mode = (address_p
3946                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
3947                                 : VOIDmode);
3948
3949   *depends_on = NULL;
3950
3951   /* Only consider real candidates.  */
3952   if (!cand->iv)
3953     return infinite_cost;
3954
3955   cbase = cand->iv->base;
3956   cstep = cand->iv->step;
3957   ctype = TREE_TYPE (cbase);
3958
3959   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3960     {
3961       /* We do not have a precision to express the values of use.  */
3962       return infinite_cost;
3963     }
3964
3965   if (address_p
3966       || (use->iv->base_object
3967           && cand->iv->base_object
3968           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3969           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3970     {
3971       /* Do not try to express address of an object with computation based
3972          on address of a different object.  This may cause problems in rtl
3973          level alias analysis (that does not expect this to be happening,
3974          as this is illegal in C), and would be unlikely to be useful
3975          anyway.  */
3976       if (use->iv->base_object
3977           && cand->iv->base_object
3978           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3979         return infinite_cost;
3980     }
3981
3982   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3983     {
3984       /* TODO -- add direct handling of this case.  */
3985       goto fallback;
3986     }
3987
3988   /* CSTEPI is removed from the offset in case statement is after the
3989      increment.  If the step is not constant, we use zero instead.
3990      This is a bit imprecise (there is the extra addition), but
3991      redundancy elimination is likely to transform the code so that
3992      it uses value of the variable before increment anyway,
3993      so it is not that much unrealistic.  */
3994   if (cst_and_fits_in_hwi (cstep))
3995     cstepi = int_cst_value (cstep);
3996   else
3997     cstepi = 0;
3998
3999   if (!constant_multiple_of (ustep, cstep, &rat))
4000     return infinite_cost;
4001
4002   if (rat.fits_shwi ())
4003     ratio = rat.to_shwi ();
4004   else
4005     return infinite_cost;
4006
4007   STRIP_NOPS (cbase);
4008   ctype = TREE_TYPE (cbase);
4009
4010   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4011
4012   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4013      or ratio == 1, it is better to handle this like
4014
4015      ubase - ratio * cbase + ratio * var
4016
4017      (also holds in the case ratio == -1, TODO.  */
4018
4019   if (cst_and_fits_in_hwi (cbase))
4020     {
4021       offset = - ratio * int_cst_value (cbase);
4022       cost = difference_cost (data,
4023                               ubase, build_int_cst (utype, 0),
4024                               &symbol_present, &var_present, &offset,
4025                               depends_on);
4026       cost.cost /= avg_loop_niter (data->current_loop);
4027     }
4028   else if (ratio == 1)
4029     {
4030       tree real_cbase = cbase;
4031
4032       /* Check to see if any adjustment is needed.  */
4033       if (cstepi == 0 && stmt_is_after_inc)
4034         {
4035           aff_tree real_cbase_aff;
4036           aff_tree cstep_aff;
4037
4038           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4039                                    &real_cbase_aff);
4040           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4041
4042           aff_combination_add (&real_cbase_aff, &cstep_aff);
4043           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4044         }
4045
4046       cost = difference_cost (data,
4047                               ubase, real_cbase,
4048                               &symbol_present, &var_present, &offset,
4049                               depends_on);
4050       cost.cost /= avg_loop_niter (data->current_loop);
4051     }
4052   else if (address_p
4053            && !POINTER_TYPE_P (ctype)
4054            && multiplier_allowed_in_address_p
4055                 (ratio, mem_mode,
4056                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4057     {
4058       cbase
4059         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4060       cost = difference_cost (data,
4061                               ubase, cbase,
4062                               &symbol_present, &var_present, &offset,
4063                               depends_on);
4064       cost.cost /= avg_loop_niter (data->current_loop);
4065     }
4066   else
4067     {
4068       cost = force_var_cost (data, cbase, depends_on);
4069       cost = add_costs (cost,
4070                         difference_cost (data,
4071                                          ubase, build_int_cst (utype, 0),
4072                                          &symbol_present, &var_present,
4073                                          &offset, depends_on));
4074       cost.cost /= avg_loop_niter (data->current_loop);
4075       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4076     }
4077
4078   if (inv_expr_id)
4079     {
4080       *inv_expr_id =
4081           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4082       /* Clear depends on.  */
4083       if (*inv_expr_id != -1 && depends_on && *depends_on)
4084         bitmap_clear (*depends_on);
4085     }
4086
4087   /* If we are after the increment, the value of the candidate is higher by
4088      one iteration.  */
4089   if (stmt_is_after_inc)
4090     offset -= ratio * cstepi;
4091
4092   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4093      (symbol/var1/const parts may be omitted).  If we are looking for an
4094      address, find the cost of addressing this.  */
4095   if (address_p)
4096     return add_costs (cost,
4097                       get_address_cost (symbol_present, var_present,
4098                                         offset, ratio, cstepi,
4099                                         mem_mode,
4100                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4101                                         speed, stmt_is_after_inc,
4102                                         can_autoinc));
4103
4104   /* Otherwise estimate the costs for computing the expression.  */
4105   if (!symbol_present && !var_present && !offset)
4106     {
4107       if (ratio != 1)
4108         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4109       return cost;
4110     }
4111
4112   /* Symbol + offset should be compile-time computable so consider that they
4113       are added once to the variable, if present.  */
4114   if (var_present && (symbol_present || offset))
4115     cost.cost += adjust_setup_cost (data,
4116                                     add_cost (speed, TYPE_MODE (ctype)));
4117
4118   /* Having offset does not affect runtime cost in case it is added to
4119      symbol, but it increases complexity.  */
4120   if (offset)
4121     cost.complexity++;
4122
4123   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4124
4125   aratio = ratio > 0 ? ratio : -ratio;
4126   if (aratio != 1)
4127     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4128   return cost;
4129
4130 fallback:
4131   if (can_autoinc)
4132     *can_autoinc = false;
4133
4134   {
4135     /* Just get the expression, expand it and measure the cost.  */
4136     tree comp = get_computation_at (data->current_loop, use, cand, at);
4137
4138     if (!comp)
4139       return infinite_cost;
4140
4141     if (address_p)
4142       comp = build_simple_mem_ref (comp);
4143
4144     return new_cost (computation_cost (comp, speed), 0);
4145   }
4146 }
4147
4148 /* Determines the cost of the computation by that USE is expressed
4149    from induction variable CAND.  If ADDRESS_P is true, we just need
4150    to create an address from it, otherwise we want to get it into
4151    register.  A set of invariants we depend on is stored in
4152    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4153    autoinc addressing is likely.  */
4154
4155 static comp_cost
4156 get_computation_cost (struct ivopts_data *data,
4157                       struct iv_use *use, struct iv_cand *cand,
4158                       bool address_p, bitmap *depends_on,
4159                       bool *can_autoinc, int *inv_expr_id)
4160 {
4161   return get_computation_cost_at (data,
4162                                   use, cand, address_p, depends_on, use->stmt,
4163                                   can_autoinc, inv_expr_id);
4164 }
4165
4166 /* Determines cost of basing replacement of USE on CAND in a generic
4167    expression.  */
4168
4169 static bool
4170 determine_use_iv_cost_generic (struct ivopts_data *data,
4171                                struct iv_use *use, struct iv_cand *cand)
4172 {
4173   bitmap depends_on;
4174   comp_cost cost;
4175   int inv_expr_id = -1;
4176
4177   /* The simple case first -- if we need to express value of the preserved
4178      original biv, the cost is 0.  This also prevents us from counting the
4179      cost of increment twice -- once at this use and once in the cost of
4180      the candidate.  */
4181   if (cand->pos == IP_ORIGINAL
4182       && cand->incremented_at == use->stmt)
4183     {
4184       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4185                        ERROR_MARK, -1);
4186       return true;
4187     }
4188
4189   cost = get_computation_cost (data, use, cand, false, &depends_on,
4190                                NULL, &inv_expr_id);
4191
4192   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4193                    inv_expr_id);
4194
4195   return !infinite_cost_p (cost);
4196 }
4197
4198 /* Determines cost of basing replacement of USE on CAND in an address.  */
4199
4200 static bool
4201 determine_use_iv_cost_address (struct ivopts_data *data,
4202                                struct iv_use *use, struct iv_cand *cand)
4203 {
4204   bitmap depends_on;
4205   bool can_autoinc;
4206   int inv_expr_id = -1;
4207   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4208                                          &can_autoinc, &inv_expr_id);
4209
4210   if (cand->ainc_use == use)
4211     {
4212       if (can_autoinc)
4213         cost.cost -= cand->cost_step;
4214       /* If we generated the candidate solely for exploiting autoincrement
4215          opportunities, and it turns out it can't be used, set the cost to
4216          infinity to make sure we ignore it.  */
4217       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4218         cost = infinite_cost;
4219     }
4220   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4221                    inv_expr_id);
4222
4223   return !infinite_cost_p (cost);
4224 }
4225
4226 /* Computes value of candidate CAND at position AT in iteration NITER, and
4227    stores it to VAL.  */
4228
4229 static void
4230 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4231                aff_tree *val)
4232 {
4233   aff_tree step, delta, nit;
4234   struct iv *iv = cand->iv;
4235   tree type = TREE_TYPE (iv->base);
4236   tree steptype = type;
4237   if (POINTER_TYPE_P (type))
4238     steptype = sizetype;
4239   steptype = unsigned_type_for (type);
4240
4241   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4242   aff_combination_convert (&step, steptype);
4243   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4244   aff_combination_convert (&nit, steptype);
4245   aff_combination_mult (&nit, &step, &delta);
4246   if (stmt_after_increment (loop, cand, at))
4247     aff_combination_add (&delta, &step);
4248
4249   tree_to_aff_combination (iv->base, type, val);
4250   if (!POINTER_TYPE_P (type))
4251     aff_combination_convert (val, steptype);
4252   aff_combination_add (val, &delta);
4253 }
4254
4255 /* Returns period of induction variable iv.  */
4256
4257 static tree
4258 iv_period (struct iv *iv)
4259 {
4260   tree step = iv->step, period, type;
4261   tree pow2div;
4262
4263   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4264
4265   type = unsigned_type_for (TREE_TYPE (step));
4266   /* Period of the iv is lcm (step, type_range)/step -1,
4267      i.e., N*type_range/step - 1. Since type range is power
4268      of two, N == (step >> num_of_ending_zeros_binary (step),
4269      so the final result is
4270
4271        (type_range >> num_of_ending_zeros_binary (step)) - 1
4272
4273   */
4274   pow2div = num_ending_zeros (step);
4275
4276   period = build_low_bits_mask (type,
4277                                 (TYPE_PRECISION (type)
4278                                  - tree_low_cst (pow2div, 1)));
4279
4280   return period;
4281 }
4282
4283 /* Returns the comparison operator used when eliminating the iv USE.  */
4284
4285 static enum tree_code
4286 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4287 {
4288   struct loop *loop = data->current_loop;
4289   basic_block ex_bb;
4290   edge exit;
4291
4292   ex_bb = gimple_bb (use->stmt);
4293   exit = EDGE_SUCC (ex_bb, 0);
4294   if (flow_bb_inside_loop_p (loop, exit->dest))
4295     exit = EDGE_SUCC (ex_bb, 1);
4296
4297   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4298 }
4299
4300 static tree
4301 strip_wrap_conserving_type_conversions (tree exp)
4302 {
4303   while (tree_ssa_useless_type_conversion (exp)
4304          && (nowrap_type_p (TREE_TYPE (exp))
4305              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4306     exp = TREE_OPERAND (exp, 0);
4307   return exp;
4308 }
4309
4310 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4311    check for an exact match.  */
4312
4313 static bool
4314 expr_equal_p (tree e, tree what)
4315 {
4316   gimple stmt;
4317   enum tree_code code;
4318
4319   e = strip_wrap_conserving_type_conversions (e);
4320   what = strip_wrap_conserving_type_conversions (what);
4321
4322   code = TREE_CODE (what);
4323   if (TREE_TYPE (e) != TREE_TYPE (what))
4324     return false;
4325
4326   if (operand_equal_p (e, what, 0))
4327     return true;
4328
4329   if (TREE_CODE (e) != SSA_NAME)
4330     return false;
4331
4332   stmt = SSA_NAME_DEF_STMT (e);
4333   if (gimple_code (stmt) != GIMPLE_ASSIGN
4334       || gimple_assign_rhs_code (stmt) != code)
4335     return false;
4336
4337   switch (get_gimple_rhs_class (code))
4338     {
4339     case GIMPLE_BINARY_RHS:
4340       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4341         return false;
4342       /* Fallthru.  */
4343
4344     case GIMPLE_UNARY_RHS:
4345     case GIMPLE_SINGLE_RHS:
4346       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4347     default:
4348       return false;
4349     }
4350 }
4351
4352 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4353    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4354    calculation is performed in non-wrapping type.
4355
4356    TODO: More generally, we could test for the situation that
4357          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4358          This would require knowing the sign of OFFSET.
4359
4360          Also, we only look for the first addition in the computation of BASE.
4361          More complex analysis would be better, but introducing it just for
4362          this optimization seems like an overkill.  */
4363
4364 static bool
4365 difference_cannot_overflow_p (tree base, tree offset)
4366 {
4367   enum tree_code code;
4368   tree e1, e2;
4369
4370   if (!nowrap_type_p (TREE_TYPE (base)))
4371     return false;
4372
4373   base = expand_simple_operations (base);
4374
4375   if (TREE_CODE (base) == SSA_NAME)
4376     {
4377       gimple stmt = SSA_NAME_DEF_STMT (base);
4378
4379       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4380         return false;
4381
4382       code = gimple_assign_rhs_code (stmt);
4383       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4384         return false;
4385
4386       e1 = gimple_assign_rhs1 (stmt);
4387       e2 = gimple_assign_rhs2 (stmt);
4388     }
4389   else
4390     {
4391       code = TREE_CODE (base);
4392       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4393         return false;
4394       e1 = TREE_OPERAND (base, 0);
4395       e2 = TREE_OPERAND (base, 1);
4396     }
4397
4398   /* TODO: deeper inspection may be necessary to prove the equality.  */
4399   switch (code)
4400     {
4401     case PLUS_EXPR:
4402       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4403     case POINTER_PLUS_EXPR:
4404       return expr_equal_p (e2, offset);
4405
4406     default:
4407       return false;
4408     }
4409 }
4410
4411 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4412    comparison with CAND.  NITER describes the number of iterations of
4413    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4414
4415    We aim to handle the following situation:
4416
4417    sometype *base, *p;
4418    int a, b, i;
4419
4420    i = a;
4421    p = p_0 = base + a;
4422
4423    do
4424      {
4425        bla (*p);
4426        p++;
4427        i++;
4428      }
4429    while (i < b);
4430
4431    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4432    We aim to optimize this to
4433
4434    p = p_0 = base + a;
4435    do
4436      {
4437        bla (*p);
4438        p++;
4439      }
4440    while (p < p_0 - a + b);
4441
4442    This preserves the correctness, since the pointer arithmetics does not
4443    overflow.  More precisely:
4444
4445    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4446       overflow in computing it or the values of p.
4447    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4448       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4449
4450 static bool
4451 iv_elimination_compare_lt (struct ivopts_data *data,
4452                            struct iv_cand *cand, enum tree_code *comp_p,
4453                            struct tree_niter_desc *niter)
4454 {
4455   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4456   struct affine_tree_combination nit, tmpa, tmpb;
4457   enum tree_code comp;
4458   HOST_WIDE_INT step;
4459
4460   /* We need to know that the candidate induction variable does not overflow.
4461      While more complex analysis may be used to prove this, for now just
4462      check that the variable appears in the original program and that it
4463      is computed in a type that guarantees no overflows.  */
4464   cand_type = TREE_TYPE (cand->iv->base);
4465   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4466     return false;
4467
4468   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4469      the calculation of the BOUND could overflow, making the comparison
4470      invalid.  */
4471   if (!data->loop_single_exit_p)
4472     return false;
4473
4474   /* We need to be able to decide whether candidate is increasing or decreasing
4475      in order to choose the right comparison operator.  */
4476   if (!cst_and_fits_in_hwi (cand->iv->step))
4477     return false;
4478   step = int_cst_value (cand->iv->step);
4479
4480   /* Check that the number of iterations matches the expected pattern:
4481      a + 1 > b ? 0 : b - a - 1.  */
4482   mbz = niter->may_be_zero;
4483   if (TREE_CODE (mbz) == GT_EXPR)
4484     {
4485       /* Handle a + 1 > b.  */
4486       tree op0 = TREE_OPERAND (mbz, 0);
4487       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4488         {
4489           a = TREE_OPERAND (op0, 0);
4490           b = TREE_OPERAND (mbz, 1);
4491         }
4492       else
4493         return false;
4494     }
4495   else if (TREE_CODE (mbz) == LT_EXPR)
4496     {
4497       tree op1 = TREE_OPERAND (mbz, 1);
4498
4499       /* Handle b < a + 1.  */
4500       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4501         {
4502           a = TREE_OPERAND (op1, 0);
4503           b = TREE_OPERAND (mbz, 0);
4504         }
4505       else
4506         return false;
4507     }
4508   else
4509     return false;
4510
4511   /* Expected number of iterations is B - A - 1.  Check that it matches
4512      the actual number, i.e., that B - A - NITER = 1.  */
4513   tree_to_aff_combination (niter->niter, nit_type, &nit);
4514   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4515   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4516   aff_combination_scale (&nit, double_int_minus_one);
4517   aff_combination_scale (&tmpa, double_int_minus_one);
4518   aff_combination_add (&tmpb, &tmpa);
4519   aff_combination_add (&tmpb, &nit);
4520   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4521     return false;
4522
4523   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4524      overflow.  */
4525   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4526                         cand->iv->step,
4527                         fold_convert (TREE_TYPE (cand->iv->step), a));
4528   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4529     return false;
4530
4531   /* Determine the new comparison operator.  */
4532   comp = step < 0 ? GT_EXPR : LT_EXPR;
4533   if (*comp_p == NE_EXPR)
4534     *comp_p = comp;
4535   else if (*comp_p == EQ_EXPR)
4536     *comp_p = invert_tree_comparison (comp, false);
4537   else
4538     gcc_unreachable ();
4539
4540   return true;
4541 }
4542
4543 /* Check whether it is possible to express the condition in USE by comparison
4544    of candidate CAND.  If so, store the value compared with to BOUND, and the
4545    comparison operator to COMP.  */
4546
4547 static bool
4548 may_eliminate_iv (struct ivopts_data *data,
4549                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4550                   enum tree_code *comp)
4551 {
4552   basic_block ex_bb;
4553   edge exit;
4554   tree period;
4555   struct loop *loop = data->current_loop;
4556   aff_tree bnd;
4557   struct tree_niter_desc *desc = NULL;
4558
4559   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4560     return false;
4561
4562   /* For now works only for exits that dominate the loop latch.
4563      TODO: extend to other conditions inside loop body.  */
4564   ex_bb = gimple_bb (use->stmt);
4565   if (use->stmt != last_stmt (ex_bb)
4566       || gimple_code (use->stmt) != GIMPLE_COND
4567       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4568     return false;
4569
4570   exit = EDGE_SUCC (ex_bb, 0);
4571   if (flow_bb_inside_loop_p (loop, exit->dest))
4572     exit = EDGE_SUCC (ex_bb, 1);
4573   if (flow_bb_inside_loop_p (loop, exit->dest))
4574     return false;
4575
4576   desc = niter_for_exit (data, exit);
4577   if (!desc)
4578     return false;
4579
4580   /* Determine whether we can use the variable to test the exit condition.
4581      This is the case iff the period of the induction variable is greater
4582      than the number of iterations for which the exit condition is true.  */
4583   period = iv_period (cand->iv);
4584
4585   /* If the number of iterations is constant, compare against it directly.  */
4586   if (TREE_CODE (desc->niter) == INTEGER_CST)
4587     {
4588       /* See cand_value_at.  */
4589       if (stmt_after_increment (loop, cand, use->stmt))
4590         {
4591           if (!tree_int_cst_lt (desc->niter, period))
4592             return false;
4593         }
4594       else
4595         {
4596           if (tree_int_cst_lt (period, desc->niter))
4597             return false;
4598         }
4599     }
4600
4601   /* If not, and if this is the only possible exit of the loop, see whether
4602      we can get a conservative estimate on the number of iterations of the
4603      entire loop and compare against that instead.  */
4604   else
4605     {
4606       double_int period_value, max_niter;
4607
4608       max_niter = desc->max;
4609       if (stmt_after_increment (loop, cand, use->stmt))
4610         max_niter += double_int_one;
4611       period_value = tree_to_double_int (period);
4612       if (max_niter.ugt (period_value))
4613         {
4614           /* See if we can take advantage of inferred loop bound information.  */
4615           if (data->loop_single_exit_p)
4616             {
4617               if (!max_loop_iterations (loop, &max_niter))
4618                 return false;
4619               /* The loop bound is already adjusted by adding 1.  */
4620               if (max_niter.ugt (period_value))
4621                 return false;
4622             }
4623           else
4624             return false;
4625         }
4626     }
4627
4628   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4629
4630   *bound = fold_convert (TREE_TYPE (cand->iv->base),
4631                          aff_combination_to_tree (&bnd));
4632   *comp = iv_elimination_compare (data, use);
4633
4634   /* It is unlikely that computing the number of iterations using division
4635      would be more profitable than keeping the original induction variable.  */
4636   if (expression_expensive_p (*bound))
4637     return false;
4638
4639   /* Sometimes, it is possible to handle the situation that the number of
4640      iterations may be zero unless additional assumtions by using <
4641      instead of != in the exit condition.
4642
4643      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4644            base the exit condition on it.  However, that is often too
4645            expensive.  */
4646   if (!integer_zerop (desc->may_be_zero))
4647     return iv_elimination_compare_lt (data, cand, comp, desc);
4648
4649   return true;
4650 }
4651
4652  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4653     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4654
4655 static int
4656 parm_decl_cost (struct ivopts_data *data, tree bound)
4657 {
4658   tree sbound = bound;
4659   STRIP_NOPS (sbound);
4660
4661   if (TREE_CODE (sbound) == SSA_NAME
4662       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4663       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4664       && data->body_includes_call)
4665     return COSTS_N_INSNS (1);
4666
4667   return 0;
4668 }
4669
4670 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4671
4672 static bool
4673 determine_use_iv_cost_condition (struct ivopts_data *data,
4674                                  struct iv_use *use, struct iv_cand *cand)
4675 {
4676   tree bound = NULL_TREE;
4677   struct iv *cmp_iv;
4678   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4679   comp_cost elim_cost, express_cost, cost, bound_cost;
4680   bool ok;
4681   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4682   tree *control_var, *bound_cst;
4683   enum tree_code comp = ERROR_MARK;
4684
4685   /* Only consider real candidates.  */
4686   if (!cand->iv)
4687     {
4688       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4689                        ERROR_MARK, -1);
4690       return false;
4691     }
4692
4693   /* Try iv elimination.  */
4694   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4695     {
4696       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4697       if (elim_cost.cost == 0)
4698         elim_cost.cost = parm_decl_cost (data, bound);
4699       else if (TREE_CODE (bound) == INTEGER_CST)
4700         elim_cost.cost = 0;
4701       /* If we replace a loop condition 'i < n' with 'p < base + n',
4702          depends_on_elim will have 'base' and 'n' set, which implies
4703          that both 'base' and 'n' will be live during the loop.  More likely,
4704          'base + n' will be loop invariant, resulting in only one live value
4705          during the loop.  So in that case we clear depends_on_elim and set
4706         elim_inv_expr_id instead.  */
4707       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4708         {
4709           elim_inv_expr_id = get_expr_id (data, bound);
4710           bitmap_clear (depends_on_elim);
4711         }
4712       /* The bound is a loop invariant, so it will be only computed
4713          once.  */
4714       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4715     }
4716   else
4717     elim_cost = infinite_cost;
4718
4719   /* Try expressing the original giv.  If it is compared with an invariant,
4720      note that we cannot get rid of it.  */
4721   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4722                               NULL, &cmp_iv);
4723   gcc_assert (ok);
4724
4725   /* When the condition is a comparison of the candidate IV against
4726      zero, prefer this IV.
4727
4728      TODO: The constant that we're subtracting from the cost should
4729      be target-dependent.  This information should be added to the
4730      target costs for each backend.  */
4731   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4732       && integer_zerop (*bound_cst)
4733       && (operand_equal_p (*control_var, cand->var_after, 0)
4734           || operand_equal_p (*control_var, cand->var_before, 0)))
4735     elim_cost.cost -= 1;
4736
4737   express_cost = get_computation_cost (data, use, cand, false,
4738                                        &depends_on_express, NULL,
4739                                        &express_inv_expr_id);
4740   fd_ivopts_data = data;
4741   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4742
4743   /* Count the cost of the original bound as well.  */
4744   bound_cost = force_var_cost (data, *bound_cst, NULL);
4745   if (bound_cost.cost == 0)
4746     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4747   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4748     bound_cost.cost = 0;
4749   express_cost.cost += bound_cost.cost;
4750
4751   /* Choose the better approach, preferring the eliminated IV. */
4752   if (compare_costs (elim_cost, express_cost) <= 0)
4753     {
4754       cost = elim_cost;
4755       depends_on = depends_on_elim;
4756       depends_on_elim = NULL;
4757       inv_expr_id = elim_inv_expr_id;
4758     }
4759   else
4760     {
4761       cost = express_cost;
4762       depends_on = depends_on_express;
4763       depends_on_express = NULL;
4764       bound = NULL_TREE;
4765       comp = ERROR_MARK;
4766       inv_expr_id = express_inv_expr_id;
4767     }
4768
4769   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4770
4771   if (depends_on_elim)
4772     BITMAP_FREE (depends_on_elim);
4773   if (depends_on_express)
4774     BITMAP_FREE (depends_on_express);
4775
4776   return !infinite_cost_p (cost);
4777 }
4778
4779 /* Determines cost of basing replacement of USE on CAND.  Returns false
4780    if USE cannot be based on CAND.  */
4781
4782 static bool
4783 determine_use_iv_cost (struct ivopts_data *data,
4784                        struct iv_use *use, struct iv_cand *cand)
4785 {
4786   switch (use->type)
4787     {
4788     case USE_NONLINEAR_EXPR:
4789       return determine_use_iv_cost_generic (data, use, cand);
4790
4791     case USE_ADDRESS:
4792       return determine_use_iv_cost_address (data, use, cand);
4793
4794     case USE_COMPARE:
4795       return determine_use_iv_cost_condition (data, use, cand);
4796
4797     default:
4798       gcc_unreachable ();
4799     }
4800 }
4801
4802 /* Return true if get_computation_cost indicates that autoincrement is
4803    a possibility for the pair of USE and CAND, false otherwise.  */
4804
4805 static bool
4806 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4807                            struct iv_cand *cand)
4808 {
4809   bitmap depends_on;
4810   bool can_autoinc;
4811   comp_cost cost;
4812
4813   if (use->type != USE_ADDRESS)
4814     return false;
4815
4816   cost = get_computation_cost (data, use, cand, true, &depends_on,
4817                                &can_autoinc, NULL);
4818
4819   BITMAP_FREE (depends_on);
4820
4821   return !infinite_cost_p (cost) && can_autoinc;
4822 }
4823
4824 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4825    use that allows autoincrement, and set their AINC_USE if possible.  */
4826
4827 static void
4828 set_autoinc_for_original_candidates (struct ivopts_data *data)
4829 {
4830   unsigned i, j;
4831
4832   for (i = 0; i < n_iv_cands (data); i++)
4833     {
4834       struct iv_cand *cand = iv_cand (data, i);
4835       struct iv_use *closest = NULL;
4836       if (cand->pos != IP_ORIGINAL)
4837         continue;
4838       for (j = 0; j < n_iv_uses (data); j++)
4839         {
4840           struct iv_use *use = iv_use (data, j);
4841           unsigned uid = gimple_uid (use->stmt);
4842           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4843               || uid > gimple_uid (cand->incremented_at))
4844             continue;
4845           if (closest == NULL || uid > gimple_uid (closest->stmt))
4846             closest = use;
4847         }
4848       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4849         continue;
4850       cand->ainc_use = closest;
4851     }
4852 }
4853
4854 /* Finds the candidates for the induction variables.  */
4855
4856 static void
4857 find_iv_candidates (struct ivopts_data *data)
4858 {
4859   /* Add commonly used ivs.  */
4860   add_standard_iv_candidates (data);
4861
4862   /* Add old induction variables.  */
4863   add_old_ivs_candidates (data);
4864
4865   /* Add induction variables derived from uses.  */
4866   add_derived_ivs_candidates (data);
4867
4868   set_autoinc_for_original_candidates (data);
4869
4870   /* Record the important candidates.  */
4871   record_important_candidates (data);
4872 }
4873
4874 /* Determines costs of basing the use of the iv on an iv candidate.  */
4875
4876 static void
4877 determine_use_iv_costs (struct ivopts_data *data)
4878 {
4879   unsigned i, j;
4880   struct iv_use *use;
4881   struct iv_cand *cand;
4882   bitmap to_clear = BITMAP_ALLOC (NULL);
4883
4884   alloc_use_cost_map (data);
4885
4886   for (i = 0; i < n_iv_uses (data); i++)
4887     {
4888       use = iv_use (data, i);
4889
4890       if (data->consider_all_candidates)
4891         {
4892           for (j = 0; j < n_iv_cands (data); j++)
4893             {
4894               cand = iv_cand (data, j);
4895               determine_use_iv_cost (data, use, cand);
4896             }
4897         }
4898       else
4899         {
4900           bitmap_iterator bi;
4901
4902           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4903             {
4904               cand = iv_cand (data, j);
4905               if (!determine_use_iv_cost (data, use, cand))
4906                 bitmap_set_bit (to_clear, j);
4907             }
4908
4909           /* Remove the candidates for that the cost is infinite from
4910              the list of related candidates.  */
4911           bitmap_and_compl_into (use->related_cands, to_clear);
4912           bitmap_clear (to_clear);
4913         }
4914     }
4915
4916   BITMAP_FREE (to_clear);
4917
4918   if (dump_file && (dump_flags & TDF_DETAILS))
4919     {
4920       fprintf (dump_file, "Use-candidate costs:\n");
4921
4922       for (i = 0; i < n_iv_uses (data); i++)
4923         {
4924           use = iv_use (data, i);
4925
4926           fprintf (dump_file, "Use %d:\n", i);
4927           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4928           for (j = 0; j < use->n_map_members; j++)
4929             {
4930               if (!use->cost_map[j].cand
4931                   || infinite_cost_p (use->cost_map[j].cost))
4932                 continue;
4933
4934               fprintf (dump_file, "  %d\t%d\t%d\t",
4935                        use->cost_map[j].cand->id,
4936                        use->cost_map[j].cost.cost,
4937                        use->cost_map[j].cost.complexity);
4938               if (use->cost_map[j].depends_on)
4939                 bitmap_print (dump_file,
4940                               use->cost_map[j].depends_on, "","");
4941               if (use->cost_map[j].inv_expr_id != -1)
4942                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4943               fprintf (dump_file, "\n");
4944             }
4945
4946           fprintf (dump_file, "\n");
4947         }
4948       fprintf (dump_file, "\n");
4949     }
4950 }
4951
4952 /* Determines cost of the candidate CAND.  */
4953
4954 static void
4955 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4956 {
4957   comp_cost cost_base;
4958   unsigned cost, cost_step;
4959   tree base;
4960
4961   if (!cand->iv)
4962     {
4963       cand->cost = 0;
4964       return;
4965     }
4966
4967   /* There are two costs associated with the candidate -- its increment
4968      and its initialization.  The second is almost negligible for any loop
4969      that rolls enough, so we take it just very little into account.  */
4970
4971   base = cand->iv->base;
4972   cost_base = force_var_cost (data, base, NULL);
4973   /* It will be exceptional that the iv register happens to be initialized with
4974      the proper value at no cost.  In general, there will at least be a regcopy
4975      or a const set.  */
4976   if (cost_base.cost == 0)
4977     cost_base.cost = COSTS_N_INSNS (1);
4978   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
4979
4980   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4981
4982   /* Prefer the original ivs unless we may gain something by replacing it.
4983      The reason is to make debugging simpler; so this is not relevant for
4984      artificial ivs created by other optimization passes.  */
4985   if (cand->pos != IP_ORIGINAL
4986       || !SSA_NAME_VAR (cand->var_before)
4987       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4988     cost++;
4989
4990   /* Prefer not to insert statements into latch unless there are some
4991      already (so that we do not create unnecessary jumps).  */
4992   if (cand->pos == IP_END
4993       && empty_block_p (ip_end_pos (data->current_loop)))
4994     cost++;
4995
4996   cand->cost = cost;
4997   cand->cost_step = cost_step;
4998 }
4999
5000 /* Determines costs of computation of the candidates.  */
5001
5002 static void
5003 determine_iv_costs (struct ivopts_data *data)
5004 {
5005   unsigned i;
5006
5007   if (dump_file && (dump_flags & TDF_DETAILS))
5008     {
5009       fprintf (dump_file, "Candidate costs:\n");
5010       fprintf (dump_file, "  cand\tcost\n");
5011     }
5012
5013   for (i = 0; i < n_iv_cands (data); i++)
5014     {
5015       struct iv_cand *cand = iv_cand (data, i);
5016
5017       determine_iv_cost (data, cand);
5018
5019       if (dump_file && (dump_flags & TDF_DETAILS))
5020         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5021     }
5022
5023   if (dump_file && (dump_flags & TDF_DETAILS))
5024     fprintf (dump_file, "\n");
5025 }
5026
5027 /* Calculates cost for having SIZE induction variables.  */
5028
5029 static unsigned
5030 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5031 {
5032   /* We add size to the cost, so that we prefer eliminating ivs
5033      if possible.  */
5034   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5035                                             data->body_includes_call);
5036 }
5037
5038 /* For each size of the induction variable set determine the penalty.  */
5039
5040 static void
5041 determine_set_costs (struct ivopts_data *data)
5042 {
5043   unsigned j, n;
5044   gimple phi;
5045   gimple_stmt_iterator psi;
5046   tree op;
5047   struct loop *loop = data->current_loop;
5048   bitmap_iterator bi;
5049
5050   if (dump_file && (dump_flags & TDF_DETAILS))
5051     {
5052       fprintf (dump_file, "Global costs:\n");
5053       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5054       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5055       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5056       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5057     }
5058
5059   n = 0;
5060   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5061     {
5062       phi = gsi_stmt (psi);
5063       op = PHI_RESULT (phi);
5064
5065       if (virtual_operand_p (op))
5066         continue;
5067
5068       if (get_iv (data, op))
5069         continue;
5070
5071       n++;
5072     }
5073
5074   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5075     {
5076       struct version_info *info = ver_info (data, j);
5077
5078       if (info->inv_id && info->has_nonlin_use)
5079         n++;
5080     }
5081
5082   data->regs_used = n;
5083   if (dump_file && (dump_flags & TDF_DETAILS))
5084     fprintf (dump_file, "  regs_used %d\n", n);
5085
5086   if (dump_file && (dump_flags & TDF_DETAILS))
5087     {
5088       fprintf (dump_file, "  cost for size:\n");
5089       fprintf (dump_file, "  ivs\tcost\n");
5090       for (j = 0; j <= 2 * target_avail_regs; j++)
5091         fprintf (dump_file, "  %d\t%d\n", j,
5092                  ivopts_global_cost_for_size (data, j));
5093       fprintf (dump_file, "\n");
5094     }
5095 }
5096
5097 /* Returns true if A is a cheaper cost pair than B.  */
5098
5099 static bool
5100 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5101 {
5102   int cmp;
5103
5104   if (!a)
5105     return false;
5106
5107   if (!b)
5108     return true;
5109
5110   cmp = compare_costs (a->cost, b->cost);
5111   if (cmp < 0)
5112     return true;
5113
5114   if (cmp > 0)
5115     return false;
5116
5117   /* In case the costs are the same, prefer the cheaper candidate.  */
5118   if (a->cand->cost < b->cand->cost)
5119     return true;
5120
5121   return false;
5122 }
5123
5124
5125 /* Returns candidate by that USE is expressed in IVS.  */
5126
5127 static struct cost_pair *
5128 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5129 {
5130   return ivs->cand_for_use[use->id];
5131 }
5132
5133 /* Computes the cost field of IVS structure.  */
5134
5135 static void
5136 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5137 {
5138   comp_cost cost = ivs->cand_use_cost;
5139
5140   cost.cost += ivs->cand_cost;
5141
5142   cost.cost += ivopts_global_cost_for_size (data,
5143                                             ivs->n_regs + ivs->num_used_inv_expr);
5144
5145   ivs->cost = cost;
5146 }
5147
5148 /* Remove invariants in set INVS to set IVS.  */
5149
5150 static void
5151 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5152 {
5153   bitmap_iterator bi;
5154   unsigned iid;
5155
5156   if (!invs)
5157     return;
5158
5159   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5160     {
5161       ivs->n_invariant_uses[iid]--;
5162       if (ivs->n_invariant_uses[iid] == 0)
5163         ivs->n_regs--;
5164     }
5165 }
5166
5167 /* Set USE not to be expressed by any candidate in IVS.  */
5168
5169 static void
5170 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5171                  struct iv_use *use)
5172 {
5173   unsigned uid = use->id, cid;
5174   struct cost_pair *cp;
5175
5176   cp = ivs->cand_for_use[uid];
5177   if (!cp)
5178     return;
5179   cid = cp->cand->id;
5180
5181   ivs->bad_uses++;
5182   ivs->cand_for_use[uid] = NULL;
5183   ivs->n_cand_uses[cid]--;
5184
5185   if (ivs->n_cand_uses[cid] == 0)
5186     {
5187       bitmap_clear_bit (ivs->cands, cid);
5188       /* Do not count the pseudocandidates.  */
5189       if (cp->cand->iv)
5190         ivs->n_regs--;
5191       ivs->n_cands--;
5192       ivs->cand_cost -= cp->cand->cost;
5193
5194       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5195     }
5196
5197   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5198
5199   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5200
5201   if (cp->inv_expr_id != -1)
5202     {
5203       ivs->used_inv_expr[cp->inv_expr_id]--;
5204       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5205         ivs->num_used_inv_expr--;
5206     }
5207   iv_ca_recount_cost (data, ivs);
5208 }
5209
5210 /* Add invariants in set INVS to set IVS.  */
5211
5212 static void
5213 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5214 {
5215   bitmap_iterator bi;
5216   unsigned iid;
5217
5218   if (!invs)
5219     return;
5220
5221   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5222     {
5223       ivs->n_invariant_uses[iid]++;
5224       if (ivs->n_invariant_uses[iid] == 1)
5225         ivs->n_regs++;
5226     }
5227 }
5228
5229 /* Set cost pair for USE in set IVS to CP.  */
5230
5231 static void
5232 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5233               struct iv_use *use, struct cost_pair *cp)
5234 {
5235   unsigned uid = use->id, cid;
5236
5237   if (ivs->cand_for_use[uid] == cp)
5238     return;
5239
5240   if (ivs->cand_for_use[uid])
5241     iv_ca_set_no_cp (data, ivs, use);
5242
5243   if (cp)
5244     {
5245       cid = cp->cand->id;
5246
5247       ivs->bad_uses--;
5248       ivs->cand_for_use[uid] = cp;
5249       ivs->n_cand_uses[cid]++;
5250       if (ivs->n_cand_uses[cid] == 1)
5251         {
5252           bitmap_set_bit (ivs->cands, cid);
5253           /* Do not count the pseudocandidates.  */
5254           if (cp->cand->iv)
5255             ivs->n_regs++;
5256           ivs->n_cands++;
5257           ivs->cand_cost += cp->cand->cost;
5258
5259           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5260         }
5261
5262       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5263       iv_ca_set_add_invariants (ivs, cp->depends_on);
5264
5265       if (cp->inv_expr_id != -1)
5266         {
5267           ivs->used_inv_expr[cp->inv_expr_id]++;
5268           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5269             ivs->num_used_inv_expr++;
5270         }
5271       iv_ca_recount_cost (data, ivs);
5272     }
5273 }
5274
5275 /* Extend set IVS by expressing USE by some of the candidates in it
5276    if possible. All important candidates will be considered
5277    if IMPORTANT_CANDIDATES is true.  */
5278
5279 static void
5280 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5281                struct iv_use *use, bool important_candidates)
5282 {
5283   struct cost_pair *best_cp = NULL, *cp;
5284   bitmap_iterator bi;
5285   bitmap cands;
5286   unsigned i;
5287
5288   gcc_assert (ivs->upto >= use->id);
5289
5290   if (ivs->upto == use->id)
5291     {
5292       ivs->upto++;
5293       ivs->bad_uses++;
5294     }
5295
5296   cands = (important_candidates ? data->important_candidates : ivs->cands);
5297   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5298     {
5299       struct iv_cand *cand = iv_cand (data, i);
5300
5301       cp = get_use_iv_cost (data, use, cand);
5302
5303       if (cheaper_cost_pair (cp, best_cp))
5304         best_cp = cp;
5305     }
5306
5307   iv_ca_set_cp (data, ivs, use, best_cp);
5308 }
5309
5310 /* Get cost for assignment IVS.  */
5311
5312 static comp_cost
5313 iv_ca_cost (struct iv_ca *ivs)
5314 {
5315   /* This was a conditional expression but it triggered a bug in
5316      Sun C 5.5.  */
5317   if (ivs->bad_uses)
5318     return infinite_cost;
5319   else
5320     return ivs->cost;
5321 }
5322
5323 /* Returns true if all dependences of CP are among invariants in IVS.  */
5324
5325 static bool
5326 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5327 {
5328   unsigned i;
5329   bitmap_iterator bi;
5330
5331   if (!cp->depends_on)
5332     return true;
5333
5334   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5335     {
5336       if (ivs->n_invariant_uses[i] == 0)
5337         return false;
5338     }
5339
5340   return true;
5341 }
5342
5343 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5344    it before NEXT_CHANGE.  */
5345
5346 static struct iv_ca_delta *
5347 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5348                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5349 {
5350   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5351
5352   change->use = use;
5353   change->old_cp = old_cp;
5354   change->new_cp = new_cp;
5355   change->next_change = next_change;
5356
5357   return change;
5358 }
5359
5360 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5361    are rewritten.  */
5362
5363 static struct iv_ca_delta *
5364 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5365 {
5366   struct iv_ca_delta *last;
5367
5368   if (!l2)
5369     return l1;
5370
5371   if (!l1)
5372     return l2;
5373
5374   for (last = l1; last->next_change; last = last->next_change)
5375     continue;
5376   last->next_change = l2;
5377
5378   return l1;
5379 }
5380
5381 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5382
5383 static struct iv_ca_delta *
5384 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5385 {
5386   struct iv_ca_delta *act, *next, *prev = NULL;
5387   struct cost_pair *tmp;
5388
5389   for (act = delta; act; act = next)
5390     {
5391       next = act->next_change;
5392       act->next_change = prev;
5393       prev = act;
5394
5395       tmp = act->old_cp;
5396       act->old_cp = act->new_cp;
5397       act->new_cp = tmp;
5398     }
5399
5400   return prev;
5401 }
5402
5403 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5404    reverted instead.  */
5405
5406 static void
5407 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5408                     struct iv_ca_delta *delta, bool forward)
5409 {
5410   struct cost_pair *from, *to;
5411   struct iv_ca_delta *act;
5412
5413   if (!forward)
5414     delta = iv_ca_delta_reverse (delta);
5415
5416   for (act = delta; act; act = act->next_change)
5417     {
5418       from = act->old_cp;
5419       to = act->new_cp;
5420       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5421       iv_ca_set_cp (data, ivs, act->use, to);
5422     }
5423
5424   if (!forward)
5425     iv_ca_delta_reverse (delta);
5426 }
5427
5428 /* Returns true if CAND is used in IVS.  */
5429
5430 static bool
5431 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5432 {
5433   return ivs->n_cand_uses[cand->id] > 0;
5434 }
5435
5436 /* Returns number of induction variable candidates in the set IVS.  */
5437
5438 static unsigned
5439 iv_ca_n_cands (struct iv_ca *ivs)
5440 {
5441   return ivs->n_cands;
5442 }
5443
5444 /* Free the list of changes DELTA.  */
5445
5446 static void
5447 iv_ca_delta_free (struct iv_ca_delta **delta)
5448 {
5449   struct iv_ca_delta *act, *next;
5450
5451   for (act = *delta; act; act = next)
5452     {
5453       next = act->next_change;
5454       free (act);
5455     }
5456
5457   *delta = NULL;
5458 }
5459
5460 /* Allocates new iv candidates assignment.  */
5461
5462 static struct iv_ca *
5463 iv_ca_new (struct ivopts_data *data)
5464 {
5465   struct iv_ca *nw = XNEW (struct iv_ca);
5466
5467   nw->upto = 0;
5468   nw->bad_uses = 0;
5469   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5470   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5471   nw->cands = BITMAP_ALLOC (NULL);
5472   nw->n_cands = 0;
5473   nw->n_regs = 0;
5474   nw->cand_use_cost = no_cost;
5475   nw->cand_cost = 0;
5476   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5477   nw->cost = no_cost;
5478   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5479   nw->num_used_inv_expr = 0;
5480
5481   return nw;
5482 }
5483
5484 /* Free memory occupied by the set IVS.  */
5485
5486 static void
5487 iv_ca_free (struct iv_ca **ivs)
5488 {
5489   free ((*ivs)->cand_for_use);
5490   free ((*ivs)->n_cand_uses);
5491   BITMAP_FREE ((*ivs)->cands);
5492   free ((*ivs)->n_invariant_uses);
5493   free ((*ivs)->used_inv_expr);
5494   free (*ivs);
5495   *ivs = NULL;
5496 }
5497
5498 /* Dumps IVS to FILE.  */
5499
5500 static void
5501 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5502 {
5503   const char *pref = "  invariants ";
5504   unsigned i;
5505   comp_cost cost = iv_ca_cost (ivs);
5506
5507   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5508   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5509            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5510   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5511
5512    for (i = 0; i < ivs->upto; i++)
5513     {
5514       struct iv_use *use = iv_use (data, i);
5515       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5516       if (cp)
5517         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5518                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5519       else
5520         fprintf (file, "   use:%d --> ??\n", use->id);
5521     }
5522
5523   for (i = 1; i <= data->max_inv_id; i++)
5524     if (ivs->n_invariant_uses[i])
5525       {
5526         fprintf (file, "%s%d", pref, i);
5527         pref = ", ";
5528       }
5529   fprintf (file, "\n\n");
5530 }
5531
5532 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5533    new set, and store differences in DELTA.  Number of induction variables
5534    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5535    the function will try to find a solution with mimimal iv candidates.  */
5536
5537 static comp_cost
5538 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5539               struct iv_cand *cand, struct iv_ca_delta **delta,
5540               unsigned *n_ivs, bool min_ncand)
5541 {
5542   unsigned i;
5543   comp_cost cost;
5544   struct iv_use *use;
5545   struct cost_pair *old_cp, *new_cp;
5546
5547   *delta = NULL;
5548   for (i = 0; i < ivs->upto; i++)
5549     {
5550       use = iv_use (data, i);
5551       old_cp = iv_ca_cand_for_use (ivs, use);
5552
5553       if (old_cp
5554           && old_cp->cand == cand)
5555         continue;
5556
5557       new_cp = get_use_iv_cost (data, use, cand);
5558       if (!new_cp)
5559         continue;
5560
5561       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5562         continue;
5563
5564       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5565         continue;
5566
5567       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5568     }
5569
5570   iv_ca_delta_commit (data, ivs, *delta, true);
5571   cost = iv_ca_cost (ivs);
5572   if (n_ivs)
5573     *n_ivs = iv_ca_n_cands (ivs);
5574   iv_ca_delta_commit (data, ivs, *delta, false);
5575
5576   return cost;
5577 }
5578
5579 /* Try narrowing set IVS by removing CAND.  Return the cost of
5580    the new set and store the differences in DELTA.  */
5581
5582 static comp_cost
5583 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5584               struct iv_cand *cand, struct iv_ca_delta **delta)
5585 {
5586   unsigned i, ci;
5587   struct iv_use *use;
5588   struct cost_pair *old_cp, *new_cp, *cp;
5589   bitmap_iterator bi;
5590   struct iv_cand *cnd;
5591   comp_cost cost;
5592
5593   *delta = NULL;
5594   for (i = 0; i < n_iv_uses (data); i++)
5595     {
5596       use = iv_use (data, i);
5597
5598       old_cp = iv_ca_cand_for_use (ivs, use);
5599       if (old_cp->cand != cand)
5600         continue;
5601
5602       new_cp = NULL;
5603
5604       if (data->consider_all_candidates)
5605         {
5606           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5607             {
5608               if (ci == cand->id)
5609                 continue;
5610
5611               cnd = iv_cand (data, ci);
5612
5613               cp = get_use_iv_cost (data, use, cnd);
5614               if (!cp)
5615                 continue;
5616
5617               if (!iv_ca_has_deps (ivs, cp))
5618                 continue;
5619
5620               if (!cheaper_cost_pair (cp, new_cp))
5621                 continue;
5622
5623               new_cp = cp;
5624             }
5625         }
5626       else
5627         {
5628           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5629             {
5630               if (ci == cand->id)
5631                 continue;
5632
5633               cnd = iv_cand (data, ci);
5634
5635               cp = get_use_iv_cost (data, use, cnd);
5636               if (!cp)
5637                 continue;
5638               if (!iv_ca_has_deps (ivs, cp))
5639                 continue;
5640
5641               if (!cheaper_cost_pair (cp, new_cp))
5642                 continue;
5643
5644               new_cp = cp;
5645             }
5646         }
5647
5648       if (!new_cp)
5649         {
5650           iv_ca_delta_free (delta);
5651           return infinite_cost;
5652         }
5653
5654       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5655     }
5656
5657   iv_ca_delta_commit (data, ivs, *delta, true);
5658   cost = iv_ca_cost (ivs);
5659   iv_ca_delta_commit (data, ivs, *delta, false);
5660
5661   return cost;
5662 }
5663
5664 /* Try optimizing the set of candidates IVS by removing candidates different
5665    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5666    differences in DELTA.  */
5667
5668 static comp_cost
5669 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5670              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5671 {
5672   bitmap_iterator bi;
5673   struct iv_ca_delta *act_delta, *best_delta;
5674   unsigned i;
5675   comp_cost best_cost, acost;
5676   struct iv_cand *cand;
5677
5678   best_delta = NULL;
5679   best_cost = iv_ca_cost (ivs);
5680
5681   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5682     {
5683       cand = iv_cand (data, i);
5684
5685       if (cand == except_cand)
5686         continue;
5687
5688       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5689
5690       if (compare_costs (acost, best_cost) < 0)
5691         {
5692           best_cost = acost;
5693           iv_ca_delta_free (&best_delta);
5694           best_delta = act_delta;
5695         }
5696       else
5697         iv_ca_delta_free (&act_delta);
5698     }
5699
5700   if (!best_delta)
5701     {
5702       *delta = NULL;
5703       return best_cost;
5704     }
5705
5706   /* Recurse to possibly remove other unnecessary ivs.  */
5707   iv_ca_delta_commit (data, ivs, best_delta, true);
5708   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5709   iv_ca_delta_commit (data, ivs, best_delta, false);
5710   *delta = iv_ca_delta_join (best_delta, *delta);
5711   return best_cost;
5712 }
5713
5714 /* Tries to extend the sets IVS in the best possible way in order
5715    to express the USE.  If ORIGINALP is true, prefer candidates from
5716    the original set of IVs, otherwise favor important candidates not
5717    based on any memory object.  */
5718
5719 static bool
5720 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5721                   struct iv_use *use, bool originalp)
5722 {
5723   comp_cost best_cost, act_cost;
5724   unsigned i;
5725   bitmap_iterator bi;
5726   struct iv_cand *cand;
5727   struct iv_ca_delta *best_delta = NULL, *act_delta;
5728   struct cost_pair *cp;
5729
5730   iv_ca_add_use (data, ivs, use, false);
5731   best_cost = iv_ca_cost (ivs);
5732
5733   cp = iv_ca_cand_for_use (ivs, use);
5734   if (!cp)
5735     {
5736       ivs->upto--;
5737       ivs->bad_uses--;
5738       iv_ca_add_use (data, ivs, use, true);
5739       best_cost = iv_ca_cost (ivs);
5740       cp = iv_ca_cand_for_use (ivs, use);
5741     }
5742   if (cp)
5743     {
5744       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5745       iv_ca_set_no_cp (data, ivs, use);
5746     }
5747
5748   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5749      first try important candidates not based on any memory object.  Only if
5750      this fails, try the specific ones.  Rationale -- in loops with many
5751      variables the best choice often is to use just one generic biv.  If we
5752      added here many ivs specific to the uses, the optimization algorithm later
5753      would be likely to get stuck in a local minimum, thus causing us to create
5754      too many ivs.  The approach from few ivs to more seems more likely to be
5755      successful -- starting from few ivs, replacing an expensive use by a
5756      specific iv should always be a win.  */
5757   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5758     {
5759       cand = iv_cand (data, i);
5760
5761       if (originalp && cand->pos !=IP_ORIGINAL)
5762         continue;
5763
5764       if (!originalp && cand->iv->base_object != NULL_TREE)
5765         continue;
5766
5767       if (iv_ca_cand_used_p (ivs, cand))
5768         continue;
5769
5770       cp = get_use_iv_cost (data, use, cand);
5771       if (!cp)
5772         continue;
5773
5774       iv_ca_set_cp (data, ivs, use, cp);
5775       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5776                                true);
5777       iv_ca_set_no_cp (data, ivs, use);
5778       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5779
5780       if (compare_costs (act_cost, best_cost) < 0)
5781         {
5782           best_cost = act_cost;
5783
5784           iv_ca_delta_free (&best_delta);
5785           best_delta = act_delta;
5786         }
5787       else
5788         iv_ca_delta_free (&act_delta);
5789     }
5790
5791   if (infinite_cost_p (best_cost))
5792     {
5793       for (i = 0; i < use->n_map_members; i++)
5794         {
5795           cp = use->cost_map + i;
5796           cand = cp->cand;
5797           if (!cand)
5798             continue;
5799
5800           /* Already tried this.  */
5801           if (cand->important)
5802             {
5803               if (originalp && cand->pos == IP_ORIGINAL)
5804                 continue;
5805               if (!originalp && cand->iv->base_object == NULL_TREE)
5806                 continue;
5807             }
5808
5809           if (iv_ca_cand_used_p (ivs, cand))
5810             continue;
5811
5812           act_delta = NULL;
5813           iv_ca_set_cp (data, ivs, use, cp);
5814           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5815           iv_ca_set_no_cp (data, ivs, use);
5816           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5817                                        cp, act_delta);
5818
5819           if (compare_costs (act_cost, best_cost) < 0)
5820             {
5821               best_cost = act_cost;
5822
5823               if (best_delta)
5824                 iv_ca_delta_free (&best_delta);
5825               best_delta = act_delta;
5826             }
5827           else
5828             iv_ca_delta_free (&act_delta);
5829         }
5830     }
5831
5832   iv_ca_delta_commit (data, ivs, best_delta, true);
5833   iv_ca_delta_free (&best_delta);
5834
5835   return !infinite_cost_p (best_cost);
5836 }
5837
5838 /* Finds an initial assignment of candidates to uses.  */
5839
5840 static struct iv_ca *
5841 get_initial_solution (struct ivopts_data *data, bool originalp)
5842 {
5843   struct iv_ca *ivs = iv_ca_new (data);
5844   unsigned i;
5845
5846   for (i = 0; i < n_iv_uses (data); i++)
5847     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5848       {
5849         iv_ca_free (&ivs);
5850         return NULL;
5851       }
5852
5853   return ivs;
5854 }
5855
5856 /* Tries to improve set of induction variables IVS.  */
5857
5858 static bool
5859 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5860 {
5861   unsigned i, n_ivs;
5862   comp_cost acost, best_cost = iv_ca_cost (ivs);
5863   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5864   struct iv_cand *cand;
5865
5866   /* Try extending the set of induction variables by one.  */
5867   for (i = 0; i < n_iv_cands (data); i++)
5868     {
5869       cand = iv_cand (data, i);
5870
5871       if (iv_ca_cand_used_p (ivs, cand))
5872         continue;
5873
5874       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5875       if (!act_delta)
5876         continue;
5877
5878       /* If we successfully added the candidate and the set is small enough,
5879          try optimizing it by removing other candidates.  */
5880       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5881         {
5882           iv_ca_delta_commit (data, ivs, act_delta, true);
5883           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5884           iv_ca_delta_commit (data, ivs, act_delta, false);
5885           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5886         }
5887
5888       if (compare_costs (acost, best_cost) < 0)
5889         {
5890           best_cost = acost;
5891           iv_ca_delta_free (&best_delta);
5892           best_delta = act_delta;
5893         }
5894       else
5895         iv_ca_delta_free (&act_delta);
5896     }
5897
5898   if (!best_delta)
5899     {
5900       /* Try removing the candidates from the set instead.  */
5901       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5902
5903       /* Nothing more we can do.  */
5904       if (!best_delta)
5905         return false;
5906     }
5907
5908   iv_ca_delta_commit (data, ivs, best_delta, true);
5909   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5910   iv_ca_delta_free (&best_delta);
5911   return true;
5912 }
5913
5914 /* Attempts to find the optimal set of induction variables.  We do simple
5915    greedy heuristic -- we try to replace at most one candidate in the selected
5916    solution and remove the unused ivs while this improves the cost.  */
5917
5918 static struct iv_ca *
5919 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5920 {
5921   struct iv_ca *set;
5922
5923   /* Get the initial solution.  */
5924   set = get_initial_solution (data, originalp);
5925   if (!set)
5926     {
5927       if (dump_file && (dump_flags & TDF_DETAILS))
5928         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5929       return NULL;
5930     }
5931
5932   if (dump_file && (dump_flags & TDF_DETAILS))
5933     {
5934       fprintf (dump_file, "Initial set of candidates:\n");
5935       iv_ca_dump (data, dump_file, set);
5936     }
5937
5938   while (try_improve_iv_set (data, set))
5939     {
5940       if (dump_file && (dump_flags & TDF_DETAILS))
5941         {
5942           fprintf (dump_file, "Improved to:\n");
5943           iv_ca_dump (data, dump_file, set);
5944         }
5945     }
5946
5947   return set;
5948 }
5949
5950 static struct iv_ca *
5951 find_optimal_iv_set (struct ivopts_data *data)
5952 {
5953   unsigned i;
5954   struct iv_ca *set, *origset;
5955   struct iv_use *use;
5956   comp_cost cost, origcost;
5957
5958   /* Determine the cost based on a strategy that starts with original IVs,
5959      and try again using a strategy that prefers candidates not based
5960      on any IVs.  */
5961   origset = find_optimal_iv_set_1 (data, true);
5962   set = find_optimal_iv_set_1 (data, false);
5963
5964   if (!origset && !set)
5965     return NULL;
5966
5967   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5968   cost = set ? iv_ca_cost (set) : infinite_cost;
5969
5970   if (dump_file && (dump_flags & TDF_DETAILS))
5971     {
5972       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5973                origcost.cost, origcost.complexity);
5974       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5975                cost.cost, cost.complexity);
5976     }
5977
5978   /* Choose the one with the best cost.  */
5979   if (compare_costs (origcost, cost) <= 0)
5980     {
5981       if (set)
5982         iv_ca_free (&set);
5983       set = origset;
5984     }
5985   else if (origset)
5986     iv_ca_free (&origset);
5987
5988   for (i = 0; i < n_iv_uses (data); i++)
5989     {
5990       use = iv_use (data, i);
5991       use->selected = iv_ca_cand_for_use (set, use)->cand;
5992     }
5993
5994   return set;
5995 }
5996
5997 /* Creates a new induction variable corresponding to CAND.  */
5998
5999 static void
6000 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6001 {
6002   gimple_stmt_iterator incr_pos;
6003   tree base;
6004   bool after = false;
6005
6006   if (!cand->iv)
6007     return;
6008
6009   switch (cand->pos)
6010     {
6011     case IP_NORMAL:
6012       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6013       break;
6014
6015     case IP_END:
6016       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6017       after = true;
6018       break;
6019
6020     case IP_AFTER_USE:
6021       after = true;
6022       /* fall through */
6023     case IP_BEFORE_USE:
6024       incr_pos = gsi_for_stmt (cand->incremented_at);
6025       break;
6026
6027     case IP_ORIGINAL:
6028       /* Mark that the iv is preserved.  */
6029       name_info (data, cand->var_before)->preserve_biv = true;
6030       name_info (data, cand->var_after)->preserve_biv = true;
6031
6032       /* Rewrite the increment so that it uses var_before directly.  */
6033       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6034       return;
6035     }
6036
6037   gimple_add_tmp_var (cand->var_before);
6038
6039   base = unshare_expr (cand->iv->base);
6040
6041   create_iv (base, unshare_expr (cand->iv->step),
6042              cand->var_before, data->current_loop,
6043              &incr_pos, after, &cand->var_before, &cand->var_after);
6044 }
6045
6046 /* Creates new induction variables described in SET.  */
6047
6048 static void
6049 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6050 {
6051   unsigned i;
6052   struct iv_cand *cand;
6053   bitmap_iterator bi;
6054
6055   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6056     {
6057       cand = iv_cand (data, i);
6058       create_new_iv (data, cand);
6059     }
6060
6061   if (dump_file && (dump_flags & TDF_DETAILS))
6062     {
6063       fprintf (dump_file, "\nSelected IV set: \n");
6064       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6065         {
6066           cand = iv_cand (data, i);
6067           dump_cand (dump_file, cand);
6068         }
6069       fprintf (dump_file, "\n");
6070     }
6071 }
6072
6073 /* Rewrites USE (definition of iv used in a nonlinear expression)
6074    using candidate CAND.  */
6075
6076 static void
6077 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6078                             struct iv_use *use, struct iv_cand *cand)
6079 {
6080   tree comp;
6081   tree op, tgt;
6082   gimple ass;
6083   gimple_stmt_iterator bsi;
6084
6085   /* An important special case -- if we are asked to express value of
6086      the original iv by itself, just exit; there is no need to
6087      introduce a new computation (that might also need casting the
6088      variable to unsigned and back).  */
6089   if (cand->pos == IP_ORIGINAL
6090       && cand->incremented_at == use->stmt)
6091     {
6092       enum tree_code stmt_code;
6093
6094       gcc_assert (is_gimple_assign (use->stmt));
6095       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6096
6097       /* Check whether we may leave the computation unchanged.
6098          This is the case only if it does not rely on other
6099          computations in the loop -- otherwise, the computation
6100          we rely upon may be removed in remove_unused_ivs,
6101          thus leading to ICE.  */
6102       stmt_code = gimple_assign_rhs_code (use->stmt);
6103       if (stmt_code == PLUS_EXPR
6104           || stmt_code == MINUS_EXPR
6105           || stmt_code == POINTER_PLUS_EXPR)
6106         {
6107           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6108             op = gimple_assign_rhs2 (use->stmt);
6109           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6110             op = gimple_assign_rhs1 (use->stmt);
6111           else
6112             op = NULL_TREE;
6113         }
6114       else
6115         op = NULL_TREE;
6116
6117       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6118         return;
6119     }
6120
6121   comp = get_computation (data->current_loop, use, cand);
6122   gcc_assert (comp != NULL_TREE);
6123
6124   switch (gimple_code (use->stmt))
6125     {
6126     case GIMPLE_PHI:
6127       tgt = PHI_RESULT (use->stmt);
6128
6129       /* If we should keep the biv, do not replace it.  */
6130       if (name_info (data, tgt)->preserve_biv)
6131         return;
6132
6133       bsi = gsi_after_labels (gimple_bb (use->stmt));
6134       break;
6135
6136     case GIMPLE_ASSIGN:
6137       tgt = gimple_assign_lhs (use->stmt);
6138       bsi = gsi_for_stmt (use->stmt);
6139       break;
6140
6141     default:
6142       gcc_unreachable ();
6143     }
6144
6145   if (!valid_gimple_rhs_p (comp)
6146       || (gimple_code (use->stmt) != GIMPLE_PHI
6147           /* We can't allow re-allocating the stmt as it might be pointed
6148              to still.  */
6149           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6150               >= gimple_num_ops (gsi_stmt (bsi)))))
6151     {
6152       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6153                                        true, GSI_SAME_STMT);
6154       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6155         {
6156           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6157           /* As this isn't a plain copy we have to reset alignment
6158              information.  */
6159           if (SSA_NAME_PTR_INFO (comp))
6160             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6161         }
6162     }
6163
6164   if (gimple_code (use->stmt) == GIMPLE_PHI)
6165     {
6166       ass = gimple_build_assign (tgt, comp);
6167       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6168
6169       bsi = gsi_for_stmt (use->stmt);
6170       remove_phi_node (&bsi, false);
6171     }
6172   else
6173     {
6174       gimple_assign_set_rhs_from_tree (&bsi, comp);
6175       use->stmt = gsi_stmt (bsi);
6176     }
6177 }
6178
6179 /* Performs a peephole optimization to reorder the iv update statement with
6180    a mem ref to enable instruction combining in later phases. The mem ref uses
6181    the iv value before the update, so the reordering transformation requires
6182    adjustment of the offset. CAND is the selected IV_CAND.
6183
6184    Example:
6185
6186    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6187    iv2 = iv1 + 1;
6188
6189    if (t < val)      (1)
6190      goto L;
6191    goto Head;
6192
6193
6194    directly propagating t over to (1) will introduce overlapping live range
6195    thus increase register pressure. This peephole transform it into:
6196
6197
6198    iv2 = iv1 + 1;
6199    t = MEM_REF (base, iv2, 8, 8);
6200    if (t < val)
6201      goto L;
6202    goto Head;
6203 */
6204
6205 static void
6206 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6207 {
6208   tree var_after;
6209   gimple iv_update, stmt;
6210   basic_block bb;
6211   gimple_stmt_iterator gsi, gsi_iv;
6212
6213   if (cand->pos != IP_NORMAL)
6214     return;
6215
6216   var_after = cand->var_after;
6217   iv_update = SSA_NAME_DEF_STMT (var_after);
6218
6219   bb = gimple_bb (iv_update);
6220   gsi = gsi_last_nondebug_bb (bb);
6221   stmt = gsi_stmt (gsi);
6222
6223   /* Only handle conditional statement for now.  */
6224   if (gimple_code (stmt) != GIMPLE_COND)
6225     return;
6226
6227   gsi_prev_nondebug (&gsi);
6228   stmt = gsi_stmt (gsi);
6229   if (stmt != iv_update)
6230     return;
6231
6232   gsi_prev_nondebug (&gsi);
6233   if (gsi_end_p (gsi))
6234     return;
6235
6236   stmt = gsi_stmt (gsi);
6237   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6238     return;
6239
6240   if (stmt != use->stmt)
6241     return;
6242
6243   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6244     return;
6245
6246   if (dump_file && (dump_flags & TDF_DETAILS))
6247     {
6248       fprintf (dump_file, "Reordering \n");
6249       print_gimple_stmt (dump_file, iv_update, 0, 0);
6250       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6251       fprintf (dump_file, "\n");
6252     }
6253
6254   gsi = gsi_for_stmt (use->stmt);
6255   gsi_iv = gsi_for_stmt (iv_update);
6256   gsi_move_before (&gsi_iv, &gsi);
6257
6258   cand->pos = IP_BEFORE_USE;
6259   cand->incremented_at = use->stmt;
6260 }
6261
6262 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6263
6264 static void
6265 rewrite_use_address (struct ivopts_data *data,
6266                      struct iv_use *use, struct iv_cand *cand)
6267 {
6268   aff_tree aff;
6269   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6270   tree base_hint = NULL_TREE;
6271   tree ref, iv;
6272   bool ok;
6273
6274   adjust_iv_update_pos (cand, use);
6275   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6276   gcc_assert (ok);
6277   unshare_aff_combination (&aff);
6278
6279   /* To avoid undefined overflow problems, all IV candidates use unsigned
6280      integer types.  The drawback is that this makes it impossible for
6281      create_mem_ref to distinguish an IV that is based on a memory object
6282      from one that represents simply an offset.
6283
6284      To work around this problem, we pass a hint to create_mem_ref that
6285      indicates which variable (if any) in aff is an IV based on a memory
6286      object.  Note that we only consider the candidate.  If this is not
6287      based on an object, the base of the reference is in some subexpression
6288      of the use -- but these will use pointer types, so they are recognized
6289      by the create_mem_ref heuristics anyway.  */
6290   if (cand->iv->base_object)
6291     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6292
6293   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6294   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6295                         reference_alias_ptr_type (*use->op_p),
6296                         iv, base_hint, data->speed);
6297   copy_ref_info (ref, *use->op_p);
6298   *use->op_p = ref;
6299 }
6300
6301 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6302    candidate CAND.  */
6303
6304 static void
6305 rewrite_use_compare (struct ivopts_data *data,
6306                      struct iv_use *use, struct iv_cand *cand)
6307 {
6308   tree comp, *var_p, op, bound;
6309   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6310   enum tree_code compare;
6311   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6312   bool ok;
6313
6314   bound = cp->value;
6315   if (bound)
6316     {
6317       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6318       tree var_type = TREE_TYPE (var);
6319       gimple_seq stmts;
6320
6321       if (dump_file && (dump_flags & TDF_DETAILS))
6322         {
6323           fprintf (dump_file, "Replacing exit test: ");
6324           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6325         }
6326       compare = cp->comp;
6327       bound = unshare_expr (fold_convert (var_type, bound));
6328       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6329       if (stmts)
6330         gsi_insert_seq_on_edge_immediate (
6331                 loop_preheader_edge (data->current_loop),
6332                 stmts);
6333
6334       gimple_cond_set_lhs (use->stmt, var);
6335       gimple_cond_set_code (use->stmt, compare);
6336       gimple_cond_set_rhs (use->stmt, op);
6337       return;
6338     }
6339
6340   /* The induction variable elimination failed; just express the original
6341      giv.  */
6342   comp = get_computation (data->current_loop, use, cand);
6343   gcc_assert (comp != NULL_TREE);
6344
6345   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6346   gcc_assert (ok);
6347
6348   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6349                                      true, GSI_SAME_STMT);
6350 }
6351
6352 /* Rewrites USE using candidate CAND.  */
6353
6354 static void
6355 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6356 {
6357   switch (use->type)
6358     {
6359       case USE_NONLINEAR_EXPR:
6360         rewrite_use_nonlinear_expr (data, use, cand);
6361         break;
6362
6363       case USE_ADDRESS:
6364         rewrite_use_address (data, use, cand);
6365         break;
6366
6367       case USE_COMPARE:
6368         rewrite_use_compare (data, use, cand);
6369         break;
6370
6371       default:
6372         gcc_unreachable ();
6373     }
6374
6375   update_stmt (use->stmt);
6376 }
6377
6378 /* Rewrite the uses using the selected induction variables.  */
6379
6380 static void
6381 rewrite_uses (struct ivopts_data *data)
6382 {
6383   unsigned i;
6384   struct iv_cand *cand;
6385   struct iv_use *use;
6386
6387   for (i = 0; i < n_iv_uses (data); i++)
6388     {
6389       use = iv_use (data, i);
6390       cand = use->selected;
6391       gcc_assert (cand);
6392
6393       rewrite_use (data, use, cand);
6394     }
6395 }
6396
6397 /* Removes the ivs that are not used after rewriting.  */
6398
6399 static void
6400 remove_unused_ivs (struct ivopts_data *data)
6401 {
6402   unsigned j;
6403   bitmap_iterator bi;
6404   bitmap toremove = BITMAP_ALLOC (NULL);
6405
6406   /* Figure out an order in which to release SSA DEFs so that we don't
6407      release something that we'd have to propagate into a debug stmt
6408      afterwards.  */
6409   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6410     {
6411       struct version_info *info;
6412
6413       info = ver_info (data, j);
6414       if (info->iv
6415           && !integer_zerop (info->iv->step)
6416           && !info->inv_id
6417           && !info->iv->have_use_for
6418           && !info->preserve_biv)
6419         {
6420           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6421
6422           tree def = info->iv->ssa_name;
6423
6424           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6425             {
6426               imm_use_iterator imm_iter;
6427               use_operand_p use_p;
6428               gimple stmt;
6429               int count = 0;
6430
6431               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6432                 {
6433                   if (!gimple_debug_bind_p (stmt))
6434                     continue;
6435
6436                   /* We just want to determine whether to do nothing
6437                      (count == 0), to substitute the computed
6438                      expression into a single use of the SSA DEF by
6439                      itself (count == 1), or to use a debug temp
6440                      because the SSA DEF is used multiple times or as
6441                      part of a larger expression (count > 1). */
6442                   count++;
6443                   if (gimple_debug_bind_get_value (stmt) != def)
6444                     count++;
6445
6446                   if (count > 1)
6447                     BREAK_FROM_IMM_USE_STMT (imm_iter);
6448                 }
6449
6450               if (!count)
6451                 continue;
6452
6453               struct iv_use dummy_use;
6454               struct iv_cand *best_cand = NULL, *cand;
6455               unsigned i, best_pref = 0, cand_pref;
6456
6457               memset (&dummy_use, 0, sizeof (dummy_use));
6458               dummy_use.iv = info->iv;
6459               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6460                 {
6461                   cand = iv_use (data, i)->selected;
6462                   if (cand == best_cand)
6463                     continue;
6464                   cand_pref = operand_equal_p (cand->iv->step,
6465                                                info->iv->step, 0)
6466                     ? 4 : 0;
6467                   cand_pref
6468                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
6469                     == TYPE_MODE (TREE_TYPE (info->iv->base))
6470                     ? 2 : 0;
6471                   cand_pref
6472                     += TREE_CODE (cand->iv->base) == INTEGER_CST
6473                     ? 1 : 0;
6474                   if (best_cand == NULL || best_pref < cand_pref)
6475                     {
6476                       best_cand = cand;
6477                       best_pref = cand_pref;
6478                     }
6479                 }
6480
6481               if (!best_cand)
6482                 continue;
6483
6484               tree comp = get_computation_at (data->current_loop,
6485                                               &dummy_use, best_cand,
6486                                               SSA_NAME_DEF_STMT (def));
6487               if (!comp)
6488                 continue;
6489
6490               if (count > 1)
6491                 {
6492                   tree vexpr = make_node (DEBUG_EXPR_DECL);
6493                   DECL_ARTIFICIAL (vexpr) = 1;
6494                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
6495                   if (SSA_NAME_VAR (def))
6496                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6497                   else
6498                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6499                   gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6500                   gimple_stmt_iterator gsi;
6501
6502                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6503                     gsi = gsi_after_labels (gimple_bb
6504                                             (SSA_NAME_DEF_STMT (def)));
6505                   else
6506                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6507
6508                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6509                   comp = vexpr;
6510                 }
6511
6512               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6513                 {
6514                   if (!gimple_debug_bind_p (stmt))
6515                     continue;
6516
6517                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6518                     SET_USE (use_p, comp);
6519
6520                   update_stmt (stmt);
6521                 }
6522             }
6523         }
6524     }
6525
6526   release_defs_bitset (toremove);
6527
6528   BITMAP_FREE (toremove);
6529 }
6530
6531 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6532    for pointer_map_traverse.  */
6533
6534 static bool
6535 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6536                       void *data ATTRIBUTE_UNUSED)
6537 {
6538   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6539
6540   free (niter);
6541   return true;
6542 }
6543
6544 /* Frees data allocated by the optimization of a single loop.  */
6545
6546 static void
6547 free_loop_data (struct ivopts_data *data)
6548 {
6549   unsigned i, j;
6550   bitmap_iterator bi;
6551   tree obj;
6552
6553   if (data->niters)
6554     {
6555       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6556       pointer_map_destroy (data->niters);
6557       data->niters = NULL;
6558     }
6559
6560   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6561     {
6562       struct version_info *info;
6563
6564       info = ver_info (data, i);
6565       free (info->iv);
6566       info->iv = NULL;
6567       info->has_nonlin_use = false;
6568       info->preserve_biv = false;
6569       info->inv_id = 0;
6570     }
6571   bitmap_clear (data->relevant);
6572   bitmap_clear (data->important_candidates);
6573
6574   for (i = 0; i < n_iv_uses (data); i++)
6575     {
6576       struct iv_use *use = iv_use (data, i);
6577
6578       free (use->iv);
6579       BITMAP_FREE (use->related_cands);
6580       for (j = 0; j < use->n_map_members; j++)
6581         if (use->cost_map[j].depends_on)
6582           BITMAP_FREE (use->cost_map[j].depends_on);
6583       free (use->cost_map);
6584       free (use);
6585     }
6586   data->iv_uses.truncate (0);
6587
6588   for (i = 0; i < n_iv_cands (data); i++)
6589     {
6590       struct iv_cand *cand = iv_cand (data, i);
6591
6592       free (cand->iv);
6593       if (cand->depends_on)
6594         BITMAP_FREE (cand->depends_on);
6595       free (cand);
6596     }
6597   data->iv_candidates.truncate (0);
6598
6599   if (data->version_info_size < num_ssa_names)
6600     {
6601       data->version_info_size = 2 * num_ssa_names;
6602       free (data->version_info);
6603       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6604     }
6605
6606   data->max_inv_id = 0;
6607
6608   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6609     SET_DECL_RTL (obj, NULL_RTX);
6610
6611   decl_rtl_to_reset.truncate (0);
6612
6613   htab_empty (data->inv_expr_tab);
6614   data->inv_expr_id = 0;
6615 }
6616
6617 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6618    loop tree.  */
6619
6620 static void
6621 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6622 {
6623   free_loop_data (data);
6624   free (data->version_info);
6625   BITMAP_FREE (data->relevant);
6626   BITMAP_FREE (data->important_candidates);
6627
6628   decl_rtl_to_reset.release ();
6629   data->iv_uses.release ();
6630   data->iv_candidates.release ();
6631   htab_delete (data->inv_expr_tab);
6632 }
6633
6634 /* Returns true if the loop body BODY includes any function calls.  */
6635
6636 static bool
6637 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6638 {
6639   gimple_stmt_iterator gsi;
6640   unsigned i;
6641
6642   for (i = 0; i < num_nodes; i++)
6643     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6644       {
6645         gimple stmt = gsi_stmt (gsi);
6646         if (is_gimple_call (stmt)
6647             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6648           return true;
6649       }
6650   return false;
6651 }
6652
6653 /* Optimizes the LOOP.  Returns true if anything changed.  */
6654
6655 static bool
6656 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6657 {
6658   bool changed = false;
6659   struct iv_ca *iv_ca;
6660   edge exit = single_dom_exit (loop);
6661   basic_block *body;
6662
6663   gcc_assert (!data->niters);
6664   data->current_loop = loop;
6665   data->speed = optimize_loop_for_speed_p (loop);
6666
6667   if (dump_file && (dump_flags & TDF_DETAILS))
6668     {
6669       fprintf (dump_file, "Processing loop %d\n", loop->num);
6670
6671       if (exit)
6672         {
6673           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6674                    exit->src->index, exit->dest->index);
6675           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6676           fprintf (dump_file, "\n");
6677         }
6678
6679       fprintf (dump_file, "\n");
6680     }
6681
6682   body = get_loop_body (loop);
6683   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6684   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6685   free (body);
6686
6687   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6688
6689   /* For each ssa name determines whether it behaves as an induction variable
6690      in some loop.  */
6691   if (!find_induction_variables (data))
6692     goto finish;
6693
6694   /* Finds interesting uses (item 1).  */
6695   find_interesting_uses (data);
6696   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6697     goto finish;
6698
6699   /* Finds candidates for the induction variables (item 2).  */
6700   find_iv_candidates (data);
6701
6702   /* Calculates the costs (item 3, part 1).  */
6703   determine_iv_costs (data);
6704   determine_use_iv_costs (data);
6705   determine_set_costs (data);
6706
6707   /* Find the optimal set of induction variables (item 3, part 2).  */
6708   iv_ca = find_optimal_iv_set (data);
6709   if (!iv_ca)
6710     goto finish;
6711   changed = true;
6712
6713   /* Create the new induction variables (item 4, part 1).  */
6714   create_new_ivs (data, iv_ca);
6715   iv_ca_free (&iv_ca);
6716
6717   /* Rewrite the uses (item 4, part 2).  */
6718   rewrite_uses (data);
6719
6720   /* Remove the ivs that are unused after rewriting.  */
6721   remove_unused_ivs (data);
6722
6723   /* We have changed the structure of induction variables; it might happen
6724      that definitions in the scev database refer to some of them that were
6725      eliminated.  */
6726   scev_reset ();
6727
6728 finish:
6729   free_loop_data (data);
6730
6731   return changed;
6732 }
6733
6734 /* Main entry point.  Optimizes induction variables in loops.  */
6735
6736 void
6737 tree_ssa_iv_optimize (void)
6738 {
6739   struct loop *loop;
6740   struct ivopts_data data;
6741   loop_iterator li;
6742
6743   tree_ssa_iv_optimize_init (&data);
6744
6745   /* Optimize the loops starting with the innermost ones.  */
6746   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6747     {
6748       if (dump_file && (dump_flags & TDF_DETAILS))
6749         flow_loop_dump (loop, dump_file, NULL, 1);
6750
6751       tree_ssa_iv_optimize_loop (&data, loop);
6752     }
6753
6754   tree_ssa_iv_optimize_finalize (&data);
6755 }