gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "tree-flow.h"
  74 #include "cfgloop.h"
  75 #include "tree-pass.h"
  76 #include "ggc.h"
  77 #include "insn-config.h"
  78 #include "pointer-set.h"
  79 #include "hashtab.h"
  80 #include "tree-chrec.h"
  81 #include "tree-scalar-evolution.h"
  82 #include "cfgloop.h"
  83 #include "params.h"
  84 #include "langhooks.h"
  85 #include "tree-affine.h"
  86 #include "target.h"
  87 #include "tree-inline.h"
  88 #include "tree-ssa-propagate.h"
  89 #include "expmed.h"
  90
  91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  92    cost of different addressing modes.  This should be moved to a TBD
  93    interface between the GIMPLE and RTL worlds.  */
  94 #include "expr.h"
  95 #include "recog.h"
  96
  97 /* The infinite cost.  */
  98 #define INFTY 10000000
  99
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102 /* Returns the expected number of loop iterations for LOOP.
 103    The average trip count is computed from profile data if it
 104    exists. */
 105
 106 static inline HOST_WIDE_INT
 107 avg_loop_niter (struct loop *loop)
 108 {
 109   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 110   if (niter == -1)
 111     return AVG_LOOP_NITER (loop);
 112
 113   return niter;
 114 }
 115
 116 /* Representation of the induction variable.  */
 117 struct iv
 118 {
 119   tree base;            /* Initial value of the iv.  */
 120   tree base_object;     /* A memory object to that the induction variable points.  */
 121   tree step;            /* Step of the iv (constant only).  */
 122   tree ssa_name;        /* The ssa name with the value.  */
 123   bool biv_p;           /* Is it a biv?  */
 124   bool have_use_for;    /* Do we already have a use for it?  */
 125   unsigned use_id;      /* The identifier in the use if it is the case.  */
 126 };
 127
 128 /* Per-ssa version information (induction variable descriptions, etc.).  */
 129 struct version_info
 130 {
 131   tree name;            /* The ssa name.  */
 132   struct iv *iv;        /* Induction variable description.  */
 133   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 134                            an expression that is not an induction variable.  */
 135   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 136   unsigned inv_id;      /* Id of an invariant.  */
 137 };
 138
 139 /* Types of uses.  */
 140 enum use_type
 141 {
 142   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 143   USE_ADDRESS,          /* Use in an address.  */
 144   USE_COMPARE           /* Use is a compare.  */
 145 };
 146
 147 /* Cost of a computation.  */
 148 typedef struct
 149 {
 150   int cost;             /* The runtime cost.  */
 151   unsigned complexity;  /* The estimate of the complexity of the code for
 152                            the computation (in no concrete units --
 153                            complexity field should be larger for more
 154                            complex expressions and addressing modes).  */
 155 } comp_cost;
 156
 157 static const comp_cost no_cost = {0, 0};
 158 static const comp_cost infinite_cost = {INFTY, INFTY};
 159
 160 /* The candidate - cost pair.  */
 161 struct cost_pair
 162 {
 163   struct iv_cand *cand; /* The candidate.  */
 164   comp_cost cost;       /* The cost.  */
 165   bitmap depends_on;    /* The list of invariants that have to be
 166                            preserved.  */
 167   tree value;           /* For final value elimination, the expression for
 168                            the final value of the iv.  For iv elimination,
 169                            the new bound to compare with.  */
 170   enum tree_code comp;  /* For iv elimination, the comparison.  */
 171   int inv_expr_id;      /* Loop invariant expression id.  */
 172 };
 173
 174 /* Use.  */
 175 struct iv_use
 176 {
 177   unsigned id;          /* The id of the use.  */
 178   enum use_type type;   /* Type of the use.  */
 179   struct iv *iv;        /* The induction variable it is based on.  */
 180   gimple stmt;          /* Statement in that it occurs.  */
 181   tree *op_p;           /* The place where it occurs.  */
 182   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 183                            important ones.  */
 184
 185   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 186   struct cost_pair *cost_map;
 187                         /* The costs wrto the iv candidates.  */
 188
 189   struct iv_cand *selected;
 190                         /* The selected candidate.  */
 191 };
 192
 193 /* The position where the iv is computed.  */
 194 enum iv_position
 195 {
 196   IP_NORMAL,            /* At the end, just before the exit condition.  */
 197   IP_END,               /* At the end of the latch block.  */
 198   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 199   IP_AFTER_USE,         /* Immediately after a specific use.  */
 200   IP_ORIGINAL           /* The original biv.  */
 201 };
 202
 203 /* The induction variable candidate.  */
 204 struct iv_cand
 205 {
 206   unsigned id;          /* The number of the candidate.  */
 207   bool important;       /* Whether this is an "important" candidate, i.e. such
 208                            that it should be considered by all uses.  */
 209   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 210   gimple incremented_at;/* For original biv, the statement where it is
 211                            incremented.  */
 212   tree var_before;      /* The variable used for it before increment.  */
 213   tree var_after;       /* The variable used for it after increment.  */
 214   struct iv *iv;        /* The value of the candidate.  NULL for
 215                            "pseudocandidate" used to indicate the possibility
 216                            to replace the final value of an iv by direct
 217                            computation of the value.  */
 218   unsigned cost;        /* Cost of the candidate.  */
 219   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 220   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 221                               where it is incremented.  */
 222   bitmap depends_on;    /* The list of invariants that are used in step of the
 223                            biv.  */
 224 };
 225
 226 /* Loop invariant expression hashtable entry.  */
 227 struct iv_inv_expr_ent
 228 {
 229   tree expr;
 230   int id;
 231   hashval_t hash;
 232 };
 233
 234 /* The data used by the induction variable optimizations.  */
 235
 236 typedef struct iv_use *iv_use_p;
 237 DEF_VEC_P(iv_use_p);
 238 DEF_VEC_ALLOC_P(iv_use_p,heap);
 239
 240 typedef struct iv_cand *iv_cand_p;
 241 DEF_VEC_P(iv_cand_p);
 242 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 243
 244 struct ivopts_data
 245 {
 246   /* The currently optimized loop.  */
 247   struct loop *current_loop;
 248
 249   /* Numbers of iterations for all exits of the current loop.  */
 250   struct pointer_map_t *niters;
 251
 252   /* Number of registers used in it.  */
 253   unsigned regs_used;
 254
 255   /* The size of version_info array allocated.  */
 256   unsigned version_info_size;
 257
 258   /* The array of information for the ssa names.  */
 259   struct version_info *version_info;
 260
 261   /* The hashtable of loop invariant expressions created
 262      by ivopt.  */
 263   htab_t inv_expr_tab;
 264
 265   /* Loop invariant expression id.  */
 266   int inv_expr_id;
 267
 268   /* The bitmap of indices in version_info whose value was changed.  */
 269   bitmap relevant;
 270
 271   /* The uses of induction variables.  */
 272   VEC(iv_use_p,heap) *iv_uses;
 273
 274   /* The candidates.  */
 275   VEC(iv_cand_p,heap) *iv_candidates;
 276
 277   /* A bitmap of important candidates.  */
 278   bitmap important_candidates;
 279
 280   /* The maximum invariant id.  */
 281   unsigned max_inv_id;
 282
 283   /* Whether to consider just related and important candidates when replacing a
 284      use.  */
 285   bool consider_all_candidates;
 286
 287   /* Are we optimizing for speed?  */
 288   bool speed;
 289
 290   /* Whether the loop body includes any function calls.  */
 291   bool body_includes_call;
 292
 293   /* Whether the loop body can only be exited via single exit.  */
 294   bool loop_single_exit_p;
 295 };
 296
 297 /* An assignment of iv candidates to uses.  */
 298
 299 struct iv_ca
 300 {
 301   /* The number of uses covered by the assignment.  */
 302   unsigned upto;
 303
 304   /* Number of uses that cannot be expressed by the candidates in the set.  */
 305   unsigned bad_uses;
 306
 307   /* Candidate assigned to a use, together with the related costs.  */
 308   struct cost_pair **cand_for_use;
 309
 310   /* Number of times each candidate is used.  */
 311   unsigned *n_cand_uses;
 312
 313   /* The candidates used.  */
 314   bitmap cands;
 315
 316   /* The number of candidates in the set.  */
 317   unsigned n_cands;
 318
 319   /* Total number of registers needed.  */
 320   unsigned n_regs;
 321
 322   /* Total cost of expressing uses.  */
 323   comp_cost cand_use_cost;
 324
 325   /* Total cost of candidates.  */
 326   unsigned cand_cost;
 327
 328   /* Number of times each invariant is used.  */
 329   unsigned *n_invariant_uses;
 330
 331   /* The array holding the number of uses of each loop
 332      invariant expressions created by ivopt.  */
 333   unsigned *used_inv_expr;
 334
 335   /* The number of created loop invariants.  */
 336   unsigned num_used_inv_expr;
 337
 338   /* Total cost of the assignment.  */
 339   comp_cost cost;
 340 };
 341
 342 /* Difference of two iv candidate assignments.  */
 343
 344 struct iv_ca_delta
 345 {
 346   /* Changed use.  */
 347   struct iv_use *use;
 348
 349   /* An old assignment (for rollback purposes).  */
 350   struct cost_pair *old_cp;
 351
 352   /* A new assignment.  */
 353   struct cost_pair *new_cp;
 354
 355   /* Next change in the list.  */
 356   struct iv_ca_delta *next_change;
 357 };
 358
 359 /* Bound on number of candidates below that all candidates are considered.  */
 360
 361 #define CONSIDER_ALL_CANDIDATES_BOUND \
 362   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 363
 364 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 365    optimizing such a loop would help, and it would take ages).  */
 366
 367 #define MAX_CONSIDERED_USES \
 368   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 369
 370 /* If there are at most this number of ivs in the set, try removing unnecessary
 371    ivs from the set always.  */
 372
 373 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 374   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 375
 376 /* The list of trees for that the decl_rtl field must be reset is stored
 377    here.  */
 378
 379 static VEC(tree,heap) *decl_rtl_to_reset;
 380
 381 static comp_cost force_expr_to_var_cost (tree, bool);
 382
 383 /* Number of uses recorded in DATA.  */
 384
 385 static inline unsigned
 386 n_iv_uses (struct ivopts_data *data)
 387 {
 388   return VEC_length (iv_use_p, data->iv_uses);
 389 }
 390
 391 /* Ith use recorded in DATA.  */
 392
 393 static inline struct iv_use *
 394 iv_use (struct ivopts_data *data, unsigned i)
 395 {
 396   return VEC_index (iv_use_p, data->iv_uses, i);
 397 }
 398
 399 /* Number of candidates recorded in DATA.  */
 400
 401 static inline unsigned
 402 n_iv_cands (struct ivopts_data *data)
 403 {
 404   return VEC_length (iv_cand_p, data->iv_candidates);
 405 }
 406
 407 /* Ith candidate recorded in DATA.  */
 408
 409 static inline struct iv_cand *
 410 iv_cand (struct ivopts_data *data, unsigned i)
 411 {
 412   return VEC_index (iv_cand_p, data->iv_candidates, i);
 413 }
 414
 415 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 416
 417 edge
 418 single_dom_exit (struct loop *loop)
 419 {
 420   edge exit = single_exit (loop);
 421
 422   if (!exit)
 423     return NULL;
 424
 425   if (!just_once_each_iteration_p (loop, exit->src))
 426     return NULL;
 427
 428   return exit;
 429 }
 430
 431 /* Dumps information about the induction variable IV to FILE.  */
 432
 433 extern void dump_iv (FILE *, struct iv *);
 434 void
 435 dump_iv (FILE *file, struct iv *iv)
 436 {
 437   if (iv->ssa_name)
 438     {
 439       fprintf (file, "ssa name ");
 440       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 441       fprintf (file, "\n");
 442     }
 443
 444   fprintf (file, "  type ");
 445   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 446   fprintf (file, "\n");
 447
 448   if (iv->step)
 449     {
 450       fprintf (file, "  base ");
 451       print_generic_expr (file, iv->base, TDF_SLIM);
 452       fprintf (file, "\n");
 453
 454       fprintf (file, "  step ");
 455       print_generic_expr (file, iv->step, TDF_SLIM);
 456       fprintf (file, "\n");
 457     }
 458   else
 459     {
 460       fprintf (file, "  invariant ");
 461       print_generic_expr (file, iv->base, TDF_SLIM);
 462       fprintf (file, "\n");
 463     }
 464
 465   if (iv->base_object)
 466     {
 467       fprintf (file, "  base object ");
 468       print_generic_expr (file, iv->base_object, TDF_SLIM);
 469       fprintf (file, "\n");
 470     }
 471
 472   if (iv->biv_p)
 473     fprintf (file, "  is a biv\n");
 474 }
 475
 476 /* Dumps information about the USE to FILE.  */
 477
 478 extern void dump_use (FILE *, struct iv_use *);
 479 void
 480 dump_use (FILE *file, struct iv_use *use)
 481 {
 482   fprintf (file, "use %d\n", use->id);
 483
 484   switch (use->type)
 485     {
 486     case USE_NONLINEAR_EXPR:
 487       fprintf (file, "  generic\n");
 488       break;
 489
 490     case USE_ADDRESS:
 491       fprintf (file, "  address\n");
 492       break;
 493
 494     case USE_COMPARE:
 495       fprintf (file, "  compare\n");
 496       break;
 497
 498     default:
 499       gcc_unreachable ();
 500     }
 501
 502   fprintf (file, "  in statement ");
 503   print_gimple_stmt (file, use->stmt, 0, 0);
 504   fprintf (file, "\n");
 505
 506   fprintf (file, "  at position ");
 507   if (use->op_p)
 508     print_generic_expr (file, *use->op_p, TDF_SLIM);
 509   fprintf (file, "\n");
 510
 511   dump_iv (file, use->iv);
 512
 513   if (use->related_cands)
 514     {
 515       fprintf (file, "  related candidates ");
 516       dump_bitmap (file, use->related_cands);
 517     }
 518 }
 519
 520 /* Dumps information about the uses to FILE.  */
 521
 522 extern void dump_uses (FILE *, struct ivopts_data *);
 523 void
 524 dump_uses (FILE *file, struct ivopts_data *data)
 525 {
 526   unsigned i;
 527   struct iv_use *use;
 528
 529   for (i = 0; i < n_iv_uses (data); i++)
 530     {
 531       use = iv_use (data, i);
 532
 533       dump_use (file, use);
 534       fprintf (file, "\n");
 535     }
 536 }
 537
 538 /* Dumps information about induction variable candidate CAND to FILE.  */
 539
 540 extern void dump_cand (FILE *, struct iv_cand *);
 541 void
 542 dump_cand (FILE *file, struct iv_cand *cand)
 543 {
 544   struct iv *iv = cand->iv;
 545
 546   fprintf (file, "candidate %d%s\n",
 547            cand->id, cand->important ? " (important)" : "");
 548
 549   if (cand->depends_on)
 550     {
 551       fprintf (file, "  depends on ");
 552       dump_bitmap (file, cand->depends_on);
 553     }
 554
 555   if (!iv)
 556     {
 557       fprintf (file, "  final value replacement\n");
 558       return;
 559     }
 560
 561   if (cand->var_before)
 562     {
 563       fprintf (file, "  var_before ");
 564       print_generic_expr (file, cand->var_before, TDF_SLIM);
 565       fprintf (file, "\n");
 566     }
 567   if (cand->var_after)
 568     {
 569       fprintf (file, "  var_after ");
 570       print_generic_expr (file, cand->var_after, TDF_SLIM);
 571       fprintf (file, "\n");
 572     }
 573
 574   switch (cand->pos)
 575     {
 576     case IP_NORMAL:
 577       fprintf (file, "  incremented before exit test\n");
 578       break;
 579
 580     case IP_BEFORE_USE:
 581       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 582       break;
 583
 584     case IP_AFTER_USE:
 585       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 586       break;
 587
 588     case IP_END:
 589       fprintf (file, "  incremented at end\n");
 590       break;
 591
 592     case IP_ORIGINAL:
 593       fprintf (file, "  original biv\n");
 594       break;
 595     }
 596
 597   dump_iv (file, iv);
 598 }
 599
 600 /* Returns the info for ssa version VER.  */
 601
 602 static inline struct version_info *
 603 ver_info (struct ivopts_data *data, unsigned ver)
 604 {
 605   return data->version_info + ver;
 606 }
 607
 608 /* Returns the info for ssa name NAME.  */
 609
 610 static inline struct version_info *
 611 name_info (struct ivopts_data *data, tree name)
 612 {
 613   return ver_info (data, SSA_NAME_VERSION (name));
 614 }
 615
 616 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 617    emitted in LOOP.  */
 618
 619 static bool
 620 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 621 {
 622   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 623
 624   gcc_assert (bb);
 625
 626   if (sbb == loop->latch)
 627     return true;
 628
 629   if (sbb != bb)
 630     return false;
 631
 632   return stmt == last_stmt (bb);
 633 }
 634
 635 /* Returns true if STMT if after the place where the original induction
 636    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 637    if the positions are identical.  */
 638
 639 static bool
 640 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 641 {
 642   basic_block cand_bb = gimple_bb (cand->incremented_at);
 643   basic_block stmt_bb = gimple_bb (stmt);
 644
 645   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 646     return false;
 647
 648   if (stmt_bb != cand_bb)
 649     return true;
 650
 651   if (true_if_equal
 652       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 653     return true;
 654   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 655 }
 656
 657 /* Returns true if STMT if after the place where the induction variable
 658    CAND is incremented in LOOP.  */
 659
 660 static bool
 661 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 662 {
 663   switch (cand->pos)
 664     {
 665     case IP_END:
 666       return false;
 667
 668     case IP_NORMAL:
 669       return stmt_after_ip_normal_pos (loop, stmt);
 670
 671     case IP_ORIGINAL:
 672     case IP_AFTER_USE:
 673       return stmt_after_inc_pos (cand, stmt, false);
 674
 675     case IP_BEFORE_USE:
 676       return stmt_after_inc_pos (cand, stmt, true);
 677
 678     default:
 679       gcc_unreachable ();
 680     }
 681 }
 682
 683 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 684
 685 static bool
 686 abnormal_ssa_name_p (tree exp)
 687 {
 688   if (!exp)
 689     return false;
 690
 691   if (TREE_CODE (exp) != SSA_NAME)
 692     return false;
 693
 694   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 695 }
 696
 697 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 698    abnormal phi node.  Callback for for_each_index.  */
 699
 700 static bool
 701 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 702                                   void *data ATTRIBUTE_UNUSED)
 703 {
 704   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 705     {
 706       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 707         return false;
 708       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 709         return false;
 710     }
 711
 712   return !abnormal_ssa_name_p (*index);
 713 }
 714
 715 /* Returns true if EXPR contains a ssa name that occurs in an
 716    abnormal phi node.  */
 717
 718 bool
 719 contains_abnormal_ssa_name_p (tree expr)
 720 {
 721   enum tree_code code;
 722   enum tree_code_class codeclass;
 723
 724   if (!expr)
 725     return false;
 726
 727   code = TREE_CODE (expr);
 728   codeclass = TREE_CODE_CLASS (code);
 729
 730   if (code == SSA_NAME)
 731     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 732
 733   if (code == INTEGER_CST
 734       || is_gimple_min_invariant (expr))
 735     return false;
 736
 737   if (code == ADDR_EXPR)
 738     return !for_each_index (&TREE_OPERAND (expr, 0),
 739                             idx_contains_abnormal_ssa_name_p,
 740                             NULL);
 741
 742   if (code == COND_EXPR)
 743     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 744       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 745       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 746
 747   switch (codeclass)
 748     {
 749     case tcc_binary:
 750     case tcc_comparison:
 751       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 752         return true;
 753
 754       /* Fallthru.  */
 755     case tcc_unary:
 756       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 757         return true;
 758
 759       break;
 760
 761     default:
 762       gcc_unreachable ();
 763     }
 764
 765   return false;
 766 }
 767
 768 /*  Returns the structure describing number of iterations determined from
 769     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 770
 771 static struct tree_niter_desc *
 772 niter_for_exit (struct ivopts_data *data, edge exit)
 773 {
 774   struct tree_niter_desc *desc;
 775   void **slot;
 776
 777   if (!data->niters)
 778     {
 779       data->niters = pointer_map_create ();
 780       slot = NULL;
 781     }
 782   else
 783     slot = pointer_map_contains (data->niters, exit);
 784
 785   if (!slot)
 786     {
 787       /* Try to determine number of iterations.  We cannot safely work with ssa
 788          names that appear in phi nodes on abnormal edges, so that we do not
 789          create overlapping life ranges for them (PR 27283).  */
 790       desc = XNEW (struct tree_niter_desc);
 791       if (!number_of_iterations_exit (data->current_loop,
 792                                       exit, desc, true)
 793           || contains_abnormal_ssa_name_p (desc->niter))
 794         {
 795           XDELETE (desc);
 796           desc = NULL;
 797         }
 798       slot = pointer_map_insert (data->niters, exit);
 799       *slot = desc;
 800     }
 801   else
 802     desc = (struct tree_niter_desc *) *slot;
 803
 804   return desc;
 805 }
 806
 807 /* Returns the structure describing number of iterations determined from
 808    single dominating exit of DATA->current_loop, or NULL if something
 809    goes wrong.  */
 810
 811 static struct tree_niter_desc *
 812 niter_for_single_dom_exit (struct ivopts_data *data)
 813 {
 814   edge exit = single_dom_exit (data->current_loop);
 815
 816   if (!exit)
 817     return NULL;
 818
 819   return niter_for_exit (data, exit);
 820 }
 821
 822 /* Hash table equality function for expressions.  */
 823
 824 static int
 825 htab_inv_expr_eq (const void *ent1, const void *ent2)
 826 {
 827   const struct iv_inv_expr_ent *expr1 =
 828       (const struct iv_inv_expr_ent *)ent1;
 829   const struct iv_inv_expr_ent *expr2 =
 830       (const struct iv_inv_expr_ent *)ent2;
 831
 832   return expr1->hash == expr2->hash
 833          && operand_equal_p (expr1->expr, expr2->expr, 0);
 834 }
 835
 836 /* Hash function for loop invariant expressions.  */
 837
 838 static hashval_t
 839 htab_inv_expr_hash (const void *ent)
 840 {
 841   const struct iv_inv_expr_ent *expr =
 842       (const struct iv_inv_expr_ent *)ent;
 843   return expr->hash;
 844 }
 845
 846 /* Initializes data structures used by the iv optimization pass, stored
 847    in DATA.  */
 848
 849 static void
 850 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 851 {
 852   data->version_info_size = 2 * num_ssa_names;
 853   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 854   data->relevant = BITMAP_ALLOC (NULL);
 855   data->important_candidates = BITMAP_ALLOC (NULL);
 856   data->max_inv_id = 0;
 857   data->niters = NULL;
 858   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 859   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 860   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 861                                     htab_inv_expr_eq, free);
 862   data->inv_expr_id = 0;
 863   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 864 }
 865
 866 /* Returns a memory object to that EXPR points.  In case we are able to
 867    determine that it does not point to any such object, NULL is returned.  */
 868
 869 static tree
 870 determine_base_object (tree expr)
 871 {
 872   enum tree_code code = TREE_CODE (expr);
 873   tree base, obj;
 874
 875   /* If this is a pointer casted to any type, we need to determine
 876      the base object for the pointer; so handle conversions before
 877      throwing away non-pointer expressions.  */
 878   if (CONVERT_EXPR_P (expr))
 879     return determine_base_object (TREE_OPERAND (expr, 0));
 880
 881   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 882     return NULL_TREE;
 883
 884   switch (code)
 885     {
 886     case INTEGER_CST:
 887       return NULL_TREE;
 888
 889     case ADDR_EXPR:
 890       obj = TREE_OPERAND (expr, 0);
 891       base = get_base_address (obj);
 892
 893       if (!base)
 894         return expr;
 895
 896       if (TREE_CODE (base) == MEM_REF)
 897         return determine_base_object (TREE_OPERAND (base, 0));
 898
 899       return fold_convert (ptr_type_node,
 900                            build_fold_addr_expr (base));
 901
 902     case POINTER_PLUS_EXPR:
 903       return determine_base_object (TREE_OPERAND (expr, 0));
 904
 905     case PLUS_EXPR:
 906     case MINUS_EXPR:
 907       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 908       gcc_unreachable ();
 909
 910     default:
 911       return fold_convert (ptr_type_node, expr);
 912     }
 913 }
 914
 915 /* Allocates an induction variable with given initial value BASE and step STEP
 916    for loop LOOP.  */
 917
 918 static struct iv *
 919 alloc_iv (tree base, tree step)
 920 {
 921   struct iv *iv = XCNEW (struct iv);
 922   gcc_assert (step != NULL_TREE);
 923
 924   iv->base = base;
 925   iv->base_object = determine_base_object (base);
 926   iv->step = step;
 927   iv->biv_p = false;
 928   iv->have_use_for = false;
 929   iv->use_id = 0;
 930   iv->ssa_name = NULL_TREE;
 931
 932   return iv;
 933 }
 934
 935 /* Sets STEP and BASE for induction variable IV.  */
 936
 937 static void
 938 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 939 {
 940   struct version_info *info = name_info (data, iv);
 941
 942   gcc_assert (!info->iv);
 943
 944   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 945   info->iv = alloc_iv (base, step);
 946   info->iv->ssa_name = iv;
 947 }
 948
 949 /* Finds induction variable declaration for VAR.  */
 950
 951 static struct iv *
 952 get_iv (struct ivopts_data *data, tree var)
 953 {
 954   basic_block bb;
 955   tree type = TREE_TYPE (var);
 956
 957   if (!POINTER_TYPE_P (type)
 958       && !INTEGRAL_TYPE_P (type))
 959     return NULL;
 960
 961   if (!name_info (data, var)->iv)
 962     {
 963       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 964
 965       if (!bb
 966           || !flow_bb_inside_loop_p (data->current_loop, bb))
 967         set_iv (data, var, var, build_int_cst (type, 0));
 968     }
 969
 970   return name_info (data, var)->iv;
 971 }
 972
 973 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 974    not define a simple affine biv with nonzero step.  */
 975
 976 static tree
 977 determine_biv_step (gimple phi)
 978 {
 979   struct loop *loop = gimple_bb (phi)->loop_father;
 980   tree name = PHI_RESULT (phi);
 981   affine_iv iv;
 982
 983   if (virtual_operand_p (name))
 984     return NULL_TREE;
 985
 986   if (!simple_iv (loop, loop, name, &iv, true))
 987     return NULL_TREE;
 988
 989   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 990 }
 991
 992 /* Finds basic ivs.  */
 993
 994 static bool
 995 find_bivs (struct ivopts_data *data)
 996 {
 997   gimple phi;
 998   tree step, type, base;
 999   bool found = false;
1000   struct loop *loop = data->current_loop;
1001   gimple_stmt_iterator psi;
1002
1003   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1004     {
1005       phi = gsi_stmt (psi);
1006
1007       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1008         continue;
1009
1010       step = determine_biv_step (phi);
1011       if (!step)
1012         continue;
1013
1014       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1015       base = expand_simple_operations (base);
1016       if (contains_abnormal_ssa_name_p (base)
1017           || contains_abnormal_ssa_name_p (step))
1018         continue;
1019
1020       type = TREE_TYPE (PHI_RESULT (phi));
1021       base = fold_convert (type, base);
1022       if (step)
1023         {
1024           if (POINTER_TYPE_P (type))
1025             step = convert_to_ptrofftype (step);
1026           else
1027             step = fold_convert (type, step);
1028         }
1029
1030       set_iv (data, PHI_RESULT (phi), base, step);
1031       found = true;
1032     }
1033
1034   return found;
1035 }
1036
1037 /* Marks basic ivs.  */
1038
1039 static void
1040 mark_bivs (struct ivopts_data *data)
1041 {
1042   gimple phi;
1043   tree var;
1044   struct iv *iv, *incr_iv;
1045   struct loop *loop = data->current_loop;
1046   basic_block incr_bb;
1047   gimple_stmt_iterator psi;
1048
1049   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1050     {
1051       phi = gsi_stmt (psi);
1052
1053       iv = get_iv (data, PHI_RESULT (phi));
1054       if (!iv)
1055         continue;
1056
1057       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1058       incr_iv = get_iv (data, var);
1059       if (!incr_iv)
1060         continue;
1061
1062       /* If the increment is in the subloop, ignore it.  */
1063       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1064       if (incr_bb->loop_father != data->current_loop
1065           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1066         continue;
1067
1068       iv->biv_p = true;
1069       incr_iv->biv_p = true;
1070     }
1071 }
1072
1073 /* Checks whether STMT defines a linear induction variable and stores its
1074    parameters to IV.  */
1075
1076 static bool
1077 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1078 {
1079   tree lhs;
1080   struct loop *loop = data->current_loop;
1081
1082   iv->base = NULL_TREE;
1083   iv->step = NULL_TREE;
1084
1085   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1086     return false;
1087
1088   lhs = gimple_assign_lhs (stmt);
1089   if (TREE_CODE (lhs) != SSA_NAME)
1090     return false;
1091
1092   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1093     return false;
1094   iv->base = expand_simple_operations (iv->base);
1095
1096   if (contains_abnormal_ssa_name_p (iv->base)
1097       || contains_abnormal_ssa_name_p (iv->step))
1098     return false;
1099
1100   /* If STMT could throw, then do not consider STMT as defining a GIV.
1101      While this will suppress optimizations, we can not safely delete this
1102      GIV and associated statements, even if it appears it is not used.  */
1103   if (stmt_could_throw_p (stmt))
1104     return false;
1105
1106   return true;
1107 }
1108
1109 /* Finds general ivs in statement STMT.  */
1110
1111 static void
1112 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1113 {
1114   affine_iv iv;
1115
1116   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1117     return;
1118
1119   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1120 }
1121
1122 /* Finds general ivs in basic block BB.  */
1123
1124 static void
1125 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1126 {
1127   gimple_stmt_iterator bsi;
1128
1129   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1130     find_givs_in_stmt (data, gsi_stmt (bsi));
1131 }
1132
1133 /* Finds general ivs.  */
1134
1135 static void
1136 find_givs (struct ivopts_data *data)
1137 {
1138   struct loop *loop = data->current_loop;
1139   basic_block *body = get_loop_body_in_dom_order (loop);
1140   unsigned i;
1141
1142   for (i = 0; i < loop->num_nodes; i++)
1143     find_givs_in_bb (data, body[i]);
1144   free (body);
1145 }
1146
1147 /* For each ssa name defined in LOOP determines whether it is an induction
1148    variable and if so, its initial value and step.  */
1149
1150 static bool
1151 find_induction_variables (struct ivopts_data *data)
1152 {
1153   unsigned i;
1154   bitmap_iterator bi;
1155
1156   if (!find_bivs (data))
1157     return false;
1158
1159   find_givs (data);
1160   mark_bivs (data);
1161
1162   if (dump_file && (dump_flags & TDF_DETAILS))
1163     {
1164       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1165
1166       if (niter)
1167         {
1168           fprintf (dump_file, "  number of iterations ");
1169           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1170           if (!integer_zerop (niter->may_be_zero))
1171             {
1172               fprintf (dump_file, "; zero if ");
1173               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1174             }
1175           fprintf (dump_file, "\n\n");
1176         };
1177
1178       fprintf (dump_file, "Induction variables:\n\n");
1179
1180       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1181         {
1182           if (ver_info (data, i)->iv)
1183             dump_iv (dump_file, ver_info (data, i)->iv);
1184         }
1185     }
1186
1187   return true;
1188 }
1189
1190 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1191
1192 static struct iv_use *
1193 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1194             gimple stmt, enum use_type use_type)
1195 {
1196   struct iv_use *use = XCNEW (struct iv_use);
1197
1198   use->id = n_iv_uses (data);
1199   use->type = use_type;
1200   use->iv = iv;
1201   use->stmt = stmt;
1202   use->op_p = use_p;
1203   use->related_cands = BITMAP_ALLOC (NULL);
1204
1205   /* To avoid showing ssa name in the dumps, if it was not reset by the
1206      caller.  */
1207   iv->ssa_name = NULL_TREE;
1208
1209   if (dump_file && (dump_flags & TDF_DETAILS))
1210     dump_use (dump_file, use);
1211
1212   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1213
1214   return use;
1215 }
1216
1217 /* Checks whether OP is a loop-level invariant and if so, records it.
1218    NONLINEAR_USE is true if the invariant is used in a way we do not
1219    handle specially.  */
1220
1221 static void
1222 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1223 {
1224   basic_block bb;
1225   struct version_info *info;
1226
1227   if (TREE_CODE (op) != SSA_NAME
1228       || virtual_operand_p (op))
1229     return;
1230
1231   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1232   if (bb
1233       && flow_bb_inside_loop_p (data->current_loop, bb))
1234     return;
1235
1236   info = name_info (data, op);
1237   info->name = op;
1238   info->has_nonlin_use |= nonlinear_use;
1239   if (!info->inv_id)
1240     info->inv_id = ++data->max_inv_id;
1241   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1242 }
1243
1244 /* Checks whether the use OP is interesting and if so, records it.  */
1245
1246 static struct iv_use *
1247 find_interesting_uses_op (struct ivopts_data *data, tree op)
1248 {
1249   struct iv *iv;
1250   struct iv *civ;
1251   gimple stmt;
1252   struct iv_use *use;
1253
1254   if (TREE_CODE (op) != SSA_NAME)
1255     return NULL;
1256
1257   iv = get_iv (data, op);
1258   if (!iv)
1259     return NULL;
1260
1261   if (iv->have_use_for)
1262     {
1263       use = iv_use (data, iv->use_id);
1264
1265       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1266       return use;
1267     }
1268
1269   if (integer_zerop (iv->step))
1270     {
1271       record_invariant (data, op, true);
1272       return NULL;
1273     }
1274   iv->have_use_for = true;
1275
1276   civ = XNEW (struct iv);
1277   *civ = *iv;
1278
1279   stmt = SSA_NAME_DEF_STMT (op);
1280   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1281               || is_gimple_assign (stmt));
1282
1283   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1284   iv->use_id = use->id;
1285
1286   return use;
1287 }
1288
1289 /* Given a condition in statement STMT, checks whether it is a compare
1290    of an induction variable and an invariant.  If this is the case,
1291    CONTROL_VAR is set to location of the iv, BOUND to the location of
1292    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1293    induction variable descriptions, and true is returned.  If this is not
1294    the case, CONTROL_VAR and BOUND are set to the arguments of the
1295    condition and false is returned.  */
1296
1297 static bool
1298 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1299                        tree **control_var, tree **bound,
1300                        struct iv **iv_var, struct iv **iv_bound)
1301 {
1302   /* The objects returned when COND has constant operands.  */
1303   static struct iv const_iv;
1304   static tree zero;
1305   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1306   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1307   bool ret = false;
1308
1309   if (gimple_code (stmt) == GIMPLE_COND)
1310     {
1311       op0 = gimple_cond_lhs_ptr (stmt);
1312       op1 = gimple_cond_rhs_ptr (stmt);
1313     }
1314   else
1315     {
1316       op0 = gimple_assign_rhs1_ptr (stmt);
1317       op1 = gimple_assign_rhs2_ptr (stmt);
1318     }
1319
1320   zero = integer_zero_node;
1321   const_iv.step = integer_zero_node;
1322
1323   if (TREE_CODE (*op0) == SSA_NAME)
1324     iv0 = get_iv (data, *op0);
1325   if (TREE_CODE (*op1) == SSA_NAME)
1326     iv1 = get_iv (data, *op1);
1327
1328   /* Exactly one of the compared values must be an iv, and the other one must
1329      be an invariant.  */
1330   if (!iv0 || !iv1)
1331     goto end;
1332
1333   if (integer_zerop (iv0->step))
1334     {
1335       /* Control variable may be on the other side.  */
1336       tmp_op = op0; op0 = op1; op1 = tmp_op;
1337       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1338     }
1339   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1340
1341 end:
1342   if (control_var)
1343     *control_var = op0;;
1344   if (iv_var)
1345     *iv_var = iv0;;
1346   if (bound)
1347     *bound = op1;
1348   if (iv_bound)
1349     *iv_bound = iv1;
1350
1351   return ret;
1352 }
1353
1354 /* Checks whether the condition in STMT is interesting and if so,
1355    records it.  */
1356
1357 static void
1358 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1359 {
1360   tree *var_p, *bound_p;
1361   struct iv *var_iv, *civ;
1362
1363   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1364     {
1365       find_interesting_uses_op (data, *var_p);
1366       find_interesting_uses_op (data, *bound_p);
1367       return;
1368     }
1369
1370   civ = XNEW (struct iv);
1371   *civ = *var_iv;
1372   record_use (data, NULL, civ, stmt, USE_COMPARE);
1373 }
1374
1375 /* Returns true if expression EXPR is obviously invariant in LOOP,
1376    i.e. if all its operands are defined outside of the LOOP.  LOOP
1377    should not be the function body.  */
1378
1379 bool
1380 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1381 {
1382   basic_block def_bb;
1383   unsigned i, len;
1384
1385   gcc_assert (loop_depth (loop) > 0);
1386
1387   if (is_gimple_min_invariant (expr))
1388     return true;
1389
1390   if (TREE_CODE (expr) == SSA_NAME)
1391     {
1392       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1393       if (def_bb
1394           && flow_bb_inside_loop_p (loop, def_bb))
1395         return false;
1396
1397       return true;
1398     }
1399
1400   if (!EXPR_P (expr))
1401     return false;
1402
1403   len = TREE_OPERAND_LENGTH (expr);
1404   for (i = 0; i < len; i++)
1405     if (TREE_OPERAND (expr, i)
1406         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1407       return false;
1408
1409   return true;
1410 }
1411
1412 /* Returns true if statement STMT is obviously invariant in LOOP,
1413    i.e. if all its operands on the RHS are defined outside of the LOOP.
1414    LOOP should not be the function body.  */
1415
1416 bool
1417 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1418 {
1419   unsigned i;
1420   tree lhs;
1421
1422   gcc_assert (loop_depth (loop) > 0);
1423
1424   lhs = gimple_get_lhs (stmt);
1425   for (i = 0; i < gimple_num_ops (stmt); i++)
1426     {
1427       tree op = gimple_op (stmt, i);
1428       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1429         return false;
1430     }
1431
1432   return true;
1433 }
1434
1435 /* Cumulates the steps of indices into DATA and replaces their values with the
1436    initial ones.  Returns false when the value of the index cannot be determined.
1437    Callback for for_each_index.  */
1438
1439 struct ifs_ivopts_data
1440 {
1441   struct ivopts_data *ivopts_data;
1442   gimple stmt;
1443   tree step;
1444 };
1445
1446 static bool
1447 idx_find_step (tree base, tree *idx, void *data)
1448 {
1449   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1450   struct iv *iv;
1451   tree step, iv_base, iv_step, lbound, off;
1452   struct loop *loop = dta->ivopts_data->current_loop;
1453
1454   /* If base is a component ref, require that the offset of the reference
1455      be invariant.  */
1456   if (TREE_CODE (base) == COMPONENT_REF)
1457     {
1458       off = component_ref_field_offset (base);
1459       return expr_invariant_in_loop_p (loop, off);
1460     }
1461
1462   /* If base is array, first check whether we will be able to move the
1463      reference out of the loop (in order to take its address in strength
1464      reduction).  In order for this to work we need both lower bound
1465      and step to be loop invariants.  */
1466   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1467     {
1468       /* Moreover, for a range, the size needs to be invariant as well.  */
1469       if (TREE_CODE (base) == ARRAY_RANGE_REF
1470           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1471         return false;
1472
1473       step = array_ref_element_size (base);
1474       lbound = array_ref_low_bound (base);
1475
1476       if (!expr_invariant_in_loop_p (loop, step)
1477           || !expr_invariant_in_loop_p (loop, lbound))
1478         return false;
1479     }
1480
1481   if (TREE_CODE (*idx) != SSA_NAME)
1482     return true;
1483
1484   iv = get_iv (dta->ivopts_data, *idx);
1485   if (!iv)
1486     return false;
1487
1488   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1489           *&x[0], which is not folded and does not trigger the
1490           ARRAY_REF path below.  */
1491   *idx = iv->base;
1492
1493   if (integer_zerop (iv->step))
1494     return true;
1495
1496   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1497     {
1498       step = array_ref_element_size (base);
1499
1500       /* We only handle addresses whose step is an integer constant.  */
1501       if (TREE_CODE (step) != INTEGER_CST)
1502         return false;
1503     }
1504   else
1505     /* The step for pointer arithmetics already is 1 byte.  */
1506     step = size_one_node;
1507
1508   iv_base = iv->base;
1509   iv_step = iv->step;
1510   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1511                             sizetype, &iv_base, &iv_step, dta->stmt,
1512                             false))
1513     {
1514       /* The index might wrap.  */
1515       return false;
1516     }
1517
1518   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1519   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1520
1521   return true;
1522 }
1523
1524 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1525    object is passed to it in DATA.  */
1526
1527 static bool
1528 idx_record_use (tree base, tree *idx,
1529                 void *vdata)
1530 {
1531   struct ivopts_data *data = (struct ivopts_data *) vdata;
1532   find_interesting_uses_op (data, *idx);
1533   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1534     {
1535       find_interesting_uses_op (data, array_ref_element_size (base));
1536       find_interesting_uses_op (data, array_ref_low_bound (base));
1537     }
1538   return true;
1539 }
1540
1541 /* If we can prove that TOP = cst * BOT for some constant cst,
1542    store cst to MUL and return true.  Otherwise return false.
1543    The returned value is always sign-extended, regardless of the
1544    signedness of TOP and BOT.  */
1545
1546 static bool
1547 constant_multiple_of (tree top, tree bot, double_int *mul)
1548 {
1549   tree mby;
1550   enum tree_code code;
1551   double_int res, p0, p1;
1552   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1553
1554   STRIP_NOPS (top);
1555   STRIP_NOPS (bot);
1556
1557   if (operand_equal_p (top, bot, 0))
1558     {
1559       *mul = double_int_one;
1560       return true;
1561     }
1562
1563   code = TREE_CODE (top);
1564   switch (code)
1565     {
1566     case MULT_EXPR:
1567       mby = TREE_OPERAND (top, 1);
1568       if (TREE_CODE (mby) != INTEGER_CST)
1569         return false;
1570
1571       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1572         return false;
1573
1574       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1575                               precision);
1576       return true;
1577
1578     case PLUS_EXPR:
1579     case MINUS_EXPR:
1580       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1581           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1582         return false;
1583
1584       if (code == MINUS_EXPR)
1585         p1 = double_int_neg (p1);
1586       *mul = double_int_sext (double_int_add (p0, p1), precision);
1587       return true;
1588
1589     case INTEGER_CST:
1590       if (TREE_CODE (bot) != INTEGER_CST)
1591         return false;
1592
1593       p0 = double_int_sext (tree_to_double_int (top), precision);
1594       p1 = double_int_sext (tree_to_double_int (bot), precision);
1595       if (double_int_zero_p (p1))
1596         return false;
1597       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1598                               precision);
1599       return double_int_zero_p (res);
1600
1601     default:
1602       return false;
1603     }
1604 }
1605
1606 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1607
1608 static bool
1609 may_be_unaligned_p (tree ref, tree step)
1610 {
1611   tree base;
1612   tree base_type;
1613   HOST_WIDE_INT bitsize;
1614   HOST_WIDE_INT bitpos;
1615   tree toffset;
1616   enum machine_mode mode;
1617   int unsignedp, volatilep;
1618   unsigned base_align;
1619
1620   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1621      thus they are not misaligned.  */
1622   if (TREE_CODE (ref) == TARGET_MEM_REF)
1623     return false;
1624
1625   /* The test below is basically copy of what expr.c:normal_inner_ref
1626      does to check whether the object must be loaded by parts when
1627      STRICT_ALIGNMENT is true.  */
1628   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1629                               &unsignedp, &volatilep, true);
1630   base_type = TREE_TYPE (base);
1631   base_align = get_object_alignment (base);
1632   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1633
1634   if (mode != BLKmode)
1635     {
1636       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1637
1638       if (base_align < mode_align
1639           || (bitpos % mode_align) != 0
1640           || (bitpos % BITS_PER_UNIT) != 0)
1641         return true;
1642
1643       if (toffset
1644           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1645         return true;
1646
1647       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1648         return true;
1649     }
1650
1651   return false;
1652 }
1653
1654 /* Return true if EXPR may be non-addressable.   */
1655
1656 bool
1657 may_be_nonaddressable_p (tree expr)
1658 {
1659   switch (TREE_CODE (expr))
1660     {
1661     case TARGET_MEM_REF:
1662       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1663          target, thus they are always addressable.  */
1664       return false;
1665
1666     case COMPONENT_REF:
1667       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1668              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1669
1670     case VIEW_CONVERT_EXPR:
1671       /* This kind of view-conversions may wrap non-addressable objects
1672          and make them look addressable.  After some processing the
1673          non-addressability may be uncovered again, causing ADDR_EXPRs
1674          of inappropriate objects to be built.  */
1675       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1676           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1677         return true;
1678
1679       /* ... fall through ... */
1680
1681     case ARRAY_REF:
1682     case ARRAY_RANGE_REF:
1683       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1684
1685     CASE_CONVERT:
1686       return true;
1687
1688     default:
1689       break;
1690     }
1691
1692   return false;
1693 }
1694
1695 /* Finds addresses in *OP_P inside STMT.  */
1696
1697 static void
1698 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1699 {
1700   tree base = *op_p, step = size_zero_node;
1701   struct iv *civ;
1702   struct ifs_ivopts_data ifs_ivopts_data;
1703
1704   /* Do not play with volatile memory references.  A bit too conservative,
1705      perhaps, but safe.  */
1706   if (gimple_has_volatile_ops (stmt))
1707     goto fail;
1708
1709   /* Ignore bitfields for now.  Not really something terribly complicated
1710      to handle.  TODO.  */
1711   if (TREE_CODE (base) == BIT_FIELD_REF)
1712     goto fail;
1713
1714   base = unshare_expr (base);
1715
1716   if (TREE_CODE (base) == TARGET_MEM_REF)
1717     {
1718       tree type = build_pointer_type (TREE_TYPE (base));
1719       tree astep;
1720
1721       if (TMR_BASE (base)
1722           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1723         {
1724           civ = get_iv (data, TMR_BASE (base));
1725           if (!civ)
1726             goto fail;
1727
1728           TMR_BASE (base) = civ->base;
1729           step = civ->step;
1730         }
1731       if (TMR_INDEX2 (base)
1732           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1733         {
1734           civ = get_iv (data, TMR_INDEX2 (base));
1735           if (!civ)
1736             goto fail;
1737
1738           TMR_INDEX2 (base) = civ->base;
1739           step = civ->step;
1740         }
1741       if (TMR_INDEX (base)
1742           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1743         {
1744           civ = get_iv (data, TMR_INDEX (base));
1745           if (!civ)
1746             goto fail;
1747
1748           TMR_INDEX (base) = civ->base;
1749           astep = civ->step;
1750
1751           if (astep)
1752             {
1753               if (TMR_STEP (base))
1754                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1755
1756               step = fold_build2 (PLUS_EXPR, type, step, astep);
1757             }
1758         }
1759
1760       if (integer_zerop (step))
1761         goto fail;
1762       base = tree_mem_ref_addr (type, base);
1763     }
1764   else
1765     {
1766       ifs_ivopts_data.ivopts_data = data;
1767       ifs_ivopts_data.stmt = stmt;
1768       ifs_ivopts_data.step = size_zero_node;
1769       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1770           || integer_zerop (ifs_ivopts_data.step))
1771         goto fail;
1772       step = ifs_ivopts_data.step;
1773
1774       /* Check that the base expression is addressable.  This needs
1775          to be done after substituting bases of IVs into it.  */
1776       if (may_be_nonaddressable_p (base))
1777         goto fail;
1778
1779       /* Moreover, on strict alignment platforms, check that it is
1780          sufficiently aligned.  */
1781       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1782         goto fail;
1783
1784       base = build_fold_addr_expr (base);
1785
1786       /* Substituting bases of IVs into the base expression might
1787          have caused folding opportunities.  */
1788       if (TREE_CODE (base) == ADDR_EXPR)
1789         {
1790           tree *ref = &TREE_OPERAND (base, 0);
1791           while (handled_component_p (*ref))
1792             ref = &TREE_OPERAND (*ref, 0);
1793           if (TREE_CODE (*ref) == MEM_REF)
1794             {
1795               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1796                                       TREE_OPERAND (*ref, 0),
1797                                       TREE_OPERAND (*ref, 1));
1798               if (tem)
1799                 *ref = tem;
1800             }
1801         }
1802     }
1803
1804   civ = alloc_iv (base, step);
1805   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1806   return;
1807
1808 fail:
1809   for_each_index (op_p, idx_record_use, data);
1810 }
1811
1812 /* Finds and records invariants used in STMT.  */
1813
1814 static void
1815 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1816 {
1817   ssa_op_iter iter;
1818   use_operand_p use_p;
1819   tree op;
1820
1821   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1822     {
1823       op = USE_FROM_PTR (use_p);
1824       record_invariant (data, op, false);
1825     }
1826 }
1827
1828 /* Finds interesting uses of induction variables in the statement STMT.  */
1829
1830 static void
1831 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1832 {
1833   struct iv *iv;
1834   tree op, *lhs, *rhs;
1835   ssa_op_iter iter;
1836   use_operand_p use_p;
1837   enum tree_code code;
1838
1839   find_invariants_stmt (data, stmt);
1840
1841   if (gimple_code (stmt) == GIMPLE_COND)
1842     {
1843       find_interesting_uses_cond (data, stmt);
1844       return;
1845     }
1846
1847   if (is_gimple_assign (stmt))
1848     {
1849       lhs = gimple_assign_lhs_ptr (stmt);
1850       rhs = gimple_assign_rhs1_ptr (stmt);
1851
1852       if (TREE_CODE (*lhs) == SSA_NAME)
1853         {
1854           /* If the statement defines an induction variable, the uses are not
1855              interesting by themselves.  */
1856
1857           iv = get_iv (data, *lhs);
1858
1859           if (iv && !integer_zerop (iv->step))
1860             return;
1861         }
1862
1863       code = gimple_assign_rhs_code (stmt);
1864       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1865           && (REFERENCE_CLASS_P (*rhs)
1866               || is_gimple_val (*rhs)))
1867         {
1868           if (REFERENCE_CLASS_P (*rhs))
1869             find_interesting_uses_address (data, stmt, rhs);
1870           else
1871             find_interesting_uses_op (data, *rhs);
1872
1873           if (REFERENCE_CLASS_P (*lhs))
1874             find_interesting_uses_address (data, stmt, lhs);
1875           return;
1876         }
1877       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1878         {
1879           find_interesting_uses_cond (data, stmt);
1880           return;
1881         }
1882
1883       /* TODO -- we should also handle address uses of type
1884
1885          memory = call (whatever);
1886
1887          and
1888
1889          call (memory).  */
1890     }
1891
1892   if (gimple_code (stmt) == GIMPLE_PHI
1893       && gimple_bb (stmt) == data->current_loop->header)
1894     {
1895       iv = get_iv (data, PHI_RESULT (stmt));
1896
1897       if (iv && !integer_zerop (iv->step))
1898         return;
1899     }
1900
1901   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1902     {
1903       op = USE_FROM_PTR (use_p);
1904
1905       if (TREE_CODE (op) != SSA_NAME)
1906         continue;
1907
1908       iv = get_iv (data, op);
1909       if (!iv)
1910         continue;
1911
1912       find_interesting_uses_op (data, op);
1913     }
1914 }
1915
1916 /* Finds interesting uses of induction variables outside of loops
1917    on loop exit edge EXIT.  */
1918
1919 static void
1920 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1921 {
1922   gimple phi;
1923   gimple_stmt_iterator psi;
1924   tree def;
1925
1926   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1927     {
1928       phi = gsi_stmt (psi);
1929       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1930       if (!virtual_operand_p (def))
1931         find_interesting_uses_op (data, def);
1932     }
1933 }
1934
1935 /* Finds uses of the induction variables that are interesting.  */
1936
1937 static void
1938 find_interesting_uses (struct ivopts_data *data)
1939 {
1940   basic_block bb;
1941   gimple_stmt_iterator bsi;
1942   basic_block *body = get_loop_body (data->current_loop);
1943   unsigned i;
1944   struct version_info *info;
1945   edge e;
1946
1947   if (dump_file && (dump_flags & TDF_DETAILS))
1948     fprintf (dump_file, "Uses:\n\n");
1949
1950   for (i = 0; i < data->current_loop->num_nodes; i++)
1951     {
1952       edge_iterator ei;
1953       bb = body[i];
1954
1955       FOR_EACH_EDGE (e, ei, bb->succs)
1956         if (e->dest != EXIT_BLOCK_PTR
1957             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1958           find_interesting_uses_outside (data, e);
1959
1960       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1961         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1962       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1963         if (!is_gimple_debug (gsi_stmt (bsi)))
1964           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1965     }
1966
1967   if (dump_file && (dump_flags & TDF_DETAILS))
1968     {
1969       bitmap_iterator bi;
1970
1971       fprintf (dump_file, "\n");
1972
1973       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1974         {
1975           info = ver_info (data, i);
1976           if (info->inv_id)
1977             {
1978               fprintf (dump_file, "  ");
1979               print_generic_expr (dump_file, info->name, TDF_SLIM);
1980               fprintf (dump_file, " is invariant (%d)%s\n",
1981                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1982             }
1983         }
1984
1985       fprintf (dump_file, "\n");
1986     }
1987
1988   free (body);
1989 }
1990
1991 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1992    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1993    we are at the top-level of the processed address.  */
1994
1995 static tree
1996 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1997                 unsigned HOST_WIDE_INT *offset)
1998 {
1999   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2000   enum tree_code code;
2001   tree type, orig_type = TREE_TYPE (expr);
2002   unsigned HOST_WIDE_INT off0, off1, st;
2003   tree orig_expr = expr;
2004
2005   STRIP_NOPS (expr);
2006
2007   type = TREE_TYPE (expr);
2008   code = TREE_CODE (expr);
2009   *offset = 0;
2010
2011   switch (code)
2012     {
2013     case INTEGER_CST:
2014       if (!cst_and_fits_in_hwi (expr)
2015           || integer_zerop (expr))
2016         return orig_expr;
2017
2018       *offset = int_cst_value (expr);
2019       return build_int_cst (orig_type, 0);
2020
2021     case POINTER_PLUS_EXPR:
2022     case PLUS_EXPR:
2023     case MINUS_EXPR:
2024       op0 = TREE_OPERAND (expr, 0);
2025       op1 = TREE_OPERAND (expr, 1);
2026
2027       op0 = strip_offset_1 (op0, false, false, &off0);
2028       op1 = strip_offset_1 (op1, false, false, &off1);
2029
2030       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2031       if (op0 == TREE_OPERAND (expr, 0)
2032           && op1 == TREE_OPERAND (expr, 1))
2033         return orig_expr;
2034
2035       if (integer_zerop (op1))
2036         expr = op0;
2037       else if (integer_zerop (op0))
2038         {
2039           if (code == MINUS_EXPR)
2040             expr = fold_build1 (NEGATE_EXPR, type, op1);
2041           else
2042             expr = op1;
2043         }
2044       else
2045         expr = fold_build2 (code, type, op0, op1);
2046
2047       return fold_convert (orig_type, expr);
2048
2049     case MULT_EXPR:
2050       op1 = TREE_OPERAND (expr, 1);
2051       if (!cst_and_fits_in_hwi (op1))
2052         return orig_expr;
2053
2054       op0 = TREE_OPERAND (expr, 0);
2055       op0 = strip_offset_1 (op0, false, false, &off0);
2056       if (op0 == TREE_OPERAND (expr, 0))
2057         return orig_expr;
2058
2059       *offset = off0 * int_cst_value (op1);
2060       if (integer_zerop (op0))
2061         expr = op0;
2062       else
2063         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2064
2065       return fold_convert (orig_type, expr);
2066
2067     case ARRAY_REF:
2068     case ARRAY_RANGE_REF:
2069       if (!inside_addr)
2070         return orig_expr;
2071
2072       step = array_ref_element_size (expr);
2073       if (!cst_and_fits_in_hwi (step))
2074         break;
2075
2076       st = int_cst_value (step);
2077       op1 = TREE_OPERAND (expr, 1);
2078       op1 = strip_offset_1 (op1, false, false, &off1);
2079       *offset = off1 * st;
2080
2081       if (top_compref
2082           && integer_zerop (op1))
2083         {
2084           /* Strip the component reference completely.  */
2085           op0 = TREE_OPERAND (expr, 0);
2086           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2087           *offset += off0;
2088           return op0;
2089         }
2090       break;
2091
2092     case COMPONENT_REF:
2093       if (!inside_addr)
2094         return orig_expr;
2095
2096       tmp = component_ref_field_offset (expr);
2097       if (top_compref
2098           && cst_and_fits_in_hwi (tmp))
2099         {
2100           /* Strip the component reference completely.  */
2101           op0 = TREE_OPERAND (expr, 0);
2102           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2103           *offset = off0 + int_cst_value (tmp);
2104           return op0;
2105         }
2106       break;
2107
2108     case ADDR_EXPR:
2109       op0 = TREE_OPERAND (expr, 0);
2110       op0 = strip_offset_1 (op0, true, true, &off0);
2111       *offset += off0;
2112
2113       if (op0 == TREE_OPERAND (expr, 0))
2114         return orig_expr;
2115
2116       expr = build_fold_addr_expr (op0);
2117       return fold_convert (orig_type, expr);
2118
2119     case MEM_REF:
2120       /* ???  Offset operand?  */
2121       inside_addr = false;
2122       break;
2123
2124     default:
2125       return orig_expr;
2126     }
2127
2128   /* Default handling of expressions for that we want to recurse into
2129      the first operand.  */
2130   op0 = TREE_OPERAND (expr, 0);
2131   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2132   *offset += off0;
2133
2134   if (op0 == TREE_OPERAND (expr, 0)
2135       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2136     return orig_expr;
2137
2138   expr = copy_node (expr);
2139   TREE_OPERAND (expr, 0) = op0;
2140   if (op1)
2141     TREE_OPERAND (expr, 1) = op1;
2142
2143   /* Inside address, we might strip the top level component references,
2144      thus changing type of the expression.  Handling of ADDR_EXPR
2145      will fix that.  */
2146   expr = fold_convert (orig_type, expr);
2147
2148   return expr;
2149 }
2150
2151 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2152
2153 static tree
2154 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2155 {
2156   return strip_offset_1 (expr, false, false, offset);
2157 }
2158
2159 /* Returns variant of TYPE that can be used as base for different uses.
2160    We return unsigned type with the same precision, which avoids problems
2161    with overflows.  */
2162
2163 static tree
2164 generic_type_for (tree type)
2165 {
2166   if (POINTER_TYPE_P (type))
2167     return unsigned_type_for (type);
2168
2169   if (TYPE_UNSIGNED (type))
2170     return type;
2171
2172   return unsigned_type_for (type);
2173 }
2174
2175 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2176    the bitmap to that we should store it.  */
2177
2178 static struct ivopts_data *fd_ivopts_data;
2179 static tree
2180 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2181 {
2182   bitmap *depends_on = (bitmap *) data;
2183   struct version_info *info;
2184
2185   if (TREE_CODE (*expr_p) != SSA_NAME)
2186     return NULL_TREE;
2187   info = name_info (fd_ivopts_data, *expr_p);
2188
2189   if (!info->inv_id || info->has_nonlin_use)
2190     return NULL_TREE;
2191
2192   if (!*depends_on)
2193     *depends_on = BITMAP_ALLOC (NULL);
2194   bitmap_set_bit (*depends_on, info->inv_id);
2195
2196   return NULL_TREE;
2197 }
2198
2199 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2200    position to POS.  If USE is not NULL, the candidate is set as related to
2201    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2202    replacement of the final value of the iv by a direct computation.  */
2203
2204 static struct iv_cand *
2205 add_candidate_1 (struct ivopts_data *data,
2206                  tree base, tree step, bool important, enum iv_position pos,
2207                  struct iv_use *use, gimple incremented_at)
2208 {
2209   unsigned i;
2210   struct iv_cand *cand = NULL;
2211   tree type, orig_type;
2212
2213   /* For non-original variables, make sure their values are computed in a type
2214      that does not invoke undefined behavior on overflows (since in general,
2215      we cannot prove that these induction variables are non-wrapping).  */
2216   if (pos != IP_ORIGINAL)
2217     {
2218       orig_type = TREE_TYPE (base);
2219       type = generic_type_for (orig_type);
2220       if (type != orig_type)
2221         {
2222           base = fold_convert (type, base);
2223           step = fold_convert (type, step);
2224         }
2225     }
2226
2227   for (i = 0; i < n_iv_cands (data); i++)
2228     {
2229       cand = iv_cand (data, i);
2230
2231       if (cand->pos != pos)
2232         continue;
2233
2234       if (cand->incremented_at != incremented_at
2235           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2236               && cand->ainc_use != use))
2237         continue;
2238
2239       if (!cand->iv)
2240         {
2241           if (!base && !step)
2242             break;
2243
2244           continue;
2245         }
2246
2247       if (!base && !step)
2248         continue;
2249
2250       if (operand_equal_p (base, cand->iv->base, 0)
2251           && operand_equal_p (step, cand->iv->step, 0)
2252           && (TYPE_PRECISION (TREE_TYPE (base))
2253               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2254         break;
2255     }
2256
2257   if (i == n_iv_cands (data))
2258     {
2259       cand = XCNEW (struct iv_cand);
2260       cand->id = i;
2261
2262       if (!base && !step)
2263         cand->iv = NULL;
2264       else
2265         cand->iv = alloc_iv (base, step);
2266
2267       cand->pos = pos;
2268       if (pos != IP_ORIGINAL && cand->iv)
2269         {
2270           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2271           cand->var_after = cand->var_before;
2272         }
2273       cand->important = important;
2274       cand->incremented_at = incremented_at;
2275       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2276
2277       if (step
2278           && TREE_CODE (step) != INTEGER_CST)
2279         {
2280           fd_ivopts_data = data;
2281           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2282         }
2283
2284       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2285         cand->ainc_use = use;
2286       else
2287         cand->ainc_use = NULL;
2288
2289       if (dump_file && (dump_flags & TDF_DETAILS))
2290         dump_cand (dump_file, cand);
2291     }
2292
2293   if (important && !cand->important)
2294     {
2295       cand->important = true;
2296       if (dump_file && (dump_flags & TDF_DETAILS))
2297         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2298     }
2299
2300   if (use)
2301     {
2302       bitmap_set_bit (use->related_cands, i);
2303       if (dump_file && (dump_flags & TDF_DETAILS))
2304         fprintf (dump_file, "Candidate %d is related to use %d\n",
2305                  cand->id, use->id);
2306     }
2307
2308   return cand;
2309 }
2310
2311 /* Returns true if incrementing the induction variable at the end of the LOOP
2312    is allowed.
2313
2314    The purpose is to avoid splitting latch edge with a biv increment, thus
2315    creating a jump, possibly confusing other optimization passes and leaving
2316    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2317    is not available (so we do not have a better alternative), or if the latch
2318    edge is already nonempty.  */
2319
2320 static bool
2321 allow_ip_end_pos_p (struct loop *loop)
2322 {
2323   if (!ip_normal_pos (loop))
2324     return true;
2325
2326   if (!empty_block_p (ip_end_pos (loop)))
2327     return true;
2328
2329   return false;
2330 }
2331
2332 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2333    Important field is set to IMPORTANT.  */
2334
2335 static void
2336 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2337                         bool important, struct iv_use *use)
2338 {
2339   basic_block use_bb = gimple_bb (use->stmt);
2340   enum machine_mode mem_mode;
2341   unsigned HOST_WIDE_INT cstepi;
2342
2343   /* If we insert the increment in any position other than the standard
2344      ones, we must ensure that it is incremented once per iteration.
2345      It must not be in an inner nested loop, or one side of an if
2346      statement.  */
2347   if (use_bb->loop_father != data->current_loop
2348       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2349       || stmt_could_throw_p (use->stmt)
2350       || !cst_and_fits_in_hwi (step))
2351     return;
2352
2353   cstepi = int_cst_value (step);
2354
2355   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2356   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2357         || USE_STORE_PRE_INCREMENT (mem_mode))
2358        && GET_MODE_SIZE (mem_mode) == cstepi)
2359       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2360            || USE_STORE_PRE_DECREMENT (mem_mode))
2361           && GET_MODE_SIZE (mem_mode) == -cstepi))
2362     {
2363       enum tree_code code = MINUS_EXPR;
2364       tree new_base;
2365       tree new_step = step;
2366
2367       if (POINTER_TYPE_P (TREE_TYPE (base)))
2368         {
2369           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2370           code = POINTER_PLUS_EXPR;
2371         }
2372       else
2373         new_step = fold_convert (TREE_TYPE (base), new_step);
2374       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2375       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2376                        use->stmt);
2377     }
2378   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2379         || USE_STORE_POST_INCREMENT (mem_mode))
2380        && GET_MODE_SIZE (mem_mode) == cstepi)
2381       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2382            || USE_STORE_POST_DECREMENT (mem_mode))
2383           && GET_MODE_SIZE (mem_mode) == -cstepi))
2384     {
2385       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2386                        use->stmt);
2387     }
2388 }
2389
2390 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2391    position to POS.  If USE is not NULL, the candidate is set as related to
2392    it.  The candidate computation is scheduled on all available positions.  */
2393
2394 static void
2395 add_candidate (struct ivopts_data *data,
2396                tree base, tree step, bool important, struct iv_use *use)
2397 {
2398   if (ip_normal_pos (data->current_loop))
2399     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2400   if (ip_end_pos (data->current_loop)
2401       && allow_ip_end_pos_p (data->current_loop))
2402     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2403
2404   if (use != NULL && use->type == USE_ADDRESS)
2405     add_autoinc_candidates (data, base, step, important, use);
2406 }
2407
2408 /* Adds standard iv candidates.  */
2409
2410 static void
2411 add_standard_iv_candidates (struct ivopts_data *data)
2412 {
2413   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2414
2415   /* The same for a double-integer type if it is still fast enough.  */
2416   if (TYPE_PRECISION
2417         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2418       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2419     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2420                    build_int_cst (long_integer_type_node, 1), true, NULL);
2421
2422   /* The same for a double-integer type if it is still fast enough.  */
2423   if (TYPE_PRECISION
2424         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2425       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2426     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2427                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2428 }
2429
2430
2431 /* Adds candidates bases on the old induction variable IV.  */
2432
2433 static void
2434 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2435 {
2436   gimple phi;
2437   tree def;
2438   struct iv_cand *cand;
2439
2440   add_candidate (data, iv->base, iv->step, true, NULL);
2441
2442   /* The same, but with initial value zero.  */
2443   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2444     add_candidate (data, size_int (0), iv->step, true, NULL);
2445   else
2446     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2447                    iv->step, true, NULL);
2448
2449   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2450   if (gimple_code (phi) == GIMPLE_PHI)
2451     {
2452       /* Additionally record the possibility of leaving the original iv
2453          untouched.  */
2454       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2455       cand = add_candidate_1 (data,
2456                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2457                               SSA_NAME_DEF_STMT (def));
2458       cand->var_before = iv->ssa_name;
2459       cand->var_after = def;
2460     }
2461 }
2462
2463 /* Adds candidates based on the old induction variables.  */
2464
2465 static void
2466 add_old_ivs_candidates (struct ivopts_data *data)
2467 {
2468   unsigned i;
2469   struct iv *iv;
2470   bitmap_iterator bi;
2471
2472   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2473     {
2474       iv = ver_info (data, i)->iv;
2475       if (iv && iv->biv_p && !integer_zerop (iv->step))
2476         add_old_iv_candidates (data, iv);
2477     }
2478 }
2479
2480 /* Adds candidates based on the value of the induction variable IV and USE.  */
2481
2482 static void
2483 add_iv_value_candidates (struct ivopts_data *data,
2484                          struct iv *iv, struct iv_use *use)
2485 {
2486   unsigned HOST_WIDE_INT offset;
2487   tree base;
2488   tree basetype;
2489
2490   add_candidate (data, iv->base, iv->step, false, use);
2491
2492   /* The same, but with initial value zero.  Make such variable important,
2493      since it is generic enough so that possibly many uses may be based
2494      on it.  */
2495   basetype = TREE_TYPE (iv->base);
2496   if (POINTER_TYPE_P (basetype))
2497     basetype = sizetype;
2498   add_candidate (data, build_int_cst (basetype, 0),
2499                  iv->step, true, use);
2500
2501   /* Third, try removing the constant offset.  Make sure to even
2502      add a candidate for &a[0] vs. (T *)&a.  */
2503   base = strip_offset (iv->base, &offset);
2504   if (offset
2505       || base != iv->base)
2506     add_candidate (data, base, iv->step, false, use);
2507 }
2508
2509 /* Adds candidates based on the uses.  */
2510
2511 static void
2512 add_derived_ivs_candidates (struct ivopts_data *data)
2513 {
2514   unsigned i;
2515
2516   for (i = 0; i < n_iv_uses (data); i++)
2517     {
2518       struct iv_use *use = iv_use (data, i);
2519
2520       if (!use)
2521         continue;
2522
2523       switch (use->type)
2524         {
2525         case USE_NONLINEAR_EXPR:
2526         case USE_COMPARE:
2527         case USE_ADDRESS:
2528           /* Just add the ivs based on the value of the iv used here.  */
2529           add_iv_value_candidates (data, use->iv, use);
2530           break;
2531
2532         default:
2533           gcc_unreachable ();
2534         }
2535     }
2536 }
2537
2538 /* Record important candidates and add them to related_cands bitmaps
2539    if needed.  */
2540
2541 static void
2542 record_important_candidates (struct ivopts_data *data)
2543 {
2544   unsigned i;
2545   struct iv_use *use;
2546
2547   for (i = 0; i < n_iv_cands (data); i++)
2548     {
2549       struct iv_cand *cand = iv_cand (data, i);
2550
2551       if (cand->important)
2552         bitmap_set_bit (data->important_candidates, i);
2553     }
2554
2555   data->consider_all_candidates = (n_iv_cands (data)
2556                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2557
2558   if (data->consider_all_candidates)
2559     {
2560       /* We will not need "related_cands" bitmaps in this case,
2561          so release them to decrease peak memory consumption.  */
2562       for (i = 0; i < n_iv_uses (data); i++)
2563         {
2564           use = iv_use (data, i);
2565           BITMAP_FREE (use->related_cands);
2566         }
2567     }
2568   else
2569     {
2570       /* Add important candidates to the related_cands bitmaps.  */
2571       for (i = 0; i < n_iv_uses (data); i++)
2572         bitmap_ior_into (iv_use (data, i)->related_cands,
2573                          data->important_candidates);
2574     }
2575 }
2576
2577 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2578    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2579    we allocate a simple list to every use.  */
2580
2581 static void
2582 alloc_use_cost_map (struct ivopts_data *data)
2583 {
2584   unsigned i, size, s, j;
2585
2586   for (i = 0; i < n_iv_uses (data); i++)
2587     {
2588       struct iv_use *use = iv_use (data, i);
2589       bitmap_iterator bi;
2590
2591       if (data->consider_all_candidates)
2592         size = n_iv_cands (data);
2593       else
2594         {
2595           s = 0;
2596           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2597             {
2598               s++;
2599             }
2600
2601           /* Round up to the power of two, so that moduling by it is fast.  */
2602           for (size = 1; size < s; size <<= 1)
2603             continue;
2604         }
2605
2606       use->n_map_members = size;
2607       use->cost_map = XCNEWVEC (struct cost_pair, size);
2608     }
2609 }
2610
2611 /* Returns description of computation cost of expression whose runtime
2612    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2613
2614 static comp_cost
2615 new_cost (unsigned runtime, unsigned complexity)
2616 {
2617   comp_cost cost;
2618
2619   cost.cost = runtime;
2620   cost.complexity = complexity;
2621
2622   return cost;
2623 }
2624
2625 /* Adds costs COST1 and COST2.  */
2626
2627 static comp_cost
2628 add_costs (comp_cost cost1, comp_cost cost2)
2629 {
2630   cost1.cost += cost2.cost;
2631   cost1.complexity += cost2.complexity;
2632
2633   return cost1;
2634 }
2635 /* Subtracts costs COST1 and COST2.  */
2636
2637 static comp_cost
2638 sub_costs (comp_cost cost1, comp_cost cost2)
2639 {
2640   cost1.cost -= cost2.cost;
2641   cost1.complexity -= cost2.complexity;
2642
2643   return cost1;
2644 }
2645
2646 /* Returns a negative number if COST1 < COST2, a positive number if
2647    COST1 > COST2, and 0 if COST1 = COST2.  */
2648
2649 static int
2650 compare_costs (comp_cost cost1, comp_cost cost2)
2651 {
2652   if (cost1.cost == cost2.cost)
2653     return cost1.complexity - cost2.complexity;
2654
2655   return cost1.cost - cost2.cost;
2656 }
2657
2658 /* Returns true if COST is infinite.  */
2659
2660 static bool
2661 infinite_cost_p (comp_cost cost)
2662 {
2663   return cost.cost == INFTY;
2664 }
2665
2666 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2667    on invariants DEPENDS_ON and that the value used in expressing it
2668    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2669
2670 static void
2671 set_use_iv_cost (struct ivopts_data *data,
2672                  struct iv_use *use, struct iv_cand *cand,
2673                  comp_cost cost, bitmap depends_on, tree value,
2674                  enum tree_code comp, int inv_expr_id)
2675 {
2676   unsigned i, s;
2677
2678   if (infinite_cost_p (cost))
2679     {
2680       BITMAP_FREE (depends_on);
2681       return;
2682     }
2683
2684   if (data->consider_all_candidates)
2685     {
2686       use->cost_map[cand->id].cand = cand;
2687       use->cost_map[cand->id].cost = cost;
2688       use->cost_map[cand->id].depends_on = depends_on;
2689       use->cost_map[cand->id].value = value;
2690       use->cost_map[cand->id].comp = comp;
2691       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2692       return;
2693     }
2694
2695   /* n_map_members is a power of two, so this computes modulo.  */
2696   s = cand->id & (use->n_map_members - 1);
2697   for (i = s; i < use->n_map_members; i++)
2698     if (!use->cost_map[i].cand)
2699       goto found;
2700   for (i = 0; i < s; i++)
2701     if (!use->cost_map[i].cand)
2702       goto found;
2703
2704   gcc_unreachable ();
2705
2706 found:
2707   use->cost_map[i].cand = cand;
2708   use->cost_map[i].cost = cost;
2709   use->cost_map[i].depends_on = depends_on;
2710   use->cost_map[i].value = value;
2711   use->cost_map[i].comp = comp;
2712   use->cost_map[i].inv_expr_id = inv_expr_id;
2713 }
2714
2715 /* Gets cost of (USE, CANDIDATE) pair.  */
2716
2717 static struct cost_pair *
2718 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2719                  struct iv_cand *cand)
2720 {
2721   unsigned i, s;
2722   struct cost_pair *ret;
2723
2724   if (!cand)
2725     return NULL;
2726
2727   if (data->consider_all_candidates)
2728     {
2729       ret = use->cost_map + cand->id;
2730       if (!ret->cand)
2731         return NULL;
2732
2733       return ret;
2734     }
2735
2736   /* n_map_members is a power of two, so this computes modulo.  */
2737   s = cand->id & (use->n_map_members - 1);
2738   for (i = s; i < use->n_map_members; i++)
2739     if (use->cost_map[i].cand == cand)
2740       return use->cost_map + i;
2741
2742   for (i = 0; i < s; i++)
2743     if (use->cost_map[i].cand == cand)
2744       return use->cost_map + i;
2745
2746   return NULL;
2747 }
2748
2749 /* Returns estimate on cost of computing SEQ.  */
2750
2751 static unsigned
2752 seq_cost (rtx seq, bool speed)
2753 {
2754   unsigned cost = 0;
2755   rtx set;
2756
2757   for (; seq; seq = NEXT_INSN (seq))
2758     {
2759       set = single_set (seq);
2760       if (set)
2761         cost += set_src_cost (SET_SRC (set), speed);
2762       else
2763         cost++;
2764     }
2765
2766   return cost;
2767 }
2768
2769 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2770 static rtx
2771 produce_memory_decl_rtl (tree obj, int *regno)
2772 {
2773   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2774   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2775   rtx x;
2776
2777   gcc_assert (obj);
2778   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2779     {
2780       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2781       x = gen_rtx_SYMBOL_REF (address_mode, name);
2782       SET_SYMBOL_REF_DECL (x, obj);
2783       x = gen_rtx_MEM (DECL_MODE (obj), x);
2784       set_mem_addr_space (x, as);
2785       targetm.encode_section_info (obj, x, true);
2786     }
2787   else
2788     {
2789       x = gen_raw_REG (address_mode, (*regno)++);
2790       x = gen_rtx_MEM (DECL_MODE (obj), x);
2791       set_mem_addr_space (x, as);
2792     }
2793
2794   return x;
2795 }
2796
2797 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2798    walk_tree.  DATA contains the actual fake register number.  */
2799
2800 static tree
2801 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2802 {
2803   tree obj = NULL_TREE;
2804   rtx x = NULL_RTX;
2805   int *regno = (int *) data;
2806
2807   switch (TREE_CODE (*expr_p))
2808     {
2809     case ADDR_EXPR:
2810       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2811            handled_component_p (*expr_p);
2812            expr_p = &TREE_OPERAND (*expr_p, 0))
2813         continue;
2814       obj = *expr_p;
2815       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2816         x = produce_memory_decl_rtl (obj, regno);
2817       break;
2818
2819     case SSA_NAME:
2820       *ws = 0;
2821       obj = SSA_NAME_VAR (*expr_p);
2822       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2823       if (!obj)
2824         return NULL_TREE;
2825       if (!DECL_RTL_SET_P (obj))
2826         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2827       break;
2828
2829     case VAR_DECL:
2830     case PARM_DECL:
2831     case RESULT_DECL:
2832       *ws = 0;
2833       obj = *expr_p;
2834
2835       if (DECL_RTL_SET_P (obj))
2836         break;
2837
2838       if (DECL_MODE (obj) == BLKmode)
2839         x = produce_memory_decl_rtl (obj, regno);
2840       else
2841         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2842
2843       break;
2844
2845     default:
2846       break;
2847     }
2848
2849   if (x)
2850     {
2851       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2852       SET_DECL_RTL (obj, x);
2853     }
2854
2855   return NULL_TREE;
2856 }
2857
2858 /* Determines cost of the computation of EXPR.  */
2859
2860 static unsigned
2861 computation_cost (tree expr, bool speed)
2862 {
2863   rtx seq, rslt;
2864   tree type = TREE_TYPE (expr);
2865   unsigned cost;
2866   /* Avoid using hard regs in ways which may be unsupported.  */
2867   int regno = LAST_VIRTUAL_REGISTER + 1;
2868   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2869   enum node_frequency real_frequency = node->frequency;
2870
2871   node->frequency = NODE_FREQUENCY_NORMAL;
2872   crtl->maybe_hot_insn_p = speed;
2873   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2874   start_sequence ();
2875   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2876   seq = get_insns ();
2877   end_sequence ();
2878   default_rtl_profile ();
2879   node->frequency = real_frequency;
2880
2881   cost = seq_cost (seq, speed);
2882   if (MEM_P (rslt))
2883     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2884                           TYPE_ADDR_SPACE (type), speed);
2885   else if (!REG_P (rslt))
2886     cost += set_src_cost (rslt, speed);
2887
2888   return cost;
2889 }
2890
2891 /* Returns variable containing the value of candidate CAND at statement AT.  */
2892
2893 static tree
2894 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2895 {
2896   if (stmt_after_increment (loop, cand, stmt))
2897     return cand->var_after;
2898   else
2899     return cand->var_before;
2900 }
2901
2902 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2903    same precision that is at least as wide as the precision of TYPE, stores
2904    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2905    type of A and B.  */
2906
2907 static tree
2908 determine_common_wider_type (tree *a, tree *b)
2909 {
2910   tree wider_type = NULL;
2911   tree suba, subb;
2912   tree atype = TREE_TYPE (*a);
2913
2914   if (CONVERT_EXPR_P (*a))
2915     {
2916       suba = TREE_OPERAND (*a, 0);
2917       wider_type = TREE_TYPE (suba);
2918       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2919         return atype;
2920     }
2921   else
2922     return atype;
2923
2924   if (CONVERT_EXPR_P (*b))
2925     {
2926       subb = TREE_OPERAND (*b, 0);
2927       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2928         return atype;
2929     }
2930   else
2931     return atype;
2932
2933   *a = suba;
2934   *b = subb;
2935   return wider_type;
2936 }
2937
2938 /* Determines the expression by that USE is expressed from induction variable
2939    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2940    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2941
2942 static bool
2943 get_computation_aff (struct loop *loop,
2944                      struct iv_use *use, struct iv_cand *cand, gimple at,
2945                      struct affine_tree_combination *aff)
2946 {
2947   tree ubase = use->iv->base;
2948   tree ustep = use->iv->step;
2949   tree cbase = cand->iv->base;
2950   tree cstep = cand->iv->step, cstep_common;
2951   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2952   tree common_type, var;
2953   tree uutype;
2954   aff_tree cbase_aff, var_aff;
2955   double_int rat;
2956
2957   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2958     {
2959       /* We do not have a precision to express the values of use.  */
2960       return false;
2961     }
2962
2963   var = var_at_stmt (loop, cand, at);
2964   uutype = unsigned_type_for (utype);
2965
2966   /* If the conversion is not noop, perform it.  */
2967   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2968     {
2969       cstep = fold_convert (uutype, cstep);
2970       cbase = fold_convert (uutype, cbase);
2971       var = fold_convert (uutype, var);
2972     }
2973
2974   if (!constant_multiple_of (ustep, cstep, &rat))
2975     return false;
2976
2977   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2978      type, we achieve better folding by computing their difference in this
2979      wider type, and cast the result to UUTYPE.  We do not need to worry about
2980      overflows, as all the arithmetics will in the end be performed in UUTYPE
2981      anyway.  */
2982   common_type = determine_common_wider_type (&ubase, &cbase);
2983
2984   /* use = ubase - ratio * cbase + ratio * var.  */
2985   tree_to_aff_combination (ubase, common_type, aff);
2986   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2987   tree_to_aff_combination (var, uutype, &var_aff);
2988
2989   /* We need to shift the value if we are after the increment.  */
2990   if (stmt_after_increment (loop, cand, at))
2991     {
2992       aff_tree cstep_aff;
2993
2994       if (common_type != uutype)
2995         cstep_common = fold_convert (common_type, cstep);
2996       else
2997         cstep_common = cstep;
2998
2999       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3000       aff_combination_add (&cbase_aff, &cstep_aff);
3001     }
3002
3003   aff_combination_scale (&cbase_aff, double_int_neg (rat));
3004   aff_combination_add (aff, &cbase_aff);
3005   if (common_type != uutype)
3006     aff_combination_convert (aff, uutype);
3007
3008   aff_combination_scale (&var_aff, rat);
3009   aff_combination_add (aff, &var_aff);
3010
3011   return true;
3012 }
3013
3014 /* Determines the expression by that USE is expressed from induction variable
3015    CAND at statement AT in LOOP.  The computation is unshared.  */
3016
3017 static tree
3018 get_computation_at (struct loop *loop,
3019                     struct iv_use *use, struct iv_cand *cand, gimple at)
3020 {
3021   aff_tree aff;
3022   tree type = TREE_TYPE (use->iv->base);
3023
3024   if (!get_computation_aff (loop, use, cand, at, &aff))
3025     return NULL_TREE;
3026   unshare_aff_combination (&aff);
3027   return fold_convert (type, aff_combination_to_tree (&aff));
3028 }
3029
3030 /* Determines the expression by that USE is expressed from induction variable
3031    CAND in LOOP.  The computation is unshared.  */
3032
3033 static tree
3034 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3035 {
3036   return get_computation_at (loop, use, cand, use->stmt);
3037 }
3038
3039 /* Adjust the cost COST for being in loop setup rather than loop body.
3040    If we're optimizing for space, the loop setup overhead is constant;
3041    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3042 static unsigned
3043 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3044 {
3045   if (cost == INFTY)
3046     return cost;
3047   else if (optimize_loop_for_speed_p (data->current_loop))
3048     return cost / avg_loop_niter (data->current_loop);
3049   else
3050     return cost;
3051 }
3052
3053 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3054    validity for a memory reference accessing memory of mode MODE in
3055    address space AS.  */
3056
3057 DEF_VEC_P (sbitmap);
3058 DEF_VEC_ALLOC_P (sbitmap, heap);
3059
3060 bool
3061 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3062                                  addr_space_t as)
3063 {
3064 #define MAX_RATIO 128
3065   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3066   static VEC (sbitmap, heap) *valid_mult_list;
3067   sbitmap valid_mult;
3068
3069   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3070     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3071
3072   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3073   if (!valid_mult)
3074     {
3075       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3076       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3077       rtx addr;
3078       HOST_WIDE_INT i;
3079
3080       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3081       sbitmap_zero (valid_mult);
3082       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3083       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3084         {
3085           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3086           if (memory_address_addr_space_p (mode, addr, as))
3087             SET_BIT (valid_mult, i + MAX_RATIO);
3088         }
3089
3090       if (dump_file && (dump_flags & TDF_DETAILS))
3091         {
3092           fprintf (dump_file, "  allowed multipliers:");
3093           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3094             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3095               fprintf (dump_file, " %d", (int) i);
3096           fprintf (dump_file, "\n");
3097           fprintf (dump_file, "\n");
3098         }
3099
3100       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3101     }
3102
3103   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3104     return false;
3105
3106   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3107 }
3108
3109 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3110    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3111    variable is omitted.  Compute the cost for a memory reference that accesses
3112    a memory location of mode MEM_MODE in address space AS.
3113
3114    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3115    size of MEM_MODE / RATIO) is available.  To make this determination, we
3116    look at the size of the increment to be made, which is given in CSTEP.
3117    CSTEP may be zero if the step is unknown.
3118    STMT_AFTER_INC is true iff the statement we're looking at is after the
3119    increment of the original biv.
3120
3121    TODO -- there must be some better way.  This all is quite crude.  */
3122
3123 typedef struct address_cost_data_s
3124 {
3125   HOST_WIDE_INT min_offset, max_offset;
3126   unsigned costs[2][2][2][2];
3127 } *address_cost_data;
3128
3129 DEF_VEC_P (address_cost_data);
3130 DEF_VEC_ALLOC_P (address_cost_data, heap);
3131
3132 static comp_cost
3133 get_address_cost (bool symbol_present, bool var_present,
3134                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3135                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3136                   addr_space_t as, bool speed,
3137                   bool stmt_after_inc, bool *may_autoinc)
3138 {
3139   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3140   static VEC(address_cost_data, heap) *address_cost_data_list;
3141   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3142   address_cost_data data;
3143   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3144   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3145   unsigned cost, acost, complexity;
3146   bool offset_p, ratio_p, autoinc;
3147   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3148   unsigned HOST_WIDE_INT mask;
3149   unsigned bits;
3150
3151   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3152     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3153                            data_index + 1);
3154
3155   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3156   if (!data)
3157     {
3158       HOST_WIDE_INT i;
3159       HOST_WIDE_INT rat, off = 0;
3160       int old_cse_not_expected, width;
3161       unsigned sym_p, var_p, off_p, rat_p, add_c;
3162       rtx seq, addr, base;
3163       rtx reg0, reg1;
3164
3165       data = (address_cost_data) xcalloc (1, sizeof (*data));
3166
3167       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3168
3169       width = GET_MODE_BITSIZE (address_mode) - 1;
3170       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3171         width = HOST_BITS_PER_WIDE_INT - 1;
3172       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3173
3174       for (i = width; i >= 0; i--)
3175         {
3176           off = -((unsigned HOST_WIDE_INT) 1 << i);
3177           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3178           if (memory_address_addr_space_p (mem_mode, addr, as))
3179             break;
3180         }
3181       data->min_offset = (i == -1? 0 : off);
3182
3183       for (i = width; i >= 0; i--)
3184         {
3185           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3186           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3187           if (memory_address_addr_space_p (mem_mode, addr, as))
3188             break;
3189         }
3190       if (i == -1)
3191         off = 0;
3192       data->max_offset = off;
3193
3194       if (dump_file && (dump_flags & TDF_DETAILS))
3195         {
3196           fprintf (dump_file, "get_address_cost:\n");
3197           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3198                    GET_MODE_NAME (mem_mode),
3199                    data->min_offset);
3200           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3201                    GET_MODE_NAME (mem_mode),
3202                    data->max_offset);
3203         }
3204
3205       rat = 1;
3206       for (i = 2; i <= MAX_RATIO; i++)
3207         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3208           {
3209             rat = i;
3210             break;
3211           }
3212
3213       /* Compute the cost of various addressing modes.  */
3214       acost = 0;
3215       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3216       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3217
3218       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3219           || USE_STORE_PRE_DECREMENT (mem_mode))
3220         {
3221           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3222           has_predec[mem_mode]
3223             = memory_address_addr_space_p (mem_mode, addr, as);
3224         }
3225       if (USE_LOAD_POST_DECREMENT (mem_mode)
3226           || USE_STORE_POST_DECREMENT (mem_mode))
3227         {
3228           addr = gen_rtx_POST_DEC (address_mode, reg0);
3229           has_postdec[mem_mode]
3230             = memory_address_addr_space_p (mem_mode, addr, as);
3231         }
3232       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3233           || USE_STORE_PRE_DECREMENT (mem_mode))
3234         {
3235           addr = gen_rtx_PRE_INC (address_mode, reg0);
3236           has_preinc[mem_mode]
3237             = memory_address_addr_space_p (mem_mode, addr, as);
3238         }
3239       if (USE_LOAD_POST_INCREMENT (mem_mode)
3240           || USE_STORE_POST_INCREMENT (mem_mode))
3241         {
3242           addr = gen_rtx_POST_INC (address_mode, reg0);
3243           has_postinc[mem_mode]
3244             = memory_address_addr_space_p (mem_mode, addr, as);
3245         }
3246       for (i = 0; i < 16; i++)
3247         {
3248           sym_p = i & 1;
3249           var_p = (i >> 1) & 1;
3250           off_p = (i >> 2) & 1;
3251           rat_p = (i >> 3) & 1;
3252
3253           addr = reg0;
3254           if (rat_p)
3255             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3256                                    gen_int_mode (rat, address_mode));
3257
3258           if (var_p)
3259             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3260
3261           if (sym_p)
3262             {
3263               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3264               /* ??? We can run into trouble with some backends by presenting
3265                  it with symbols which haven't been properly passed through
3266                  targetm.encode_section_info.  By setting the local bit, we
3267                  enhance the probability of things working.  */
3268               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3269
3270               if (off_p)
3271                 base = gen_rtx_fmt_e (CONST, address_mode,
3272                                       gen_rtx_fmt_ee
3273                                         (PLUS, address_mode, base,
3274                                          gen_int_mode (off, address_mode)));
3275             }
3276           else if (off_p)
3277             base = gen_int_mode (off, address_mode);
3278           else
3279             base = NULL_RTX;
3280
3281           if (base)
3282             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3283
3284           start_sequence ();
3285           /* To avoid splitting addressing modes, pretend that no cse will
3286              follow.  */
3287           old_cse_not_expected = cse_not_expected;
3288           cse_not_expected = true;
3289           addr = memory_address_addr_space (mem_mode, addr, as);
3290           cse_not_expected = old_cse_not_expected;
3291           seq = get_insns ();
3292           end_sequence ();
3293
3294           acost = seq_cost (seq, speed);
3295           acost += address_cost (addr, mem_mode, as, speed);
3296
3297           if (!acost)
3298             acost = 1;
3299           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3300         }
3301
3302       /* On some targets, it is quite expensive to load symbol to a register,
3303          which makes addresses that contain symbols look much more expensive.
3304          However, the symbol will have to be loaded in any case before the
3305          loop (and quite likely we have it in register already), so it does not
3306          make much sense to penalize them too heavily.  So make some final
3307          tweaks for the SYMBOL_PRESENT modes:
3308
3309          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3310          var is cheaper, use this mode with small penalty.
3311          If VAR_PRESENT is true, try whether the mode with
3312          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3313          if this is the case, use it.  */
3314       add_c = add_cost (speed, address_mode);
3315       for (i = 0; i < 8; i++)
3316         {
3317           var_p = i & 1;
3318           off_p = (i >> 1) & 1;
3319           rat_p = (i >> 2) & 1;
3320
3321           acost = data->costs[0][1][off_p][rat_p] + 1;
3322           if (var_p)
3323             acost += add_c;
3324
3325           if (acost < data->costs[1][var_p][off_p][rat_p])
3326             data->costs[1][var_p][off_p][rat_p] = acost;
3327         }
3328
3329       if (dump_file && (dump_flags & TDF_DETAILS))
3330         {
3331           fprintf (dump_file, "Address costs:\n");
3332
3333           for (i = 0; i < 16; i++)
3334             {
3335               sym_p = i & 1;
3336               var_p = (i >> 1) & 1;
3337               off_p = (i >> 2) & 1;
3338               rat_p = (i >> 3) & 1;
3339
3340               fprintf (dump_file, "  ");
3341               if (sym_p)
3342                 fprintf (dump_file, "sym + ");
3343               if (var_p)
3344                 fprintf (dump_file, "var + ");
3345               if (off_p)
3346                 fprintf (dump_file, "cst + ");
3347               if (rat_p)
3348                 fprintf (dump_file, "rat * ");
3349
3350               acost = data->costs[sym_p][var_p][off_p][rat_p];
3351               fprintf (dump_file, "index costs %d\n", acost);
3352             }
3353           if (has_predec[mem_mode] || has_postdec[mem_mode]
3354               || has_preinc[mem_mode] || has_postinc[mem_mode])
3355             fprintf (dump_file, "  May include autoinc/dec\n");
3356           fprintf (dump_file, "\n");
3357         }
3358
3359       VEC_replace (address_cost_data, address_cost_data_list,
3360                    data_index, data);
3361     }
3362
3363   bits = GET_MODE_BITSIZE (address_mode);
3364   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3365   offset &= mask;
3366   if ((offset >> (bits - 1) & 1))
3367     offset |= ~mask;
3368   s_offset = offset;
3369
3370   autoinc = false;
3371   msize = GET_MODE_SIZE (mem_mode);
3372   autoinc_offset = offset;
3373   if (stmt_after_inc)
3374     autoinc_offset += ratio * cstep;
3375   if (symbol_present || var_present || ratio != 1)
3376     autoinc = false;
3377   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3378                && msize == cstep)
3379            || (has_postdec[mem_mode] && autoinc_offset == 0
3380                && msize == -cstep)
3381            || (has_preinc[mem_mode] && autoinc_offset == msize
3382                && msize == cstep)
3383            || (has_predec[mem_mode] && autoinc_offset == -msize
3384                && msize == -cstep))
3385     autoinc = true;
3386
3387   cost = 0;
3388   offset_p = (s_offset != 0
3389               && data->min_offset <= s_offset
3390               && s_offset <= data->max_offset);
3391   ratio_p = (ratio != 1
3392              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3393
3394   if (ratio != 1 && !ratio_p)
3395     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3396
3397   if (s_offset && !offset_p && !symbol_present)
3398     cost += add_cost (speed, address_mode);
3399
3400   if (may_autoinc)
3401     *may_autoinc = autoinc;
3402   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3403   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3404   return new_cost (cost + acost, complexity);
3405 }
3406
3407  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3408     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3409     calculating the operands of EXPR.  Returns true if successful, and returns
3410     the cost in COST.  */
3411
3412 static bool
3413 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3414                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3415 {
3416   comp_cost res;
3417   tree op1 = TREE_OPERAND (expr, 1);
3418   tree cst = TREE_OPERAND (mult, 1);
3419   tree multop = TREE_OPERAND (mult, 0);
3420   int m = exact_log2 (int_cst_value (cst));
3421   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3422   int sa_cost;
3423
3424   if (!(m >= 0 && m < maxm))
3425     return false;
3426
3427   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3428              ? shiftadd_cost (speed, mode, m)
3429              : (mult == op1
3430                 ? shiftsub1_cost (speed, mode, m)
3431                 : shiftsub0_cost (speed, mode, m)));
3432   res = new_cost (sa_cost, 0);
3433   res = add_costs (res, mult == op1 ? cost0 : cost1);
3434
3435   STRIP_NOPS (multop);
3436   if (!is_gimple_val (multop))
3437     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3438
3439   *cost = res;
3440   return true;
3441 }
3442
3443 /* Estimates cost of forcing expression EXPR into a variable.  */
3444
3445 static comp_cost
3446 force_expr_to_var_cost (tree expr, bool speed)
3447 {
3448   static bool costs_initialized = false;
3449   static unsigned integer_cost [2];
3450   static unsigned symbol_cost [2];
3451   static unsigned address_cost [2];
3452   tree op0, op1;
3453   comp_cost cost0, cost1, cost;
3454   enum machine_mode mode;
3455
3456   if (!costs_initialized)
3457     {
3458       tree type = build_pointer_type (integer_type_node);
3459       tree var, addr;
3460       rtx x;
3461       int i;
3462
3463       var = create_tmp_var_raw (integer_type_node, "test_var");
3464       TREE_STATIC (var) = 1;
3465       x = produce_memory_decl_rtl (var, NULL);
3466       SET_DECL_RTL (var, x);
3467
3468       addr = build1 (ADDR_EXPR, type, var);
3469
3470
3471       for (i = 0; i < 2; i++)
3472         {
3473           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3474                                                              2000), i);
3475
3476           symbol_cost[i] = computation_cost (addr, i) + 1;
3477
3478           address_cost[i]
3479             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3480           if (dump_file && (dump_flags & TDF_DETAILS))
3481             {
3482               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3483               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3484               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3485               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3486               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3487               fprintf (dump_file, "\n");
3488             }
3489         }
3490
3491       costs_initialized = true;
3492     }
3493
3494   STRIP_NOPS (expr);
3495
3496   if (SSA_VAR_P (expr))
3497     return no_cost;
3498
3499   if (is_gimple_min_invariant (expr))
3500     {
3501       if (TREE_CODE (expr) == INTEGER_CST)
3502         return new_cost (integer_cost [speed], 0);
3503
3504       if (TREE_CODE (expr) == ADDR_EXPR)
3505         {
3506           tree obj = TREE_OPERAND (expr, 0);
3507
3508           if (TREE_CODE (obj) == VAR_DECL
3509               || TREE_CODE (obj) == PARM_DECL
3510               || TREE_CODE (obj) == RESULT_DECL)
3511             return new_cost (symbol_cost [speed], 0);
3512         }
3513
3514       return new_cost (address_cost [speed], 0);
3515     }
3516
3517   switch (TREE_CODE (expr))
3518     {
3519     case POINTER_PLUS_EXPR:
3520     case PLUS_EXPR:
3521     case MINUS_EXPR:
3522     case MULT_EXPR:
3523       op0 = TREE_OPERAND (expr, 0);
3524       op1 = TREE_OPERAND (expr, 1);
3525       STRIP_NOPS (op0);
3526       STRIP_NOPS (op1);
3527
3528       if (is_gimple_val (op0))
3529         cost0 = no_cost;
3530       else
3531         cost0 = force_expr_to_var_cost (op0, speed);
3532
3533       if (is_gimple_val (op1))
3534         cost1 = no_cost;
3535       else
3536         cost1 = force_expr_to_var_cost (op1, speed);
3537
3538       break;
3539
3540     case NEGATE_EXPR:
3541       op0 = TREE_OPERAND (expr, 0);
3542       STRIP_NOPS (op0);
3543       op1 = NULL_TREE;
3544
3545       if (is_gimple_val (op0))
3546         cost0 = no_cost;
3547       else
3548         cost0 = force_expr_to_var_cost (op0, speed);
3549
3550       cost1 = no_cost;
3551       break;
3552
3553     default:
3554       /* Just an arbitrary value, FIXME.  */
3555       return new_cost (target_spill_cost[speed], 0);
3556     }
3557
3558   mode = TYPE_MODE (TREE_TYPE (expr));
3559   switch (TREE_CODE (expr))
3560     {
3561     case POINTER_PLUS_EXPR:
3562     case PLUS_EXPR:
3563     case MINUS_EXPR:
3564     case NEGATE_EXPR:
3565       cost = new_cost (add_cost (speed, mode), 0);
3566       if (TREE_CODE (expr) != NEGATE_EXPR)
3567         {
3568           tree mult = NULL_TREE;
3569           comp_cost sa_cost;
3570           if (TREE_CODE (op1) == MULT_EXPR)
3571             mult = op1;
3572           else if (TREE_CODE (op0) == MULT_EXPR)
3573             mult = op0;
3574
3575           if (mult != NULL_TREE
3576               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3577               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3578                                     speed, &sa_cost))
3579             return sa_cost;
3580         }
3581       break;
3582
3583     case MULT_EXPR:
3584       if (cst_and_fits_in_hwi (op0))
3585         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3586                                              mode, speed), 0);
3587       else if (cst_and_fits_in_hwi (op1))
3588         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3589                                              mode, speed), 0);
3590       else
3591         return new_cost (target_spill_cost [speed], 0);
3592       break;
3593
3594     default:
3595       gcc_unreachable ();
3596     }
3597
3598   cost = add_costs (cost, cost0);
3599   cost = add_costs (cost, cost1);
3600
3601   /* Bound the cost by target_spill_cost.  The parts of complicated
3602      computations often are either loop invariant or at least can
3603      be shared between several iv uses, so letting this grow without
3604      limits would not give reasonable results.  */
3605   if (cost.cost > (int) target_spill_cost [speed])
3606     cost.cost = target_spill_cost [speed];
3607
3608   return cost;
3609 }
3610
3611 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3612    invariants the computation depends on.  */
3613
3614 static comp_cost
3615 force_var_cost (struct ivopts_data *data,
3616                 tree expr, bitmap *depends_on)
3617 {
3618   if (depends_on)
3619     {
3620       fd_ivopts_data = data;
3621       walk_tree (&expr, find_depends, depends_on, NULL);
3622     }
3623
3624   return force_expr_to_var_cost (expr, data->speed);
3625 }
3626
3627 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3628    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3629    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3630    invariants the computation depends on.  */
3631
3632 static comp_cost
3633 split_address_cost (struct ivopts_data *data,
3634                     tree addr, bool *symbol_present, bool *var_present,
3635                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3636 {
3637   tree core;
3638   HOST_WIDE_INT bitsize;
3639   HOST_WIDE_INT bitpos;
3640   tree toffset;
3641   enum machine_mode mode;
3642   int unsignedp, volatilep;
3643
3644   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3645                               &unsignedp, &volatilep, false);
3646
3647   if (toffset != 0
3648       || bitpos % BITS_PER_UNIT != 0
3649       || TREE_CODE (core) != VAR_DECL)
3650     {
3651       *symbol_present = false;
3652       *var_present = true;
3653       fd_ivopts_data = data;
3654       walk_tree (&addr, find_depends, depends_on, NULL);
3655       return new_cost (target_spill_cost[data->speed], 0);
3656     }
3657
3658   *offset += bitpos / BITS_PER_UNIT;
3659   if (TREE_STATIC (core)
3660       || DECL_EXTERNAL (core))
3661     {
3662       *symbol_present = true;
3663       *var_present = false;
3664       return no_cost;
3665     }
3666
3667   *symbol_present = false;
3668   *var_present = true;
3669   return no_cost;
3670 }
3671
3672 /* Estimates cost of expressing difference of addresses E1 - E2 as
3673    var + symbol + offset.  The value of offset is added to OFFSET,
3674    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3675    part is missing.  DEPENDS_ON is a set of the invariants the computation
3676    depends on.  */
3677
3678 static comp_cost
3679 ptr_difference_cost (struct ivopts_data *data,
3680                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3681                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3682 {
3683   HOST_WIDE_INT diff = 0;
3684   aff_tree aff_e1, aff_e2;
3685   tree type;
3686
3687   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3688
3689   if (ptr_difference_const (e1, e2, &diff))
3690     {
3691       *offset += diff;
3692       *symbol_present = false;
3693       *var_present = false;
3694       return no_cost;
3695     }
3696
3697   if (integer_zerop (e2))
3698     return split_address_cost (data, TREE_OPERAND (e1, 0),
3699                                symbol_present, var_present, offset, depends_on);
3700
3701   *symbol_present = false;
3702   *var_present = true;
3703
3704   type = signed_type_for (TREE_TYPE (e1));
3705   tree_to_aff_combination (e1, type, &aff_e1);
3706   tree_to_aff_combination (e2, type, &aff_e2);
3707   aff_combination_scale (&aff_e2, double_int_minus_one);
3708   aff_combination_add (&aff_e1, &aff_e2);
3709
3710   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3711 }
3712
3713 /* Estimates cost of expressing difference E1 - E2 as
3714    var + symbol + offset.  The value of offset is added to OFFSET,
3715    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3716    part is missing.  DEPENDS_ON is a set of the invariants the computation
3717    depends on.  */
3718
3719 static comp_cost
3720 difference_cost (struct ivopts_data *data,
3721                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3722                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3723 {
3724   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3725   unsigned HOST_WIDE_INT off1, off2;
3726   aff_tree aff_e1, aff_e2;
3727   tree type;
3728
3729   e1 = strip_offset (e1, &off1);
3730   e2 = strip_offset (e2, &off2);
3731   *offset += off1 - off2;
3732
3733   STRIP_NOPS (e1);
3734   STRIP_NOPS (e2);
3735
3736   if (TREE_CODE (e1) == ADDR_EXPR)
3737     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3738                                 offset, depends_on);
3739   *symbol_present = false;
3740
3741   if (operand_equal_p (e1, e2, 0))
3742     {
3743       *var_present = false;
3744       return no_cost;
3745     }
3746
3747   *var_present = true;
3748
3749   if (integer_zerop (e2))
3750     return force_var_cost (data, e1, depends_on);
3751
3752   if (integer_zerop (e1))
3753     {
3754       comp_cost cost = force_var_cost (data, e2, depends_on);
3755       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3756       return cost;
3757     }
3758
3759   type = signed_type_for (TREE_TYPE (e1));
3760   tree_to_aff_combination (e1, type, &aff_e1);
3761   tree_to_aff_combination (e2, type, &aff_e2);
3762   aff_combination_scale (&aff_e2, double_int_minus_one);
3763   aff_combination_add (&aff_e1, &aff_e2);
3764
3765   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3766 }
3767
3768 /* Returns true if AFF1 and AFF2 are identical.  */
3769
3770 static bool
3771 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3772 {
3773   unsigned i;
3774
3775   if (aff1->n != aff2->n)
3776     return false;
3777
3778   for (i = 0; i < aff1->n; i++)
3779     {
3780       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3781         return false;
3782
3783       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3784         return false;
3785     }
3786   return true;
3787 }
3788
3789 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3790
3791 static int
3792 get_expr_id (struct ivopts_data *data, tree expr)
3793 {
3794   struct iv_inv_expr_ent ent;
3795   struct iv_inv_expr_ent **slot;
3796
3797   ent.expr = expr;
3798   ent.hash = iterative_hash_expr (expr, 0);
3799   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3800                                                      &ent, INSERT);
3801   if (*slot)
3802     return (*slot)->id;
3803
3804   *slot = XNEW (struct iv_inv_expr_ent);
3805   (*slot)->expr = expr;
3806   (*slot)->hash = ent.hash;
3807   (*slot)->id = data->inv_expr_id++;
3808   return (*slot)->id;
3809 }
3810
3811 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3812    requires a new compiler generated temporary.  Returns -1 otherwise.
3813    ADDRESS_P is a flag indicating if the expression is for address
3814    computation.  */
3815
3816 static int
3817 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3818                             tree cbase, HOST_WIDE_INT ratio,
3819                             bool address_p)
3820 {
3821   aff_tree ubase_aff, cbase_aff;
3822   tree expr, ub, cb;
3823
3824   STRIP_NOPS (ubase);
3825   STRIP_NOPS (cbase);
3826   ub = ubase;
3827   cb = cbase;
3828
3829   if ((TREE_CODE (ubase) == INTEGER_CST)
3830       && (TREE_CODE (cbase) == INTEGER_CST))
3831     return -1;
3832
3833   /* Strips the constant part. */
3834   if (TREE_CODE (ubase) == PLUS_EXPR
3835       || TREE_CODE (ubase) == MINUS_EXPR
3836       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3837     {
3838       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3839         ubase = TREE_OPERAND (ubase, 0);
3840     }
3841
3842   /* Strips the constant part. */
3843   if (TREE_CODE (cbase) == PLUS_EXPR
3844       || TREE_CODE (cbase) == MINUS_EXPR
3845       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3846     {
3847       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3848         cbase = TREE_OPERAND (cbase, 0);
3849     }
3850
3851   if (address_p)
3852     {
3853       if (((TREE_CODE (ubase) == SSA_NAME)
3854            || (TREE_CODE (ubase) == ADDR_EXPR
3855                && is_gimple_min_invariant (ubase)))
3856           && (TREE_CODE (cbase) == INTEGER_CST))
3857         return -1;
3858
3859       if (((TREE_CODE (cbase) == SSA_NAME)
3860            || (TREE_CODE (cbase) == ADDR_EXPR
3861                && is_gimple_min_invariant (cbase)))
3862           && (TREE_CODE (ubase) == INTEGER_CST))
3863         return -1;
3864     }
3865
3866   if (ratio == 1)
3867     {
3868       if(operand_equal_p (ubase, cbase, 0))
3869         return -1;
3870
3871       if (TREE_CODE (ubase) == ADDR_EXPR
3872           && TREE_CODE (cbase) == ADDR_EXPR)
3873         {
3874           tree usym, csym;
3875
3876           usym = TREE_OPERAND (ubase, 0);
3877           csym = TREE_OPERAND (cbase, 0);
3878           if (TREE_CODE (usym) == ARRAY_REF)
3879             {
3880               tree ind = TREE_OPERAND (usym, 1);
3881               if (TREE_CODE (ind) == INTEGER_CST
3882                   && host_integerp (ind, 0)
3883                   && TREE_INT_CST_LOW (ind) == 0)
3884                 usym = TREE_OPERAND (usym, 0);
3885             }
3886           if (TREE_CODE (csym) == ARRAY_REF)
3887             {
3888               tree ind = TREE_OPERAND (csym, 1);
3889               if (TREE_CODE (ind) == INTEGER_CST
3890                   && host_integerp (ind, 0)
3891                   && TREE_INT_CST_LOW (ind) == 0)
3892                 csym = TREE_OPERAND (csym, 0);
3893             }
3894           if (operand_equal_p (usym, csym, 0))
3895             return -1;
3896         }
3897       /* Now do more complex comparison  */
3898       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3899       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3900       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3901         return -1;
3902     }
3903
3904   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3905   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3906
3907   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3908   aff_combination_add (&ubase_aff, &cbase_aff);
3909   expr = aff_combination_to_tree (&ubase_aff);
3910   return get_expr_id (data, expr);
3911 }
3912
3913
3914
3915 /* Determines the cost of the computation by that USE is expressed
3916    from induction variable CAND.  If ADDRESS_P is true, we just need
3917    to create an address from it, otherwise we want to get it into
3918    register.  A set of invariants we depend on is stored in
3919    DEPENDS_ON.  AT is the statement at that the value is computed.
3920    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3921    addressing is likely.  */
3922
3923 static comp_cost
3924 get_computation_cost_at (struct ivopts_data *data,
3925                          struct iv_use *use, struct iv_cand *cand,
3926                          bool address_p, bitmap *depends_on, gimple at,
3927                          bool *can_autoinc,
3928                          int *inv_expr_id)
3929 {
3930   tree ubase = use->iv->base, ustep = use->iv->step;
3931   tree cbase, cstep;
3932   tree utype = TREE_TYPE (ubase), ctype;
3933   unsigned HOST_WIDE_INT cstepi, offset = 0;
3934   HOST_WIDE_INT ratio, aratio;
3935   bool var_present, symbol_present, stmt_is_after_inc;
3936   comp_cost cost;
3937   double_int rat;
3938   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3939
3940   *depends_on = NULL;
3941
3942   /* Only consider real candidates.  */
3943   if (!cand->iv)
3944     return infinite_cost;
3945
3946   cbase = cand->iv->base;
3947   cstep = cand->iv->step;
3948   ctype = TREE_TYPE (cbase);
3949
3950   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3951     {
3952       /* We do not have a precision to express the values of use.  */
3953       return infinite_cost;
3954     }
3955
3956   if (address_p
3957       || (use->iv->base_object
3958           && cand->iv->base_object
3959           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3960           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3961     {
3962       /* Do not try to express address of an object with computation based
3963          on address of a different object.  This may cause problems in rtl
3964          level alias analysis (that does not expect this to be happening,
3965          as this is illegal in C), and would be unlikely to be useful
3966          anyway.  */
3967       if (use->iv->base_object
3968           && cand->iv->base_object
3969           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3970         return infinite_cost;
3971     }
3972
3973   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3974     {
3975       /* TODO -- add direct handling of this case.  */
3976       goto fallback;
3977     }
3978
3979   /* CSTEPI is removed from the offset in case statement is after the
3980      increment.  If the step is not constant, we use zero instead.
3981      This is a bit imprecise (there is the extra addition), but
3982      redundancy elimination is likely to transform the code so that
3983      it uses value of the variable before increment anyway,
3984      so it is not that much unrealistic.  */
3985   if (cst_and_fits_in_hwi (cstep))
3986     cstepi = int_cst_value (cstep);
3987   else
3988     cstepi = 0;
3989
3990   if (!constant_multiple_of (ustep, cstep, &rat))
3991     return infinite_cost;
3992
3993   if (double_int_fits_in_shwi_p (rat))
3994     ratio = double_int_to_shwi (rat);
3995   else
3996     return infinite_cost;
3997
3998   STRIP_NOPS (cbase);
3999   ctype = TREE_TYPE (cbase);
4000
4001   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4002
4003   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4004      or ratio == 1, it is better to handle this like
4005
4006      ubase - ratio * cbase + ratio * var
4007
4008      (also holds in the case ratio == -1, TODO.  */
4009
4010   if (cst_and_fits_in_hwi (cbase))
4011     {
4012       offset = - ratio * int_cst_value (cbase);
4013       cost = difference_cost (data,
4014                               ubase, build_int_cst (utype, 0),
4015                               &symbol_present, &var_present, &offset,
4016                               depends_on);
4017       cost.cost /= avg_loop_niter (data->current_loop);
4018     }
4019   else if (ratio == 1)
4020     {
4021       tree real_cbase = cbase;
4022
4023       /* Check to see if any adjustment is needed.  */
4024       if (cstepi == 0 && stmt_is_after_inc)
4025         {
4026           aff_tree real_cbase_aff;
4027           aff_tree cstep_aff;
4028
4029           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4030                                    &real_cbase_aff);
4031           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4032
4033           aff_combination_add (&real_cbase_aff, &cstep_aff);
4034           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4035         }
4036
4037       cost = difference_cost (data,
4038                               ubase, real_cbase,
4039                               &symbol_present, &var_present, &offset,
4040                               depends_on);
4041       cost.cost /= avg_loop_niter (data->current_loop);
4042     }
4043   else if (address_p
4044            && !POINTER_TYPE_P (ctype)
4045            && multiplier_allowed_in_address_p
4046                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4047                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4048     {
4049       cbase
4050         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4051       cost = difference_cost (data,
4052                               ubase, cbase,
4053                               &symbol_present, &var_present, &offset,
4054                               depends_on);
4055       cost.cost /= avg_loop_niter (data->current_loop);
4056     }
4057   else
4058     {
4059       cost = force_var_cost (data, cbase, depends_on);
4060       cost = add_costs (cost,
4061                         difference_cost (data,
4062                                          ubase, build_int_cst (utype, 0),
4063                                          &symbol_present, &var_present,
4064                                          &offset, depends_on));
4065       cost.cost /= avg_loop_niter (data->current_loop);
4066       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4067     }
4068
4069   if (inv_expr_id)
4070     {
4071       *inv_expr_id =
4072           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4073       /* Clear depends on.  */
4074       if (*inv_expr_id != -1 && depends_on && *depends_on)
4075         bitmap_clear (*depends_on);
4076     }
4077
4078   /* If we are after the increment, the value of the candidate is higher by
4079      one iteration.  */
4080   if (stmt_is_after_inc)
4081     offset -= ratio * cstepi;
4082
4083   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4084      (symbol/var1/const parts may be omitted).  If we are looking for an
4085      address, find the cost of addressing this.  */
4086   if (address_p)
4087     return add_costs (cost,
4088                       get_address_cost (symbol_present, var_present,
4089                                         offset, ratio, cstepi,
4090                                         TYPE_MODE (TREE_TYPE (utype)),
4091                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4092                                         speed, stmt_is_after_inc,
4093                                         can_autoinc));
4094
4095   /* Otherwise estimate the costs for computing the expression.  */
4096   if (!symbol_present && !var_present && !offset)
4097     {
4098       if (ratio != 1)
4099         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4100       return cost;
4101     }
4102
4103   /* Symbol + offset should be compile-time computable so consider that they
4104       are added once to the variable, if present.  */
4105   if (var_present && (symbol_present || offset))
4106     cost.cost += adjust_setup_cost (data,
4107                                     add_cost (speed, TYPE_MODE (ctype)));
4108
4109   /* Having offset does not affect runtime cost in case it is added to
4110      symbol, but it increases complexity.  */
4111   if (offset)
4112     cost.complexity++;
4113
4114   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4115
4116   aratio = ratio > 0 ? ratio : -ratio;
4117   if (aratio != 1)
4118     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4119   return cost;
4120
4121 fallback:
4122   if (can_autoinc)
4123     *can_autoinc = false;
4124
4125   {
4126     /* Just get the expression, expand it and measure the cost.  */
4127     tree comp = get_computation_at (data->current_loop, use, cand, at);
4128
4129     if (!comp)
4130       return infinite_cost;
4131
4132     if (address_p)
4133       comp = build_simple_mem_ref (comp);
4134
4135     return new_cost (computation_cost (comp, speed), 0);
4136   }
4137 }
4138
4139 /* Determines the cost of the computation by that USE is expressed
4140    from induction variable CAND.  If ADDRESS_P is true, we just need
4141    to create an address from it, otherwise we want to get it into
4142    register.  A set of invariants we depend on is stored in
4143    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4144    autoinc addressing is likely.  */
4145
4146 static comp_cost
4147 get_computation_cost (struct ivopts_data *data,
4148                       struct iv_use *use, struct iv_cand *cand,
4149                       bool address_p, bitmap *depends_on,
4150                       bool *can_autoinc, int *inv_expr_id)
4151 {
4152   return get_computation_cost_at (data,
4153                                   use, cand, address_p, depends_on, use->stmt,
4154                                   can_autoinc, inv_expr_id);
4155 }
4156
4157 /* Determines cost of basing replacement of USE on CAND in a generic
4158    expression.  */
4159
4160 static bool
4161 determine_use_iv_cost_generic (struct ivopts_data *data,
4162                                struct iv_use *use, struct iv_cand *cand)
4163 {
4164   bitmap depends_on;
4165   comp_cost cost;
4166   int inv_expr_id = -1;
4167
4168   /* The simple case first -- if we need to express value of the preserved
4169      original biv, the cost is 0.  This also prevents us from counting the
4170      cost of increment twice -- once at this use and once in the cost of
4171      the candidate.  */
4172   if (cand->pos == IP_ORIGINAL
4173       && cand->incremented_at == use->stmt)
4174     {
4175       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4176                        ERROR_MARK, -1);
4177       return true;
4178     }
4179
4180   cost = get_computation_cost (data, use, cand, false, &depends_on,
4181                                NULL, &inv_expr_id);
4182
4183   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4184                    inv_expr_id);
4185
4186   return !infinite_cost_p (cost);
4187 }
4188
4189 /* Determines cost of basing replacement of USE on CAND in an address.  */
4190
4191 static bool
4192 determine_use_iv_cost_address (struct ivopts_data *data,
4193                                struct iv_use *use, struct iv_cand *cand)
4194 {
4195   bitmap depends_on;
4196   bool can_autoinc;
4197   int inv_expr_id = -1;
4198   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4199                                          &can_autoinc, &inv_expr_id);
4200
4201   if (cand->ainc_use == use)
4202     {
4203       if (can_autoinc)
4204         cost.cost -= cand->cost_step;
4205       /* If we generated the candidate solely for exploiting autoincrement
4206          opportunities, and it turns out it can't be used, set the cost to
4207          infinity to make sure we ignore it.  */
4208       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4209         cost = infinite_cost;
4210     }
4211   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4212                    inv_expr_id);
4213
4214   return !infinite_cost_p (cost);
4215 }
4216
4217 /* Computes value of candidate CAND at position AT in iteration NITER, and
4218    stores it to VAL.  */
4219
4220 static void
4221 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4222                aff_tree *val)
4223 {
4224   aff_tree step, delta, nit;
4225   struct iv *iv = cand->iv;
4226   tree type = TREE_TYPE (iv->base);
4227   tree steptype = type;
4228   if (POINTER_TYPE_P (type))
4229     steptype = sizetype;
4230
4231   tree_to_aff_combination (iv->step, steptype, &step);
4232   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4233   aff_combination_convert (&nit, steptype);
4234   aff_combination_mult (&nit, &step, &delta);
4235   if (stmt_after_increment (loop, cand, at))
4236     aff_combination_add (&delta, &step);
4237
4238   tree_to_aff_combination (iv->base, type, val);
4239   aff_combination_add (val, &delta);
4240 }
4241
4242 /* Returns period of induction variable iv.  */
4243
4244 static tree
4245 iv_period (struct iv *iv)
4246 {
4247   tree step = iv->step, period, type;
4248   tree pow2div;
4249
4250   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4251
4252   type = unsigned_type_for (TREE_TYPE (step));
4253   /* Period of the iv is lcm (step, type_range)/step -1,
4254      i.e., N*type_range/step - 1. Since type range is power
4255      of two, N == (step >> num_of_ending_zeros_binary (step),
4256      so the final result is
4257
4258        (type_range >> num_of_ending_zeros_binary (step)) - 1
4259
4260   */
4261   pow2div = num_ending_zeros (step);
4262
4263   period = build_low_bits_mask (type,
4264                                 (TYPE_PRECISION (type)
4265                                  - tree_low_cst (pow2div, 1)));
4266
4267   return period;
4268 }
4269
4270 /* Returns the comparison operator used when eliminating the iv USE.  */
4271
4272 static enum tree_code
4273 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4274 {
4275   struct loop *loop = data->current_loop;
4276   basic_block ex_bb;
4277   edge exit;
4278
4279   ex_bb = gimple_bb (use->stmt);
4280   exit = EDGE_SUCC (ex_bb, 0);
4281   if (flow_bb_inside_loop_p (loop, exit->dest))
4282     exit = EDGE_SUCC (ex_bb, 1);
4283
4284   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4285 }
4286
4287 static tree
4288 strip_wrap_conserving_type_conversions (tree exp)
4289 {
4290   while (tree_ssa_useless_type_conversion (exp)
4291          && (nowrap_type_p (TREE_TYPE (exp))
4292              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4293     exp = TREE_OPERAND (exp, 0);
4294   return exp;
4295 }
4296
4297 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4298    check for an exact match.  */
4299
4300 static bool
4301 expr_equal_p (tree e, tree what)
4302 {
4303   gimple stmt;
4304   enum tree_code code;
4305
4306   e = strip_wrap_conserving_type_conversions (e);
4307   what = strip_wrap_conserving_type_conversions (what);
4308
4309   code = TREE_CODE (what);
4310   if (TREE_TYPE (e) != TREE_TYPE (what))
4311     return false;
4312
4313   if (operand_equal_p (e, what, 0))
4314     return true;
4315
4316   if (TREE_CODE (e) != SSA_NAME)
4317     return false;
4318
4319   stmt = SSA_NAME_DEF_STMT (e);
4320   if (gimple_code (stmt) != GIMPLE_ASSIGN
4321       || gimple_assign_rhs_code (stmt) != code)
4322     return false;
4323
4324   switch (get_gimple_rhs_class (code))
4325     {
4326     case GIMPLE_BINARY_RHS:
4327       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4328         return false;
4329       /* Fallthru.  */
4330
4331     case GIMPLE_UNARY_RHS:
4332     case GIMPLE_SINGLE_RHS:
4333       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4334     default:
4335       return false;
4336     }
4337 }
4338
4339 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4340    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4341    calculation is performed in non-wrapping type.
4342
4343    TODO: More generally, we could test for the situation that
4344          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4345          This would require knowing the sign of OFFSET.
4346
4347          Also, we only look for the first addition in the computation of BASE.
4348          More complex analysis would be better, but introducing it just for
4349          this optimization seems like an overkill.  */
4350
4351 static bool
4352 difference_cannot_overflow_p (tree base, tree offset)
4353 {
4354   enum tree_code code;
4355   tree e1, e2;
4356
4357   if (!nowrap_type_p (TREE_TYPE (base)))
4358     return false;
4359
4360   base = expand_simple_operations (base);
4361
4362   if (TREE_CODE (base) == SSA_NAME)
4363     {
4364       gimple stmt = SSA_NAME_DEF_STMT (base);
4365
4366       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4367         return false;
4368
4369       code = gimple_assign_rhs_code (stmt);
4370       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4371         return false;
4372
4373       e1 = gimple_assign_rhs1 (stmt);
4374       e2 = gimple_assign_rhs2 (stmt);
4375     }
4376   else
4377     {
4378       code = TREE_CODE (base);
4379       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4380         return false;
4381       e1 = TREE_OPERAND (base, 0);
4382       e2 = TREE_OPERAND (base, 1);
4383     }
4384
4385   /* TODO: deeper inspection may be necessary to prove the equality.  */
4386   switch (code)
4387     {
4388     case PLUS_EXPR:
4389       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4390     case POINTER_PLUS_EXPR:
4391       return expr_equal_p (e2, offset);
4392
4393     default:
4394       return false;
4395     }
4396 }
4397
4398 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4399    comparison with CAND.  NITER describes the number of iterations of
4400    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4401
4402    We aim to handle the following situation:
4403
4404    sometype *base, *p;
4405    int a, b, i;
4406
4407    i = a;
4408    p = p_0 = base + a;
4409
4410    do
4411      {
4412        bla (*p);
4413        p++;
4414        i++;
4415      }
4416    while (i < b);
4417
4418    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4419    We aim to optimize this to
4420
4421    p = p_0 = base + a;
4422    do
4423      {
4424        bla (*p);
4425        p++;
4426      }
4427    while (p < p_0 - a + b);
4428
4429    This preserves the correctness, since the pointer arithmetics does not
4430    overflow.  More precisely:
4431
4432    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4433       overflow in computing it or the values of p.
4434    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4435       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4436
4437 static bool
4438 iv_elimination_compare_lt (struct ivopts_data *data,
4439                            struct iv_cand *cand, enum tree_code *comp_p,
4440                            struct tree_niter_desc *niter)
4441 {
4442   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4443   struct affine_tree_combination nit, tmpa, tmpb;
4444   enum tree_code comp;
4445   HOST_WIDE_INT step;
4446
4447   /* We need to know that the candidate induction variable does not overflow.
4448      While more complex analysis may be used to prove this, for now just
4449      check that the variable appears in the original program and that it
4450      is computed in a type that guarantees no overflows.  */
4451   cand_type = TREE_TYPE (cand->iv->base);
4452   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4453     return false;
4454
4455   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4456      the calculation of the BOUND could overflow, making the comparison
4457      invalid.  */
4458   if (!data->loop_single_exit_p)
4459     return false;
4460
4461   /* We need to be able to decide whether candidate is increasing or decreasing
4462      in order to choose the right comparison operator.  */
4463   if (!cst_and_fits_in_hwi (cand->iv->step))
4464     return false;
4465   step = int_cst_value (cand->iv->step);
4466
4467   /* Check that the number of iterations matches the expected pattern:
4468      a + 1 > b ? 0 : b - a - 1.  */
4469   mbz = niter->may_be_zero;
4470   if (TREE_CODE (mbz) == GT_EXPR)
4471     {
4472       /* Handle a + 1 > b.  */
4473       tree op0 = TREE_OPERAND (mbz, 0);
4474       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4475         {
4476           a = TREE_OPERAND (op0, 0);
4477           b = TREE_OPERAND (mbz, 1);
4478         }
4479       else
4480         return false;
4481     }
4482   else if (TREE_CODE (mbz) == LT_EXPR)
4483     {
4484       tree op1 = TREE_OPERAND (mbz, 1);
4485
4486       /* Handle b < a + 1.  */
4487       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4488         {
4489           a = TREE_OPERAND (op1, 0);
4490           b = TREE_OPERAND (mbz, 0);
4491         }
4492       else
4493         return false;
4494     }
4495   else
4496     return false;
4497
4498   /* Expected number of iterations is B - A - 1.  Check that it matches
4499      the actual number, i.e., that B - A - NITER = 1.  */
4500   tree_to_aff_combination (niter->niter, nit_type, &nit);
4501   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4502   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4503   aff_combination_scale (&nit, double_int_minus_one);
4504   aff_combination_scale (&tmpa, double_int_minus_one);
4505   aff_combination_add (&tmpb, &tmpa);
4506   aff_combination_add (&tmpb, &nit);
4507   if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
4508     return false;
4509
4510   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4511      overflow.  */
4512   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4513                         cand->iv->step,
4514                         fold_convert (TREE_TYPE (cand->iv->step), a));
4515   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4516     return false;
4517
4518   /* Determine the new comparison operator.  */
4519   comp = step < 0 ? GT_EXPR : LT_EXPR;
4520   if (*comp_p == NE_EXPR)
4521     *comp_p = comp;
4522   else if (*comp_p == EQ_EXPR)
4523     *comp_p = invert_tree_comparison (comp, false);
4524   else
4525     gcc_unreachable ();
4526
4527   return true;
4528 }
4529
4530 /* Check whether it is possible to express the condition in USE by comparison
4531    of candidate CAND.  If so, store the value compared with to BOUND, and the
4532    comparison operator to COMP.  */
4533
4534 static bool
4535 may_eliminate_iv (struct ivopts_data *data,
4536                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4537                   enum tree_code *comp)
4538 {
4539   basic_block ex_bb;
4540   edge exit;
4541   tree period;
4542   struct loop *loop = data->current_loop;
4543   aff_tree bnd;
4544   struct tree_niter_desc *desc = NULL;
4545
4546   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4547     return false;
4548
4549   /* For now works only for exits that dominate the loop latch.
4550      TODO: extend to other conditions inside loop body.  */
4551   ex_bb = gimple_bb (use->stmt);
4552   if (use->stmt != last_stmt (ex_bb)
4553       || gimple_code (use->stmt) != GIMPLE_COND
4554       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4555     return false;
4556
4557   exit = EDGE_SUCC (ex_bb, 0);
4558   if (flow_bb_inside_loop_p (loop, exit->dest))
4559     exit = EDGE_SUCC (ex_bb, 1);
4560   if (flow_bb_inside_loop_p (loop, exit->dest))
4561     return false;
4562
4563   desc = niter_for_exit (data, exit);
4564   if (!desc)
4565     return false;
4566
4567   /* Determine whether we can use the variable to test the exit condition.
4568      This is the case iff the period of the induction variable is greater
4569      than the number of iterations for which the exit condition is true.  */
4570   period = iv_period (cand->iv);
4571
4572   /* If the number of iterations is constant, compare against it directly.  */
4573   if (TREE_CODE (desc->niter) == INTEGER_CST)
4574     {
4575       /* See cand_value_at.  */
4576       if (stmt_after_increment (loop, cand, use->stmt))
4577         {
4578           if (!tree_int_cst_lt (desc->niter, period))
4579             return false;
4580         }
4581       else
4582         {
4583           if (tree_int_cst_lt (period, desc->niter))
4584             return false;
4585         }
4586     }
4587
4588   /* If not, and if this is the only possible exit of the loop, see whether
4589      we can get a conservative estimate on the number of iterations of the
4590      entire loop and compare against that instead.  */
4591   else
4592     {
4593       double_int period_value, max_niter;
4594
4595       max_niter = desc->max;
4596       if (stmt_after_increment (loop, cand, use->stmt))
4597         max_niter = double_int_add (max_niter, double_int_one);
4598       period_value = tree_to_double_int (period);
4599       if (double_int_ucmp (max_niter, period_value) > 0)
4600         {
4601           /* See if we can take advantage of inferred loop bound information.  */
4602           if (data->loop_single_exit_p)
4603             {
4604               if (!max_loop_iterations (loop, &max_niter))
4605                 return false;
4606               /* The loop bound is already adjusted by adding 1.  */
4607               if (double_int_ucmp (max_niter, period_value) > 0)
4608                 return false;
4609             }
4610           else
4611             return false;
4612         }
4613     }
4614
4615   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4616
4617   *bound = aff_combination_to_tree (&bnd);
4618   *comp = iv_elimination_compare (data, use);
4619
4620   /* It is unlikely that computing the number of iterations using division
4621      would be more profitable than keeping the original induction variable.  */
4622   if (expression_expensive_p (*bound))
4623     return false;
4624
4625   /* Sometimes, it is possible to handle the situation that the number of
4626      iterations may be zero unless additional assumtions by using <
4627      instead of != in the exit condition.
4628
4629      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4630            base the exit condition on it.  However, that is often too
4631            expensive.  */
4632   if (!integer_zerop (desc->may_be_zero))
4633     return iv_elimination_compare_lt (data, cand, comp, desc);
4634
4635   return true;
4636 }
4637
4638  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4639     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4640
4641 static int
4642 parm_decl_cost (struct ivopts_data *data, tree bound)
4643 {
4644   tree sbound = bound;
4645   STRIP_NOPS (sbound);
4646
4647   if (TREE_CODE (sbound) == SSA_NAME
4648       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4649       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4650       && data->body_includes_call)
4651     return COSTS_N_INSNS (1);
4652
4653   return 0;
4654 }
4655
4656 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4657
4658 static bool
4659 determine_use_iv_cost_condition (struct ivopts_data *data,
4660                                  struct iv_use *use, struct iv_cand *cand)
4661 {
4662   tree bound = NULL_TREE;
4663   struct iv *cmp_iv;
4664   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4665   comp_cost elim_cost, express_cost, cost, bound_cost;
4666   bool ok;
4667   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4668   tree *control_var, *bound_cst;
4669   enum tree_code comp = ERROR_MARK;
4670
4671   /* Only consider real candidates.  */
4672   if (!cand->iv)
4673     {
4674       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4675                        ERROR_MARK, -1);
4676       return false;
4677     }
4678
4679   /* Try iv elimination.  */
4680   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4681     {
4682       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4683       if (elim_cost.cost == 0)
4684         elim_cost.cost = parm_decl_cost (data, bound);
4685       else if (TREE_CODE (bound) == INTEGER_CST)
4686         elim_cost.cost = 0;
4687       /* If we replace a loop condition 'i < n' with 'p < base + n',
4688          depends_on_elim will have 'base' and 'n' set, which implies
4689          that both 'base' and 'n' will be live during the loop.  More likely,
4690          'base + n' will be loop invariant, resulting in only one live value
4691          during the loop.  So in that case we clear depends_on_elim and set
4692         elim_inv_expr_id instead.  */
4693       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4694         {
4695           elim_inv_expr_id = get_expr_id (data, bound);
4696           bitmap_clear (depends_on_elim);
4697         }
4698       /* The bound is a loop invariant, so it will be only computed
4699          once.  */
4700       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4701     }
4702   else
4703     elim_cost = infinite_cost;
4704
4705   /* Try expressing the original giv.  If it is compared with an invariant,
4706      note that we cannot get rid of it.  */
4707   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4708                               NULL, &cmp_iv);
4709   gcc_assert (ok);
4710
4711   /* When the condition is a comparison of the candidate IV against
4712      zero, prefer this IV.
4713
4714      TODO: The constant that we're subtracting from the cost should
4715      be target-dependent.  This information should be added to the
4716      target costs for each backend.  */
4717   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4718       && integer_zerop (*bound_cst)
4719       && (operand_equal_p (*control_var, cand->var_after, 0)
4720           || operand_equal_p (*control_var, cand->var_before, 0)))
4721     elim_cost.cost -= 1;
4722
4723   express_cost = get_computation_cost (data, use, cand, false,
4724                                        &depends_on_express, NULL,
4725                                        &express_inv_expr_id);
4726   fd_ivopts_data = data;
4727   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4728
4729   /* Count the cost of the original bound as well.  */
4730   bound_cost = force_var_cost (data, *bound_cst, NULL);
4731   if (bound_cost.cost == 0)
4732     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4733   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4734     bound_cost.cost = 0;
4735   express_cost.cost += bound_cost.cost;
4736
4737   /* Choose the better approach, preferring the eliminated IV. */
4738   if (compare_costs (elim_cost, express_cost) <= 0)
4739     {
4740       cost = elim_cost;
4741       depends_on = depends_on_elim;
4742       depends_on_elim = NULL;
4743       inv_expr_id = elim_inv_expr_id;
4744     }
4745   else
4746     {
4747       cost = express_cost;
4748       depends_on = depends_on_express;
4749       depends_on_express = NULL;
4750       bound = NULL_TREE;
4751       comp = ERROR_MARK;
4752       inv_expr_id = express_inv_expr_id;
4753     }
4754
4755   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4756
4757   if (depends_on_elim)
4758     BITMAP_FREE (depends_on_elim);
4759   if (depends_on_express)
4760     BITMAP_FREE (depends_on_express);
4761
4762   return !infinite_cost_p (cost);
4763 }
4764
4765 /* Determines cost of basing replacement of USE on CAND.  Returns false
4766    if USE cannot be based on CAND.  */
4767
4768 static bool
4769 determine_use_iv_cost (struct ivopts_data *data,
4770                        struct iv_use *use, struct iv_cand *cand)
4771 {
4772   switch (use->type)
4773     {
4774     case USE_NONLINEAR_EXPR:
4775       return determine_use_iv_cost_generic (data, use, cand);
4776
4777     case USE_ADDRESS:
4778       return determine_use_iv_cost_address (data, use, cand);
4779
4780     case USE_COMPARE:
4781       return determine_use_iv_cost_condition (data, use, cand);
4782
4783     default:
4784       gcc_unreachable ();
4785     }
4786 }
4787
4788 /* Return true if get_computation_cost indicates that autoincrement is
4789    a possibility for the pair of USE and CAND, false otherwise.  */
4790
4791 static bool
4792 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4793                            struct iv_cand *cand)
4794 {
4795   bitmap depends_on;
4796   bool can_autoinc;
4797   comp_cost cost;
4798
4799   if (use->type != USE_ADDRESS)
4800     return false;
4801
4802   cost = get_computation_cost (data, use, cand, true, &depends_on,
4803                                &can_autoinc, NULL);
4804
4805   BITMAP_FREE (depends_on);
4806
4807   return !infinite_cost_p (cost) && can_autoinc;
4808 }
4809
4810 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4811    use that allows autoincrement, and set their AINC_USE if possible.  */
4812
4813 static void
4814 set_autoinc_for_original_candidates (struct ivopts_data *data)
4815 {
4816   unsigned i, j;
4817
4818   for (i = 0; i < n_iv_cands (data); i++)
4819     {
4820       struct iv_cand *cand = iv_cand (data, i);
4821       struct iv_use *closest = NULL;
4822       if (cand->pos != IP_ORIGINAL)
4823         continue;
4824       for (j = 0; j < n_iv_uses (data); j++)
4825         {
4826           struct iv_use *use = iv_use (data, j);
4827           unsigned uid = gimple_uid (use->stmt);
4828           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4829               || uid > gimple_uid (cand->incremented_at))
4830             continue;
4831           if (closest == NULL || uid > gimple_uid (closest->stmt))
4832             closest = use;
4833         }
4834       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4835         continue;
4836       cand->ainc_use = closest;
4837     }
4838 }
4839
4840 /* Finds the candidates for the induction variables.  */
4841
4842 static void
4843 find_iv_candidates (struct ivopts_data *data)
4844 {
4845   /* Add commonly used ivs.  */
4846   add_standard_iv_candidates (data);
4847
4848   /* Add old induction variables.  */
4849   add_old_ivs_candidates (data);
4850
4851   /* Add induction variables derived from uses.  */
4852   add_derived_ivs_candidates (data);
4853
4854   set_autoinc_for_original_candidates (data);
4855
4856   /* Record the important candidates.  */
4857   record_important_candidates (data);
4858 }
4859
4860 /* Determines costs of basing the use of the iv on an iv candidate.  */
4861
4862 static void
4863 determine_use_iv_costs (struct ivopts_data *data)
4864 {
4865   unsigned i, j;
4866   struct iv_use *use;
4867   struct iv_cand *cand;
4868   bitmap to_clear = BITMAP_ALLOC (NULL);
4869
4870   alloc_use_cost_map (data);
4871
4872   for (i = 0; i < n_iv_uses (data); i++)
4873     {
4874       use = iv_use (data, i);
4875
4876       if (data->consider_all_candidates)
4877         {
4878           for (j = 0; j < n_iv_cands (data); j++)
4879             {
4880               cand = iv_cand (data, j);
4881               determine_use_iv_cost (data, use, cand);
4882             }
4883         }
4884       else
4885         {
4886           bitmap_iterator bi;
4887
4888           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4889             {
4890               cand = iv_cand (data, j);
4891               if (!determine_use_iv_cost (data, use, cand))
4892                 bitmap_set_bit (to_clear, j);
4893             }
4894
4895           /* Remove the candidates for that the cost is infinite from
4896              the list of related candidates.  */
4897           bitmap_and_compl_into (use->related_cands, to_clear);
4898           bitmap_clear (to_clear);
4899         }
4900     }
4901
4902   BITMAP_FREE (to_clear);
4903
4904   if (dump_file && (dump_flags & TDF_DETAILS))
4905     {
4906       fprintf (dump_file, "Use-candidate costs:\n");
4907
4908       for (i = 0; i < n_iv_uses (data); i++)
4909         {
4910           use = iv_use (data, i);
4911
4912           fprintf (dump_file, "Use %d:\n", i);
4913           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4914           for (j = 0; j < use->n_map_members; j++)
4915             {
4916               if (!use->cost_map[j].cand
4917                   || infinite_cost_p (use->cost_map[j].cost))
4918                 continue;
4919
4920               fprintf (dump_file, "  %d\t%d\t%d\t",
4921                        use->cost_map[j].cand->id,
4922                        use->cost_map[j].cost.cost,
4923                        use->cost_map[j].cost.complexity);
4924               if (use->cost_map[j].depends_on)
4925                 bitmap_print (dump_file,
4926                               use->cost_map[j].depends_on, "","");
4927               if (use->cost_map[j].inv_expr_id != -1)
4928                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4929               fprintf (dump_file, "\n");
4930             }
4931
4932           fprintf (dump_file, "\n");
4933         }
4934       fprintf (dump_file, "\n");
4935     }
4936 }
4937
4938 /* Determines cost of the candidate CAND.  */
4939
4940 static void
4941 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4942 {
4943   comp_cost cost_base;
4944   unsigned cost, cost_step;
4945   tree base;
4946
4947   if (!cand->iv)
4948     {
4949       cand->cost = 0;
4950       return;
4951     }
4952
4953   /* There are two costs associated with the candidate -- its increment
4954      and its initialization.  The second is almost negligible for any loop
4955      that rolls enough, so we take it just very little into account.  */
4956
4957   base = cand->iv->base;
4958   cost_base = force_var_cost (data, base, NULL);
4959   /* It will be exceptional that the iv register happens to be initialized with
4960      the proper value at no cost.  In general, there will at least be a regcopy
4961      or a const set.  */
4962   if (cost_base.cost == 0)
4963     cost_base.cost = COSTS_N_INSNS (1);
4964   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
4965
4966   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4967
4968   /* Prefer the original ivs unless we may gain something by replacing it.
4969      The reason is to make debugging simpler; so this is not relevant for
4970      artificial ivs created by other optimization passes.  */
4971   if (cand->pos != IP_ORIGINAL
4972       || !SSA_NAME_VAR (cand->var_before)
4973       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4974     cost++;
4975
4976   /* Prefer not to insert statements into latch unless there are some
4977      already (so that we do not create unnecessary jumps).  */
4978   if (cand->pos == IP_END
4979       && empty_block_p (ip_end_pos (data->current_loop)))
4980     cost++;
4981
4982   cand->cost = cost;
4983   cand->cost_step = cost_step;
4984 }
4985
4986 /* Determines costs of computation of the candidates.  */
4987
4988 static void
4989 determine_iv_costs (struct ivopts_data *data)
4990 {
4991   unsigned i;
4992
4993   if (dump_file && (dump_flags & TDF_DETAILS))
4994     {
4995       fprintf (dump_file, "Candidate costs:\n");
4996       fprintf (dump_file, "  cand\tcost\n");
4997     }
4998
4999   for (i = 0; i < n_iv_cands (data); i++)
5000     {
5001       struct iv_cand *cand = iv_cand (data, i);
5002
5003       determine_iv_cost (data, cand);
5004
5005       if (dump_file && (dump_flags & TDF_DETAILS))
5006         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5007     }
5008
5009   if (dump_file && (dump_flags & TDF_DETAILS))
5010     fprintf (dump_file, "\n");
5011 }
5012
5013 /* Calculates cost for having SIZE induction variables.  */
5014
5015 static unsigned
5016 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5017 {
5018   /* We add size to the cost, so that we prefer eliminating ivs
5019      if possible.  */
5020   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5021                                             data->body_includes_call);
5022 }
5023
5024 /* For each size of the induction variable set determine the penalty.  */
5025
5026 static void
5027 determine_set_costs (struct ivopts_data *data)
5028 {
5029   unsigned j, n;
5030   gimple phi;
5031   gimple_stmt_iterator psi;
5032   tree op;
5033   struct loop *loop = data->current_loop;
5034   bitmap_iterator bi;
5035
5036   if (dump_file && (dump_flags & TDF_DETAILS))
5037     {
5038       fprintf (dump_file, "Global costs:\n");
5039       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5040       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5041       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5042       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5043     }
5044
5045   n = 0;
5046   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5047     {
5048       phi = gsi_stmt (psi);
5049       op = PHI_RESULT (phi);
5050
5051       if (virtual_operand_p (op))
5052         continue;
5053
5054       if (get_iv (data, op))
5055         continue;
5056
5057       n++;
5058     }
5059
5060   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5061     {
5062       struct version_info *info = ver_info (data, j);
5063
5064       if (info->inv_id && info->has_nonlin_use)
5065         n++;
5066     }
5067
5068   data->regs_used = n;
5069   if (dump_file && (dump_flags & TDF_DETAILS))
5070     fprintf (dump_file, "  regs_used %d\n", n);
5071
5072   if (dump_file && (dump_flags & TDF_DETAILS))
5073     {
5074       fprintf (dump_file, "  cost for size:\n");
5075       fprintf (dump_file, "  ivs\tcost\n");
5076       for (j = 0; j <= 2 * target_avail_regs; j++)
5077         fprintf (dump_file, "  %d\t%d\n", j,
5078                  ivopts_global_cost_for_size (data, j));
5079       fprintf (dump_file, "\n");
5080     }
5081 }
5082
5083 /* Returns true if A is a cheaper cost pair than B.  */
5084
5085 static bool
5086 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5087 {
5088   int cmp;
5089
5090   if (!a)
5091     return false;
5092
5093   if (!b)
5094     return true;
5095
5096   cmp = compare_costs (a->cost, b->cost);
5097   if (cmp < 0)
5098     return true;
5099
5100   if (cmp > 0)
5101     return false;
5102
5103   /* In case the costs are the same, prefer the cheaper candidate.  */
5104   if (a->cand->cost < b->cand->cost)
5105     return true;
5106
5107   return false;
5108 }
5109
5110
5111 /* Returns candidate by that USE is expressed in IVS.  */
5112
5113 static struct cost_pair *
5114 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5115 {
5116   return ivs->cand_for_use[use->id];
5117 }
5118
5119 /* Computes the cost field of IVS structure.  */
5120
5121 static void
5122 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5123 {
5124   comp_cost cost = ivs->cand_use_cost;
5125
5126   cost.cost += ivs->cand_cost;
5127
5128   cost.cost += ivopts_global_cost_for_size (data,
5129                                             ivs->n_regs + ivs->num_used_inv_expr);
5130
5131   ivs->cost = cost;
5132 }
5133
5134 /* Remove invariants in set INVS to set IVS.  */
5135
5136 static void
5137 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5138 {
5139   bitmap_iterator bi;
5140   unsigned iid;
5141
5142   if (!invs)
5143     return;
5144
5145   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5146     {
5147       ivs->n_invariant_uses[iid]--;
5148       if (ivs->n_invariant_uses[iid] == 0)
5149         ivs->n_regs--;
5150     }
5151 }
5152
5153 /* Set USE not to be expressed by any candidate in IVS.  */
5154
5155 static void
5156 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5157                  struct iv_use *use)
5158 {
5159   unsigned uid = use->id, cid;
5160   struct cost_pair *cp;
5161
5162   cp = ivs->cand_for_use[uid];
5163   if (!cp)
5164     return;
5165   cid = cp->cand->id;
5166
5167   ivs->bad_uses++;
5168   ivs->cand_for_use[uid] = NULL;
5169   ivs->n_cand_uses[cid]--;
5170
5171   if (ivs->n_cand_uses[cid] == 0)
5172     {
5173       bitmap_clear_bit (ivs->cands, cid);
5174       /* Do not count the pseudocandidates.  */
5175       if (cp->cand->iv)
5176         ivs->n_regs--;
5177       ivs->n_cands--;
5178       ivs->cand_cost -= cp->cand->cost;
5179
5180       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5181     }
5182
5183   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5184
5185   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5186
5187   if (cp->inv_expr_id != -1)
5188     {
5189       ivs->used_inv_expr[cp->inv_expr_id]--;
5190       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5191         ivs->num_used_inv_expr--;
5192     }
5193   iv_ca_recount_cost (data, ivs);
5194 }
5195
5196 /* Add invariants in set INVS to set IVS.  */
5197
5198 static void
5199 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5200 {
5201   bitmap_iterator bi;
5202   unsigned iid;
5203
5204   if (!invs)
5205     return;
5206
5207   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5208     {
5209       ivs->n_invariant_uses[iid]++;
5210       if (ivs->n_invariant_uses[iid] == 1)
5211         ivs->n_regs++;
5212     }
5213 }
5214
5215 /* Set cost pair for USE in set IVS to CP.  */
5216
5217 static void
5218 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5219               struct iv_use *use, struct cost_pair *cp)
5220 {
5221   unsigned uid = use->id, cid;
5222
5223   if (ivs->cand_for_use[uid] == cp)
5224     return;
5225
5226   if (ivs->cand_for_use[uid])
5227     iv_ca_set_no_cp (data, ivs, use);
5228
5229   if (cp)
5230     {
5231       cid = cp->cand->id;
5232
5233       ivs->bad_uses--;
5234       ivs->cand_for_use[uid] = cp;
5235       ivs->n_cand_uses[cid]++;
5236       if (ivs->n_cand_uses[cid] == 1)
5237         {
5238           bitmap_set_bit (ivs->cands, cid);
5239           /* Do not count the pseudocandidates.  */
5240           if (cp->cand->iv)
5241             ivs->n_regs++;
5242           ivs->n_cands++;
5243           ivs->cand_cost += cp->cand->cost;
5244
5245           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5246         }
5247
5248       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5249       iv_ca_set_add_invariants (ivs, cp->depends_on);
5250
5251       if (cp->inv_expr_id != -1)
5252         {
5253           ivs->used_inv_expr[cp->inv_expr_id]++;
5254           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5255             ivs->num_used_inv_expr++;
5256         }
5257       iv_ca_recount_cost (data, ivs);
5258     }
5259 }
5260
5261 /* Extend set IVS by expressing USE by some of the candidates in it
5262    if possible. All important candidates will be considered
5263    if IMPORTANT_CANDIDATES is true.  */
5264
5265 static void
5266 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5267                struct iv_use *use, bool important_candidates)
5268 {
5269   struct cost_pair *best_cp = NULL, *cp;
5270   bitmap_iterator bi;
5271   bitmap cands;
5272   unsigned i;
5273
5274   gcc_assert (ivs->upto >= use->id);
5275
5276   if (ivs->upto == use->id)
5277     {
5278       ivs->upto++;
5279       ivs->bad_uses++;
5280     }
5281
5282   cands = (important_candidates ? data->important_candidates : ivs->cands);
5283   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5284     {
5285       struct iv_cand *cand = iv_cand (data, i);
5286
5287       cp = get_use_iv_cost (data, use, cand);
5288
5289       if (cheaper_cost_pair (cp, best_cp))
5290         best_cp = cp;
5291     }
5292
5293   iv_ca_set_cp (data, ivs, use, best_cp);
5294 }
5295
5296 /* Get cost for assignment IVS.  */
5297
5298 static comp_cost
5299 iv_ca_cost (struct iv_ca *ivs)
5300 {
5301   /* This was a conditional expression but it triggered a bug in
5302      Sun C 5.5.  */
5303   if (ivs->bad_uses)
5304     return infinite_cost;
5305   else
5306     return ivs->cost;
5307 }
5308
5309 /* Returns true if all dependences of CP are among invariants in IVS.  */
5310
5311 static bool
5312 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5313 {
5314   unsigned i;
5315   bitmap_iterator bi;
5316
5317   if (!cp->depends_on)
5318     return true;
5319
5320   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5321     {
5322       if (ivs->n_invariant_uses[i] == 0)
5323         return false;
5324     }
5325
5326   return true;
5327 }
5328
5329 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5330    it before NEXT_CHANGE.  */
5331
5332 static struct iv_ca_delta *
5333 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5334                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5335 {
5336   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5337
5338   change->use = use;
5339   change->old_cp = old_cp;
5340   change->new_cp = new_cp;
5341   change->next_change = next_change;
5342
5343   return change;
5344 }
5345
5346 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5347    are rewritten.  */
5348
5349 static struct iv_ca_delta *
5350 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5351 {
5352   struct iv_ca_delta *last;
5353
5354   if (!l2)
5355     return l1;
5356
5357   if (!l1)
5358     return l2;
5359
5360   for (last = l1; last->next_change; last = last->next_change)
5361     continue;
5362   last->next_change = l2;
5363
5364   return l1;
5365 }
5366
5367 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5368
5369 static struct iv_ca_delta *
5370 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5371 {
5372   struct iv_ca_delta *act, *next, *prev = NULL;
5373   struct cost_pair *tmp;
5374
5375   for (act = delta; act; act = next)
5376     {
5377       next = act->next_change;
5378       act->next_change = prev;
5379       prev = act;
5380
5381       tmp = act->old_cp;
5382       act->old_cp = act->new_cp;
5383       act->new_cp = tmp;
5384     }
5385
5386   return prev;
5387 }
5388
5389 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5390    reverted instead.  */
5391
5392 static void
5393 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5394                     struct iv_ca_delta *delta, bool forward)
5395 {
5396   struct cost_pair *from, *to;
5397   struct iv_ca_delta *act;
5398
5399   if (!forward)
5400     delta = iv_ca_delta_reverse (delta);
5401
5402   for (act = delta; act; act = act->next_change)
5403     {
5404       from = act->old_cp;
5405       to = act->new_cp;
5406       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5407       iv_ca_set_cp (data, ivs, act->use, to);
5408     }
5409
5410   if (!forward)
5411     iv_ca_delta_reverse (delta);
5412 }
5413
5414 /* Returns true if CAND is used in IVS.  */
5415
5416 static bool
5417 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5418 {
5419   return ivs->n_cand_uses[cand->id] > 0;
5420 }
5421
5422 /* Returns number of induction variable candidates in the set IVS.  */
5423
5424 static unsigned
5425 iv_ca_n_cands (struct iv_ca *ivs)
5426 {
5427   return ivs->n_cands;
5428 }
5429
5430 /* Free the list of changes DELTA.  */
5431
5432 static void
5433 iv_ca_delta_free (struct iv_ca_delta **delta)
5434 {
5435   struct iv_ca_delta *act, *next;
5436
5437   for (act = *delta; act; act = next)
5438     {
5439       next = act->next_change;
5440       free (act);
5441     }
5442
5443   *delta = NULL;
5444 }
5445
5446 /* Allocates new iv candidates assignment.  */
5447
5448 static struct iv_ca *
5449 iv_ca_new (struct ivopts_data *data)
5450 {
5451   struct iv_ca *nw = XNEW (struct iv_ca);
5452
5453   nw->upto = 0;
5454   nw->bad_uses = 0;
5455   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5456   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5457   nw->cands = BITMAP_ALLOC (NULL);
5458   nw->n_cands = 0;
5459   nw->n_regs = 0;
5460   nw->cand_use_cost = no_cost;
5461   nw->cand_cost = 0;
5462   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5463   nw->cost = no_cost;
5464   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5465   nw->num_used_inv_expr = 0;
5466
5467   return nw;
5468 }
5469
5470 /* Free memory occupied by the set IVS.  */
5471
5472 static void
5473 iv_ca_free (struct iv_ca **ivs)
5474 {
5475   free ((*ivs)->cand_for_use);
5476   free ((*ivs)->n_cand_uses);
5477   BITMAP_FREE ((*ivs)->cands);
5478   free ((*ivs)->n_invariant_uses);
5479   free ((*ivs)->used_inv_expr);
5480   free (*ivs);
5481   *ivs = NULL;
5482 }
5483
5484 /* Dumps IVS to FILE.  */
5485
5486 static void
5487 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5488 {
5489   const char *pref = "  invariants ";
5490   unsigned i;
5491   comp_cost cost = iv_ca_cost (ivs);
5492
5493   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5494   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5495            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5496   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5497
5498    for (i = 0; i < ivs->upto; i++)
5499     {
5500       struct iv_use *use = iv_use (data, i);
5501       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5502       if (cp)
5503         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5504                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5505       else
5506         fprintf (file, "   use:%d --> ??\n", use->id);
5507     }
5508
5509   for (i = 1; i <= data->max_inv_id; i++)
5510     if (ivs->n_invariant_uses[i])
5511       {
5512         fprintf (file, "%s%d", pref, i);
5513         pref = ", ";
5514       }
5515   fprintf (file, "\n\n");
5516 }
5517
5518 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5519    new set, and store differences in DELTA.  Number of induction variables
5520    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5521    the function will try to find a solution with mimimal iv candidates.  */
5522
5523 static comp_cost
5524 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5525               struct iv_cand *cand, struct iv_ca_delta **delta,
5526               unsigned *n_ivs, bool min_ncand)
5527 {
5528   unsigned i;
5529   comp_cost cost;
5530   struct iv_use *use;
5531   struct cost_pair *old_cp, *new_cp;
5532
5533   *delta = NULL;
5534   for (i = 0; i < ivs->upto; i++)
5535     {
5536       use = iv_use (data, i);
5537       old_cp = iv_ca_cand_for_use (ivs, use);
5538
5539       if (old_cp
5540           && old_cp->cand == cand)
5541         continue;
5542
5543       new_cp = get_use_iv_cost (data, use, cand);
5544       if (!new_cp)
5545         continue;
5546
5547       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5548         continue;
5549
5550       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5551         continue;
5552
5553       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5554     }
5555
5556   iv_ca_delta_commit (data, ivs, *delta, true);
5557   cost = iv_ca_cost (ivs);
5558   if (n_ivs)
5559     *n_ivs = iv_ca_n_cands (ivs);
5560   iv_ca_delta_commit (data, ivs, *delta, false);
5561
5562   return cost;
5563 }
5564
5565 /* Try narrowing set IVS by removing CAND.  Return the cost of
5566    the new set and store the differences in DELTA.  */
5567
5568 static comp_cost
5569 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5570               struct iv_cand *cand, struct iv_ca_delta **delta)
5571 {
5572   unsigned i, ci;
5573   struct iv_use *use;
5574   struct cost_pair *old_cp, *new_cp, *cp;
5575   bitmap_iterator bi;
5576   struct iv_cand *cnd;
5577   comp_cost cost;
5578
5579   *delta = NULL;
5580   for (i = 0; i < n_iv_uses (data); i++)
5581     {
5582       use = iv_use (data, i);
5583
5584       old_cp = iv_ca_cand_for_use (ivs, use);
5585       if (old_cp->cand != cand)
5586         continue;
5587
5588       new_cp = NULL;
5589
5590       if (data->consider_all_candidates)
5591         {
5592           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5593             {
5594               if (ci == cand->id)
5595                 continue;
5596
5597               cnd = iv_cand (data, ci);
5598
5599               cp = get_use_iv_cost (data, use, cnd);
5600               if (!cp)
5601                 continue;
5602
5603               if (!iv_ca_has_deps (ivs, cp))
5604                 continue;
5605
5606               if (!cheaper_cost_pair (cp, new_cp))
5607                 continue;
5608
5609               new_cp = cp;
5610             }
5611         }
5612       else
5613         {
5614           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5615             {
5616               if (ci == cand->id)
5617                 continue;
5618
5619               cnd = iv_cand (data, ci);
5620
5621               cp = get_use_iv_cost (data, use, cnd);
5622               if (!cp)
5623                 continue;
5624               if (!iv_ca_has_deps (ivs, cp))
5625                 continue;
5626
5627               if (!cheaper_cost_pair (cp, new_cp))
5628                 continue;
5629
5630               new_cp = cp;
5631             }
5632         }
5633
5634       if (!new_cp)
5635         {
5636           iv_ca_delta_free (delta);
5637           return infinite_cost;
5638         }
5639
5640       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5641     }
5642
5643   iv_ca_delta_commit (data, ivs, *delta, true);
5644   cost = iv_ca_cost (ivs);
5645   iv_ca_delta_commit (data, ivs, *delta, false);
5646
5647   return cost;
5648 }
5649
5650 /* Try optimizing the set of candidates IVS by removing candidates different
5651    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5652    differences in DELTA.  */
5653
5654 static comp_cost
5655 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5656              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5657 {
5658   bitmap_iterator bi;
5659   struct iv_ca_delta *act_delta, *best_delta;
5660   unsigned i;
5661   comp_cost best_cost, acost;
5662   struct iv_cand *cand;
5663
5664   best_delta = NULL;
5665   best_cost = iv_ca_cost (ivs);
5666
5667   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5668     {
5669       cand = iv_cand (data, i);
5670
5671       if (cand == except_cand)
5672         continue;
5673
5674       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5675
5676       if (compare_costs (acost, best_cost) < 0)
5677         {
5678           best_cost = acost;
5679           iv_ca_delta_free (&best_delta);
5680           best_delta = act_delta;
5681         }
5682       else
5683         iv_ca_delta_free (&act_delta);
5684     }
5685
5686   if (!best_delta)
5687     {
5688       *delta = NULL;
5689       return best_cost;
5690     }
5691
5692   /* Recurse to possibly remove other unnecessary ivs.  */
5693   iv_ca_delta_commit (data, ivs, best_delta, true);
5694   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5695   iv_ca_delta_commit (data, ivs, best_delta, false);
5696   *delta = iv_ca_delta_join (best_delta, *delta);
5697   return best_cost;
5698 }
5699
5700 /* Tries to extend the sets IVS in the best possible way in order
5701    to express the USE.  If ORIGINALP is true, prefer candidates from
5702    the original set of IVs, otherwise favor important candidates not
5703    based on any memory object.  */
5704
5705 static bool
5706 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5707                   struct iv_use *use, bool originalp)
5708 {
5709   comp_cost best_cost, act_cost;
5710   unsigned i;
5711   bitmap_iterator bi;
5712   struct iv_cand *cand;
5713   struct iv_ca_delta *best_delta = NULL, *act_delta;
5714   struct cost_pair *cp;
5715
5716   iv_ca_add_use (data, ivs, use, false);
5717   best_cost = iv_ca_cost (ivs);
5718
5719   cp = iv_ca_cand_for_use (ivs, use);
5720   if (!cp)
5721     {
5722       ivs->upto--;
5723       ivs->bad_uses--;
5724       iv_ca_add_use (data, ivs, use, true);
5725       best_cost = iv_ca_cost (ivs);
5726       cp = iv_ca_cand_for_use (ivs, use);
5727     }
5728   if (cp)
5729     {
5730       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5731       iv_ca_set_no_cp (data, ivs, use);
5732     }
5733
5734   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5735      first try important candidates not based on any memory object.  Only if
5736      this fails, try the specific ones.  Rationale -- in loops with many
5737      variables the best choice often is to use just one generic biv.  If we
5738      added here many ivs specific to the uses, the optimization algorithm later
5739      would be likely to get stuck in a local minimum, thus causing us to create
5740      too many ivs.  The approach from few ivs to more seems more likely to be
5741      successful -- starting from few ivs, replacing an expensive use by a
5742      specific iv should always be a win.  */
5743   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5744     {
5745       cand = iv_cand (data, i);
5746
5747       if (originalp && cand->pos !=IP_ORIGINAL)
5748         continue;
5749
5750       if (!originalp && cand->iv->base_object != NULL_TREE)
5751         continue;
5752
5753       if (iv_ca_cand_used_p (ivs, cand))
5754         continue;
5755
5756       cp = get_use_iv_cost (data, use, cand);
5757       if (!cp)
5758         continue;
5759
5760       iv_ca_set_cp (data, ivs, use, cp);
5761       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5762                                true);
5763       iv_ca_set_no_cp (data, ivs, use);
5764       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5765
5766       if (compare_costs (act_cost, best_cost) < 0)
5767         {
5768           best_cost = act_cost;
5769
5770           iv_ca_delta_free (&best_delta);
5771           best_delta = act_delta;
5772         }
5773       else
5774         iv_ca_delta_free (&act_delta);
5775     }
5776
5777   if (infinite_cost_p (best_cost))
5778     {
5779       for (i = 0; i < use->n_map_members; i++)
5780         {
5781           cp = use->cost_map + i;
5782           cand = cp->cand;
5783           if (!cand)
5784             continue;
5785
5786           /* Already tried this.  */
5787           if (cand->important)
5788             {
5789               if (originalp && cand->pos == IP_ORIGINAL)
5790                 continue;
5791               if (!originalp && cand->iv->base_object == NULL_TREE)
5792                 continue;
5793             }
5794
5795           if (iv_ca_cand_used_p (ivs, cand))
5796             continue;
5797
5798           act_delta = NULL;
5799           iv_ca_set_cp (data, ivs, use, cp);
5800           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5801           iv_ca_set_no_cp (data, ivs, use);
5802           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5803                                        cp, act_delta);
5804
5805           if (compare_costs (act_cost, best_cost) < 0)
5806             {
5807               best_cost = act_cost;
5808
5809               if (best_delta)
5810                 iv_ca_delta_free (&best_delta);
5811               best_delta = act_delta;
5812             }
5813           else
5814             iv_ca_delta_free (&act_delta);
5815         }
5816     }
5817
5818   iv_ca_delta_commit (data, ivs, best_delta, true);
5819   iv_ca_delta_free (&best_delta);
5820
5821   return !infinite_cost_p (best_cost);
5822 }
5823
5824 /* Finds an initial assignment of candidates to uses.  */
5825
5826 static struct iv_ca *
5827 get_initial_solution (struct ivopts_data *data, bool originalp)
5828 {
5829   struct iv_ca *ivs = iv_ca_new (data);
5830   unsigned i;
5831
5832   for (i = 0; i < n_iv_uses (data); i++)
5833     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5834       {
5835         iv_ca_free (&ivs);
5836         return NULL;
5837       }
5838
5839   return ivs;
5840 }
5841
5842 /* Tries to improve set of induction variables IVS.  */
5843
5844 static bool
5845 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5846 {
5847   unsigned i, n_ivs;
5848   comp_cost acost, best_cost = iv_ca_cost (ivs);
5849   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5850   struct iv_cand *cand;
5851
5852   /* Try extending the set of induction variables by one.  */
5853   for (i = 0; i < n_iv_cands (data); i++)
5854     {
5855       cand = iv_cand (data, i);
5856
5857       if (iv_ca_cand_used_p (ivs, cand))
5858         continue;
5859
5860       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5861       if (!act_delta)
5862         continue;
5863
5864       /* If we successfully added the candidate and the set is small enough,
5865          try optimizing it by removing other candidates.  */
5866       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5867         {
5868           iv_ca_delta_commit (data, ivs, act_delta, true);
5869           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5870           iv_ca_delta_commit (data, ivs, act_delta, false);
5871           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5872         }
5873
5874       if (compare_costs (acost, best_cost) < 0)
5875         {
5876           best_cost = acost;
5877           iv_ca_delta_free (&best_delta);
5878           best_delta = act_delta;
5879         }
5880       else
5881         iv_ca_delta_free (&act_delta);
5882     }
5883
5884   if (!best_delta)
5885     {
5886       /* Try removing the candidates from the set instead.  */
5887       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5888
5889       /* Nothing more we can do.  */
5890       if (!best_delta)
5891         return false;
5892     }
5893
5894   iv_ca_delta_commit (data, ivs, best_delta, true);
5895   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5896   iv_ca_delta_free (&best_delta);
5897   return true;
5898 }
5899
5900 /* Attempts to find the optimal set of induction variables.  We do simple
5901    greedy heuristic -- we try to replace at most one candidate in the selected
5902    solution and remove the unused ivs while this improves the cost.  */
5903
5904 static struct iv_ca *
5905 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5906 {
5907   struct iv_ca *set;
5908
5909   /* Get the initial solution.  */
5910   set = get_initial_solution (data, originalp);
5911   if (!set)
5912     {
5913       if (dump_file && (dump_flags & TDF_DETAILS))
5914         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5915       return NULL;
5916     }
5917
5918   if (dump_file && (dump_flags & TDF_DETAILS))
5919     {
5920       fprintf (dump_file, "Initial set of candidates:\n");
5921       iv_ca_dump (data, dump_file, set);
5922     }
5923
5924   while (try_improve_iv_set (data, set))
5925     {
5926       if (dump_file && (dump_flags & TDF_DETAILS))
5927         {
5928           fprintf (dump_file, "Improved to:\n");
5929           iv_ca_dump (data, dump_file, set);
5930         }
5931     }
5932
5933   return set;
5934 }
5935
5936 static struct iv_ca *
5937 find_optimal_iv_set (struct ivopts_data *data)
5938 {
5939   unsigned i;
5940   struct iv_ca *set, *origset;
5941   struct iv_use *use;
5942   comp_cost cost, origcost;
5943
5944   /* Determine the cost based on a strategy that starts with original IVs,
5945      and try again using a strategy that prefers candidates not based
5946      on any IVs.  */
5947   origset = find_optimal_iv_set_1 (data, true);
5948   set = find_optimal_iv_set_1 (data, false);
5949
5950   if (!origset && !set)
5951     return NULL;
5952
5953   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5954   cost = set ? iv_ca_cost (set) : infinite_cost;
5955
5956   if (dump_file && (dump_flags & TDF_DETAILS))
5957     {
5958       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5959                origcost.cost, origcost.complexity);
5960       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5961                cost.cost, cost.complexity);
5962     }
5963
5964   /* Choose the one with the best cost.  */
5965   if (compare_costs (origcost, cost) <= 0)
5966     {
5967       if (set)
5968         iv_ca_free (&set);
5969       set = origset;
5970     }
5971   else if (origset)
5972     iv_ca_free (&origset);
5973
5974   for (i = 0; i < n_iv_uses (data); i++)
5975     {
5976       use = iv_use (data, i);
5977       use->selected = iv_ca_cand_for_use (set, use)->cand;
5978     }
5979
5980   return set;
5981 }
5982
5983 /* Creates a new induction variable corresponding to CAND.  */
5984
5985 static void
5986 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5987 {
5988   gimple_stmt_iterator incr_pos;
5989   tree base;
5990   bool after = false;
5991
5992   if (!cand->iv)
5993     return;
5994
5995   switch (cand->pos)
5996     {
5997     case IP_NORMAL:
5998       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5999       break;
6000
6001     case IP_END:
6002       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6003       after = true;
6004       break;
6005
6006     case IP_AFTER_USE:
6007       after = true;
6008       /* fall through */
6009     case IP_BEFORE_USE:
6010       incr_pos = gsi_for_stmt (cand->incremented_at);
6011       break;
6012
6013     case IP_ORIGINAL:
6014       /* Mark that the iv is preserved.  */
6015       name_info (data, cand->var_before)->preserve_biv = true;
6016       name_info (data, cand->var_after)->preserve_biv = true;
6017
6018       /* Rewrite the increment so that it uses var_before directly.  */
6019       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6020       return;
6021     }
6022
6023   gimple_add_tmp_var (cand->var_before);
6024
6025   base = unshare_expr (cand->iv->base);
6026
6027   create_iv (base, unshare_expr (cand->iv->step),
6028              cand->var_before, data->current_loop,
6029              &incr_pos, after, &cand->var_before, &cand->var_after);
6030 }
6031
6032 /* Creates new induction variables described in SET.  */
6033
6034 static void
6035 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6036 {
6037   unsigned i;
6038   struct iv_cand *cand;
6039   bitmap_iterator bi;
6040
6041   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6042     {
6043       cand = iv_cand (data, i);
6044       create_new_iv (data, cand);
6045     }
6046
6047   if (dump_file && (dump_flags & TDF_DETAILS))
6048     {
6049       fprintf (dump_file, "\nSelected IV set: \n");
6050       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6051         {
6052           cand = iv_cand (data, i);
6053           dump_cand (dump_file, cand);
6054         }
6055       fprintf (dump_file, "\n");
6056     }
6057 }
6058
6059 /* Rewrites USE (definition of iv used in a nonlinear expression)
6060    using candidate CAND.  */
6061
6062 static void
6063 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6064                             struct iv_use *use, struct iv_cand *cand)
6065 {
6066   tree comp;
6067   tree op, tgt;
6068   gimple ass;
6069   gimple_stmt_iterator bsi;
6070
6071   /* An important special case -- if we are asked to express value of
6072      the original iv by itself, just exit; there is no need to
6073      introduce a new computation (that might also need casting the
6074      variable to unsigned and back).  */
6075   if (cand->pos == IP_ORIGINAL
6076       && cand->incremented_at == use->stmt)
6077     {
6078       tree step, ctype, utype;
6079       enum tree_code incr_code = PLUS_EXPR, old_code;
6080
6081       gcc_assert (is_gimple_assign (use->stmt));
6082       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6083
6084       step = cand->iv->step;
6085       ctype = TREE_TYPE (step);
6086       utype = TREE_TYPE (cand->var_after);
6087       if (TREE_CODE (step) == NEGATE_EXPR)
6088         {
6089           incr_code = MINUS_EXPR;
6090           step = TREE_OPERAND (step, 0);
6091         }
6092
6093       /* Check whether we may leave the computation unchanged.
6094          This is the case only if it does not rely on other
6095          computations in the loop -- otherwise, the computation
6096          we rely upon may be removed in remove_unused_ivs,
6097          thus leading to ICE.  */
6098       old_code = gimple_assign_rhs_code (use->stmt);
6099       if (old_code == PLUS_EXPR
6100           || old_code == MINUS_EXPR
6101           || old_code == POINTER_PLUS_EXPR)
6102         {
6103           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6104             op = gimple_assign_rhs2 (use->stmt);
6105           else if (old_code != MINUS_EXPR
6106                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6107             op = gimple_assign_rhs1 (use->stmt);
6108           else
6109             op = NULL_TREE;
6110         }
6111       else
6112         op = NULL_TREE;
6113
6114       if (op
6115           && (TREE_CODE (op) == INTEGER_CST
6116               || operand_equal_p (op, step, 0)))
6117         return;
6118
6119       /* Otherwise, add the necessary computations to express
6120          the iv.  */
6121       op = fold_convert (ctype, cand->var_before);
6122       comp = fold_convert (utype,
6123                            build2 (incr_code, ctype, op,
6124                                    unshare_expr (step)));
6125     }
6126   else
6127     {
6128       comp = get_computation (data->current_loop, use, cand);
6129       gcc_assert (comp != NULL_TREE);
6130     }
6131
6132   switch (gimple_code (use->stmt))
6133     {
6134     case GIMPLE_PHI:
6135       tgt = PHI_RESULT (use->stmt);
6136
6137       /* If we should keep the biv, do not replace it.  */
6138       if (name_info (data, tgt)->preserve_biv)
6139         return;
6140
6141       bsi = gsi_after_labels (gimple_bb (use->stmt));
6142       break;
6143
6144     case GIMPLE_ASSIGN:
6145       tgt = gimple_assign_lhs (use->stmt);
6146       bsi = gsi_for_stmt (use->stmt);
6147       break;
6148
6149     default:
6150       gcc_unreachable ();
6151     }
6152
6153   if (!valid_gimple_rhs_p (comp)
6154       || (gimple_code (use->stmt) != GIMPLE_PHI
6155           /* We can't allow re-allocating the stmt as it might be pointed
6156              to still.  */
6157           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6158               >= gimple_num_ops (gsi_stmt (bsi)))))
6159     {
6160       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6161                                        true, GSI_SAME_STMT);
6162       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6163         {
6164           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6165           /* As this isn't a plain copy we have to reset alignment
6166              information.  */
6167           if (SSA_NAME_PTR_INFO (comp))
6168             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6169         }
6170     }
6171
6172   if (gimple_code (use->stmt) == GIMPLE_PHI)
6173     {
6174       ass = gimple_build_assign (tgt, comp);
6175       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6176
6177       bsi = gsi_for_stmt (use->stmt);
6178       remove_phi_node (&bsi, false);
6179     }
6180   else
6181     {
6182       gimple_assign_set_rhs_from_tree (&bsi, comp);
6183       use->stmt = gsi_stmt (bsi);
6184     }
6185 }
6186
6187 /* Performs a peephole optimization to reorder the iv update statement with
6188    a mem ref to enable instruction combining in later phases. The mem ref uses
6189    the iv value before the update, so the reordering transformation requires
6190    adjustment of the offset. CAND is the selected IV_CAND.
6191
6192    Example:
6193
6194    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6195    iv2 = iv1 + 1;
6196
6197    if (t < val)      (1)
6198      goto L;
6199    goto Head;
6200
6201
6202    directly propagating t over to (1) will introduce overlapping live range
6203    thus increase register pressure. This peephole transform it into:
6204
6205
6206    iv2 = iv1 + 1;
6207    t = MEM_REF (base, iv2, 8, 8);
6208    if (t < val)
6209      goto L;
6210    goto Head;
6211 */
6212
6213 static void
6214 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6215 {
6216   tree var_after;
6217   gimple iv_update, stmt;
6218   basic_block bb;
6219   gimple_stmt_iterator gsi, gsi_iv;
6220
6221   if (cand->pos != IP_NORMAL)
6222     return;
6223
6224   var_after = cand->var_after;
6225   iv_update = SSA_NAME_DEF_STMT (var_after);
6226
6227   bb = gimple_bb (iv_update);
6228   gsi = gsi_last_nondebug_bb (bb);
6229   stmt = gsi_stmt (gsi);
6230
6231   /* Only handle conditional statement for now.  */
6232   if (gimple_code (stmt) != GIMPLE_COND)
6233     return;
6234
6235   gsi_prev_nondebug (&gsi);
6236   stmt = gsi_stmt (gsi);
6237   if (stmt != iv_update)
6238     return;
6239
6240   gsi_prev_nondebug (&gsi);
6241   if (gsi_end_p (gsi))
6242     return;
6243
6244   stmt = gsi_stmt (gsi);
6245   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6246     return;
6247
6248   if (stmt != use->stmt)
6249     return;
6250
6251   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6252     return;
6253
6254   if (dump_file && (dump_flags & TDF_DETAILS))
6255     {
6256       fprintf (dump_file, "Reordering \n");
6257       print_gimple_stmt (dump_file, iv_update, 0, 0);
6258       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6259       fprintf (dump_file, "\n");
6260     }
6261
6262   gsi = gsi_for_stmt (use->stmt);
6263   gsi_iv = gsi_for_stmt (iv_update);
6264   gsi_move_before (&gsi_iv, &gsi);
6265
6266   cand->pos = IP_BEFORE_USE;
6267   cand->incremented_at = use->stmt;
6268 }
6269
6270 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6271
6272 static void
6273 rewrite_use_address (struct ivopts_data *data,
6274                      struct iv_use *use, struct iv_cand *cand)
6275 {
6276   aff_tree aff;
6277   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6278   tree base_hint = NULL_TREE;
6279   tree ref, iv;
6280   bool ok;
6281
6282   adjust_iv_update_pos (cand, use);
6283   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6284   gcc_assert (ok);
6285   unshare_aff_combination (&aff);
6286
6287   /* To avoid undefined overflow problems, all IV candidates use unsigned
6288      integer types.  The drawback is that this makes it impossible for
6289      create_mem_ref to distinguish an IV that is based on a memory object
6290      from one that represents simply an offset.
6291
6292      To work around this problem, we pass a hint to create_mem_ref that
6293      indicates which variable (if any) in aff is an IV based on a memory
6294      object.  Note that we only consider the candidate.  If this is not
6295      based on an object, the base of the reference is in some subexpression
6296      of the use -- but these will use pointer types, so they are recognized
6297      by the create_mem_ref heuristics anyway.  */
6298   if (cand->iv->base_object)
6299     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6300
6301   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6302   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6303                         reference_alias_ptr_type (*use->op_p),
6304                         iv, base_hint, data->speed);
6305   copy_ref_info (ref, *use->op_p);
6306   *use->op_p = ref;
6307 }
6308
6309 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6310    candidate CAND.  */
6311
6312 static void
6313 rewrite_use_compare (struct ivopts_data *data,
6314                      struct iv_use *use, struct iv_cand *cand)
6315 {
6316   tree comp, *var_p, op, bound;
6317   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6318   enum tree_code compare;
6319   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6320   bool ok;
6321
6322   bound = cp->value;
6323   if (bound)
6324     {
6325       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6326       tree var_type = TREE_TYPE (var);
6327       gimple_seq stmts;
6328
6329       if (dump_file && (dump_flags & TDF_DETAILS))
6330         {
6331           fprintf (dump_file, "Replacing exit test: ");
6332           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6333         }
6334       compare = cp->comp;
6335       bound = unshare_expr (fold_convert (var_type, bound));
6336       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6337       if (stmts)
6338         gsi_insert_seq_on_edge_immediate (
6339                 loop_preheader_edge (data->current_loop),
6340                 stmts);
6341
6342       gimple_cond_set_lhs (use->stmt, var);
6343       gimple_cond_set_code (use->stmt, compare);
6344       gimple_cond_set_rhs (use->stmt, op);
6345       return;
6346     }
6347
6348   /* The induction variable elimination failed; just express the original
6349      giv.  */
6350   comp = get_computation (data->current_loop, use, cand);
6351   gcc_assert (comp != NULL_TREE);
6352
6353   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6354   gcc_assert (ok);
6355
6356   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6357                                      true, GSI_SAME_STMT);
6358 }
6359
6360 /* Rewrites USE using candidate CAND.  */
6361
6362 static void
6363 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6364 {
6365   switch (use->type)
6366     {
6367       case USE_NONLINEAR_EXPR:
6368         rewrite_use_nonlinear_expr (data, use, cand);
6369         break;
6370
6371       case USE_ADDRESS:
6372         rewrite_use_address (data, use, cand);
6373         break;
6374
6375       case USE_COMPARE:
6376         rewrite_use_compare (data, use, cand);
6377         break;
6378
6379       default:
6380         gcc_unreachable ();
6381     }
6382
6383   update_stmt (use->stmt);
6384 }
6385
6386 /* Rewrite the uses using the selected induction variables.  */
6387
6388 static void
6389 rewrite_uses (struct ivopts_data *data)
6390 {
6391   unsigned i;
6392   struct iv_cand *cand;
6393   struct iv_use *use;
6394
6395   for (i = 0; i < n_iv_uses (data); i++)
6396     {
6397       use = iv_use (data, i);
6398       cand = use->selected;
6399       gcc_assert (cand);
6400
6401       rewrite_use (data, use, cand);
6402     }
6403 }
6404
6405 /* Removes the ivs that are not used after rewriting.  */
6406
6407 static void
6408 remove_unused_ivs (struct ivopts_data *data)
6409 {
6410   unsigned j;
6411   bitmap_iterator bi;
6412   bitmap toremove = BITMAP_ALLOC (NULL);
6413
6414   /* Figure out an order in which to release SSA DEFs so that we don't
6415      release something that we'd have to propagate into a debug stmt
6416      afterwards.  */
6417   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6418     {
6419       struct version_info *info;
6420
6421       info = ver_info (data, j);
6422       if (info->iv
6423           && !integer_zerop (info->iv->step)
6424           && !info->inv_id
6425           && !info->iv->have_use_for
6426           && !info->preserve_biv)
6427         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6428     }
6429
6430   release_defs_bitset (toremove);
6431
6432   BITMAP_FREE (toremove);
6433 }
6434
6435 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6436    for pointer_map_traverse.  */
6437
6438 static bool
6439 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6440                       void *data ATTRIBUTE_UNUSED)
6441 {
6442   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6443
6444   free (niter);
6445   return true;
6446 }
6447
6448 /* Frees data allocated by the optimization of a single loop.  */
6449
6450 static void
6451 free_loop_data (struct ivopts_data *data)
6452 {
6453   unsigned i, j;
6454   bitmap_iterator bi;
6455   tree obj;
6456
6457   if (data->niters)
6458     {
6459       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6460       pointer_map_destroy (data->niters);
6461       data->niters = NULL;
6462     }
6463
6464   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6465     {
6466       struct version_info *info;
6467
6468       info = ver_info (data, i);
6469       free (info->iv);
6470       info->iv = NULL;
6471       info->has_nonlin_use = false;
6472       info->preserve_biv = false;
6473       info->inv_id = 0;
6474     }
6475   bitmap_clear (data->relevant);
6476   bitmap_clear (data->important_candidates);
6477
6478   for (i = 0; i < n_iv_uses (data); i++)
6479     {
6480       struct iv_use *use = iv_use (data, i);
6481
6482       free (use->iv);
6483       BITMAP_FREE (use->related_cands);
6484       for (j = 0; j < use->n_map_members; j++)
6485         if (use->cost_map[j].depends_on)
6486           BITMAP_FREE (use->cost_map[j].depends_on);
6487       free (use->cost_map);
6488       free (use);
6489     }
6490   VEC_truncate (iv_use_p, data->iv_uses, 0);
6491
6492   for (i = 0; i < n_iv_cands (data); i++)
6493     {
6494       struct iv_cand *cand = iv_cand (data, i);
6495
6496       free (cand->iv);
6497       if (cand->depends_on)
6498         BITMAP_FREE (cand->depends_on);
6499       free (cand);
6500     }
6501   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6502
6503   if (data->version_info_size < num_ssa_names)
6504     {
6505       data->version_info_size = 2 * num_ssa_names;
6506       free (data->version_info);
6507       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6508     }
6509
6510   data->max_inv_id = 0;
6511
6512   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6513     SET_DECL_RTL (obj, NULL_RTX);
6514
6515   VEC_truncate (tree, decl_rtl_to_reset, 0);
6516
6517   htab_empty (data->inv_expr_tab);
6518   data->inv_expr_id = 0;
6519 }
6520
6521 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6522    loop tree.  */
6523
6524 static void
6525 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6526 {
6527   free_loop_data (data);
6528   free (data->version_info);
6529   BITMAP_FREE (data->relevant);
6530   BITMAP_FREE (data->important_candidates);
6531
6532   VEC_free (tree, heap, decl_rtl_to_reset);
6533   VEC_free (iv_use_p, heap, data->iv_uses);
6534   VEC_free (iv_cand_p, heap, data->iv_candidates);
6535   htab_delete (data->inv_expr_tab);
6536 }
6537
6538 /* Returns true if the loop body BODY includes any function calls.  */
6539
6540 static bool
6541 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6542 {
6543   gimple_stmt_iterator gsi;
6544   unsigned i;
6545
6546   for (i = 0; i < num_nodes; i++)
6547     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6548       {
6549         gimple stmt = gsi_stmt (gsi);
6550         if (is_gimple_call (stmt)
6551             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6552           return true;
6553       }
6554   return false;
6555 }
6556
6557 /* Optimizes the LOOP.  Returns true if anything changed.  */
6558
6559 static bool
6560 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6561 {
6562   bool changed = false;
6563   struct iv_ca *iv_ca;
6564   edge exit = single_dom_exit (loop);
6565   basic_block *body;
6566
6567   gcc_assert (!data->niters);
6568   data->current_loop = loop;
6569   data->speed = optimize_loop_for_speed_p (loop);
6570
6571   if (dump_file && (dump_flags & TDF_DETAILS))
6572     {
6573       fprintf (dump_file, "Processing loop %d\n", loop->num);
6574
6575       if (exit)
6576         {
6577           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6578                    exit->src->index, exit->dest->index);
6579           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6580           fprintf (dump_file, "\n");
6581         }
6582
6583       fprintf (dump_file, "\n");
6584     }
6585
6586   body = get_loop_body (loop);
6587   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6588   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6589   free (body);
6590
6591   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6592
6593   /* For each ssa name determines whether it behaves as an induction variable
6594      in some loop.  */
6595   if (!find_induction_variables (data))
6596     goto finish;
6597
6598   /* Finds interesting uses (item 1).  */
6599   find_interesting_uses (data);
6600   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6601     goto finish;
6602
6603   /* Finds candidates for the induction variables (item 2).  */
6604   find_iv_candidates (data);
6605
6606   /* Calculates the costs (item 3, part 1).  */
6607   determine_iv_costs (data);
6608   determine_use_iv_costs (data);
6609   determine_set_costs (data);
6610
6611   /* Find the optimal set of induction variables (item 3, part 2).  */
6612   iv_ca = find_optimal_iv_set (data);
6613   if (!iv_ca)
6614     goto finish;
6615   changed = true;
6616
6617   /* Create the new induction variables (item 4, part 1).  */
6618   create_new_ivs (data, iv_ca);
6619   iv_ca_free (&iv_ca);
6620
6621   /* Rewrite the uses (item 4, part 2).  */
6622   rewrite_uses (data);
6623
6624   /* Remove the ivs that are unused after rewriting.  */
6625   remove_unused_ivs (data);
6626
6627   /* We have changed the structure of induction variables; it might happen
6628      that definitions in the scev database refer to some of them that were
6629      eliminated.  */
6630   scev_reset ();
6631
6632 finish:
6633   free_loop_data (data);
6634
6635   return changed;
6636 }
6637
6638 /* Main entry point.  Optimizes induction variables in loops.  */
6639
6640 void
6641 tree_ssa_iv_optimize (void)
6642 {
6643   struct loop *loop;
6644   struct ivopts_data data;
6645   loop_iterator li;
6646
6647   tree_ssa_iv_optimize_init (&data);
6648
6649   /* Optimize the loops starting with the innermost ones.  */
6650   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6651     {
6652       if (dump_file && (dump_flags & TDF_DETAILS))
6653         flow_loop_dump (loop, dump_file, NULL, 1);
6654
6655       tree_ssa_iv_optimize_loop (&data, loop);
6656     }
6657
6658   tree_ssa_iv_optimize_finalize (&data);
6659 }