gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "gimple-pretty-print.h"
  73 #include "tree-flow.h"
  74 #include "cfgloop.h"
  75 #include "tree-pass.h"
  76 #include "ggc.h"
  77 #include "insn-config.h"
  78 #include "pointer-set.h"
  79 #include "hashtab.h"
  80 #include "tree-chrec.h"
  81 #include "tree-scalar-evolution.h"
  82 #include "cfgloop.h"
  83 #include "params.h"
  84 #include "langhooks.h"
  85 #include "tree-affine.h"
  86 #include "target.h"
  87 #include "tree-inline.h"
  88 #include "tree-ssa-propagate.h"
  89 #include "expmed.h"
  90
  91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
  92    cost of different addressing modes.  This should be moved to a TBD
  93    interface between the GIMPLE and RTL worlds.  */
  94 #include "expr.h"
  95 #include "recog.h"
  96
  97 /* The infinite cost.  */
  98 #define INFTY 10000000
  99
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102 /* Returns the expected number of loop iterations for LOOP.
 103    The average trip count is computed from profile data if it
 104    exists. */
 105
 106 static inline HOST_WIDE_INT
 107 avg_loop_niter (struct loop *loop)
 108 {
 109   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 110   if (niter == -1)
 111     return AVG_LOOP_NITER (loop);
 112
 113   return niter;
 114 }
 115
 116 /* Representation of the induction variable.  */
 117 struct iv
 118 {
 119   tree base;            /* Initial value of the iv.  */
 120   tree base_object;     /* A memory object to that the induction variable points.  */
 121   tree step;            /* Step of the iv (constant only).  */
 122   tree ssa_name;        /* The ssa name with the value.  */
 123   bool biv_p;           /* Is it a biv?  */
 124   bool have_use_for;    /* Do we already have a use for it?  */
 125   unsigned use_id;      /* The identifier in the use if it is the case.  */
 126 };
 127
 128 /* Per-ssa version information (induction variable descriptions, etc.).  */
 129 struct version_info
 130 {
 131   tree name;            /* The ssa name.  */
 132   struct iv *iv;        /* Induction variable description.  */
 133   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 134                            an expression that is not an induction variable.  */
 135   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 136   unsigned inv_id;      /* Id of an invariant.  */
 137 };
 138
 139 /* Types of uses.  */
 140 enum use_type
 141 {
 142   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 143   USE_ADDRESS,          /* Use in an address.  */
 144   USE_COMPARE           /* Use is a compare.  */
 145 };
 146
 147 /* Cost of a computation.  */
 148 typedef struct
 149 {
 150   int cost;             /* The runtime cost.  */
 151   unsigned complexity;  /* The estimate of the complexity of the code for
 152                            the computation (in no concrete units --
 153                            complexity field should be larger for more
 154                            complex expressions and addressing modes).  */
 155 } comp_cost;
 156
 157 static const comp_cost no_cost = {0, 0};
 158 static const comp_cost infinite_cost = {INFTY, INFTY};
 159
 160 /* The candidate - cost pair.  */
 161 struct cost_pair
 162 {
 163   struct iv_cand *cand; /* The candidate.  */
 164   comp_cost cost;       /* The cost.  */
 165   bitmap depends_on;    /* The list of invariants that have to be
 166                            preserved.  */
 167   tree value;           /* For final value elimination, the expression for
 168                            the final value of the iv.  For iv elimination,
 169                            the new bound to compare with.  */
 170   enum tree_code comp;  /* For iv elimination, the comparison.  */
 171   int inv_expr_id;      /* Loop invariant expression id.  */
 172 };
 173
 174 /* Use.  */
 175 struct iv_use
 176 {
 177   unsigned id;          /* The id of the use.  */
 178   enum use_type type;   /* Type of the use.  */
 179   struct iv *iv;        /* The induction variable it is based on.  */
 180   gimple stmt;          /* Statement in that it occurs.  */
 181   tree *op_p;           /* The place where it occurs.  */
 182   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 183                            important ones.  */
 184
 185   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 186   struct cost_pair *cost_map;
 187                         /* The costs wrto the iv candidates.  */
 188
 189   struct iv_cand *selected;
 190                         /* The selected candidate.  */
 191 };
 192
 193 /* The position where the iv is computed.  */
 194 enum iv_position
 195 {
 196   IP_NORMAL,            /* At the end, just before the exit condition.  */
 197   IP_END,               /* At the end of the latch block.  */
 198   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 199   IP_AFTER_USE,         /* Immediately after a specific use.  */
 200   IP_ORIGINAL           /* The original biv.  */
 201 };
 202
 203 /* The induction variable candidate.  */
 204 struct iv_cand
 205 {
 206   unsigned id;          /* The number of the candidate.  */
 207   bool important;       /* Whether this is an "important" candidate, i.e. such
 208                            that it should be considered by all uses.  */
 209   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 210   gimple incremented_at;/* For original biv, the statement where it is
 211                            incremented.  */
 212   tree var_before;      /* The variable used for it before increment.  */
 213   tree var_after;       /* The variable used for it after increment.  */
 214   struct iv *iv;        /* The value of the candidate.  NULL for
 215                            "pseudocandidate" used to indicate the possibility
 216                            to replace the final value of an iv by direct
 217                            computation of the value.  */
 218   unsigned cost;        /* Cost of the candidate.  */
 219   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 220   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 221                               where it is incremented.  */
 222   bitmap depends_on;    /* The list of invariants that are used in step of the
 223                            biv.  */
 224 };
 225
 226 /* Loop invariant expression hashtable entry.  */
 227 struct iv_inv_expr_ent
 228 {
 229   tree expr;
 230   int id;
 231   hashval_t hash;
 232 };
 233
 234 /* The data used by the induction variable optimizations.  */
 235
 236 typedef struct iv_use *iv_use_p;
 237 DEF_VEC_P(iv_use_p);
 238 DEF_VEC_ALLOC_P(iv_use_p,heap);
 239
 240 typedef struct iv_cand *iv_cand_p;
 241 DEF_VEC_P(iv_cand_p);
 242 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 243
 244 struct ivopts_data
 245 {
 246   /* The currently optimized loop.  */
 247   struct loop *current_loop;
 248
 249   /* Numbers of iterations for all exits of the current loop.  */
 250   struct pointer_map_t *niters;
 251
 252   /* Number of registers used in it.  */
 253   unsigned regs_used;
 254
 255   /* The size of version_info array allocated.  */
 256   unsigned version_info_size;
 257
 258   /* The array of information for the ssa names.  */
 259   struct version_info *version_info;
 260
 261   /* The hashtable of loop invariant expressions created
 262      by ivopt.  */
 263   htab_t inv_expr_tab;
 264
 265   /* Loop invariant expression id.  */
 266   int inv_expr_id;
 267
 268   /* The bitmap of indices in version_info whose value was changed.  */
 269   bitmap relevant;
 270
 271   /* The uses of induction variables.  */
 272   VEC(iv_use_p,heap) *iv_uses;
 273
 274   /* The candidates.  */
 275   VEC(iv_cand_p,heap) *iv_candidates;
 276
 277   /* A bitmap of important candidates.  */
 278   bitmap important_candidates;
 279
 280   /* The maximum invariant id.  */
 281   unsigned max_inv_id;
 282
 283   /* Whether to consider just related and important candidates when replacing a
 284      use.  */
 285   bool consider_all_candidates;
 286
 287   /* Are we optimizing for speed?  */
 288   bool speed;
 289
 290   /* Whether the loop body includes any function calls.  */
 291   bool body_includes_call;
 292
 293   /* Whether the loop body can only be exited via single exit.  */
 294   bool loop_single_exit_p;
 295 };
 296
 297 /* An assignment of iv candidates to uses.  */
 298
 299 struct iv_ca
 300 {
 301   /* The number of uses covered by the assignment.  */
 302   unsigned upto;
 303
 304   /* Number of uses that cannot be expressed by the candidates in the set.  */
 305   unsigned bad_uses;
 306
 307   /* Candidate assigned to a use, together with the related costs.  */
 308   struct cost_pair **cand_for_use;
 309
 310   /* Number of times each candidate is used.  */
 311   unsigned *n_cand_uses;
 312
 313   /* The candidates used.  */
 314   bitmap cands;
 315
 316   /* The number of candidates in the set.  */
 317   unsigned n_cands;
 318
 319   /* Total number of registers needed.  */
 320   unsigned n_regs;
 321
 322   /* Total cost of expressing uses.  */
 323   comp_cost cand_use_cost;
 324
 325   /* Total cost of candidates.  */
 326   unsigned cand_cost;
 327
 328   /* Number of times each invariant is used.  */
 329   unsigned *n_invariant_uses;
 330
 331   /* The array holding the number of uses of each loop
 332      invariant expressions created by ivopt.  */
 333   unsigned *used_inv_expr;
 334
 335   /* The number of created loop invariants.  */
 336   unsigned num_used_inv_expr;
 337
 338   /* Total cost of the assignment.  */
 339   comp_cost cost;
 340 };
 341
 342 /* Difference of two iv candidate assignments.  */
 343
 344 struct iv_ca_delta
 345 {
 346   /* Changed use.  */
 347   struct iv_use *use;
 348
 349   /* An old assignment (for rollback purposes).  */
 350   struct cost_pair *old_cp;
 351
 352   /* A new assignment.  */
 353   struct cost_pair *new_cp;
 354
 355   /* Next change in the list.  */
 356   struct iv_ca_delta *next_change;
 357 };
 358
 359 /* Bound on number of candidates below that all candidates are considered.  */
 360
 361 #define CONSIDER_ALL_CANDIDATES_BOUND \
 362   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 363
 364 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 365    optimizing such a loop would help, and it would take ages).  */
 366
 367 #define MAX_CONSIDERED_USES \
 368   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 369
 370 /* If there are at most this number of ivs in the set, try removing unnecessary
 371    ivs from the set always.  */
 372
 373 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 374   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 375
 376 /* The list of trees for that the decl_rtl field must be reset is stored
 377    here.  */
 378
 379 static VEC(tree,heap) *decl_rtl_to_reset;
 380
 381 static comp_cost force_expr_to_var_cost (tree, bool);
 382
 383 /* Number of uses recorded in DATA.  */
 384
 385 static inline unsigned
 386 n_iv_uses (struct ivopts_data *data)
 387 {
 388   return VEC_length (iv_use_p, data->iv_uses);
 389 }
 390
 391 /* Ith use recorded in DATA.  */
 392
 393 static inline struct iv_use *
 394 iv_use (struct ivopts_data *data, unsigned i)
 395 {
 396   return VEC_index (iv_use_p, data->iv_uses, i);
 397 }
 398
 399 /* Number of candidates recorded in DATA.  */
 400
 401 static inline unsigned
 402 n_iv_cands (struct ivopts_data *data)
 403 {
 404   return VEC_length (iv_cand_p, data->iv_candidates);
 405 }
 406
 407 /* Ith candidate recorded in DATA.  */
 408
 409 static inline struct iv_cand *
 410 iv_cand (struct ivopts_data *data, unsigned i)
 411 {
 412   return VEC_index (iv_cand_p, data->iv_candidates, i);
 413 }
 414
 415 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 416
 417 edge
 418 single_dom_exit (struct loop *loop)
 419 {
 420   edge exit = single_exit (loop);
 421
 422   if (!exit)
 423     return NULL;
 424
 425   if (!just_once_each_iteration_p (loop, exit->src))
 426     return NULL;
 427
 428   return exit;
 429 }
 430
 431 /* Dumps information about the induction variable IV to FILE.  */
 432
 433 extern void dump_iv (FILE *, struct iv *);
 434 void
 435 dump_iv (FILE *file, struct iv *iv)
 436 {
 437   if (iv->ssa_name)
 438     {
 439       fprintf (file, "ssa name ");
 440       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 441       fprintf (file, "\n");
 442     }
 443
 444   fprintf (file, "  type ");
 445   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 446   fprintf (file, "\n");
 447
 448   if (iv->step)
 449     {
 450       fprintf (file, "  base ");
 451       print_generic_expr (file, iv->base, TDF_SLIM);
 452       fprintf (file, "\n");
 453
 454       fprintf (file, "  step ");
 455       print_generic_expr (file, iv->step, TDF_SLIM);
 456       fprintf (file, "\n");
 457     }
 458   else
 459     {
 460       fprintf (file, "  invariant ");
 461       print_generic_expr (file, iv->base, TDF_SLIM);
 462       fprintf (file, "\n");
 463     }
 464
 465   if (iv->base_object)
 466     {
 467       fprintf (file, "  base object ");
 468       print_generic_expr (file, iv->base_object, TDF_SLIM);
 469       fprintf (file, "\n");
 470     }
 471
 472   if (iv->biv_p)
 473     fprintf (file, "  is a biv\n");
 474 }
 475
 476 /* Dumps information about the USE to FILE.  */
 477
 478 extern void dump_use (FILE *, struct iv_use *);
 479 void
 480 dump_use (FILE *file, struct iv_use *use)
 481 {
 482   fprintf (file, "use %d\n", use->id);
 483
 484   switch (use->type)
 485     {
 486     case USE_NONLINEAR_EXPR:
 487       fprintf (file, "  generic\n");
 488       break;
 489
 490     case USE_ADDRESS:
 491       fprintf (file, "  address\n");
 492       break;
 493
 494     case USE_COMPARE:
 495       fprintf (file, "  compare\n");
 496       break;
 497
 498     default:
 499       gcc_unreachable ();
 500     }
 501
 502   fprintf (file, "  in statement ");
 503   print_gimple_stmt (file, use->stmt, 0, 0);
 504   fprintf (file, "\n");
 505
 506   fprintf (file, "  at position ");
 507   if (use->op_p)
 508     print_generic_expr (file, *use->op_p, TDF_SLIM);
 509   fprintf (file, "\n");
 510
 511   dump_iv (file, use->iv);
 512
 513   if (use->related_cands)
 514     {
 515       fprintf (file, "  related candidates ");
 516       dump_bitmap (file, use->related_cands);
 517     }
 518 }
 519
 520 /* Dumps information about the uses to FILE.  */
 521
 522 extern void dump_uses (FILE *, struct ivopts_data *);
 523 void
 524 dump_uses (FILE *file, struct ivopts_data *data)
 525 {
 526   unsigned i;
 527   struct iv_use *use;
 528
 529   for (i = 0; i < n_iv_uses (data); i++)
 530     {
 531       use = iv_use (data, i);
 532
 533       dump_use (file, use);
 534       fprintf (file, "\n");
 535     }
 536 }
 537
 538 /* Dumps information about induction variable candidate CAND to FILE.  */
 539
 540 extern void dump_cand (FILE *, struct iv_cand *);
 541 void
 542 dump_cand (FILE *file, struct iv_cand *cand)
 543 {
 544   struct iv *iv = cand->iv;
 545
 546   fprintf (file, "candidate %d%s\n",
 547            cand->id, cand->important ? " (important)" : "");
 548
 549   if (cand->depends_on)
 550     {
 551       fprintf (file, "  depends on ");
 552       dump_bitmap (file, cand->depends_on);
 553     }
 554
 555   if (!iv)
 556     {
 557       fprintf (file, "  final value replacement\n");
 558       return;
 559     }
 560
 561   if (cand->var_before)
 562     {
 563       fprintf (file, "  var_before ");
 564       print_generic_expr (file, cand->var_before, TDF_SLIM);
 565       fprintf (file, "\n");
 566     }
 567   if (cand->var_after)
 568     {
 569       fprintf (file, "  var_after ");
 570       print_generic_expr (file, cand->var_after, TDF_SLIM);
 571       fprintf (file, "\n");
 572     }
 573
 574   switch (cand->pos)
 575     {
 576     case IP_NORMAL:
 577       fprintf (file, "  incremented before exit test\n");
 578       break;
 579
 580     case IP_BEFORE_USE:
 581       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 582       break;
 583
 584     case IP_AFTER_USE:
 585       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 586       break;
 587
 588     case IP_END:
 589       fprintf (file, "  incremented at end\n");
 590       break;
 591
 592     case IP_ORIGINAL:
 593       fprintf (file, "  original biv\n");
 594       break;
 595     }
 596
 597   dump_iv (file, iv);
 598 }
 599
 600 /* Returns the info for ssa version VER.  */
 601
 602 static inline struct version_info *
 603 ver_info (struct ivopts_data *data, unsigned ver)
 604 {
 605   return data->version_info + ver;
 606 }
 607
 608 /* Returns the info for ssa name NAME.  */
 609
 610 static inline struct version_info *
 611 name_info (struct ivopts_data *data, tree name)
 612 {
 613   return ver_info (data, SSA_NAME_VERSION (name));
 614 }
 615
 616 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 617    emitted in LOOP.  */
 618
 619 static bool
 620 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 621 {
 622   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 623
 624   gcc_assert (bb);
 625
 626   if (sbb == loop->latch)
 627     return true;
 628
 629   if (sbb != bb)
 630     return false;
 631
 632   return stmt == last_stmt (bb);
 633 }
 634
 635 /* Returns true if STMT if after the place where the original induction
 636    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 637    if the positions are identical.  */
 638
 639 static bool
 640 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 641 {
 642   basic_block cand_bb = gimple_bb (cand->incremented_at);
 643   basic_block stmt_bb = gimple_bb (stmt);
 644
 645   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 646     return false;
 647
 648   if (stmt_bb != cand_bb)
 649     return true;
 650
 651   if (true_if_equal
 652       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 653     return true;
 654   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 655 }
 656
 657 /* Returns true if STMT if after the place where the induction variable
 658    CAND is incremented in LOOP.  */
 659
 660 static bool
 661 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 662 {
 663   switch (cand->pos)
 664     {
 665     case IP_END:
 666       return false;
 667
 668     case IP_NORMAL:
 669       return stmt_after_ip_normal_pos (loop, stmt);
 670
 671     case IP_ORIGINAL:
 672     case IP_AFTER_USE:
 673       return stmt_after_inc_pos (cand, stmt, false);
 674
 675     case IP_BEFORE_USE:
 676       return stmt_after_inc_pos (cand, stmt, true);
 677
 678     default:
 679       gcc_unreachable ();
 680     }
 681 }
 682
 683 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 684
 685 static bool
 686 abnormal_ssa_name_p (tree exp)
 687 {
 688   if (!exp)
 689     return false;
 690
 691   if (TREE_CODE (exp) != SSA_NAME)
 692     return false;
 693
 694   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 695 }
 696
 697 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 698    abnormal phi node.  Callback for for_each_index.  */
 699
 700 static bool
 701 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 702                                   void *data ATTRIBUTE_UNUSED)
 703 {
 704   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 705     {
 706       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 707         return false;
 708       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 709         return false;
 710     }
 711
 712   return !abnormal_ssa_name_p (*index);
 713 }
 714
 715 /* Returns true if EXPR contains a ssa name that occurs in an
 716    abnormal phi node.  */
 717
 718 bool
 719 contains_abnormal_ssa_name_p (tree expr)
 720 {
 721   enum tree_code code;
 722   enum tree_code_class codeclass;
 723
 724   if (!expr)
 725     return false;
 726
 727   code = TREE_CODE (expr);
 728   codeclass = TREE_CODE_CLASS (code);
 729
 730   if (code == SSA_NAME)
 731     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 732
 733   if (code == INTEGER_CST
 734       || is_gimple_min_invariant (expr))
 735     return false;
 736
 737   if (code == ADDR_EXPR)
 738     return !for_each_index (&TREE_OPERAND (expr, 0),
 739                             idx_contains_abnormal_ssa_name_p,
 740                             NULL);
 741
 742   if (code == COND_EXPR)
 743     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 744       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 745       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 746
 747   switch (codeclass)
 748     {
 749     case tcc_binary:
 750     case tcc_comparison:
 751       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 752         return true;
 753
 754       /* Fallthru.  */
 755     case tcc_unary:
 756       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 757         return true;
 758
 759       break;
 760
 761     default:
 762       gcc_unreachable ();
 763     }
 764
 765   return false;
 766 }
 767
 768 /*  Returns the structure describing number of iterations determined from
 769     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 770
 771 static struct tree_niter_desc *
 772 niter_for_exit (struct ivopts_data *data, edge exit)
 773 {
 774   struct tree_niter_desc *desc;
 775   void **slot;
 776
 777   if (!data->niters)
 778     {
 779       data->niters = pointer_map_create ();
 780       slot = NULL;
 781     }
 782   else
 783     slot = pointer_map_contains (data->niters, exit);
 784
 785   if (!slot)
 786     {
 787       /* Try to determine number of iterations.  We cannot safely work with ssa
 788          names that appear in phi nodes on abnormal edges, so that we do not
 789          create overlapping life ranges for them (PR 27283).  */
 790       desc = XNEW (struct tree_niter_desc);
 791       if (!number_of_iterations_exit (data->current_loop,
 792                                       exit, desc, true)
 793           || contains_abnormal_ssa_name_p (desc->niter))
 794         {
 795           XDELETE (desc);
 796           desc = NULL;
 797         }
 798       slot = pointer_map_insert (data->niters, exit);
 799       *slot = desc;
 800     }
 801   else
 802     desc = (struct tree_niter_desc *) *slot;
 803
 804   return desc;
 805 }
 806
 807 /* Returns the structure describing number of iterations determined from
 808    single dominating exit of DATA->current_loop, or NULL if something
 809    goes wrong.  */
 810
 811 static struct tree_niter_desc *
 812 niter_for_single_dom_exit (struct ivopts_data *data)
 813 {
 814   edge exit = single_dom_exit (data->current_loop);
 815
 816   if (!exit)
 817     return NULL;
 818
 819   return niter_for_exit (data, exit);
 820 }
 821
 822 /* Hash table equality function for expressions.  */
 823
 824 static int
 825 htab_inv_expr_eq (const void *ent1, const void *ent2)
 826 {
 827   const struct iv_inv_expr_ent *expr1 =
 828       (const struct iv_inv_expr_ent *)ent1;
 829   const struct iv_inv_expr_ent *expr2 =
 830       (const struct iv_inv_expr_ent *)ent2;
 831
 832   return expr1->hash == expr2->hash
 833          && operand_equal_p (expr1->expr, expr2->expr, 0);
 834 }
 835
 836 /* Hash function for loop invariant expressions.  */
 837
 838 static hashval_t
 839 htab_inv_expr_hash (const void *ent)
 840 {
 841   const struct iv_inv_expr_ent *expr =
 842       (const struct iv_inv_expr_ent *)ent;
 843   return expr->hash;
 844 }
 845
 846 /* Initializes data structures used by the iv optimization pass, stored
 847    in DATA.  */
 848
 849 static void
 850 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 851 {
 852   data->version_info_size = 2 * num_ssa_names;
 853   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 854   data->relevant = BITMAP_ALLOC (NULL);
 855   data->important_candidates = BITMAP_ALLOC (NULL);
 856   data->max_inv_id = 0;
 857   data->niters = NULL;
 858   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 859   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 860   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 861                                     htab_inv_expr_eq, free);
 862   data->inv_expr_id = 0;
 863   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 864 }
 865
 866 /* Returns a memory object to that EXPR points.  In case we are able to
 867    determine that it does not point to any such object, NULL is returned.  */
 868
 869 static tree
 870 determine_base_object (tree expr)
 871 {
 872   enum tree_code code = TREE_CODE (expr);
 873   tree base, obj;
 874
 875   /* If this is a pointer casted to any type, we need to determine
 876      the base object for the pointer; so handle conversions before
 877      throwing away non-pointer expressions.  */
 878   if (CONVERT_EXPR_P (expr))
 879     return determine_base_object (TREE_OPERAND (expr, 0));
 880
 881   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 882     return NULL_TREE;
 883
 884   switch (code)
 885     {
 886     case INTEGER_CST:
 887       return NULL_TREE;
 888
 889     case ADDR_EXPR:
 890       obj = TREE_OPERAND (expr, 0);
 891       base = get_base_address (obj);
 892
 893       if (!base)
 894         return expr;
 895
 896       if (TREE_CODE (base) == MEM_REF)
 897         return determine_base_object (TREE_OPERAND (base, 0));
 898
 899       return fold_convert (ptr_type_node,
 900                            build_fold_addr_expr (base));
 901
 902     case POINTER_PLUS_EXPR:
 903       return determine_base_object (TREE_OPERAND (expr, 0));
 904
 905     case PLUS_EXPR:
 906     case MINUS_EXPR:
 907       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 908       gcc_unreachable ();
 909
 910     default:
 911       return fold_convert (ptr_type_node, expr);
 912     }
 913 }
 914
 915 /* Allocates an induction variable with given initial value BASE and step STEP
 916    for loop LOOP.  */
 917
 918 static struct iv *
 919 alloc_iv (tree base, tree step)
 920 {
 921   struct iv *iv = XCNEW (struct iv);
 922   gcc_assert (step != NULL_TREE);
 923
 924   iv->base = base;
 925   iv->base_object = determine_base_object (base);
 926   iv->step = step;
 927   iv->biv_p = false;
 928   iv->have_use_for = false;
 929   iv->use_id = 0;
 930   iv->ssa_name = NULL_TREE;
 931
 932   return iv;
 933 }
 934
 935 /* Sets STEP and BASE for induction variable IV.  */
 936
 937 static void
 938 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 939 {
 940   struct version_info *info = name_info (data, iv);
 941
 942   gcc_assert (!info->iv);
 943
 944   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 945   info->iv = alloc_iv (base, step);
 946   info->iv->ssa_name = iv;
 947 }
 948
 949 /* Finds induction variable declaration for VAR.  */
 950
 951 static struct iv *
 952 get_iv (struct ivopts_data *data, tree var)
 953 {
 954   basic_block bb;
 955   tree type = TREE_TYPE (var);
 956
 957   if (!POINTER_TYPE_P (type)
 958       && !INTEGRAL_TYPE_P (type))
 959     return NULL;
 960
 961   if (!name_info (data, var)->iv)
 962     {
 963       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 964
 965       if (!bb
 966           || !flow_bb_inside_loop_p (data->current_loop, bb))
 967         set_iv (data, var, var, build_int_cst (type, 0));
 968     }
 969
 970   return name_info (data, var)->iv;
 971 }
 972
 973 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 974    not define a simple affine biv with nonzero step.  */
 975
 976 static tree
 977 determine_biv_step (gimple phi)
 978 {
 979   struct loop *loop = gimple_bb (phi)->loop_father;
 980   tree name = PHI_RESULT (phi);
 981   affine_iv iv;
 982
 983   if (virtual_operand_p (name))
 984     return NULL_TREE;
 985
 986   if (!simple_iv (loop, loop, name, &iv, true))
 987     return NULL_TREE;
 988
 989   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 990 }
 991
 992 /* Finds basic ivs.  */
 993
 994 static bool
 995 find_bivs (struct ivopts_data *data)
 996 {
 997   gimple phi;
 998   tree step, type, base;
 999   bool found = false;
1000   struct loop *loop = data->current_loop;
1001   gimple_stmt_iterator psi;
1002
1003   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1004     {
1005       phi = gsi_stmt (psi);
1006
1007       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1008         continue;
1009
1010       step = determine_biv_step (phi);
1011       if (!step)
1012         continue;
1013
1014       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1015       base = expand_simple_operations (base);
1016       if (contains_abnormal_ssa_name_p (base)
1017           || contains_abnormal_ssa_name_p (step))
1018         continue;
1019
1020       type = TREE_TYPE (PHI_RESULT (phi));
1021       base = fold_convert (type, base);
1022       if (step)
1023         {
1024           if (POINTER_TYPE_P (type))
1025             step = convert_to_ptrofftype (step);
1026           else
1027             step = fold_convert (type, step);
1028         }
1029
1030       set_iv (data, PHI_RESULT (phi), base, step);
1031       found = true;
1032     }
1033
1034   return found;
1035 }
1036
1037 /* Marks basic ivs.  */
1038
1039 static void
1040 mark_bivs (struct ivopts_data *data)
1041 {
1042   gimple phi;
1043   tree var;
1044   struct iv *iv, *incr_iv;
1045   struct loop *loop = data->current_loop;
1046   basic_block incr_bb;
1047   gimple_stmt_iterator psi;
1048
1049   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1050     {
1051       phi = gsi_stmt (psi);
1052
1053       iv = get_iv (data, PHI_RESULT (phi));
1054       if (!iv)
1055         continue;
1056
1057       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1058       incr_iv = get_iv (data, var);
1059       if (!incr_iv)
1060         continue;
1061
1062       /* If the increment is in the subloop, ignore it.  */
1063       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1064       if (incr_bb->loop_father != data->current_loop
1065           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1066         continue;
1067
1068       iv->biv_p = true;
1069       incr_iv->biv_p = true;
1070     }
1071 }
1072
1073 /* Checks whether STMT defines a linear induction variable and stores its
1074    parameters to IV.  */
1075
1076 static bool
1077 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1078 {
1079   tree lhs;
1080   struct loop *loop = data->current_loop;
1081
1082   iv->base = NULL_TREE;
1083   iv->step = NULL_TREE;
1084
1085   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1086     return false;
1087
1088   lhs = gimple_assign_lhs (stmt);
1089   if (TREE_CODE (lhs) != SSA_NAME)
1090     return false;
1091
1092   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1093     return false;
1094   iv->base = expand_simple_operations (iv->base);
1095
1096   if (contains_abnormal_ssa_name_p (iv->base)
1097       || contains_abnormal_ssa_name_p (iv->step))
1098     return false;
1099
1100   /* If STMT could throw, then do not consider STMT as defining a GIV.
1101      While this will suppress optimizations, we can not safely delete this
1102      GIV and associated statements, even if it appears it is not used.  */
1103   if (stmt_could_throw_p (stmt))
1104     return false;
1105
1106   return true;
1107 }
1108
1109 /* Finds general ivs in statement STMT.  */
1110
1111 static void
1112 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1113 {
1114   affine_iv iv;
1115
1116   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1117     return;
1118
1119   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1120 }
1121
1122 /* Finds general ivs in basic block BB.  */
1123
1124 static void
1125 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1126 {
1127   gimple_stmt_iterator bsi;
1128
1129   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1130     find_givs_in_stmt (data, gsi_stmt (bsi));
1131 }
1132
1133 /* Finds general ivs.  */
1134
1135 static void
1136 find_givs (struct ivopts_data *data)
1137 {
1138   struct loop *loop = data->current_loop;
1139   basic_block *body = get_loop_body_in_dom_order (loop);
1140   unsigned i;
1141
1142   for (i = 0; i < loop->num_nodes; i++)
1143     find_givs_in_bb (data, body[i]);
1144   free (body);
1145 }
1146
1147 /* For each ssa name defined in LOOP determines whether it is an induction
1148    variable and if so, its initial value and step.  */
1149
1150 static bool
1151 find_induction_variables (struct ivopts_data *data)
1152 {
1153   unsigned i;
1154   bitmap_iterator bi;
1155
1156   if (!find_bivs (data))
1157     return false;
1158
1159   find_givs (data);
1160   mark_bivs (data);
1161
1162   if (dump_file && (dump_flags & TDF_DETAILS))
1163     {
1164       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1165
1166       if (niter)
1167         {
1168           fprintf (dump_file, "  number of iterations ");
1169           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1170           if (!integer_zerop (niter->may_be_zero))
1171             {
1172               fprintf (dump_file, "; zero if ");
1173               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1174             }
1175           fprintf (dump_file, "\n\n");
1176         };
1177
1178       fprintf (dump_file, "Induction variables:\n\n");
1179
1180       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1181         {
1182           if (ver_info (data, i)->iv)
1183             dump_iv (dump_file, ver_info (data, i)->iv);
1184         }
1185     }
1186
1187   return true;
1188 }
1189
1190 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1191
1192 static struct iv_use *
1193 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1194             gimple stmt, enum use_type use_type)
1195 {
1196   struct iv_use *use = XCNEW (struct iv_use);
1197
1198   use->id = n_iv_uses (data);
1199   use->type = use_type;
1200   use->iv = iv;
1201   use->stmt = stmt;
1202   use->op_p = use_p;
1203   use->related_cands = BITMAP_ALLOC (NULL);
1204
1205   /* To avoid showing ssa name in the dumps, if it was not reset by the
1206      caller.  */
1207   iv->ssa_name = NULL_TREE;
1208
1209   if (dump_file && (dump_flags & TDF_DETAILS))
1210     dump_use (dump_file, use);
1211
1212   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1213
1214   return use;
1215 }
1216
1217 /* Checks whether OP is a loop-level invariant and if so, records it.
1218    NONLINEAR_USE is true if the invariant is used in a way we do not
1219    handle specially.  */
1220
1221 static void
1222 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1223 {
1224   basic_block bb;
1225   struct version_info *info;
1226
1227   if (TREE_CODE (op) != SSA_NAME
1228       || virtual_operand_p (op))
1229     return;
1230
1231   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1232   if (bb
1233       && flow_bb_inside_loop_p (data->current_loop, bb))
1234     return;
1235
1236   info = name_info (data, op);
1237   info->name = op;
1238   info->has_nonlin_use |= nonlinear_use;
1239   if (!info->inv_id)
1240     info->inv_id = ++data->max_inv_id;
1241   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1242 }
1243
1244 /* Checks whether the use OP is interesting and if so, records it.  */
1245
1246 static struct iv_use *
1247 find_interesting_uses_op (struct ivopts_data *data, tree op)
1248 {
1249   struct iv *iv;
1250   struct iv *civ;
1251   gimple stmt;
1252   struct iv_use *use;
1253
1254   if (TREE_CODE (op) != SSA_NAME)
1255     return NULL;
1256
1257   iv = get_iv (data, op);
1258   if (!iv)
1259     return NULL;
1260
1261   if (iv->have_use_for)
1262     {
1263       use = iv_use (data, iv->use_id);
1264
1265       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1266       return use;
1267     }
1268
1269   if (integer_zerop (iv->step))
1270     {
1271       record_invariant (data, op, true);
1272       return NULL;
1273     }
1274   iv->have_use_for = true;
1275
1276   civ = XNEW (struct iv);
1277   *civ = *iv;
1278
1279   stmt = SSA_NAME_DEF_STMT (op);
1280   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1281               || is_gimple_assign (stmt));
1282
1283   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1284   iv->use_id = use->id;
1285
1286   return use;
1287 }
1288
1289 /* Given a condition in statement STMT, checks whether it is a compare
1290    of an induction variable and an invariant.  If this is the case,
1291    CONTROL_VAR is set to location of the iv, BOUND to the location of
1292    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1293    induction variable descriptions, and true is returned.  If this is not
1294    the case, CONTROL_VAR and BOUND are set to the arguments of the
1295    condition and false is returned.  */
1296
1297 static bool
1298 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1299                        tree **control_var, tree **bound,
1300                        struct iv **iv_var, struct iv **iv_bound)
1301 {
1302   /* The objects returned when COND has constant operands.  */
1303   static struct iv const_iv;
1304   static tree zero;
1305   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1306   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1307   bool ret = false;
1308
1309   if (gimple_code (stmt) == GIMPLE_COND)
1310     {
1311       op0 = gimple_cond_lhs_ptr (stmt);
1312       op1 = gimple_cond_rhs_ptr (stmt);
1313     }
1314   else
1315     {
1316       op0 = gimple_assign_rhs1_ptr (stmt);
1317       op1 = gimple_assign_rhs2_ptr (stmt);
1318     }
1319
1320   zero = integer_zero_node;
1321   const_iv.step = integer_zero_node;
1322
1323   if (TREE_CODE (*op0) == SSA_NAME)
1324     iv0 = get_iv (data, *op0);
1325   if (TREE_CODE (*op1) == SSA_NAME)
1326     iv1 = get_iv (data, *op1);
1327
1328   /* Exactly one of the compared values must be an iv, and the other one must
1329      be an invariant.  */
1330   if (!iv0 || !iv1)
1331     goto end;
1332
1333   if (integer_zerop (iv0->step))
1334     {
1335       /* Control variable may be on the other side.  */
1336       tmp_op = op0; op0 = op1; op1 = tmp_op;
1337       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1338     }
1339   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1340
1341 end:
1342   if (control_var)
1343     *control_var = op0;;
1344   if (iv_var)
1345     *iv_var = iv0;;
1346   if (bound)
1347     *bound = op1;
1348   if (iv_bound)
1349     *iv_bound = iv1;
1350
1351   return ret;
1352 }
1353
1354 /* Checks whether the condition in STMT is interesting and if so,
1355    records it.  */
1356
1357 static void
1358 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1359 {
1360   tree *var_p, *bound_p;
1361   struct iv *var_iv, *civ;
1362
1363   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1364     {
1365       find_interesting_uses_op (data, *var_p);
1366       find_interesting_uses_op (data, *bound_p);
1367       return;
1368     }
1369
1370   civ = XNEW (struct iv);
1371   *civ = *var_iv;
1372   record_use (data, NULL, civ, stmt, USE_COMPARE);
1373 }
1374
1375 /* Returns true if expression EXPR is obviously invariant in LOOP,
1376    i.e. if all its operands are defined outside of the LOOP.  LOOP
1377    should not be the function body.  */
1378
1379 bool
1380 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1381 {
1382   basic_block def_bb;
1383   unsigned i, len;
1384
1385   gcc_assert (loop_depth (loop) > 0);
1386
1387   if (is_gimple_min_invariant (expr))
1388     return true;
1389
1390   if (TREE_CODE (expr) == SSA_NAME)
1391     {
1392       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1393       if (def_bb
1394           && flow_bb_inside_loop_p (loop, def_bb))
1395         return false;
1396
1397       return true;
1398     }
1399
1400   if (!EXPR_P (expr))
1401     return false;
1402
1403   len = TREE_OPERAND_LENGTH (expr);
1404   for (i = 0; i < len; i++)
1405     if (TREE_OPERAND (expr, i)
1406         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1407       return false;
1408
1409   return true;
1410 }
1411
1412 /* Returns true if statement STMT is obviously invariant in LOOP,
1413    i.e. if all its operands on the RHS are defined outside of the LOOP.
1414    LOOP should not be the function body.  */
1415
1416 bool
1417 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1418 {
1419   unsigned i;
1420   tree lhs;
1421
1422   gcc_assert (loop_depth (loop) > 0);
1423
1424   lhs = gimple_get_lhs (stmt);
1425   for (i = 0; i < gimple_num_ops (stmt); i++)
1426     {
1427       tree op = gimple_op (stmt, i);
1428       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1429         return false;
1430     }
1431
1432   return true;
1433 }
1434
1435 /* Cumulates the steps of indices into DATA and replaces their values with the
1436    initial ones.  Returns false when the value of the index cannot be determined.
1437    Callback for for_each_index.  */
1438
1439 struct ifs_ivopts_data
1440 {
1441   struct ivopts_data *ivopts_data;
1442   gimple stmt;
1443   tree step;
1444 };
1445
1446 static bool
1447 idx_find_step (tree base, tree *idx, void *data)
1448 {
1449   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1450   struct iv *iv;
1451   tree step, iv_base, iv_step, lbound, off;
1452   struct loop *loop = dta->ivopts_data->current_loop;
1453
1454   /* If base is a component ref, require that the offset of the reference
1455      be invariant.  */
1456   if (TREE_CODE (base) == COMPONENT_REF)
1457     {
1458       off = component_ref_field_offset (base);
1459       return expr_invariant_in_loop_p (loop, off);
1460     }
1461
1462   /* If base is array, first check whether we will be able to move the
1463      reference out of the loop (in order to take its address in strength
1464      reduction).  In order for this to work we need both lower bound
1465      and step to be loop invariants.  */
1466   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1467     {
1468       /* Moreover, for a range, the size needs to be invariant as well.  */
1469       if (TREE_CODE (base) == ARRAY_RANGE_REF
1470           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1471         return false;
1472
1473       step = array_ref_element_size (base);
1474       lbound = array_ref_low_bound (base);
1475
1476       if (!expr_invariant_in_loop_p (loop, step)
1477           || !expr_invariant_in_loop_p (loop, lbound))
1478         return false;
1479     }
1480
1481   if (TREE_CODE (*idx) != SSA_NAME)
1482     return true;
1483
1484   iv = get_iv (dta->ivopts_data, *idx);
1485   if (!iv)
1486     return false;
1487
1488   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1489           *&x[0], which is not folded and does not trigger the
1490           ARRAY_REF path below.  */
1491   *idx = iv->base;
1492
1493   if (integer_zerop (iv->step))
1494     return true;
1495
1496   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1497     {
1498       step = array_ref_element_size (base);
1499
1500       /* We only handle addresses whose step is an integer constant.  */
1501       if (TREE_CODE (step) != INTEGER_CST)
1502         return false;
1503     }
1504   else
1505     /* The step for pointer arithmetics already is 1 byte.  */
1506     step = size_one_node;
1507
1508   iv_base = iv->base;
1509   iv_step = iv->step;
1510   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1511                             sizetype, &iv_base, &iv_step, dta->stmt,
1512                             false))
1513     {
1514       /* The index might wrap.  */
1515       return false;
1516     }
1517
1518   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1519   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1520
1521   return true;
1522 }
1523
1524 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1525    object is passed to it in DATA.  */
1526
1527 static bool
1528 idx_record_use (tree base, tree *idx,
1529                 void *vdata)
1530 {
1531   struct ivopts_data *data = (struct ivopts_data *) vdata;
1532   find_interesting_uses_op (data, *idx);
1533   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1534     {
1535       find_interesting_uses_op (data, array_ref_element_size (base));
1536       find_interesting_uses_op (data, array_ref_low_bound (base));
1537     }
1538   return true;
1539 }
1540
1541 /* If we can prove that TOP = cst * BOT for some constant cst,
1542    store cst to MUL and return true.  Otherwise return false.
1543    The returned value is always sign-extended, regardless of the
1544    signedness of TOP and BOT.  */
1545
1546 static bool
1547 constant_multiple_of (tree top, tree bot, double_int *mul)
1548 {
1549   tree mby;
1550   enum tree_code code;
1551   double_int res, p0, p1;
1552   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1553
1554   STRIP_NOPS (top);
1555   STRIP_NOPS (bot);
1556
1557   if (operand_equal_p (top, bot, 0))
1558     {
1559       *mul = double_int_one;
1560       return true;
1561     }
1562
1563   code = TREE_CODE (top);
1564   switch (code)
1565     {
1566     case MULT_EXPR:
1567       mby = TREE_OPERAND (top, 1);
1568       if (TREE_CODE (mby) != INTEGER_CST)
1569         return false;
1570
1571       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1572         return false;
1573
1574       *mul = (res * tree_to_double_int (mby)).sext (precision);
1575       return true;
1576
1577     case PLUS_EXPR:
1578     case MINUS_EXPR:
1579       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1580           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1581         return false;
1582
1583       if (code == MINUS_EXPR)
1584         p1 = -p1;
1585       *mul = (p0 + p1).sext (precision);
1586       return true;
1587
1588     case INTEGER_CST:
1589       if (TREE_CODE (bot) != INTEGER_CST)
1590         return false;
1591
1592       p0 = tree_to_double_int (top).sext (precision);
1593       p1 = tree_to_double_int (bot).sext (precision);
1594       if (p1.is_zero ())
1595         return false;
1596       *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1597       return res.is_zero ();
1598
1599     default:
1600       return false;
1601     }
1602 }
1603
1604 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1605
1606 static bool
1607 may_be_unaligned_p (tree ref, tree step)
1608 {
1609   tree base;
1610   tree base_type;
1611   HOST_WIDE_INT bitsize;
1612   HOST_WIDE_INT bitpos;
1613   tree toffset;
1614   enum machine_mode mode;
1615   int unsignedp, volatilep;
1616   unsigned base_align;
1617
1618   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1619      thus they are not misaligned.  */
1620   if (TREE_CODE (ref) == TARGET_MEM_REF)
1621     return false;
1622
1623   /* The test below is basically copy of what expr.c:normal_inner_ref
1624      does to check whether the object must be loaded by parts when
1625      STRICT_ALIGNMENT is true.  */
1626   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1627                               &unsignedp, &volatilep, true);
1628   base_type = TREE_TYPE (base);
1629   base_align = get_object_alignment (base);
1630   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1631
1632   if (mode != BLKmode)
1633     {
1634       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1635
1636       if (base_align < mode_align
1637           || (bitpos % mode_align) != 0
1638           || (bitpos % BITS_PER_UNIT) != 0)
1639         return true;
1640
1641       if (toffset
1642           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1643         return true;
1644
1645       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1646         return true;
1647     }
1648
1649   return false;
1650 }
1651
1652 /* Return true if EXPR may be non-addressable.   */
1653
1654 bool
1655 may_be_nonaddressable_p (tree expr)
1656 {
1657   switch (TREE_CODE (expr))
1658     {
1659     case TARGET_MEM_REF:
1660       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1661          target, thus they are always addressable.  */
1662       return false;
1663
1664     case COMPONENT_REF:
1665       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1666              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1667
1668     case VIEW_CONVERT_EXPR:
1669       /* This kind of view-conversions may wrap non-addressable objects
1670          and make them look addressable.  After some processing the
1671          non-addressability may be uncovered again, causing ADDR_EXPRs
1672          of inappropriate objects to be built.  */
1673       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1674           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1675         return true;
1676
1677       /* ... fall through ... */
1678
1679     case ARRAY_REF:
1680     case ARRAY_RANGE_REF:
1681       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1682
1683     CASE_CONVERT:
1684       return true;
1685
1686     default:
1687       break;
1688     }
1689
1690   return false;
1691 }
1692
1693 /* Finds addresses in *OP_P inside STMT.  */
1694
1695 static void
1696 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1697 {
1698   tree base = *op_p, step = size_zero_node;
1699   struct iv *civ;
1700   struct ifs_ivopts_data ifs_ivopts_data;
1701
1702   /* Do not play with volatile memory references.  A bit too conservative,
1703      perhaps, but safe.  */
1704   if (gimple_has_volatile_ops (stmt))
1705     goto fail;
1706
1707   /* Ignore bitfields for now.  Not really something terribly complicated
1708      to handle.  TODO.  */
1709   if (TREE_CODE (base) == BIT_FIELD_REF)
1710     goto fail;
1711
1712   base = unshare_expr (base);
1713
1714   if (TREE_CODE (base) == TARGET_MEM_REF)
1715     {
1716       tree type = build_pointer_type (TREE_TYPE (base));
1717       tree astep;
1718
1719       if (TMR_BASE (base)
1720           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1721         {
1722           civ = get_iv (data, TMR_BASE (base));
1723           if (!civ)
1724             goto fail;
1725
1726           TMR_BASE (base) = civ->base;
1727           step = civ->step;
1728         }
1729       if (TMR_INDEX2 (base)
1730           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1731         {
1732           civ = get_iv (data, TMR_INDEX2 (base));
1733           if (!civ)
1734             goto fail;
1735
1736           TMR_INDEX2 (base) = civ->base;
1737           step = civ->step;
1738         }
1739       if (TMR_INDEX (base)
1740           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1741         {
1742           civ = get_iv (data, TMR_INDEX (base));
1743           if (!civ)
1744             goto fail;
1745
1746           TMR_INDEX (base) = civ->base;
1747           astep = civ->step;
1748
1749           if (astep)
1750             {
1751               if (TMR_STEP (base))
1752                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1753
1754               step = fold_build2 (PLUS_EXPR, type, step, astep);
1755             }
1756         }
1757
1758       if (integer_zerop (step))
1759         goto fail;
1760       base = tree_mem_ref_addr (type, base);
1761     }
1762   else
1763     {
1764       ifs_ivopts_data.ivopts_data = data;
1765       ifs_ivopts_data.stmt = stmt;
1766       ifs_ivopts_data.step = size_zero_node;
1767       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1768           || integer_zerop (ifs_ivopts_data.step))
1769         goto fail;
1770       step = ifs_ivopts_data.step;
1771
1772       /* Check that the base expression is addressable.  This needs
1773          to be done after substituting bases of IVs into it.  */
1774       if (may_be_nonaddressable_p (base))
1775         goto fail;
1776
1777       /* Moreover, on strict alignment platforms, check that it is
1778          sufficiently aligned.  */
1779       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1780         goto fail;
1781
1782       base = build_fold_addr_expr (base);
1783
1784       /* Substituting bases of IVs into the base expression might
1785          have caused folding opportunities.  */
1786       if (TREE_CODE (base) == ADDR_EXPR)
1787         {
1788           tree *ref = &TREE_OPERAND (base, 0);
1789           while (handled_component_p (*ref))
1790             ref = &TREE_OPERAND (*ref, 0);
1791           if (TREE_CODE (*ref) == MEM_REF)
1792             {
1793               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1794                                       TREE_OPERAND (*ref, 0),
1795                                       TREE_OPERAND (*ref, 1));
1796               if (tem)
1797                 *ref = tem;
1798             }
1799         }
1800     }
1801
1802   civ = alloc_iv (base, step);
1803   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1804   return;
1805
1806 fail:
1807   for_each_index (op_p, idx_record_use, data);
1808 }
1809
1810 /* Finds and records invariants used in STMT.  */
1811
1812 static void
1813 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1814 {
1815   ssa_op_iter iter;
1816   use_operand_p use_p;
1817   tree op;
1818
1819   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1820     {
1821       op = USE_FROM_PTR (use_p);
1822       record_invariant (data, op, false);
1823     }
1824 }
1825
1826 /* Finds interesting uses of induction variables in the statement STMT.  */
1827
1828 static void
1829 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1830 {
1831   struct iv *iv;
1832   tree op, *lhs, *rhs;
1833   ssa_op_iter iter;
1834   use_operand_p use_p;
1835   enum tree_code code;
1836
1837   find_invariants_stmt (data, stmt);
1838
1839   if (gimple_code (stmt) == GIMPLE_COND)
1840     {
1841       find_interesting_uses_cond (data, stmt);
1842       return;
1843     }
1844
1845   if (is_gimple_assign (stmt))
1846     {
1847       lhs = gimple_assign_lhs_ptr (stmt);
1848       rhs = gimple_assign_rhs1_ptr (stmt);
1849
1850       if (TREE_CODE (*lhs) == SSA_NAME)
1851         {
1852           /* If the statement defines an induction variable, the uses are not
1853              interesting by themselves.  */
1854
1855           iv = get_iv (data, *lhs);
1856
1857           if (iv && !integer_zerop (iv->step))
1858             return;
1859         }
1860
1861       code = gimple_assign_rhs_code (stmt);
1862       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1863           && (REFERENCE_CLASS_P (*rhs)
1864               || is_gimple_val (*rhs)))
1865         {
1866           if (REFERENCE_CLASS_P (*rhs))
1867             find_interesting_uses_address (data, stmt, rhs);
1868           else
1869             find_interesting_uses_op (data, *rhs);
1870
1871           if (REFERENCE_CLASS_P (*lhs))
1872             find_interesting_uses_address (data, stmt, lhs);
1873           return;
1874         }
1875       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1876         {
1877           find_interesting_uses_cond (data, stmt);
1878           return;
1879         }
1880
1881       /* TODO -- we should also handle address uses of type
1882
1883          memory = call (whatever);
1884
1885          and
1886
1887          call (memory).  */
1888     }
1889
1890   if (gimple_code (stmt) == GIMPLE_PHI
1891       && gimple_bb (stmt) == data->current_loop->header)
1892     {
1893       iv = get_iv (data, PHI_RESULT (stmt));
1894
1895       if (iv && !integer_zerop (iv->step))
1896         return;
1897     }
1898
1899   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1900     {
1901       op = USE_FROM_PTR (use_p);
1902
1903       if (TREE_CODE (op) != SSA_NAME)
1904         continue;
1905
1906       iv = get_iv (data, op);
1907       if (!iv)
1908         continue;
1909
1910       find_interesting_uses_op (data, op);
1911     }
1912 }
1913
1914 /* Finds interesting uses of induction variables outside of loops
1915    on loop exit edge EXIT.  */
1916
1917 static void
1918 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1919 {
1920   gimple phi;
1921   gimple_stmt_iterator psi;
1922   tree def;
1923
1924   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1925     {
1926       phi = gsi_stmt (psi);
1927       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1928       if (!virtual_operand_p (def))
1929         find_interesting_uses_op (data, def);
1930     }
1931 }
1932
1933 /* Finds uses of the induction variables that are interesting.  */
1934
1935 static void
1936 find_interesting_uses (struct ivopts_data *data)
1937 {
1938   basic_block bb;
1939   gimple_stmt_iterator bsi;
1940   basic_block *body = get_loop_body (data->current_loop);
1941   unsigned i;
1942   struct version_info *info;
1943   edge e;
1944
1945   if (dump_file && (dump_flags & TDF_DETAILS))
1946     fprintf (dump_file, "Uses:\n\n");
1947
1948   for (i = 0; i < data->current_loop->num_nodes; i++)
1949     {
1950       edge_iterator ei;
1951       bb = body[i];
1952
1953       FOR_EACH_EDGE (e, ei, bb->succs)
1954         if (e->dest != EXIT_BLOCK_PTR
1955             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1956           find_interesting_uses_outside (data, e);
1957
1958       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1959         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1960       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1961         if (!is_gimple_debug (gsi_stmt (bsi)))
1962           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1963     }
1964
1965   if (dump_file && (dump_flags & TDF_DETAILS))
1966     {
1967       bitmap_iterator bi;
1968
1969       fprintf (dump_file, "\n");
1970
1971       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1972         {
1973           info = ver_info (data, i);
1974           if (info->inv_id)
1975             {
1976               fprintf (dump_file, "  ");
1977               print_generic_expr (dump_file, info->name, TDF_SLIM);
1978               fprintf (dump_file, " is invariant (%d)%s\n",
1979                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1980             }
1981         }
1982
1983       fprintf (dump_file, "\n");
1984     }
1985
1986   free (body);
1987 }
1988
1989 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1990    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1991    we are at the top-level of the processed address.  */
1992
1993 static tree
1994 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1995                 unsigned HOST_WIDE_INT *offset)
1996 {
1997   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1998   enum tree_code code;
1999   tree type, orig_type = TREE_TYPE (expr);
2000   unsigned HOST_WIDE_INT off0, off1, st;
2001   tree orig_expr = expr;
2002
2003   STRIP_NOPS (expr);
2004
2005   type = TREE_TYPE (expr);
2006   code = TREE_CODE (expr);
2007   *offset = 0;
2008
2009   switch (code)
2010     {
2011     case INTEGER_CST:
2012       if (!cst_and_fits_in_hwi (expr)
2013           || integer_zerop (expr))
2014         return orig_expr;
2015
2016       *offset = int_cst_value (expr);
2017       return build_int_cst (orig_type, 0);
2018
2019     case POINTER_PLUS_EXPR:
2020     case PLUS_EXPR:
2021     case MINUS_EXPR:
2022       op0 = TREE_OPERAND (expr, 0);
2023       op1 = TREE_OPERAND (expr, 1);
2024
2025       op0 = strip_offset_1 (op0, false, false, &off0);
2026       op1 = strip_offset_1 (op1, false, false, &off1);
2027
2028       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2029       if (op0 == TREE_OPERAND (expr, 0)
2030           && op1 == TREE_OPERAND (expr, 1))
2031         return orig_expr;
2032
2033       if (integer_zerop (op1))
2034         expr = op0;
2035       else if (integer_zerop (op0))
2036         {
2037           if (code == MINUS_EXPR)
2038             expr = fold_build1 (NEGATE_EXPR, type, op1);
2039           else
2040             expr = op1;
2041         }
2042       else
2043         expr = fold_build2 (code, type, op0, op1);
2044
2045       return fold_convert (orig_type, expr);
2046
2047     case MULT_EXPR:
2048       op1 = TREE_OPERAND (expr, 1);
2049       if (!cst_and_fits_in_hwi (op1))
2050         return orig_expr;
2051
2052       op0 = TREE_OPERAND (expr, 0);
2053       op0 = strip_offset_1 (op0, false, false, &off0);
2054       if (op0 == TREE_OPERAND (expr, 0))
2055         return orig_expr;
2056
2057       *offset = off0 * int_cst_value (op1);
2058       if (integer_zerop (op0))
2059         expr = op0;
2060       else
2061         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2062
2063       return fold_convert (orig_type, expr);
2064
2065     case ARRAY_REF:
2066     case ARRAY_RANGE_REF:
2067       if (!inside_addr)
2068         return orig_expr;
2069
2070       step = array_ref_element_size (expr);
2071       if (!cst_and_fits_in_hwi (step))
2072         break;
2073
2074       st = int_cst_value (step);
2075       op1 = TREE_OPERAND (expr, 1);
2076       op1 = strip_offset_1 (op1, false, false, &off1);
2077       *offset = off1 * st;
2078
2079       if (top_compref
2080           && integer_zerop (op1))
2081         {
2082           /* Strip the component reference completely.  */
2083           op0 = TREE_OPERAND (expr, 0);
2084           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2085           *offset += off0;
2086           return op0;
2087         }
2088       break;
2089
2090     case COMPONENT_REF:
2091       if (!inside_addr)
2092         return orig_expr;
2093
2094       tmp = component_ref_field_offset (expr);
2095       if (top_compref
2096           && cst_and_fits_in_hwi (tmp))
2097         {
2098           /* Strip the component reference completely.  */
2099           op0 = TREE_OPERAND (expr, 0);
2100           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2101           *offset = off0 + int_cst_value (tmp);
2102           return op0;
2103         }
2104       break;
2105
2106     case ADDR_EXPR:
2107       op0 = TREE_OPERAND (expr, 0);
2108       op0 = strip_offset_1 (op0, true, true, &off0);
2109       *offset += off0;
2110
2111       if (op0 == TREE_OPERAND (expr, 0))
2112         return orig_expr;
2113
2114       expr = build_fold_addr_expr (op0);
2115       return fold_convert (orig_type, expr);
2116
2117     case MEM_REF:
2118       /* ???  Offset operand?  */
2119       inside_addr = false;
2120       break;
2121
2122     default:
2123       return orig_expr;
2124     }
2125
2126   /* Default handling of expressions for that we want to recurse into
2127      the first operand.  */
2128   op0 = TREE_OPERAND (expr, 0);
2129   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2130   *offset += off0;
2131
2132   if (op0 == TREE_OPERAND (expr, 0)
2133       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2134     return orig_expr;
2135
2136   expr = copy_node (expr);
2137   TREE_OPERAND (expr, 0) = op0;
2138   if (op1)
2139     TREE_OPERAND (expr, 1) = op1;
2140
2141   /* Inside address, we might strip the top level component references,
2142      thus changing type of the expression.  Handling of ADDR_EXPR
2143      will fix that.  */
2144   expr = fold_convert (orig_type, expr);
2145
2146   return expr;
2147 }
2148
2149 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2150
2151 static tree
2152 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2153 {
2154   return strip_offset_1 (expr, false, false, offset);
2155 }
2156
2157 /* Returns variant of TYPE that can be used as base for different uses.
2158    We return unsigned type with the same precision, which avoids problems
2159    with overflows.  */
2160
2161 static tree
2162 generic_type_for (tree type)
2163 {
2164   if (POINTER_TYPE_P (type))
2165     return unsigned_type_for (type);
2166
2167   if (TYPE_UNSIGNED (type))
2168     return type;
2169
2170   return unsigned_type_for (type);
2171 }
2172
2173 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2174    the bitmap to that we should store it.  */
2175
2176 static struct ivopts_data *fd_ivopts_data;
2177 static tree
2178 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2179 {
2180   bitmap *depends_on = (bitmap *) data;
2181   struct version_info *info;
2182
2183   if (TREE_CODE (*expr_p) != SSA_NAME)
2184     return NULL_TREE;
2185   info = name_info (fd_ivopts_data, *expr_p);
2186
2187   if (!info->inv_id || info->has_nonlin_use)
2188     return NULL_TREE;
2189
2190   if (!*depends_on)
2191     *depends_on = BITMAP_ALLOC (NULL);
2192   bitmap_set_bit (*depends_on, info->inv_id);
2193
2194   return NULL_TREE;
2195 }
2196
2197 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2198    position to POS.  If USE is not NULL, the candidate is set as related to
2199    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2200    replacement of the final value of the iv by a direct computation.  */
2201
2202 static struct iv_cand *
2203 add_candidate_1 (struct ivopts_data *data,
2204                  tree base, tree step, bool important, enum iv_position pos,
2205                  struct iv_use *use, gimple incremented_at)
2206 {
2207   unsigned i;
2208   struct iv_cand *cand = NULL;
2209   tree type, orig_type;
2210
2211   /* For non-original variables, make sure their values are computed in a type
2212      that does not invoke undefined behavior on overflows (since in general,
2213      we cannot prove that these induction variables are non-wrapping).  */
2214   if (pos != IP_ORIGINAL)
2215     {
2216       orig_type = TREE_TYPE (base);
2217       type = generic_type_for (orig_type);
2218       if (type != orig_type)
2219         {
2220           base = fold_convert (type, base);
2221           step = fold_convert (type, step);
2222         }
2223     }
2224
2225   for (i = 0; i < n_iv_cands (data); i++)
2226     {
2227       cand = iv_cand (data, i);
2228
2229       if (cand->pos != pos)
2230         continue;
2231
2232       if (cand->incremented_at != incremented_at
2233           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2234               && cand->ainc_use != use))
2235         continue;
2236
2237       if (!cand->iv)
2238         {
2239           if (!base && !step)
2240             break;
2241
2242           continue;
2243         }
2244
2245       if (!base && !step)
2246         continue;
2247
2248       if (operand_equal_p (base, cand->iv->base, 0)
2249           && operand_equal_p (step, cand->iv->step, 0)
2250           && (TYPE_PRECISION (TREE_TYPE (base))
2251               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2252         break;
2253     }
2254
2255   if (i == n_iv_cands (data))
2256     {
2257       cand = XCNEW (struct iv_cand);
2258       cand->id = i;
2259
2260       if (!base && !step)
2261         cand->iv = NULL;
2262       else
2263         cand->iv = alloc_iv (base, step);
2264
2265       cand->pos = pos;
2266       if (pos != IP_ORIGINAL && cand->iv)
2267         {
2268           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2269           cand->var_after = cand->var_before;
2270         }
2271       cand->important = important;
2272       cand->incremented_at = incremented_at;
2273       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2274
2275       if (step
2276           && TREE_CODE (step) != INTEGER_CST)
2277         {
2278           fd_ivopts_data = data;
2279           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2280         }
2281
2282       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2283         cand->ainc_use = use;
2284       else
2285         cand->ainc_use = NULL;
2286
2287       if (dump_file && (dump_flags & TDF_DETAILS))
2288         dump_cand (dump_file, cand);
2289     }
2290
2291   if (important && !cand->important)
2292     {
2293       cand->important = true;
2294       if (dump_file && (dump_flags & TDF_DETAILS))
2295         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2296     }
2297
2298   if (use)
2299     {
2300       bitmap_set_bit (use->related_cands, i);
2301       if (dump_file && (dump_flags & TDF_DETAILS))
2302         fprintf (dump_file, "Candidate %d is related to use %d\n",
2303                  cand->id, use->id);
2304     }
2305
2306   return cand;
2307 }
2308
2309 /* Returns true if incrementing the induction variable at the end of the LOOP
2310    is allowed.
2311
2312    The purpose is to avoid splitting latch edge with a biv increment, thus
2313    creating a jump, possibly confusing other optimization passes and leaving
2314    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2315    is not available (so we do not have a better alternative), or if the latch
2316    edge is already nonempty.  */
2317
2318 static bool
2319 allow_ip_end_pos_p (struct loop *loop)
2320 {
2321   if (!ip_normal_pos (loop))
2322     return true;
2323
2324   if (!empty_block_p (ip_end_pos (loop)))
2325     return true;
2326
2327   return false;
2328 }
2329
2330 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2331    Important field is set to IMPORTANT.  */
2332
2333 static void
2334 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2335                         bool important, struct iv_use *use)
2336 {
2337   basic_block use_bb = gimple_bb (use->stmt);
2338   enum machine_mode mem_mode;
2339   unsigned HOST_WIDE_INT cstepi;
2340
2341   /* If we insert the increment in any position other than the standard
2342      ones, we must ensure that it is incremented once per iteration.
2343      It must not be in an inner nested loop, or one side of an if
2344      statement.  */
2345   if (use_bb->loop_father != data->current_loop
2346       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2347       || stmt_could_throw_p (use->stmt)
2348       || !cst_and_fits_in_hwi (step))
2349     return;
2350
2351   cstepi = int_cst_value (step);
2352
2353   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2354   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2355         || USE_STORE_PRE_INCREMENT (mem_mode))
2356        && GET_MODE_SIZE (mem_mode) == cstepi)
2357       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2358            || USE_STORE_PRE_DECREMENT (mem_mode))
2359           && GET_MODE_SIZE (mem_mode) == -cstepi))
2360     {
2361       enum tree_code code = MINUS_EXPR;
2362       tree new_base;
2363       tree new_step = step;
2364
2365       if (POINTER_TYPE_P (TREE_TYPE (base)))
2366         {
2367           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2368           code = POINTER_PLUS_EXPR;
2369         }
2370       else
2371         new_step = fold_convert (TREE_TYPE (base), new_step);
2372       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2373       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2374                        use->stmt);
2375     }
2376   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2377         || USE_STORE_POST_INCREMENT (mem_mode))
2378        && GET_MODE_SIZE (mem_mode) == cstepi)
2379       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2380            || USE_STORE_POST_DECREMENT (mem_mode))
2381           && GET_MODE_SIZE (mem_mode) == -cstepi))
2382     {
2383       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2384                        use->stmt);
2385     }
2386 }
2387
2388 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2389    position to POS.  If USE is not NULL, the candidate is set as related to
2390    it.  The candidate computation is scheduled on all available positions.  */
2391
2392 static void
2393 add_candidate (struct ivopts_data *data,
2394                tree base, tree step, bool important, struct iv_use *use)
2395 {
2396   if (ip_normal_pos (data->current_loop))
2397     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2398   if (ip_end_pos (data->current_loop)
2399       && allow_ip_end_pos_p (data->current_loop))
2400     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2401
2402   if (use != NULL && use->type == USE_ADDRESS)
2403     add_autoinc_candidates (data, base, step, important, use);
2404 }
2405
2406 /* Adds standard iv candidates.  */
2407
2408 static void
2409 add_standard_iv_candidates (struct ivopts_data *data)
2410 {
2411   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2412
2413   /* The same for a double-integer type if it is still fast enough.  */
2414   if (TYPE_PRECISION
2415         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2416       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2417     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2418                    build_int_cst (long_integer_type_node, 1), true, NULL);
2419
2420   /* The same for a double-integer type if it is still fast enough.  */
2421   if (TYPE_PRECISION
2422         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2423       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2424     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2425                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2426 }
2427
2428
2429 /* Adds candidates bases on the old induction variable IV.  */
2430
2431 static void
2432 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2433 {
2434   gimple phi;
2435   tree def;
2436   struct iv_cand *cand;
2437
2438   add_candidate (data, iv->base, iv->step, true, NULL);
2439
2440   /* The same, but with initial value zero.  */
2441   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2442     add_candidate (data, size_int (0), iv->step, true, NULL);
2443   else
2444     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2445                    iv->step, true, NULL);
2446
2447   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2448   if (gimple_code (phi) == GIMPLE_PHI)
2449     {
2450       /* Additionally record the possibility of leaving the original iv
2451          untouched.  */
2452       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2453       cand = add_candidate_1 (data,
2454                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2455                               SSA_NAME_DEF_STMT (def));
2456       cand->var_before = iv->ssa_name;
2457       cand->var_after = def;
2458     }
2459 }
2460
2461 /* Adds candidates based on the old induction variables.  */
2462
2463 static void
2464 add_old_ivs_candidates (struct ivopts_data *data)
2465 {
2466   unsigned i;
2467   struct iv *iv;
2468   bitmap_iterator bi;
2469
2470   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2471     {
2472       iv = ver_info (data, i)->iv;
2473       if (iv && iv->biv_p && !integer_zerop (iv->step))
2474         add_old_iv_candidates (data, iv);
2475     }
2476 }
2477
2478 /* Adds candidates based on the value of the induction variable IV and USE.  */
2479
2480 static void
2481 add_iv_value_candidates (struct ivopts_data *data,
2482                          struct iv *iv, struct iv_use *use)
2483 {
2484   unsigned HOST_WIDE_INT offset;
2485   tree base;
2486   tree basetype;
2487
2488   add_candidate (data, iv->base, iv->step, false, use);
2489
2490   /* The same, but with initial value zero.  Make such variable important,
2491      since it is generic enough so that possibly many uses may be based
2492      on it.  */
2493   basetype = TREE_TYPE (iv->base);
2494   if (POINTER_TYPE_P (basetype))
2495     basetype = sizetype;
2496   add_candidate (data, build_int_cst (basetype, 0),
2497                  iv->step, true, use);
2498
2499   /* Third, try removing the constant offset.  Make sure to even
2500      add a candidate for &a[0] vs. (T *)&a.  */
2501   base = strip_offset (iv->base, &offset);
2502   if (offset
2503       || base != iv->base)
2504     add_candidate (data, base, iv->step, false, use);
2505 }
2506
2507 /* Adds candidates based on the uses.  */
2508
2509 static void
2510 add_derived_ivs_candidates (struct ivopts_data *data)
2511 {
2512   unsigned i;
2513
2514   for (i = 0; i < n_iv_uses (data); i++)
2515     {
2516       struct iv_use *use = iv_use (data, i);
2517
2518       if (!use)
2519         continue;
2520
2521       switch (use->type)
2522         {
2523         case USE_NONLINEAR_EXPR:
2524         case USE_COMPARE:
2525         case USE_ADDRESS:
2526           /* Just add the ivs based on the value of the iv used here.  */
2527           add_iv_value_candidates (data, use->iv, use);
2528           break;
2529
2530         default:
2531           gcc_unreachable ();
2532         }
2533     }
2534 }
2535
2536 /* Record important candidates and add them to related_cands bitmaps
2537    if needed.  */
2538
2539 static void
2540 record_important_candidates (struct ivopts_data *data)
2541 {
2542   unsigned i;
2543   struct iv_use *use;
2544
2545   for (i = 0; i < n_iv_cands (data); i++)
2546     {
2547       struct iv_cand *cand = iv_cand (data, i);
2548
2549       if (cand->important)
2550         bitmap_set_bit (data->important_candidates, i);
2551     }
2552
2553   data->consider_all_candidates = (n_iv_cands (data)
2554                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2555
2556   if (data->consider_all_candidates)
2557     {
2558       /* We will not need "related_cands" bitmaps in this case,
2559          so release them to decrease peak memory consumption.  */
2560       for (i = 0; i < n_iv_uses (data); i++)
2561         {
2562           use = iv_use (data, i);
2563           BITMAP_FREE (use->related_cands);
2564         }
2565     }
2566   else
2567     {
2568       /* Add important candidates to the related_cands bitmaps.  */
2569       for (i = 0; i < n_iv_uses (data); i++)
2570         bitmap_ior_into (iv_use (data, i)->related_cands,
2571                          data->important_candidates);
2572     }
2573 }
2574
2575 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2576    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2577    we allocate a simple list to every use.  */
2578
2579 static void
2580 alloc_use_cost_map (struct ivopts_data *data)
2581 {
2582   unsigned i, size, s, j;
2583
2584   for (i = 0; i < n_iv_uses (data); i++)
2585     {
2586       struct iv_use *use = iv_use (data, i);
2587       bitmap_iterator bi;
2588
2589       if (data->consider_all_candidates)
2590         size = n_iv_cands (data);
2591       else
2592         {
2593           s = 0;
2594           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2595             {
2596               s++;
2597             }
2598
2599           /* Round up to the power of two, so that moduling by it is fast.  */
2600           for (size = 1; size < s; size <<= 1)
2601             continue;
2602         }
2603
2604       use->n_map_members = size;
2605       use->cost_map = XCNEWVEC (struct cost_pair, size);
2606     }
2607 }
2608
2609 /* Returns description of computation cost of expression whose runtime
2610    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2611
2612 static comp_cost
2613 new_cost (unsigned runtime, unsigned complexity)
2614 {
2615   comp_cost cost;
2616
2617   cost.cost = runtime;
2618   cost.complexity = complexity;
2619
2620   return cost;
2621 }
2622
2623 /* Adds costs COST1 and COST2.  */
2624
2625 static comp_cost
2626 add_costs (comp_cost cost1, comp_cost cost2)
2627 {
2628   cost1.cost += cost2.cost;
2629   cost1.complexity += cost2.complexity;
2630
2631   return cost1;
2632 }
2633 /* Subtracts costs COST1 and COST2.  */
2634
2635 static comp_cost
2636 sub_costs (comp_cost cost1, comp_cost cost2)
2637 {
2638   cost1.cost -= cost2.cost;
2639   cost1.complexity -= cost2.complexity;
2640
2641   return cost1;
2642 }
2643
2644 /* Returns a negative number if COST1 < COST2, a positive number if
2645    COST1 > COST2, and 0 if COST1 = COST2.  */
2646
2647 static int
2648 compare_costs (comp_cost cost1, comp_cost cost2)
2649 {
2650   if (cost1.cost == cost2.cost)
2651     return cost1.complexity - cost2.complexity;
2652
2653   return cost1.cost - cost2.cost;
2654 }
2655
2656 /* Returns true if COST is infinite.  */
2657
2658 static bool
2659 infinite_cost_p (comp_cost cost)
2660 {
2661   return cost.cost == INFTY;
2662 }
2663
2664 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2665    on invariants DEPENDS_ON and that the value used in expressing it
2666    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2667
2668 static void
2669 set_use_iv_cost (struct ivopts_data *data,
2670                  struct iv_use *use, struct iv_cand *cand,
2671                  comp_cost cost, bitmap depends_on, tree value,
2672                  enum tree_code comp, int inv_expr_id)
2673 {
2674   unsigned i, s;
2675
2676   if (infinite_cost_p (cost))
2677     {
2678       BITMAP_FREE (depends_on);
2679       return;
2680     }
2681
2682   if (data->consider_all_candidates)
2683     {
2684       use->cost_map[cand->id].cand = cand;
2685       use->cost_map[cand->id].cost = cost;
2686       use->cost_map[cand->id].depends_on = depends_on;
2687       use->cost_map[cand->id].value = value;
2688       use->cost_map[cand->id].comp = comp;
2689       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2690       return;
2691     }
2692
2693   /* n_map_members is a power of two, so this computes modulo.  */
2694   s = cand->id & (use->n_map_members - 1);
2695   for (i = s; i < use->n_map_members; i++)
2696     if (!use->cost_map[i].cand)
2697       goto found;
2698   for (i = 0; i < s; i++)
2699     if (!use->cost_map[i].cand)
2700       goto found;
2701
2702   gcc_unreachable ();
2703
2704 found:
2705   use->cost_map[i].cand = cand;
2706   use->cost_map[i].cost = cost;
2707   use->cost_map[i].depends_on = depends_on;
2708   use->cost_map[i].value = value;
2709   use->cost_map[i].comp = comp;
2710   use->cost_map[i].inv_expr_id = inv_expr_id;
2711 }
2712
2713 /* Gets cost of (USE, CANDIDATE) pair.  */
2714
2715 static struct cost_pair *
2716 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2717                  struct iv_cand *cand)
2718 {
2719   unsigned i, s;
2720   struct cost_pair *ret;
2721
2722   if (!cand)
2723     return NULL;
2724
2725   if (data->consider_all_candidates)
2726     {
2727       ret = use->cost_map + cand->id;
2728       if (!ret->cand)
2729         return NULL;
2730
2731       return ret;
2732     }
2733
2734   /* n_map_members is a power of two, so this computes modulo.  */
2735   s = cand->id & (use->n_map_members - 1);
2736   for (i = s; i < use->n_map_members; i++)
2737     if (use->cost_map[i].cand == cand)
2738       return use->cost_map + i;
2739
2740   for (i = 0; i < s; i++)
2741     if (use->cost_map[i].cand == cand)
2742       return use->cost_map + i;
2743
2744   return NULL;
2745 }
2746
2747 /* Returns estimate on cost of computing SEQ.  */
2748
2749 static unsigned
2750 seq_cost (rtx seq, bool speed)
2751 {
2752   unsigned cost = 0;
2753   rtx set;
2754
2755   for (; seq; seq = NEXT_INSN (seq))
2756     {
2757       set = single_set (seq);
2758       if (set)
2759         cost += set_src_cost (SET_SRC (set), speed);
2760       else
2761         cost++;
2762     }
2763
2764   return cost;
2765 }
2766
2767 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2768 static rtx
2769 produce_memory_decl_rtl (tree obj, int *regno)
2770 {
2771   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2772   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2773   rtx x;
2774
2775   gcc_assert (obj);
2776   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2777     {
2778       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2779       x = gen_rtx_SYMBOL_REF (address_mode, name);
2780       SET_SYMBOL_REF_DECL (x, obj);
2781       x = gen_rtx_MEM (DECL_MODE (obj), x);
2782       set_mem_addr_space (x, as);
2783       targetm.encode_section_info (obj, x, true);
2784     }
2785   else
2786     {
2787       x = gen_raw_REG (address_mode, (*regno)++);
2788       x = gen_rtx_MEM (DECL_MODE (obj), x);
2789       set_mem_addr_space (x, as);
2790     }
2791
2792   return x;
2793 }
2794
2795 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2796    walk_tree.  DATA contains the actual fake register number.  */
2797
2798 static tree
2799 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2800 {
2801   tree obj = NULL_TREE;
2802   rtx x = NULL_RTX;
2803   int *regno = (int *) data;
2804
2805   switch (TREE_CODE (*expr_p))
2806     {
2807     case ADDR_EXPR:
2808       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2809            handled_component_p (*expr_p);
2810            expr_p = &TREE_OPERAND (*expr_p, 0))
2811         continue;
2812       obj = *expr_p;
2813       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2814         x = produce_memory_decl_rtl (obj, regno);
2815       break;
2816
2817     case SSA_NAME:
2818       *ws = 0;
2819       obj = SSA_NAME_VAR (*expr_p);
2820       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
2821       if (!obj)
2822         return NULL_TREE;
2823       if (!DECL_RTL_SET_P (obj))
2824         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2825       break;
2826
2827     case VAR_DECL:
2828     case PARM_DECL:
2829     case RESULT_DECL:
2830       *ws = 0;
2831       obj = *expr_p;
2832
2833       if (DECL_RTL_SET_P (obj))
2834         break;
2835
2836       if (DECL_MODE (obj) == BLKmode)
2837         x = produce_memory_decl_rtl (obj, regno);
2838       else
2839         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2840
2841       break;
2842
2843     default:
2844       break;
2845     }
2846
2847   if (x)
2848     {
2849       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2850       SET_DECL_RTL (obj, x);
2851     }
2852
2853   return NULL_TREE;
2854 }
2855
2856 /* Determines cost of the computation of EXPR.  */
2857
2858 static unsigned
2859 computation_cost (tree expr, bool speed)
2860 {
2861   rtx seq, rslt;
2862   tree type = TREE_TYPE (expr);
2863   unsigned cost;
2864   /* Avoid using hard regs in ways which may be unsupported.  */
2865   int regno = LAST_VIRTUAL_REGISTER + 1;
2866   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2867   enum node_frequency real_frequency = node->frequency;
2868
2869   node->frequency = NODE_FREQUENCY_NORMAL;
2870   crtl->maybe_hot_insn_p = speed;
2871   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2872   start_sequence ();
2873   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2874   seq = get_insns ();
2875   end_sequence ();
2876   default_rtl_profile ();
2877   node->frequency = real_frequency;
2878
2879   cost = seq_cost (seq, speed);
2880   if (MEM_P (rslt))
2881     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2882                           TYPE_ADDR_SPACE (type), speed);
2883   else if (!REG_P (rslt))
2884     cost += set_src_cost (rslt, speed);
2885
2886   return cost;
2887 }
2888
2889 /* Returns variable containing the value of candidate CAND at statement AT.  */
2890
2891 static tree
2892 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2893 {
2894   if (stmt_after_increment (loop, cand, stmt))
2895     return cand->var_after;
2896   else
2897     return cand->var_before;
2898 }
2899
2900 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2901    same precision that is at least as wide as the precision of TYPE, stores
2902    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2903    type of A and B.  */
2904
2905 static tree
2906 determine_common_wider_type (tree *a, tree *b)
2907 {
2908   tree wider_type = NULL;
2909   tree suba, subb;
2910   tree atype = TREE_TYPE (*a);
2911
2912   if (CONVERT_EXPR_P (*a))
2913     {
2914       suba = TREE_OPERAND (*a, 0);
2915       wider_type = TREE_TYPE (suba);
2916       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2917         return atype;
2918     }
2919   else
2920     return atype;
2921
2922   if (CONVERT_EXPR_P (*b))
2923     {
2924       subb = TREE_OPERAND (*b, 0);
2925       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2926         return atype;
2927     }
2928   else
2929     return atype;
2930
2931   *a = suba;
2932   *b = subb;
2933   return wider_type;
2934 }
2935
2936 /* Determines the expression by that USE is expressed from induction variable
2937    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2938    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2939
2940 static bool
2941 get_computation_aff (struct loop *loop,
2942                      struct iv_use *use, struct iv_cand *cand, gimple at,
2943                      struct affine_tree_combination *aff)
2944 {
2945   tree ubase = use->iv->base;
2946   tree ustep = use->iv->step;
2947   tree cbase = cand->iv->base;
2948   tree cstep = cand->iv->step, cstep_common;
2949   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2950   tree common_type, var;
2951   tree uutype;
2952   aff_tree cbase_aff, var_aff;
2953   double_int rat;
2954
2955   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2956     {
2957       /* We do not have a precision to express the values of use.  */
2958       return false;
2959     }
2960
2961   var = var_at_stmt (loop, cand, at);
2962   uutype = unsigned_type_for (utype);
2963
2964   /* If the conversion is not noop, perform it.  */
2965   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2966     {
2967       cstep = fold_convert (uutype, cstep);
2968       cbase = fold_convert (uutype, cbase);
2969       var = fold_convert (uutype, var);
2970     }
2971
2972   if (!constant_multiple_of (ustep, cstep, &rat))
2973     return false;
2974
2975   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2976      type, we achieve better folding by computing their difference in this
2977      wider type, and cast the result to UUTYPE.  We do not need to worry about
2978      overflows, as all the arithmetics will in the end be performed in UUTYPE
2979      anyway.  */
2980   common_type = determine_common_wider_type (&ubase, &cbase);
2981
2982   /* use = ubase - ratio * cbase + ratio * var.  */
2983   tree_to_aff_combination (ubase, common_type, aff);
2984   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2985   tree_to_aff_combination (var, uutype, &var_aff);
2986
2987   /* We need to shift the value if we are after the increment.  */
2988   if (stmt_after_increment (loop, cand, at))
2989     {
2990       aff_tree cstep_aff;
2991
2992       if (common_type != uutype)
2993         cstep_common = fold_convert (common_type, cstep);
2994       else
2995         cstep_common = cstep;
2996
2997       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2998       aff_combination_add (&cbase_aff, &cstep_aff);
2999     }
3000
3001   aff_combination_scale (&cbase_aff, -rat);
3002   aff_combination_add (aff, &cbase_aff);
3003   if (common_type != uutype)
3004     aff_combination_convert (aff, uutype);
3005
3006   aff_combination_scale (&var_aff, rat);
3007   aff_combination_add (aff, &var_aff);
3008
3009   return true;
3010 }
3011
3012 /* Determines the expression by that USE is expressed from induction variable
3013    CAND at statement AT in LOOP.  The computation is unshared.  */
3014
3015 static tree
3016 get_computation_at (struct loop *loop,
3017                     struct iv_use *use, struct iv_cand *cand, gimple at)
3018 {
3019   aff_tree aff;
3020   tree type = TREE_TYPE (use->iv->base);
3021
3022   if (!get_computation_aff (loop, use, cand, at, &aff))
3023     return NULL_TREE;
3024   unshare_aff_combination (&aff);
3025   return fold_convert (type, aff_combination_to_tree (&aff));
3026 }
3027
3028 /* Determines the expression by that USE is expressed from induction variable
3029    CAND in LOOP.  The computation is unshared.  */
3030
3031 static tree
3032 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3033 {
3034   return get_computation_at (loop, use, cand, use->stmt);
3035 }
3036
3037 /* Adjust the cost COST for being in loop setup rather than loop body.
3038    If we're optimizing for space, the loop setup overhead is constant;
3039    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3040 static unsigned
3041 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3042 {
3043   if (cost == INFTY)
3044     return cost;
3045   else if (optimize_loop_for_speed_p (data->current_loop))
3046     return cost / avg_loop_niter (data->current_loop);
3047   else
3048     return cost;
3049 }
3050
3051 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3052    validity for a memory reference accessing memory of mode MODE in
3053    address space AS.  */
3054
3055 DEF_VEC_P (sbitmap);
3056 DEF_VEC_ALLOC_P (sbitmap, heap);
3057
3058 bool
3059 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3060                                  addr_space_t as)
3061 {
3062 #define MAX_RATIO 128
3063   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3064   static VEC (sbitmap, heap) *valid_mult_list;
3065   sbitmap valid_mult;
3066
3067   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3068     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3069
3070   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3071   if (!valid_mult)
3072     {
3073       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3074       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3075       rtx addr;
3076       HOST_WIDE_INT i;
3077
3078       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3079       bitmap_clear (valid_mult);
3080       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3081       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3082         {
3083           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3084           if (memory_address_addr_space_p (mode, addr, as))
3085             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3086         }
3087
3088       if (dump_file && (dump_flags & TDF_DETAILS))
3089         {
3090           fprintf (dump_file, "  allowed multipliers:");
3091           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3092             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3093               fprintf (dump_file, " %d", (int) i);
3094           fprintf (dump_file, "\n");
3095           fprintf (dump_file, "\n");
3096         }
3097
3098       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3099     }
3100
3101   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3102     return false;
3103
3104   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3105 }
3106
3107 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3108    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3109    variable is omitted.  Compute the cost for a memory reference that accesses
3110    a memory location of mode MEM_MODE in address space AS.
3111
3112    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3113    size of MEM_MODE / RATIO) is available.  To make this determination, we
3114    look at the size of the increment to be made, which is given in CSTEP.
3115    CSTEP may be zero if the step is unknown.
3116    STMT_AFTER_INC is true iff the statement we're looking at is after the
3117    increment of the original biv.
3118
3119    TODO -- there must be some better way.  This all is quite crude.  */
3120
3121 typedef struct address_cost_data_s
3122 {
3123   HOST_WIDE_INT min_offset, max_offset;
3124   unsigned costs[2][2][2][2];
3125 } *address_cost_data;
3126
3127 DEF_VEC_P (address_cost_data);
3128 DEF_VEC_ALLOC_P (address_cost_data, heap);
3129
3130 static comp_cost
3131 get_address_cost (bool symbol_present, bool var_present,
3132                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3133                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3134                   addr_space_t as, bool speed,
3135                   bool stmt_after_inc, bool *may_autoinc)
3136 {
3137   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3138   static VEC(address_cost_data, heap) *address_cost_data_list;
3139   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3140   address_cost_data data;
3141   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3142   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3143   unsigned cost, acost, complexity;
3144   bool offset_p, ratio_p, autoinc;
3145   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3146   unsigned HOST_WIDE_INT mask;
3147   unsigned bits;
3148
3149   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3150     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3151                            data_index + 1);
3152
3153   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3154   if (!data)
3155     {
3156       HOST_WIDE_INT i;
3157       HOST_WIDE_INT rat, off = 0;
3158       int old_cse_not_expected, width;
3159       unsigned sym_p, var_p, off_p, rat_p, add_c;
3160       rtx seq, addr, base;
3161       rtx reg0, reg1;
3162
3163       data = (address_cost_data) xcalloc (1, sizeof (*data));
3164
3165       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3166
3167       width = GET_MODE_BITSIZE (address_mode) - 1;
3168       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3169         width = HOST_BITS_PER_WIDE_INT - 1;
3170       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3171
3172       for (i = width; i >= 0; i--)
3173         {
3174           off = -((unsigned HOST_WIDE_INT) 1 << i);
3175           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3176           if (memory_address_addr_space_p (mem_mode, addr, as))
3177             break;
3178         }
3179       data->min_offset = (i == -1? 0 : off);
3180
3181       for (i = width; i >= 0; i--)
3182         {
3183           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3184           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3185           if (memory_address_addr_space_p (mem_mode, addr, as))
3186             break;
3187         }
3188       if (i == -1)
3189         off = 0;
3190       data->max_offset = off;
3191
3192       if (dump_file && (dump_flags & TDF_DETAILS))
3193         {
3194           fprintf (dump_file, "get_address_cost:\n");
3195           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3196                    GET_MODE_NAME (mem_mode),
3197                    data->min_offset);
3198           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3199                    GET_MODE_NAME (mem_mode),
3200                    data->max_offset);
3201         }
3202
3203       rat = 1;
3204       for (i = 2; i <= MAX_RATIO; i++)
3205         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3206           {
3207             rat = i;
3208             break;
3209           }
3210
3211       /* Compute the cost of various addressing modes.  */
3212       acost = 0;
3213       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3214       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3215
3216       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3217           || USE_STORE_PRE_DECREMENT (mem_mode))
3218         {
3219           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3220           has_predec[mem_mode]
3221             = memory_address_addr_space_p (mem_mode, addr, as);
3222         }
3223       if (USE_LOAD_POST_DECREMENT (mem_mode)
3224           || USE_STORE_POST_DECREMENT (mem_mode))
3225         {
3226           addr = gen_rtx_POST_DEC (address_mode, reg0);
3227           has_postdec[mem_mode]
3228             = memory_address_addr_space_p (mem_mode, addr, as);
3229         }
3230       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3231           || USE_STORE_PRE_DECREMENT (mem_mode))
3232         {
3233           addr = gen_rtx_PRE_INC (address_mode, reg0);
3234           has_preinc[mem_mode]
3235             = memory_address_addr_space_p (mem_mode, addr, as);
3236         }
3237       if (USE_LOAD_POST_INCREMENT (mem_mode)
3238           || USE_STORE_POST_INCREMENT (mem_mode))
3239         {
3240           addr = gen_rtx_POST_INC (address_mode, reg0);
3241           has_postinc[mem_mode]
3242             = memory_address_addr_space_p (mem_mode, addr, as);
3243         }
3244       for (i = 0; i < 16; i++)
3245         {
3246           sym_p = i & 1;
3247           var_p = (i >> 1) & 1;
3248           off_p = (i >> 2) & 1;
3249           rat_p = (i >> 3) & 1;
3250
3251           addr = reg0;
3252           if (rat_p)
3253             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3254                                    gen_int_mode (rat, address_mode));
3255
3256           if (var_p)
3257             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3258
3259           if (sym_p)
3260             {
3261               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3262               /* ??? We can run into trouble with some backends by presenting
3263                  it with symbols which haven't been properly passed through
3264                  targetm.encode_section_info.  By setting the local bit, we
3265                  enhance the probability of things working.  */
3266               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3267
3268               if (off_p)
3269                 base = gen_rtx_fmt_e (CONST, address_mode,
3270                                       gen_rtx_fmt_ee
3271                                         (PLUS, address_mode, base,
3272                                          gen_int_mode (off, address_mode)));
3273             }
3274           else if (off_p)
3275             base = gen_int_mode (off, address_mode);
3276           else
3277             base = NULL_RTX;
3278
3279           if (base)
3280             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3281
3282           start_sequence ();
3283           /* To avoid splitting addressing modes, pretend that no cse will
3284              follow.  */
3285           old_cse_not_expected = cse_not_expected;
3286           cse_not_expected = true;
3287           addr = memory_address_addr_space (mem_mode, addr, as);
3288           cse_not_expected = old_cse_not_expected;
3289           seq = get_insns ();
3290           end_sequence ();
3291
3292           acost = seq_cost (seq, speed);
3293           acost += address_cost (addr, mem_mode, as, speed);
3294
3295           if (!acost)
3296             acost = 1;
3297           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3298         }
3299
3300       /* On some targets, it is quite expensive to load symbol to a register,
3301          which makes addresses that contain symbols look much more expensive.
3302          However, the symbol will have to be loaded in any case before the
3303          loop (and quite likely we have it in register already), so it does not
3304          make much sense to penalize them too heavily.  So make some final
3305          tweaks for the SYMBOL_PRESENT modes:
3306
3307          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3308          var is cheaper, use this mode with small penalty.
3309          If VAR_PRESENT is true, try whether the mode with
3310          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3311          if this is the case, use it.  */
3312       add_c = add_cost (speed, address_mode);
3313       for (i = 0; i < 8; i++)
3314         {
3315           var_p = i & 1;
3316           off_p = (i >> 1) & 1;
3317           rat_p = (i >> 2) & 1;
3318
3319           acost = data->costs[0][1][off_p][rat_p] + 1;
3320           if (var_p)
3321             acost += add_c;
3322
3323           if (acost < data->costs[1][var_p][off_p][rat_p])
3324             data->costs[1][var_p][off_p][rat_p] = acost;
3325         }
3326
3327       if (dump_file && (dump_flags & TDF_DETAILS))
3328         {
3329           fprintf (dump_file, "Address costs:\n");
3330
3331           for (i = 0; i < 16; i++)
3332             {
3333               sym_p = i & 1;
3334               var_p = (i >> 1) & 1;
3335               off_p = (i >> 2) & 1;
3336               rat_p = (i >> 3) & 1;
3337
3338               fprintf (dump_file, "  ");
3339               if (sym_p)
3340                 fprintf (dump_file, "sym + ");
3341               if (var_p)
3342                 fprintf (dump_file, "var + ");
3343               if (off_p)
3344                 fprintf (dump_file, "cst + ");
3345               if (rat_p)
3346                 fprintf (dump_file, "rat * ");
3347
3348               acost = data->costs[sym_p][var_p][off_p][rat_p];
3349               fprintf (dump_file, "index costs %d\n", acost);
3350             }
3351           if (has_predec[mem_mode] || has_postdec[mem_mode]
3352               || has_preinc[mem_mode] || has_postinc[mem_mode])
3353             fprintf (dump_file, "  May include autoinc/dec\n");
3354           fprintf (dump_file, "\n");
3355         }
3356
3357       VEC_replace (address_cost_data, address_cost_data_list,
3358                    data_index, data);
3359     }
3360
3361   bits = GET_MODE_BITSIZE (address_mode);
3362   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3363   offset &= mask;
3364   if ((offset >> (bits - 1) & 1))
3365     offset |= ~mask;
3366   s_offset = offset;
3367
3368   autoinc = false;
3369   msize = GET_MODE_SIZE (mem_mode);
3370   autoinc_offset = offset;
3371   if (stmt_after_inc)
3372     autoinc_offset += ratio * cstep;
3373   if (symbol_present || var_present || ratio != 1)
3374     autoinc = false;
3375   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3376                && msize == cstep)
3377            || (has_postdec[mem_mode] && autoinc_offset == 0
3378                && msize == -cstep)
3379            || (has_preinc[mem_mode] && autoinc_offset == msize
3380                && msize == cstep)
3381            || (has_predec[mem_mode] && autoinc_offset == -msize
3382                && msize == -cstep))
3383     autoinc = true;
3384
3385   cost = 0;
3386   offset_p = (s_offset != 0
3387               && data->min_offset <= s_offset
3388               && s_offset <= data->max_offset);
3389   ratio_p = (ratio != 1
3390              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3391
3392   if (ratio != 1 && !ratio_p)
3393     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3394
3395   if (s_offset && !offset_p && !symbol_present)
3396     cost += add_cost (speed, address_mode);
3397
3398   if (may_autoinc)
3399     *may_autoinc = autoinc;
3400   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3401   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3402   return new_cost (cost + acost, complexity);
3403 }
3404
3405  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3406     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3407     calculating the operands of EXPR.  Returns true if successful, and returns
3408     the cost in COST.  */
3409
3410 static bool
3411 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3412                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3413 {
3414   comp_cost res;
3415   tree op1 = TREE_OPERAND (expr, 1);
3416   tree cst = TREE_OPERAND (mult, 1);
3417   tree multop = TREE_OPERAND (mult, 0);
3418   int m = exact_log2 (int_cst_value (cst));
3419   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3420   int sa_cost;
3421
3422   if (!(m >= 0 && m < maxm))
3423     return false;
3424
3425   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3426              ? shiftadd_cost (speed, mode, m)
3427              : (mult == op1
3428                 ? shiftsub1_cost (speed, mode, m)
3429                 : shiftsub0_cost (speed, mode, m)));
3430   res = new_cost (sa_cost, 0);
3431   res = add_costs (res, mult == op1 ? cost0 : cost1);
3432
3433   STRIP_NOPS (multop);
3434   if (!is_gimple_val (multop))
3435     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3436
3437   *cost = res;
3438   return true;
3439 }
3440
3441 /* Estimates cost of forcing expression EXPR into a variable.  */
3442
3443 static comp_cost
3444 force_expr_to_var_cost (tree expr, bool speed)
3445 {
3446   static bool costs_initialized = false;
3447   static unsigned integer_cost [2];
3448   static unsigned symbol_cost [2];
3449   static unsigned address_cost [2];
3450   tree op0, op1;
3451   comp_cost cost0, cost1, cost;
3452   enum machine_mode mode;
3453
3454   if (!costs_initialized)
3455     {
3456       tree type = build_pointer_type (integer_type_node);
3457       tree var, addr;
3458       rtx x;
3459       int i;
3460
3461       var = create_tmp_var_raw (integer_type_node, "test_var");
3462       TREE_STATIC (var) = 1;
3463       x = produce_memory_decl_rtl (var, NULL);
3464       SET_DECL_RTL (var, x);
3465
3466       addr = build1 (ADDR_EXPR, type, var);
3467
3468
3469       for (i = 0; i < 2; i++)
3470         {
3471           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3472                                                              2000), i);
3473
3474           symbol_cost[i] = computation_cost (addr, i) + 1;
3475
3476           address_cost[i]
3477             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3478           if (dump_file && (dump_flags & TDF_DETAILS))
3479             {
3480               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3481               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3482               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3483               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3484               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3485               fprintf (dump_file, "\n");
3486             }
3487         }
3488
3489       costs_initialized = true;
3490     }
3491
3492   STRIP_NOPS (expr);
3493
3494   if (SSA_VAR_P (expr))
3495     return no_cost;
3496
3497   if (is_gimple_min_invariant (expr))
3498     {
3499       if (TREE_CODE (expr) == INTEGER_CST)
3500         return new_cost (integer_cost [speed], 0);
3501
3502       if (TREE_CODE (expr) == ADDR_EXPR)
3503         {
3504           tree obj = TREE_OPERAND (expr, 0);
3505
3506           if (TREE_CODE (obj) == VAR_DECL
3507               || TREE_CODE (obj) == PARM_DECL
3508               || TREE_CODE (obj) == RESULT_DECL)
3509             return new_cost (symbol_cost [speed], 0);
3510         }
3511
3512       return new_cost (address_cost [speed], 0);
3513     }
3514
3515   switch (TREE_CODE (expr))
3516     {
3517     case POINTER_PLUS_EXPR:
3518     case PLUS_EXPR:
3519     case MINUS_EXPR:
3520     case MULT_EXPR:
3521       op0 = TREE_OPERAND (expr, 0);
3522       op1 = TREE_OPERAND (expr, 1);
3523       STRIP_NOPS (op0);
3524       STRIP_NOPS (op1);
3525
3526       if (is_gimple_val (op0))
3527         cost0 = no_cost;
3528       else
3529         cost0 = force_expr_to_var_cost (op0, speed);
3530
3531       if (is_gimple_val (op1))
3532         cost1 = no_cost;
3533       else
3534         cost1 = force_expr_to_var_cost (op1, speed);
3535
3536       break;
3537
3538     case NEGATE_EXPR:
3539       op0 = TREE_OPERAND (expr, 0);
3540       STRIP_NOPS (op0);
3541       op1 = NULL_TREE;
3542
3543       if (is_gimple_val (op0))
3544         cost0 = no_cost;
3545       else
3546         cost0 = force_expr_to_var_cost (op0, speed);
3547
3548       cost1 = no_cost;
3549       break;
3550
3551     default:
3552       /* Just an arbitrary value, FIXME.  */
3553       return new_cost (target_spill_cost[speed], 0);
3554     }
3555
3556   mode = TYPE_MODE (TREE_TYPE (expr));
3557   switch (TREE_CODE (expr))
3558     {
3559     case POINTER_PLUS_EXPR:
3560     case PLUS_EXPR:
3561     case MINUS_EXPR:
3562     case NEGATE_EXPR:
3563       cost = new_cost (add_cost (speed, mode), 0);
3564       if (TREE_CODE (expr) != NEGATE_EXPR)
3565         {
3566           tree mult = NULL_TREE;
3567           comp_cost sa_cost;
3568           if (TREE_CODE (op1) == MULT_EXPR)
3569             mult = op1;
3570           else if (TREE_CODE (op0) == MULT_EXPR)
3571             mult = op0;
3572
3573           if (mult != NULL_TREE
3574               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3575               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3576                                     speed, &sa_cost))
3577             return sa_cost;
3578         }
3579       break;
3580
3581     case MULT_EXPR:
3582       if (cst_and_fits_in_hwi (op0))
3583         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3584                                              mode, speed), 0);
3585       else if (cst_and_fits_in_hwi (op1))
3586         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3587                                              mode, speed), 0);
3588       else
3589         return new_cost (target_spill_cost [speed], 0);
3590       break;
3591
3592     default:
3593       gcc_unreachable ();
3594     }
3595
3596   cost = add_costs (cost, cost0);
3597   cost = add_costs (cost, cost1);
3598
3599   /* Bound the cost by target_spill_cost.  The parts of complicated
3600      computations often are either loop invariant or at least can
3601      be shared between several iv uses, so letting this grow without
3602      limits would not give reasonable results.  */
3603   if (cost.cost > (int) target_spill_cost [speed])
3604     cost.cost = target_spill_cost [speed];
3605
3606   return cost;
3607 }
3608
3609 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3610    invariants the computation depends on.  */
3611
3612 static comp_cost
3613 force_var_cost (struct ivopts_data *data,
3614                 tree expr, bitmap *depends_on)
3615 {
3616   if (depends_on)
3617     {
3618       fd_ivopts_data = data;
3619       walk_tree (&expr, find_depends, depends_on, NULL);
3620     }
3621
3622   return force_expr_to_var_cost (expr, data->speed);
3623 }
3624
3625 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3626    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3627    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3628    invariants the computation depends on.  */
3629
3630 static comp_cost
3631 split_address_cost (struct ivopts_data *data,
3632                     tree addr, bool *symbol_present, bool *var_present,
3633                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3634 {
3635   tree core;
3636   HOST_WIDE_INT bitsize;
3637   HOST_WIDE_INT bitpos;
3638   tree toffset;
3639   enum machine_mode mode;
3640   int unsignedp, volatilep;
3641
3642   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3643                               &unsignedp, &volatilep, false);
3644
3645   if (toffset != 0
3646       || bitpos % BITS_PER_UNIT != 0
3647       || TREE_CODE (core) != VAR_DECL)
3648     {
3649       *symbol_present = false;
3650       *var_present = true;
3651       fd_ivopts_data = data;
3652       walk_tree (&addr, find_depends, depends_on, NULL);
3653       return new_cost (target_spill_cost[data->speed], 0);
3654     }
3655
3656   *offset += bitpos / BITS_PER_UNIT;
3657   if (TREE_STATIC (core)
3658       || DECL_EXTERNAL (core))
3659     {
3660       *symbol_present = true;
3661       *var_present = false;
3662       return no_cost;
3663     }
3664
3665   *symbol_present = false;
3666   *var_present = true;
3667   return no_cost;
3668 }
3669
3670 /* Estimates cost of expressing difference of addresses E1 - E2 as
3671    var + symbol + offset.  The value of offset is added to OFFSET,
3672    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3673    part is missing.  DEPENDS_ON is a set of the invariants the computation
3674    depends on.  */
3675
3676 static comp_cost
3677 ptr_difference_cost (struct ivopts_data *data,
3678                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3679                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3680 {
3681   HOST_WIDE_INT diff = 0;
3682   aff_tree aff_e1, aff_e2;
3683   tree type;
3684
3685   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3686
3687   if (ptr_difference_const (e1, e2, &diff))
3688     {
3689       *offset += diff;
3690       *symbol_present = false;
3691       *var_present = false;
3692       return no_cost;
3693     }
3694
3695   if (integer_zerop (e2))
3696     return split_address_cost (data, TREE_OPERAND (e1, 0),
3697                                symbol_present, var_present, offset, depends_on);
3698
3699   *symbol_present = false;
3700   *var_present = true;
3701
3702   type = signed_type_for (TREE_TYPE (e1));
3703   tree_to_aff_combination (e1, type, &aff_e1);
3704   tree_to_aff_combination (e2, type, &aff_e2);
3705   aff_combination_scale (&aff_e2, double_int_minus_one);
3706   aff_combination_add (&aff_e1, &aff_e2);
3707
3708   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3709 }
3710
3711 /* Estimates cost of expressing difference E1 - E2 as
3712    var + symbol + offset.  The value of offset is added to OFFSET,
3713    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3714    part is missing.  DEPENDS_ON is a set of the invariants the computation
3715    depends on.  */
3716
3717 static comp_cost
3718 difference_cost (struct ivopts_data *data,
3719                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3720                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3721 {
3722   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3723   unsigned HOST_WIDE_INT off1, off2;
3724   aff_tree aff_e1, aff_e2;
3725   tree type;
3726
3727   e1 = strip_offset (e1, &off1);
3728   e2 = strip_offset (e2, &off2);
3729   *offset += off1 - off2;
3730
3731   STRIP_NOPS (e1);
3732   STRIP_NOPS (e2);
3733
3734   if (TREE_CODE (e1) == ADDR_EXPR)
3735     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3736                                 offset, depends_on);
3737   *symbol_present = false;
3738
3739   if (operand_equal_p (e1, e2, 0))
3740     {
3741       *var_present = false;
3742       return no_cost;
3743     }
3744
3745   *var_present = true;
3746
3747   if (integer_zerop (e2))
3748     return force_var_cost (data, e1, depends_on);
3749
3750   if (integer_zerop (e1))
3751     {
3752       comp_cost cost = force_var_cost (data, e2, depends_on);
3753       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3754       return cost;
3755     }
3756
3757   type = signed_type_for (TREE_TYPE (e1));
3758   tree_to_aff_combination (e1, type, &aff_e1);
3759   tree_to_aff_combination (e2, type, &aff_e2);
3760   aff_combination_scale (&aff_e2, double_int_minus_one);
3761   aff_combination_add (&aff_e1, &aff_e2);
3762
3763   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3764 }
3765
3766 /* Returns true if AFF1 and AFF2 are identical.  */
3767
3768 static bool
3769 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3770 {
3771   unsigned i;
3772
3773   if (aff1->n != aff2->n)
3774     return false;
3775
3776   for (i = 0; i < aff1->n; i++)
3777     {
3778       if (aff1->elts[i].coef != aff2->elts[i].coef)
3779         return false;
3780
3781       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3782         return false;
3783     }
3784   return true;
3785 }
3786
3787 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3788
3789 static int
3790 get_expr_id (struct ivopts_data *data, tree expr)
3791 {
3792   struct iv_inv_expr_ent ent;
3793   struct iv_inv_expr_ent **slot;
3794
3795   ent.expr = expr;
3796   ent.hash = iterative_hash_expr (expr, 0);
3797   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3798                                                      &ent, INSERT);
3799   if (*slot)
3800     return (*slot)->id;
3801
3802   *slot = XNEW (struct iv_inv_expr_ent);
3803   (*slot)->expr = expr;
3804   (*slot)->hash = ent.hash;
3805   (*slot)->id = data->inv_expr_id++;
3806   return (*slot)->id;
3807 }
3808
3809 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3810    requires a new compiler generated temporary.  Returns -1 otherwise.
3811    ADDRESS_P is a flag indicating if the expression is for address
3812    computation.  */
3813
3814 static int
3815 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3816                             tree cbase, HOST_WIDE_INT ratio,
3817                             bool address_p)
3818 {
3819   aff_tree ubase_aff, cbase_aff;
3820   tree expr, ub, cb;
3821
3822   STRIP_NOPS (ubase);
3823   STRIP_NOPS (cbase);
3824   ub = ubase;
3825   cb = cbase;
3826
3827   if ((TREE_CODE (ubase) == INTEGER_CST)
3828       && (TREE_CODE (cbase) == INTEGER_CST))
3829     return -1;
3830
3831   /* Strips the constant part. */
3832   if (TREE_CODE (ubase) == PLUS_EXPR
3833       || TREE_CODE (ubase) == MINUS_EXPR
3834       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3835     {
3836       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3837         ubase = TREE_OPERAND (ubase, 0);
3838     }
3839
3840   /* Strips the constant part. */
3841   if (TREE_CODE (cbase) == PLUS_EXPR
3842       || TREE_CODE (cbase) == MINUS_EXPR
3843       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3844     {
3845       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3846         cbase = TREE_OPERAND (cbase, 0);
3847     }
3848
3849   if (address_p)
3850     {
3851       if (((TREE_CODE (ubase) == SSA_NAME)
3852            || (TREE_CODE (ubase) == ADDR_EXPR
3853                && is_gimple_min_invariant (ubase)))
3854           && (TREE_CODE (cbase) == INTEGER_CST))
3855         return -1;
3856
3857       if (((TREE_CODE (cbase) == SSA_NAME)
3858            || (TREE_CODE (cbase) == ADDR_EXPR
3859                && is_gimple_min_invariant (cbase)))
3860           && (TREE_CODE (ubase) == INTEGER_CST))
3861         return -1;
3862     }
3863
3864   if (ratio == 1)
3865     {
3866       if(operand_equal_p (ubase, cbase, 0))
3867         return -1;
3868
3869       if (TREE_CODE (ubase) == ADDR_EXPR
3870           && TREE_CODE (cbase) == ADDR_EXPR)
3871         {
3872           tree usym, csym;
3873
3874           usym = TREE_OPERAND (ubase, 0);
3875           csym = TREE_OPERAND (cbase, 0);
3876           if (TREE_CODE (usym) == ARRAY_REF)
3877             {
3878               tree ind = TREE_OPERAND (usym, 1);
3879               if (TREE_CODE (ind) == INTEGER_CST
3880                   && host_integerp (ind, 0)
3881                   && TREE_INT_CST_LOW (ind) == 0)
3882                 usym = TREE_OPERAND (usym, 0);
3883             }
3884           if (TREE_CODE (csym) == ARRAY_REF)
3885             {
3886               tree ind = TREE_OPERAND (csym, 1);
3887               if (TREE_CODE (ind) == INTEGER_CST
3888                   && host_integerp (ind, 0)
3889                   && TREE_INT_CST_LOW (ind) == 0)
3890                 csym = TREE_OPERAND (csym, 0);
3891             }
3892           if (operand_equal_p (usym, csym, 0))
3893             return -1;
3894         }
3895       /* Now do more complex comparison  */
3896       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3897       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3898       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3899         return -1;
3900     }
3901
3902   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3903   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3904
3905   aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3906   aff_combination_add (&ubase_aff, &cbase_aff);
3907   expr = aff_combination_to_tree (&ubase_aff);
3908   return get_expr_id (data, expr);
3909 }
3910
3911
3912
3913 /* Determines the cost of the computation by that USE is expressed
3914    from induction variable CAND.  If ADDRESS_P is true, we just need
3915    to create an address from it, otherwise we want to get it into
3916    register.  A set of invariants we depend on is stored in
3917    DEPENDS_ON.  AT is the statement at that the value is computed.
3918    If CAN_AUTOINC is nonnull, use it to record whether autoinc
3919    addressing is likely.  */
3920
3921 static comp_cost
3922 get_computation_cost_at (struct ivopts_data *data,
3923                          struct iv_use *use, struct iv_cand *cand,
3924                          bool address_p, bitmap *depends_on, gimple at,
3925                          bool *can_autoinc,
3926                          int *inv_expr_id)
3927 {
3928   tree ubase = use->iv->base, ustep = use->iv->step;
3929   tree cbase, cstep;
3930   tree utype = TREE_TYPE (ubase), ctype;
3931   unsigned HOST_WIDE_INT cstepi, offset = 0;
3932   HOST_WIDE_INT ratio, aratio;
3933   bool var_present, symbol_present, stmt_is_after_inc;
3934   comp_cost cost;
3935   double_int rat;
3936   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3937
3938   *depends_on = NULL;
3939
3940   /* Only consider real candidates.  */
3941   if (!cand->iv)
3942     return infinite_cost;
3943
3944   cbase = cand->iv->base;
3945   cstep = cand->iv->step;
3946   ctype = TREE_TYPE (cbase);
3947
3948   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3949     {
3950       /* We do not have a precision to express the values of use.  */
3951       return infinite_cost;
3952     }
3953
3954   if (address_p
3955       || (use->iv->base_object
3956           && cand->iv->base_object
3957           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3958           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3959     {
3960       /* Do not try to express address of an object with computation based
3961          on address of a different object.  This may cause problems in rtl
3962          level alias analysis (that does not expect this to be happening,
3963          as this is illegal in C), and would be unlikely to be useful
3964          anyway.  */
3965       if (use->iv->base_object
3966           && cand->iv->base_object
3967           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3968         return infinite_cost;
3969     }
3970
3971   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3972     {
3973       /* TODO -- add direct handling of this case.  */
3974       goto fallback;
3975     }
3976
3977   /* CSTEPI is removed from the offset in case statement is after the
3978      increment.  If the step is not constant, we use zero instead.
3979      This is a bit imprecise (there is the extra addition), but
3980      redundancy elimination is likely to transform the code so that
3981      it uses value of the variable before increment anyway,
3982      so it is not that much unrealistic.  */
3983   if (cst_and_fits_in_hwi (cstep))
3984     cstepi = int_cst_value (cstep);
3985   else
3986     cstepi = 0;
3987
3988   if (!constant_multiple_of (ustep, cstep, &rat))
3989     return infinite_cost;
3990
3991   if (rat.fits_shwi ())
3992     ratio = rat.to_shwi ();
3993   else
3994     return infinite_cost;
3995
3996   STRIP_NOPS (cbase);
3997   ctype = TREE_TYPE (cbase);
3998
3999   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4000
4001   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4002      or ratio == 1, it is better to handle this like
4003
4004      ubase - ratio * cbase + ratio * var
4005
4006      (also holds in the case ratio == -1, TODO.  */
4007
4008   if (cst_and_fits_in_hwi (cbase))
4009     {
4010       offset = - ratio * int_cst_value (cbase);
4011       cost = difference_cost (data,
4012                               ubase, build_int_cst (utype, 0),
4013                               &symbol_present, &var_present, &offset,
4014                               depends_on);
4015       cost.cost /= avg_loop_niter (data->current_loop);
4016     }
4017   else if (ratio == 1)
4018     {
4019       tree real_cbase = cbase;
4020
4021       /* Check to see if any adjustment is needed.  */
4022       if (cstepi == 0 && stmt_is_after_inc)
4023         {
4024           aff_tree real_cbase_aff;
4025           aff_tree cstep_aff;
4026
4027           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4028                                    &real_cbase_aff);
4029           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4030
4031           aff_combination_add (&real_cbase_aff, &cstep_aff);
4032           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4033         }
4034
4035       cost = difference_cost (data,
4036                               ubase, real_cbase,
4037                               &symbol_present, &var_present, &offset,
4038                               depends_on);
4039       cost.cost /= avg_loop_niter (data->current_loop);
4040     }
4041   else if (address_p
4042            && !POINTER_TYPE_P (ctype)
4043            && multiplier_allowed_in_address_p
4044                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4045                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4046     {
4047       cbase
4048         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4049       cost = difference_cost (data,
4050                               ubase, cbase,
4051                               &symbol_present, &var_present, &offset,
4052                               depends_on);
4053       cost.cost /= avg_loop_niter (data->current_loop);
4054     }
4055   else
4056     {
4057       cost = force_var_cost (data, cbase, depends_on);
4058       cost = add_costs (cost,
4059                         difference_cost (data,
4060                                          ubase, build_int_cst (utype, 0),
4061                                          &symbol_present, &var_present,
4062                                          &offset, depends_on));
4063       cost.cost /= avg_loop_niter (data->current_loop);
4064       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4065     }
4066
4067   if (inv_expr_id)
4068     {
4069       *inv_expr_id =
4070           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4071       /* Clear depends on.  */
4072       if (*inv_expr_id != -1 && depends_on && *depends_on)
4073         bitmap_clear (*depends_on);
4074     }
4075
4076   /* If we are after the increment, the value of the candidate is higher by
4077      one iteration.  */
4078   if (stmt_is_after_inc)
4079     offset -= ratio * cstepi;
4080
4081   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4082      (symbol/var1/const parts may be omitted).  If we are looking for an
4083      address, find the cost of addressing this.  */
4084   if (address_p)
4085     return add_costs (cost,
4086                       get_address_cost (symbol_present, var_present,
4087                                         offset, ratio, cstepi,
4088                                         TYPE_MODE (TREE_TYPE (utype)),
4089                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4090                                         speed, stmt_is_after_inc,
4091                                         can_autoinc));
4092
4093   /* Otherwise estimate the costs for computing the expression.  */
4094   if (!symbol_present && !var_present && !offset)
4095     {
4096       if (ratio != 1)
4097         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4098       return cost;
4099     }
4100
4101   /* Symbol + offset should be compile-time computable so consider that they
4102       are added once to the variable, if present.  */
4103   if (var_present && (symbol_present || offset))
4104     cost.cost += adjust_setup_cost (data,
4105                                     add_cost (speed, TYPE_MODE (ctype)));
4106
4107   /* Having offset does not affect runtime cost in case it is added to
4108      symbol, but it increases complexity.  */
4109   if (offset)
4110     cost.complexity++;
4111
4112   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4113
4114   aratio = ratio > 0 ? ratio : -ratio;
4115   if (aratio != 1)
4116     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4117   return cost;
4118
4119 fallback:
4120   if (can_autoinc)
4121     *can_autoinc = false;
4122
4123   {
4124     /* Just get the expression, expand it and measure the cost.  */
4125     tree comp = get_computation_at (data->current_loop, use, cand, at);
4126
4127     if (!comp)
4128       return infinite_cost;
4129
4130     if (address_p)
4131       comp = build_simple_mem_ref (comp);
4132
4133     return new_cost (computation_cost (comp, speed), 0);
4134   }
4135 }
4136
4137 /* Determines the cost of the computation by that USE is expressed
4138    from induction variable CAND.  If ADDRESS_P is true, we just need
4139    to create an address from it, otherwise we want to get it into
4140    register.  A set of invariants we depend on is stored in
4141    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4142    autoinc addressing is likely.  */
4143
4144 static comp_cost
4145 get_computation_cost (struct ivopts_data *data,
4146                       struct iv_use *use, struct iv_cand *cand,
4147                       bool address_p, bitmap *depends_on,
4148                       bool *can_autoinc, int *inv_expr_id)
4149 {
4150   return get_computation_cost_at (data,
4151                                   use, cand, address_p, depends_on, use->stmt,
4152                                   can_autoinc, inv_expr_id);
4153 }
4154
4155 /* Determines cost of basing replacement of USE on CAND in a generic
4156    expression.  */
4157
4158 static bool
4159 determine_use_iv_cost_generic (struct ivopts_data *data,
4160                                struct iv_use *use, struct iv_cand *cand)
4161 {
4162   bitmap depends_on;
4163   comp_cost cost;
4164   int inv_expr_id = -1;
4165
4166   /* The simple case first -- if we need to express value of the preserved
4167      original biv, the cost is 0.  This also prevents us from counting the
4168      cost of increment twice -- once at this use and once in the cost of
4169      the candidate.  */
4170   if (cand->pos == IP_ORIGINAL
4171       && cand->incremented_at == use->stmt)
4172     {
4173       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4174                        ERROR_MARK, -1);
4175       return true;
4176     }
4177
4178   cost = get_computation_cost (data, use, cand, false, &depends_on,
4179                                NULL, &inv_expr_id);
4180
4181   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4182                    inv_expr_id);
4183
4184   return !infinite_cost_p (cost);
4185 }
4186
4187 /* Determines cost of basing replacement of USE on CAND in an address.  */
4188
4189 static bool
4190 determine_use_iv_cost_address (struct ivopts_data *data,
4191                                struct iv_use *use, struct iv_cand *cand)
4192 {
4193   bitmap depends_on;
4194   bool can_autoinc;
4195   int inv_expr_id = -1;
4196   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4197                                          &can_autoinc, &inv_expr_id);
4198
4199   if (cand->ainc_use == use)
4200     {
4201       if (can_autoinc)
4202         cost.cost -= cand->cost_step;
4203       /* If we generated the candidate solely for exploiting autoincrement
4204          opportunities, and it turns out it can't be used, set the cost to
4205          infinity to make sure we ignore it.  */
4206       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4207         cost = infinite_cost;
4208     }
4209   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4210                    inv_expr_id);
4211
4212   return !infinite_cost_p (cost);
4213 }
4214
4215 /* Computes value of candidate CAND at position AT in iteration NITER, and
4216    stores it to VAL.  */
4217
4218 static void
4219 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4220                aff_tree *val)
4221 {
4222   aff_tree step, delta, nit;
4223   struct iv *iv = cand->iv;
4224   tree type = TREE_TYPE (iv->base);
4225   tree steptype = type;
4226   if (POINTER_TYPE_P (type))
4227     steptype = sizetype;
4228
4229   tree_to_aff_combination (iv->step, steptype, &step);
4230   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4231   aff_combination_convert (&nit, steptype);
4232   aff_combination_mult (&nit, &step, &delta);
4233   if (stmt_after_increment (loop, cand, at))
4234     aff_combination_add (&delta, &step);
4235
4236   tree_to_aff_combination (iv->base, type, val);
4237   aff_combination_add (val, &delta);
4238 }
4239
4240 /* Returns period of induction variable iv.  */
4241
4242 static tree
4243 iv_period (struct iv *iv)
4244 {
4245   tree step = iv->step, period, type;
4246   tree pow2div;
4247
4248   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4249
4250   type = unsigned_type_for (TREE_TYPE (step));
4251   /* Period of the iv is lcm (step, type_range)/step -1,
4252      i.e., N*type_range/step - 1. Since type range is power
4253      of two, N == (step >> num_of_ending_zeros_binary (step),
4254      so the final result is
4255
4256        (type_range >> num_of_ending_zeros_binary (step)) - 1
4257
4258   */
4259   pow2div = num_ending_zeros (step);
4260
4261   period = build_low_bits_mask (type,
4262                                 (TYPE_PRECISION (type)
4263                                  - tree_low_cst (pow2div, 1)));
4264
4265   return period;
4266 }
4267
4268 /* Returns the comparison operator used when eliminating the iv USE.  */
4269
4270 static enum tree_code
4271 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4272 {
4273   struct loop *loop = data->current_loop;
4274   basic_block ex_bb;
4275   edge exit;
4276
4277   ex_bb = gimple_bb (use->stmt);
4278   exit = EDGE_SUCC (ex_bb, 0);
4279   if (flow_bb_inside_loop_p (loop, exit->dest))
4280     exit = EDGE_SUCC (ex_bb, 1);
4281
4282   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4283 }
4284
4285 static tree
4286 strip_wrap_conserving_type_conversions (tree exp)
4287 {
4288   while (tree_ssa_useless_type_conversion (exp)
4289          && (nowrap_type_p (TREE_TYPE (exp))
4290              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4291     exp = TREE_OPERAND (exp, 0);
4292   return exp;
4293 }
4294
4295 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4296    check for an exact match.  */
4297
4298 static bool
4299 expr_equal_p (tree e, tree what)
4300 {
4301   gimple stmt;
4302   enum tree_code code;
4303
4304   e = strip_wrap_conserving_type_conversions (e);
4305   what = strip_wrap_conserving_type_conversions (what);
4306
4307   code = TREE_CODE (what);
4308   if (TREE_TYPE (e) != TREE_TYPE (what))
4309     return false;
4310
4311   if (operand_equal_p (e, what, 0))
4312     return true;
4313
4314   if (TREE_CODE (e) != SSA_NAME)
4315     return false;
4316
4317   stmt = SSA_NAME_DEF_STMT (e);
4318   if (gimple_code (stmt) != GIMPLE_ASSIGN
4319       || gimple_assign_rhs_code (stmt) != code)
4320     return false;
4321
4322   switch (get_gimple_rhs_class (code))
4323     {
4324     case GIMPLE_BINARY_RHS:
4325       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4326         return false;
4327       /* Fallthru.  */
4328
4329     case GIMPLE_UNARY_RHS:
4330     case GIMPLE_SINGLE_RHS:
4331       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4332     default:
4333       return false;
4334     }
4335 }
4336
4337 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4338    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4339    calculation is performed in non-wrapping type.
4340
4341    TODO: More generally, we could test for the situation that
4342          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4343          This would require knowing the sign of OFFSET.
4344
4345          Also, we only look for the first addition in the computation of BASE.
4346          More complex analysis would be better, but introducing it just for
4347          this optimization seems like an overkill.  */
4348
4349 static bool
4350 difference_cannot_overflow_p (tree base, tree offset)
4351 {
4352   enum tree_code code;
4353   tree e1, e2;
4354
4355   if (!nowrap_type_p (TREE_TYPE (base)))
4356     return false;
4357
4358   base = expand_simple_operations (base);
4359
4360   if (TREE_CODE (base) == SSA_NAME)
4361     {
4362       gimple stmt = SSA_NAME_DEF_STMT (base);
4363
4364       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4365         return false;
4366
4367       code = gimple_assign_rhs_code (stmt);
4368       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4369         return false;
4370
4371       e1 = gimple_assign_rhs1 (stmt);
4372       e2 = gimple_assign_rhs2 (stmt);
4373     }
4374   else
4375     {
4376       code = TREE_CODE (base);
4377       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4378         return false;
4379       e1 = TREE_OPERAND (base, 0);
4380       e2 = TREE_OPERAND (base, 1);
4381     }
4382
4383   /* TODO: deeper inspection may be necessary to prove the equality.  */
4384   switch (code)
4385     {
4386     case PLUS_EXPR:
4387       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4388     case POINTER_PLUS_EXPR:
4389       return expr_equal_p (e2, offset);
4390
4391     default:
4392       return false;
4393     }
4394 }
4395
4396 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4397    comparison with CAND.  NITER describes the number of iterations of
4398    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4399
4400    We aim to handle the following situation:
4401
4402    sometype *base, *p;
4403    int a, b, i;
4404
4405    i = a;
4406    p = p_0 = base + a;
4407
4408    do
4409      {
4410        bla (*p);
4411        p++;
4412        i++;
4413      }
4414    while (i < b);
4415
4416    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4417    We aim to optimize this to
4418
4419    p = p_0 = base + a;
4420    do
4421      {
4422        bla (*p);
4423        p++;
4424      }
4425    while (p < p_0 - a + b);
4426
4427    This preserves the correctness, since the pointer arithmetics does not
4428    overflow.  More precisely:
4429
4430    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4431       overflow in computing it or the values of p.
4432    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4433       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4434
4435 static bool
4436 iv_elimination_compare_lt (struct ivopts_data *data,
4437                            struct iv_cand *cand, enum tree_code *comp_p,
4438                            struct tree_niter_desc *niter)
4439 {
4440   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4441   struct affine_tree_combination nit, tmpa, tmpb;
4442   enum tree_code comp;
4443   HOST_WIDE_INT step;
4444
4445   /* We need to know that the candidate induction variable does not overflow.
4446      While more complex analysis may be used to prove this, for now just
4447      check that the variable appears in the original program and that it
4448      is computed in a type that guarantees no overflows.  */
4449   cand_type = TREE_TYPE (cand->iv->base);
4450   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4451     return false;
4452
4453   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4454      the calculation of the BOUND could overflow, making the comparison
4455      invalid.  */
4456   if (!data->loop_single_exit_p)
4457     return false;
4458
4459   /* We need to be able to decide whether candidate is increasing or decreasing
4460      in order to choose the right comparison operator.  */
4461   if (!cst_and_fits_in_hwi (cand->iv->step))
4462     return false;
4463   step = int_cst_value (cand->iv->step);
4464
4465   /* Check that the number of iterations matches the expected pattern:
4466      a + 1 > b ? 0 : b - a - 1.  */
4467   mbz = niter->may_be_zero;
4468   if (TREE_CODE (mbz) == GT_EXPR)
4469     {
4470       /* Handle a + 1 > b.  */
4471       tree op0 = TREE_OPERAND (mbz, 0);
4472       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4473         {
4474           a = TREE_OPERAND (op0, 0);
4475           b = TREE_OPERAND (mbz, 1);
4476         }
4477       else
4478         return false;
4479     }
4480   else if (TREE_CODE (mbz) == LT_EXPR)
4481     {
4482       tree op1 = TREE_OPERAND (mbz, 1);
4483
4484       /* Handle b < a + 1.  */
4485       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4486         {
4487           a = TREE_OPERAND (op1, 0);
4488           b = TREE_OPERAND (mbz, 0);
4489         }
4490       else
4491         return false;
4492     }
4493   else
4494     return false;
4495
4496   /* Expected number of iterations is B - A - 1.  Check that it matches
4497      the actual number, i.e., that B - A - NITER = 1.  */
4498   tree_to_aff_combination (niter->niter, nit_type, &nit);
4499   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4500   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4501   aff_combination_scale (&nit, double_int_minus_one);
4502   aff_combination_scale (&tmpa, double_int_minus_one);
4503   aff_combination_add (&tmpb, &tmpa);
4504   aff_combination_add (&tmpb, &nit);
4505   if (tmpb.n != 0 || tmpb.offset != double_int_one)
4506     return false;
4507
4508   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4509      overflow.  */
4510   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4511                         cand->iv->step,
4512                         fold_convert (TREE_TYPE (cand->iv->step), a));
4513   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4514     return false;
4515
4516   /* Determine the new comparison operator.  */
4517   comp = step < 0 ? GT_EXPR : LT_EXPR;
4518   if (*comp_p == NE_EXPR)
4519     *comp_p = comp;
4520   else if (*comp_p == EQ_EXPR)
4521     *comp_p = invert_tree_comparison (comp, false);
4522   else
4523     gcc_unreachable ();
4524
4525   return true;
4526 }
4527
4528 /* Check whether it is possible to express the condition in USE by comparison
4529    of candidate CAND.  If so, store the value compared with to BOUND, and the
4530    comparison operator to COMP.  */
4531
4532 static bool
4533 may_eliminate_iv (struct ivopts_data *data,
4534                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4535                   enum tree_code *comp)
4536 {
4537   basic_block ex_bb;
4538   edge exit;
4539   tree period;
4540   struct loop *loop = data->current_loop;
4541   aff_tree bnd;
4542   struct tree_niter_desc *desc = NULL;
4543
4544   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4545     return false;
4546
4547   /* For now works only for exits that dominate the loop latch.
4548      TODO: extend to other conditions inside loop body.  */
4549   ex_bb = gimple_bb (use->stmt);
4550   if (use->stmt != last_stmt (ex_bb)
4551       || gimple_code (use->stmt) != GIMPLE_COND
4552       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4553     return false;
4554
4555   exit = EDGE_SUCC (ex_bb, 0);
4556   if (flow_bb_inside_loop_p (loop, exit->dest))
4557     exit = EDGE_SUCC (ex_bb, 1);
4558   if (flow_bb_inside_loop_p (loop, exit->dest))
4559     return false;
4560
4561   desc = niter_for_exit (data, exit);
4562   if (!desc)
4563     return false;
4564
4565   /* Determine whether we can use the variable to test the exit condition.
4566      This is the case iff the period of the induction variable is greater
4567      than the number of iterations for which the exit condition is true.  */
4568   period = iv_period (cand->iv);
4569
4570   /* If the number of iterations is constant, compare against it directly.  */
4571   if (TREE_CODE (desc->niter) == INTEGER_CST)
4572     {
4573       /* See cand_value_at.  */
4574       if (stmt_after_increment (loop, cand, use->stmt))
4575         {
4576           if (!tree_int_cst_lt (desc->niter, period))
4577             return false;
4578         }
4579       else
4580         {
4581           if (tree_int_cst_lt (period, desc->niter))
4582             return false;
4583         }
4584     }
4585
4586   /* If not, and if this is the only possible exit of the loop, see whether
4587      we can get a conservative estimate on the number of iterations of the
4588      entire loop and compare against that instead.  */
4589   else
4590     {
4591       double_int period_value, max_niter;
4592
4593       max_niter = desc->max;
4594       if (stmt_after_increment (loop, cand, use->stmt))
4595         max_niter += double_int_one;
4596       period_value = tree_to_double_int (period);
4597       if (max_niter.ugt (period_value))
4598         {
4599           /* See if we can take advantage of inferred loop bound information.  */
4600           if (data->loop_single_exit_p)
4601             {
4602               if (!max_loop_iterations (loop, &max_niter))
4603                 return false;
4604               /* The loop bound is already adjusted by adding 1.  */
4605               if (max_niter.ugt (period_value))
4606                 return false;
4607             }
4608           else
4609             return false;
4610         }
4611     }
4612
4613   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4614
4615   *bound = aff_combination_to_tree (&bnd);
4616   *comp = iv_elimination_compare (data, use);
4617
4618   /* It is unlikely that computing the number of iterations using division
4619      would be more profitable than keeping the original induction variable.  */
4620   if (expression_expensive_p (*bound))
4621     return false;
4622
4623   /* Sometimes, it is possible to handle the situation that the number of
4624      iterations may be zero unless additional assumtions by using <
4625      instead of != in the exit condition.
4626
4627      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4628            base the exit condition on it.  However, that is often too
4629            expensive.  */
4630   if (!integer_zerop (desc->may_be_zero))
4631     return iv_elimination_compare_lt (data, cand, comp, desc);
4632
4633   return true;
4634 }
4635
4636  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4637     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4638
4639 static int
4640 parm_decl_cost (struct ivopts_data *data, tree bound)
4641 {
4642   tree sbound = bound;
4643   STRIP_NOPS (sbound);
4644
4645   if (TREE_CODE (sbound) == SSA_NAME
4646       && SSA_NAME_IS_DEFAULT_DEF (sbound)
4647       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4648       && data->body_includes_call)
4649     return COSTS_N_INSNS (1);
4650
4651   return 0;
4652 }
4653
4654 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4655
4656 static bool
4657 determine_use_iv_cost_condition (struct ivopts_data *data,
4658                                  struct iv_use *use, struct iv_cand *cand)
4659 {
4660   tree bound = NULL_TREE;
4661   struct iv *cmp_iv;
4662   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4663   comp_cost elim_cost, express_cost, cost, bound_cost;
4664   bool ok;
4665   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4666   tree *control_var, *bound_cst;
4667   enum tree_code comp = ERROR_MARK;
4668
4669   /* Only consider real candidates.  */
4670   if (!cand->iv)
4671     {
4672       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4673                        ERROR_MARK, -1);
4674       return false;
4675     }
4676
4677   /* Try iv elimination.  */
4678   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4679     {
4680       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4681       if (elim_cost.cost == 0)
4682         elim_cost.cost = parm_decl_cost (data, bound);
4683       else if (TREE_CODE (bound) == INTEGER_CST)
4684         elim_cost.cost = 0;
4685       /* If we replace a loop condition 'i < n' with 'p < base + n',
4686          depends_on_elim will have 'base' and 'n' set, which implies
4687          that both 'base' and 'n' will be live during the loop.  More likely,
4688          'base + n' will be loop invariant, resulting in only one live value
4689          during the loop.  So in that case we clear depends_on_elim and set
4690         elim_inv_expr_id instead.  */
4691       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4692         {
4693           elim_inv_expr_id = get_expr_id (data, bound);
4694           bitmap_clear (depends_on_elim);
4695         }
4696       /* The bound is a loop invariant, so it will be only computed
4697          once.  */
4698       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4699     }
4700   else
4701     elim_cost = infinite_cost;
4702
4703   /* Try expressing the original giv.  If it is compared with an invariant,
4704      note that we cannot get rid of it.  */
4705   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4706                               NULL, &cmp_iv);
4707   gcc_assert (ok);
4708
4709   /* When the condition is a comparison of the candidate IV against
4710      zero, prefer this IV.
4711
4712      TODO: The constant that we're subtracting from the cost should
4713      be target-dependent.  This information should be added to the
4714      target costs for each backend.  */
4715   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4716       && integer_zerop (*bound_cst)
4717       && (operand_equal_p (*control_var, cand->var_after, 0)
4718           || operand_equal_p (*control_var, cand->var_before, 0)))
4719     elim_cost.cost -= 1;
4720
4721   express_cost = get_computation_cost (data, use, cand, false,
4722                                        &depends_on_express, NULL,
4723                                        &express_inv_expr_id);
4724   fd_ivopts_data = data;
4725   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4726
4727   /* Count the cost of the original bound as well.  */
4728   bound_cost = force_var_cost (data, *bound_cst, NULL);
4729   if (bound_cost.cost == 0)
4730     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4731   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4732     bound_cost.cost = 0;
4733   express_cost.cost += bound_cost.cost;
4734
4735   /* Choose the better approach, preferring the eliminated IV. */
4736   if (compare_costs (elim_cost, express_cost) <= 0)
4737     {
4738       cost = elim_cost;
4739       depends_on = depends_on_elim;
4740       depends_on_elim = NULL;
4741       inv_expr_id = elim_inv_expr_id;
4742     }
4743   else
4744     {
4745       cost = express_cost;
4746       depends_on = depends_on_express;
4747       depends_on_express = NULL;
4748       bound = NULL_TREE;
4749       comp = ERROR_MARK;
4750       inv_expr_id = express_inv_expr_id;
4751     }
4752
4753   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4754
4755   if (depends_on_elim)
4756     BITMAP_FREE (depends_on_elim);
4757   if (depends_on_express)
4758     BITMAP_FREE (depends_on_express);
4759
4760   return !infinite_cost_p (cost);
4761 }
4762
4763 /* Determines cost of basing replacement of USE on CAND.  Returns false
4764    if USE cannot be based on CAND.  */
4765
4766 static bool
4767 determine_use_iv_cost (struct ivopts_data *data,
4768                        struct iv_use *use, struct iv_cand *cand)
4769 {
4770   switch (use->type)
4771     {
4772     case USE_NONLINEAR_EXPR:
4773       return determine_use_iv_cost_generic (data, use, cand);
4774
4775     case USE_ADDRESS:
4776       return determine_use_iv_cost_address (data, use, cand);
4777
4778     case USE_COMPARE:
4779       return determine_use_iv_cost_condition (data, use, cand);
4780
4781     default:
4782       gcc_unreachable ();
4783     }
4784 }
4785
4786 /* Return true if get_computation_cost indicates that autoincrement is
4787    a possibility for the pair of USE and CAND, false otherwise.  */
4788
4789 static bool
4790 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4791                            struct iv_cand *cand)
4792 {
4793   bitmap depends_on;
4794   bool can_autoinc;
4795   comp_cost cost;
4796
4797   if (use->type != USE_ADDRESS)
4798     return false;
4799
4800   cost = get_computation_cost (data, use, cand, true, &depends_on,
4801                                &can_autoinc, NULL);
4802
4803   BITMAP_FREE (depends_on);
4804
4805   return !infinite_cost_p (cost) && can_autoinc;
4806 }
4807
4808 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4809    use that allows autoincrement, and set their AINC_USE if possible.  */
4810
4811 static void
4812 set_autoinc_for_original_candidates (struct ivopts_data *data)
4813 {
4814   unsigned i, j;
4815
4816   for (i = 0; i < n_iv_cands (data); i++)
4817     {
4818       struct iv_cand *cand = iv_cand (data, i);
4819       struct iv_use *closest = NULL;
4820       if (cand->pos != IP_ORIGINAL)
4821         continue;
4822       for (j = 0; j < n_iv_uses (data); j++)
4823         {
4824           struct iv_use *use = iv_use (data, j);
4825           unsigned uid = gimple_uid (use->stmt);
4826           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4827               || uid > gimple_uid (cand->incremented_at))
4828             continue;
4829           if (closest == NULL || uid > gimple_uid (closest->stmt))
4830             closest = use;
4831         }
4832       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4833         continue;
4834       cand->ainc_use = closest;
4835     }
4836 }
4837
4838 /* Finds the candidates for the induction variables.  */
4839
4840 static void
4841 find_iv_candidates (struct ivopts_data *data)
4842 {
4843   /* Add commonly used ivs.  */
4844   add_standard_iv_candidates (data);
4845
4846   /* Add old induction variables.  */
4847   add_old_ivs_candidates (data);
4848
4849   /* Add induction variables derived from uses.  */
4850   add_derived_ivs_candidates (data);
4851
4852   set_autoinc_for_original_candidates (data);
4853
4854   /* Record the important candidates.  */
4855   record_important_candidates (data);
4856 }
4857
4858 /* Determines costs of basing the use of the iv on an iv candidate.  */
4859
4860 static void
4861 determine_use_iv_costs (struct ivopts_data *data)
4862 {
4863   unsigned i, j;
4864   struct iv_use *use;
4865   struct iv_cand *cand;
4866   bitmap to_clear = BITMAP_ALLOC (NULL);
4867
4868   alloc_use_cost_map (data);
4869
4870   for (i = 0; i < n_iv_uses (data); i++)
4871     {
4872       use = iv_use (data, i);
4873
4874       if (data->consider_all_candidates)
4875         {
4876           for (j = 0; j < n_iv_cands (data); j++)
4877             {
4878               cand = iv_cand (data, j);
4879               determine_use_iv_cost (data, use, cand);
4880             }
4881         }
4882       else
4883         {
4884           bitmap_iterator bi;
4885
4886           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4887             {
4888               cand = iv_cand (data, j);
4889               if (!determine_use_iv_cost (data, use, cand))
4890                 bitmap_set_bit (to_clear, j);
4891             }
4892
4893           /* Remove the candidates for that the cost is infinite from
4894              the list of related candidates.  */
4895           bitmap_and_compl_into (use->related_cands, to_clear);
4896           bitmap_clear (to_clear);
4897         }
4898     }
4899
4900   BITMAP_FREE (to_clear);
4901
4902   if (dump_file && (dump_flags & TDF_DETAILS))
4903     {
4904       fprintf (dump_file, "Use-candidate costs:\n");
4905
4906       for (i = 0; i < n_iv_uses (data); i++)
4907         {
4908           use = iv_use (data, i);
4909
4910           fprintf (dump_file, "Use %d:\n", i);
4911           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
4912           for (j = 0; j < use->n_map_members; j++)
4913             {
4914               if (!use->cost_map[j].cand
4915                   || infinite_cost_p (use->cost_map[j].cost))
4916                 continue;
4917
4918               fprintf (dump_file, "  %d\t%d\t%d\t",
4919                        use->cost_map[j].cand->id,
4920                        use->cost_map[j].cost.cost,
4921                        use->cost_map[j].cost.complexity);
4922               if (use->cost_map[j].depends_on)
4923                 bitmap_print (dump_file,
4924                               use->cost_map[j].depends_on, "","");
4925               if (use->cost_map[j].inv_expr_id != -1)
4926                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4927               fprintf (dump_file, "\n");
4928             }
4929
4930           fprintf (dump_file, "\n");
4931         }
4932       fprintf (dump_file, "\n");
4933     }
4934 }
4935
4936 /* Determines cost of the candidate CAND.  */
4937
4938 static void
4939 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4940 {
4941   comp_cost cost_base;
4942   unsigned cost, cost_step;
4943   tree base;
4944
4945   if (!cand->iv)
4946     {
4947       cand->cost = 0;
4948       return;
4949     }
4950
4951   /* There are two costs associated with the candidate -- its increment
4952      and its initialization.  The second is almost negligible for any loop
4953      that rolls enough, so we take it just very little into account.  */
4954
4955   base = cand->iv->base;
4956   cost_base = force_var_cost (data, base, NULL);
4957   /* It will be exceptional that the iv register happens to be initialized with
4958      the proper value at no cost.  In general, there will at least be a regcopy
4959      or a const set.  */
4960   if (cost_base.cost == 0)
4961     cost_base.cost = COSTS_N_INSNS (1);
4962   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
4963
4964   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4965
4966   /* Prefer the original ivs unless we may gain something by replacing it.
4967      The reason is to make debugging simpler; so this is not relevant for
4968      artificial ivs created by other optimization passes.  */
4969   if (cand->pos != IP_ORIGINAL
4970       || !SSA_NAME_VAR (cand->var_before)
4971       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4972     cost++;
4973
4974   /* Prefer not to insert statements into latch unless there are some
4975      already (so that we do not create unnecessary jumps).  */
4976   if (cand->pos == IP_END
4977       && empty_block_p (ip_end_pos (data->current_loop)))
4978     cost++;
4979
4980   cand->cost = cost;
4981   cand->cost_step = cost_step;
4982 }
4983
4984 /* Determines costs of computation of the candidates.  */
4985
4986 static void
4987 determine_iv_costs (struct ivopts_data *data)
4988 {
4989   unsigned i;
4990
4991   if (dump_file && (dump_flags & TDF_DETAILS))
4992     {
4993       fprintf (dump_file, "Candidate costs:\n");
4994       fprintf (dump_file, "  cand\tcost\n");
4995     }
4996
4997   for (i = 0; i < n_iv_cands (data); i++)
4998     {
4999       struct iv_cand *cand = iv_cand (data, i);
5000
5001       determine_iv_cost (data, cand);
5002
5003       if (dump_file && (dump_flags & TDF_DETAILS))
5004         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5005     }
5006
5007   if (dump_file && (dump_flags & TDF_DETAILS))
5008     fprintf (dump_file, "\n");
5009 }
5010
5011 /* Calculates cost for having SIZE induction variables.  */
5012
5013 static unsigned
5014 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5015 {
5016   /* We add size to the cost, so that we prefer eliminating ivs
5017      if possible.  */
5018   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5019                                             data->body_includes_call);
5020 }
5021
5022 /* For each size of the induction variable set determine the penalty.  */
5023
5024 static void
5025 determine_set_costs (struct ivopts_data *data)
5026 {
5027   unsigned j, n;
5028   gimple phi;
5029   gimple_stmt_iterator psi;
5030   tree op;
5031   struct loop *loop = data->current_loop;
5032   bitmap_iterator bi;
5033
5034   if (dump_file && (dump_flags & TDF_DETAILS))
5035     {
5036       fprintf (dump_file, "Global costs:\n");
5037       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5038       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5039       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5040       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5041     }
5042
5043   n = 0;
5044   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5045     {
5046       phi = gsi_stmt (psi);
5047       op = PHI_RESULT (phi);
5048
5049       if (virtual_operand_p (op))
5050         continue;
5051
5052       if (get_iv (data, op))
5053         continue;
5054
5055       n++;
5056     }
5057
5058   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5059     {
5060       struct version_info *info = ver_info (data, j);
5061
5062       if (info->inv_id && info->has_nonlin_use)
5063         n++;
5064     }
5065
5066   data->regs_used = n;
5067   if (dump_file && (dump_flags & TDF_DETAILS))
5068     fprintf (dump_file, "  regs_used %d\n", n);
5069
5070   if (dump_file && (dump_flags & TDF_DETAILS))
5071     {
5072       fprintf (dump_file, "  cost for size:\n");
5073       fprintf (dump_file, "  ivs\tcost\n");
5074       for (j = 0; j <= 2 * target_avail_regs; j++)
5075         fprintf (dump_file, "  %d\t%d\n", j,
5076                  ivopts_global_cost_for_size (data, j));
5077       fprintf (dump_file, "\n");
5078     }
5079 }
5080
5081 /* Returns true if A is a cheaper cost pair than B.  */
5082
5083 static bool
5084 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5085 {
5086   int cmp;
5087
5088   if (!a)
5089     return false;
5090
5091   if (!b)
5092     return true;
5093
5094   cmp = compare_costs (a->cost, b->cost);
5095   if (cmp < 0)
5096     return true;
5097
5098   if (cmp > 0)
5099     return false;
5100
5101   /* In case the costs are the same, prefer the cheaper candidate.  */
5102   if (a->cand->cost < b->cand->cost)
5103     return true;
5104
5105   return false;
5106 }
5107
5108
5109 /* Returns candidate by that USE is expressed in IVS.  */
5110
5111 static struct cost_pair *
5112 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5113 {
5114   return ivs->cand_for_use[use->id];
5115 }
5116
5117 /* Computes the cost field of IVS structure.  */
5118
5119 static void
5120 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5121 {
5122   comp_cost cost = ivs->cand_use_cost;
5123
5124   cost.cost += ivs->cand_cost;
5125
5126   cost.cost += ivopts_global_cost_for_size (data,
5127                                             ivs->n_regs + ivs->num_used_inv_expr);
5128
5129   ivs->cost = cost;
5130 }
5131
5132 /* Remove invariants in set INVS to set IVS.  */
5133
5134 static void
5135 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5136 {
5137   bitmap_iterator bi;
5138   unsigned iid;
5139
5140   if (!invs)
5141     return;
5142
5143   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5144     {
5145       ivs->n_invariant_uses[iid]--;
5146       if (ivs->n_invariant_uses[iid] == 0)
5147         ivs->n_regs--;
5148     }
5149 }
5150
5151 /* Set USE not to be expressed by any candidate in IVS.  */
5152
5153 static void
5154 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5155                  struct iv_use *use)
5156 {
5157   unsigned uid = use->id, cid;
5158   struct cost_pair *cp;
5159
5160   cp = ivs->cand_for_use[uid];
5161   if (!cp)
5162     return;
5163   cid = cp->cand->id;
5164
5165   ivs->bad_uses++;
5166   ivs->cand_for_use[uid] = NULL;
5167   ivs->n_cand_uses[cid]--;
5168
5169   if (ivs->n_cand_uses[cid] == 0)
5170     {
5171       bitmap_clear_bit (ivs->cands, cid);
5172       /* Do not count the pseudocandidates.  */
5173       if (cp->cand->iv)
5174         ivs->n_regs--;
5175       ivs->n_cands--;
5176       ivs->cand_cost -= cp->cand->cost;
5177
5178       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5179     }
5180
5181   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5182
5183   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5184
5185   if (cp->inv_expr_id != -1)
5186     {
5187       ivs->used_inv_expr[cp->inv_expr_id]--;
5188       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5189         ivs->num_used_inv_expr--;
5190     }
5191   iv_ca_recount_cost (data, ivs);
5192 }
5193
5194 /* Add invariants in set INVS to set IVS.  */
5195
5196 static void
5197 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5198 {
5199   bitmap_iterator bi;
5200   unsigned iid;
5201
5202   if (!invs)
5203     return;
5204
5205   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5206     {
5207       ivs->n_invariant_uses[iid]++;
5208       if (ivs->n_invariant_uses[iid] == 1)
5209         ivs->n_regs++;
5210     }
5211 }
5212
5213 /* Set cost pair for USE in set IVS to CP.  */
5214
5215 static void
5216 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5217               struct iv_use *use, struct cost_pair *cp)
5218 {
5219   unsigned uid = use->id, cid;
5220
5221   if (ivs->cand_for_use[uid] == cp)
5222     return;
5223
5224   if (ivs->cand_for_use[uid])
5225     iv_ca_set_no_cp (data, ivs, use);
5226
5227   if (cp)
5228     {
5229       cid = cp->cand->id;
5230
5231       ivs->bad_uses--;
5232       ivs->cand_for_use[uid] = cp;
5233       ivs->n_cand_uses[cid]++;
5234       if (ivs->n_cand_uses[cid] == 1)
5235         {
5236           bitmap_set_bit (ivs->cands, cid);
5237           /* Do not count the pseudocandidates.  */
5238           if (cp->cand->iv)
5239             ivs->n_regs++;
5240           ivs->n_cands++;
5241           ivs->cand_cost += cp->cand->cost;
5242
5243           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5244         }
5245
5246       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5247       iv_ca_set_add_invariants (ivs, cp->depends_on);
5248
5249       if (cp->inv_expr_id != -1)
5250         {
5251           ivs->used_inv_expr[cp->inv_expr_id]++;
5252           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5253             ivs->num_used_inv_expr++;
5254         }
5255       iv_ca_recount_cost (data, ivs);
5256     }
5257 }
5258
5259 /* Extend set IVS by expressing USE by some of the candidates in it
5260    if possible. All important candidates will be considered
5261    if IMPORTANT_CANDIDATES is true.  */
5262
5263 static void
5264 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5265                struct iv_use *use, bool important_candidates)
5266 {
5267   struct cost_pair *best_cp = NULL, *cp;
5268   bitmap_iterator bi;
5269   bitmap cands;
5270   unsigned i;
5271
5272   gcc_assert (ivs->upto >= use->id);
5273
5274   if (ivs->upto == use->id)
5275     {
5276       ivs->upto++;
5277       ivs->bad_uses++;
5278     }
5279
5280   cands = (important_candidates ? data->important_candidates : ivs->cands);
5281   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5282     {
5283       struct iv_cand *cand = iv_cand (data, i);
5284
5285       cp = get_use_iv_cost (data, use, cand);
5286
5287       if (cheaper_cost_pair (cp, best_cp))
5288         best_cp = cp;
5289     }
5290
5291   iv_ca_set_cp (data, ivs, use, best_cp);
5292 }
5293
5294 /* Get cost for assignment IVS.  */
5295
5296 static comp_cost
5297 iv_ca_cost (struct iv_ca *ivs)
5298 {
5299   /* This was a conditional expression but it triggered a bug in
5300      Sun C 5.5.  */
5301   if (ivs->bad_uses)
5302     return infinite_cost;
5303   else
5304     return ivs->cost;
5305 }
5306
5307 /* Returns true if all dependences of CP are among invariants in IVS.  */
5308
5309 static bool
5310 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5311 {
5312   unsigned i;
5313   bitmap_iterator bi;
5314
5315   if (!cp->depends_on)
5316     return true;
5317
5318   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5319     {
5320       if (ivs->n_invariant_uses[i] == 0)
5321         return false;
5322     }
5323
5324   return true;
5325 }
5326
5327 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5328    it before NEXT_CHANGE.  */
5329
5330 static struct iv_ca_delta *
5331 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5332                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5333 {
5334   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5335
5336   change->use = use;
5337   change->old_cp = old_cp;
5338   change->new_cp = new_cp;
5339   change->next_change = next_change;
5340
5341   return change;
5342 }
5343
5344 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5345    are rewritten.  */
5346
5347 static struct iv_ca_delta *
5348 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5349 {
5350   struct iv_ca_delta *last;
5351
5352   if (!l2)
5353     return l1;
5354
5355   if (!l1)
5356     return l2;
5357
5358   for (last = l1; last->next_change; last = last->next_change)
5359     continue;
5360   last->next_change = l2;
5361
5362   return l1;
5363 }
5364
5365 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5366
5367 static struct iv_ca_delta *
5368 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5369 {
5370   struct iv_ca_delta *act, *next, *prev = NULL;
5371   struct cost_pair *tmp;
5372
5373   for (act = delta; act; act = next)
5374     {
5375       next = act->next_change;
5376       act->next_change = prev;
5377       prev = act;
5378
5379       tmp = act->old_cp;
5380       act->old_cp = act->new_cp;
5381       act->new_cp = tmp;
5382     }
5383
5384   return prev;
5385 }
5386
5387 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5388    reverted instead.  */
5389
5390 static void
5391 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5392                     struct iv_ca_delta *delta, bool forward)
5393 {
5394   struct cost_pair *from, *to;
5395   struct iv_ca_delta *act;
5396
5397   if (!forward)
5398     delta = iv_ca_delta_reverse (delta);
5399
5400   for (act = delta; act; act = act->next_change)
5401     {
5402       from = act->old_cp;
5403       to = act->new_cp;
5404       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5405       iv_ca_set_cp (data, ivs, act->use, to);
5406     }
5407
5408   if (!forward)
5409     iv_ca_delta_reverse (delta);
5410 }
5411
5412 /* Returns true if CAND is used in IVS.  */
5413
5414 static bool
5415 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5416 {
5417   return ivs->n_cand_uses[cand->id] > 0;
5418 }
5419
5420 /* Returns number of induction variable candidates in the set IVS.  */
5421
5422 static unsigned
5423 iv_ca_n_cands (struct iv_ca *ivs)
5424 {
5425   return ivs->n_cands;
5426 }
5427
5428 /* Free the list of changes DELTA.  */
5429
5430 static void
5431 iv_ca_delta_free (struct iv_ca_delta **delta)
5432 {
5433   struct iv_ca_delta *act, *next;
5434
5435   for (act = *delta; act; act = next)
5436     {
5437       next = act->next_change;
5438       free (act);
5439     }
5440
5441   *delta = NULL;
5442 }
5443
5444 /* Allocates new iv candidates assignment.  */
5445
5446 static struct iv_ca *
5447 iv_ca_new (struct ivopts_data *data)
5448 {
5449   struct iv_ca *nw = XNEW (struct iv_ca);
5450
5451   nw->upto = 0;
5452   nw->bad_uses = 0;
5453   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5454   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5455   nw->cands = BITMAP_ALLOC (NULL);
5456   nw->n_cands = 0;
5457   nw->n_regs = 0;
5458   nw->cand_use_cost = no_cost;
5459   nw->cand_cost = 0;
5460   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5461   nw->cost = no_cost;
5462   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5463   nw->num_used_inv_expr = 0;
5464
5465   return nw;
5466 }
5467
5468 /* Free memory occupied by the set IVS.  */
5469
5470 static void
5471 iv_ca_free (struct iv_ca **ivs)
5472 {
5473   free ((*ivs)->cand_for_use);
5474   free ((*ivs)->n_cand_uses);
5475   BITMAP_FREE ((*ivs)->cands);
5476   free ((*ivs)->n_invariant_uses);
5477   free ((*ivs)->used_inv_expr);
5478   free (*ivs);
5479   *ivs = NULL;
5480 }
5481
5482 /* Dumps IVS to FILE.  */
5483
5484 static void
5485 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5486 {
5487   const char *pref = "  invariants ";
5488   unsigned i;
5489   comp_cost cost = iv_ca_cost (ivs);
5490
5491   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5492   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5493            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5494   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5495
5496    for (i = 0; i < ivs->upto; i++)
5497     {
5498       struct iv_use *use = iv_use (data, i);
5499       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5500       if (cp)
5501         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5502                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5503       else
5504         fprintf (file, "   use:%d --> ??\n", use->id);
5505     }
5506
5507   for (i = 1; i <= data->max_inv_id; i++)
5508     if (ivs->n_invariant_uses[i])
5509       {
5510         fprintf (file, "%s%d", pref, i);
5511         pref = ", ";
5512       }
5513   fprintf (file, "\n\n");
5514 }
5515
5516 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5517    new set, and store differences in DELTA.  Number of induction variables
5518    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5519    the function will try to find a solution with mimimal iv candidates.  */
5520
5521 static comp_cost
5522 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5523               struct iv_cand *cand, struct iv_ca_delta **delta,
5524               unsigned *n_ivs, bool min_ncand)
5525 {
5526   unsigned i;
5527   comp_cost cost;
5528   struct iv_use *use;
5529   struct cost_pair *old_cp, *new_cp;
5530
5531   *delta = NULL;
5532   for (i = 0; i < ivs->upto; i++)
5533     {
5534       use = iv_use (data, i);
5535       old_cp = iv_ca_cand_for_use (ivs, use);
5536
5537       if (old_cp
5538           && old_cp->cand == cand)
5539         continue;
5540
5541       new_cp = get_use_iv_cost (data, use, cand);
5542       if (!new_cp)
5543         continue;
5544
5545       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5546         continue;
5547
5548       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5549         continue;
5550
5551       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5552     }
5553
5554   iv_ca_delta_commit (data, ivs, *delta, true);
5555   cost = iv_ca_cost (ivs);
5556   if (n_ivs)
5557     *n_ivs = iv_ca_n_cands (ivs);
5558   iv_ca_delta_commit (data, ivs, *delta, false);
5559
5560   return cost;
5561 }
5562
5563 /* Try narrowing set IVS by removing CAND.  Return the cost of
5564    the new set and store the differences in DELTA.  */
5565
5566 static comp_cost
5567 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5568               struct iv_cand *cand, struct iv_ca_delta **delta)
5569 {
5570   unsigned i, ci;
5571   struct iv_use *use;
5572   struct cost_pair *old_cp, *new_cp, *cp;
5573   bitmap_iterator bi;
5574   struct iv_cand *cnd;
5575   comp_cost cost;
5576
5577   *delta = NULL;
5578   for (i = 0; i < n_iv_uses (data); i++)
5579     {
5580       use = iv_use (data, i);
5581
5582       old_cp = iv_ca_cand_for_use (ivs, use);
5583       if (old_cp->cand != cand)
5584         continue;
5585
5586       new_cp = NULL;
5587
5588       if (data->consider_all_candidates)
5589         {
5590           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5591             {
5592               if (ci == cand->id)
5593                 continue;
5594
5595               cnd = iv_cand (data, ci);
5596
5597               cp = get_use_iv_cost (data, use, cnd);
5598               if (!cp)
5599                 continue;
5600
5601               if (!iv_ca_has_deps (ivs, cp))
5602                 continue;
5603
5604               if (!cheaper_cost_pair (cp, new_cp))
5605                 continue;
5606
5607               new_cp = cp;
5608             }
5609         }
5610       else
5611         {
5612           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5613             {
5614               if (ci == cand->id)
5615                 continue;
5616
5617               cnd = iv_cand (data, ci);
5618
5619               cp = get_use_iv_cost (data, use, cnd);
5620               if (!cp)
5621                 continue;
5622               if (!iv_ca_has_deps (ivs, cp))
5623                 continue;
5624
5625               if (!cheaper_cost_pair (cp, new_cp))
5626                 continue;
5627
5628               new_cp = cp;
5629             }
5630         }
5631
5632       if (!new_cp)
5633         {
5634           iv_ca_delta_free (delta);
5635           return infinite_cost;
5636         }
5637
5638       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5639     }
5640
5641   iv_ca_delta_commit (data, ivs, *delta, true);
5642   cost = iv_ca_cost (ivs);
5643   iv_ca_delta_commit (data, ivs, *delta, false);
5644
5645   return cost;
5646 }
5647
5648 /* Try optimizing the set of candidates IVS by removing candidates different
5649    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5650    differences in DELTA.  */
5651
5652 static comp_cost
5653 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5654              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5655 {
5656   bitmap_iterator bi;
5657   struct iv_ca_delta *act_delta, *best_delta;
5658   unsigned i;
5659   comp_cost best_cost, acost;
5660   struct iv_cand *cand;
5661
5662   best_delta = NULL;
5663   best_cost = iv_ca_cost (ivs);
5664
5665   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5666     {
5667       cand = iv_cand (data, i);
5668
5669       if (cand == except_cand)
5670         continue;
5671
5672       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5673
5674       if (compare_costs (acost, best_cost) < 0)
5675         {
5676           best_cost = acost;
5677           iv_ca_delta_free (&best_delta);
5678           best_delta = act_delta;
5679         }
5680       else
5681         iv_ca_delta_free (&act_delta);
5682     }
5683
5684   if (!best_delta)
5685     {
5686       *delta = NULL;
5687       return best_cost;
5688     }
5689
5690   /* Recurse to possibly remove other unnecessary ivs.  */
5691   iv_ca_delta_commit (data, ivs, best_delta, true);
5692   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5693   iv_ca_delta_commit (data, ivs, best_delta, false);
5694   *delta = iv_ca_delta_join (best_delta, *delta);
5695   return best_cost;
5696 }
5697
5698 /* Tries to extend the sets IVS in the best possible way in order
5699    to express the USE.  If ORIGINALP is true, prefer candidates from
5700    the original set of IVs, otherwise favor important candidates not
5701    based on any memory object.  */
5702
5703 static bool
5704 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5705                   struct iv_use *use, bool originalp)
5706 {
5707   comp_cost best_cost, act_cost;
5708   unsigned i;
5709   bitmap_iterator bi;
5710   struct iv_cand *cand;
5711   struct iv_ca_delta *best_delta = NULL, *act_delta;
5712   struct cost_pair *cp;
5713
5714   iv_ca_add_use (data, ivs, use, false);
5715   best_cost = iv_ca_cost (ivs);
5716
5717   cp = iv_ca_cand_for_use (ivs, use);
5718   if (!cp)
5719     {
5720       ivs->upto--;
5721       ivs->bad_uses--;
5722       iv_ca_add_use (data, ivs, use, true);
5723       best_cost = iv_ca_cost (ivs);
5724       cp = iv_ca_cand_for_use (ivs, use);
5725     }
5726   if (cp)
5727     {
5728       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5729       iv_ca_set_no_cp (data, ivs, use);
5730     }
5731
5732   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5733      first try important candidates not based on any memory object.  Only if
5734      this fails, try the specific ones.  Rationale -- in loops with many
5735      variables the best choice often is to use just one generic biv.  If we
5736      added here many ivs specific to the uses, the optimization algorithm later
5737      would be likely to get stuck in a local minimum, thus causing us to create
5738      too many ivs.  The approach from few ivs to more seems more likely to be
5739      successful -- starting from few ivs, replacing an expensive use by a
5740      specific iv should always be a win.  */
5741   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5742     {
5743       cand = iv_cand (data, i);
5744
5745       if (originalp && cand->pos !=IP_ORIGINAL)
5746         continue;
5747
5748       if (!originalp && cand->iv->base_object != NULL_TREE)
5749         continue;
5750
5751       if (iv_ca_cand_used_p (ivs, cand))
5752         continue;
5753
5754       cp = get_use_iv_cost (data, use, cand);
5755       if (!cp)
5756         continue;
5757
5758       iv_ca_set_cp (data, ivs, use, cp);
5759       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5760                                true);
5761       iv_ca_set_no_cp (data, ivs, use);
5762       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5763
5764       if (compare_costs (act_cost, best_cost) < 0)
5765         {
5766           best_cost = act_cost;
5767
5768           iv_ca_delta_free (&best_delta);
5769           best_delta = act_delta;
5770         }
5771       else
5772         iv_ca_delta_free (&act_delta);
5773     }
5774
5775   if (infinite_cost_p (best_cost))
5776     {
5777       for (i = 0; i < use->n_map_members; i++)
5778         {
5779           cp = use->cost_map + i;
5780           cand = cp->cand;
5781           if (!cand)
5782             continue;
5783
5784           /* Already tried this.  */
5785           if (cand->important)
5786             {
5787               if (originalp && cand->pos == IP_ORIGINAL)
5788                 continue;
5789               if (!originalp && cand->iv->base_object == NULL_TREE)
5790                 continue;
5791             }
5792
5793           if (iv_ca_cand_used_p (ivs, cand))
5794             continue;
5795
5796           act_delta = NULL;
5797           iv_ca_set_cp (data, ivs, use, cp);
5798           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5799           iv_ca_set_no_cp (data, ivs, use);
5800           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5801                                        cp, act_delta);
5802
5803           if (compare_costs (act_cost, best_cost) < 0)
5804             {
5805               best_cost = act_cost;
5806
5807               if (best_delta)
5808                 iv_ca_delta_free (&best_delta);
5809               best_delta = act_delta;
5810             }
5811           else
5812             iv_ca_delta_free (&act_delta);
5813         }
5814     }
5815
5816   iv_ca_delta_commit (data, ivs, best_delta, true);
5817   iv_ca_delta_free (&best_delta);
5818
5819   return !infinite_cost_p (best_cost);
5820 }
5821
5822 /* Finds an initial assignment of candidates to uses.  */
5823
5824 static struct iv_ca *
5825 get_initial_solution (struct ivopts_data *data, bool originalp)
5826 {
5827   struct iv_ca *ivs = iv_ca_new (data);
5828   unsigned i;
5829
5830   for (i = 0; i < n_iv_uses (data); i++)
5831     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5832       {
5833         iv_ca_free (&ivs);
5834         return NULL;
5835       }
5836
5837   return ivs;
5838 }
5839
5840 /* Tries to improve set of induction variables IVS.  */
5841
5842 static bool
5843 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5844 {
5845   unsigned i, n_ivs;
5846   comp_cost acost, best_cost = iv_ca_cost (ivs);
5847   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5848   struct iv_cand *cand;
5849
5850   /* Try extending the set of induction variables by one.  */
5851   for (i = 0; i < n_iv_cands (data); i++)
5852     {
5853       cand = iv_cand (data, i);
5854
5855       if (iv_ca_cand_used_p (ivs, cand))
5856         continue;
5857
5858       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5859       if (!act_delta)
5860         continue;
5861
5862       /* If we successfully added the candidate and the set is small enough,
5863          try optimizing it by removing other candidates.  */
5864       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5865         {
5866           iv_ca_delta_commit (data, ivs, act_delta, true);
5867           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5868           iv_ca_delta_commit (data, ivs, act_delta, false);
5869           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5870         }
5871
5872       if (compare_costs (acost, best_cost) < 0)
5873         {
5874           best_cost = acost;
5875           iv_ca_delta_free (&best_delta);
5876           best_delta = act_delta;
5877         }
5878       else
5879         iv_ca_delta_free (&act_delta);
5880     }
5881
5882   if (!best_delta)
5883     {
5884       /* Try removing the candidates from the set instead.  */
5885       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5886
5887       /* Nothing more we can do.  */
5888       if (!best_delta)
5889         return false;
5890     }
5891
5892   iv_ca_delta_commit (data, ivs, best_delta, true);
5893   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5894   iv_ca_delta_free (&best_delta);
5895   return true;
5896 }
5897
5898 /* Attempts to find the optimal set of induction variables.  We do simple
5899    greedy heuristic -- we try to replace at most one candidate in the selected
5900    solution and remove the unused ivs while this improves the cost.  */
5901
5902 static struct iv_ca *
5903 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5904 {
5905   struct iv_ca *set;
5906
5907   /* Get the initial solution.  */
5908   set = get_initial_solution (data, originalp);
5909   if (!set)
5910     {
5911       if (dump_file && (dump_flags & TDF_DETAILS))
5912         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5913       return NULL;
5914     }
5915
5916   if (dump_file && (dump_flags & TDF_DETAILS))
5917     {
5918       fprintf (dump_file, "Initial set of candidates:\n");
5919       iv_ca_dump (data, dump_file, set);
5920     }
5921
5922   while (try_improve_iv_set (data, set))
5923     {
5924       if (dump_file && (dump_flags & TDF_DETAILS))
5925         {
5926           fprintf (dump_file, "Improved to:\n");
5927           iv_ca_dump (data, dump_file, set);
5928         }
5929     }
5930
5931   return set;
5932 }
5933
5934 static struct iv_ca *
5935 find_optimal_iv_set (struct ivopts_data *data)
5936 {
5937   unsigned i;
5938   struct iv_ca *set, *origset;
5939   struct iv_use *use;
5940   comp_cost cost, origcost;
5941
5942   /* Determine the cost based on a strategy that starts with original IVs,
5943      and try again using a strategy that prefers candidates not based
5944      on any IVs.  */
5945   origset = find_optimal_iv_set_1 (data, true);
5946   set = find_optimal_iv_set_1 (data, false);
5947
5948   if (!origset && !set)
5949     return NULL;
5950
5951   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5952   cost = set ? iv_ca_cost (set) : infinite_cost;
5953
5954   if (dump_file && (dump_flags & TDF_DETAILS))
5955     {
5956       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5957                origcost.cost, origcost.complexity);
5958       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5959                cost.cost, cost.complexity);
5960     }
5961
5962   /* Choose the one with the best cost.  */
5963   if (compare_costs (origcost, cost) <= 0)
5964     {
5965       if (set)
5966         iv_ca_free (&set);
5967       set = origset;
5968     }
5969   else if (origset)
5970     iv_ca_free (&origset);
5971
5972   for (i = 0; i < n_iv_uses (data); i++)
5973     {
5974       use = iv_use (data, i);
5975       use->selected = iv_ca_cand_for_use (set, use)->cand;
5976     }
5977
5978   return set;
5979 }
5980
5981 /* Creates a new induction variable corresponding to CAND.  */
5982
5983 static void
5984 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5985 {
5986   gimple_stmt_iterator incr_pos;
5987   tree base;
5988   bool after = false;
5989
5990   if (!cand->iv)
5991     return;
5992
5993   switch (cand->pos)
5994     {
5995     case IP_NORMAL:
5996       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5997       break;
5998
5999     case IP_END:
6000       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6001       after = true;
6002       break;
6003
6004     case IP_AFTER_USE:
6005       after = true;
6006       /* fall through */
6007     case IP_BEFORE_USE:
6008       incr_pos = gsi_for_stmt (cand->incremented_at);
6009       break;
6010
6011     case IP_ORIGINAL:
6012       /* Mark that the iv is preserved.  */
6013       name_info (data, cand->var_before)->preserve_biv = true;
6014       name_info (data, cand->var_after)->preserve_biv = true;
6015
6016       /* Rewrite the increment so that it uses var_before directly.  */
6017       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6018       return;
6019     }
6020
6021   gimple_add_tmp_var (cand->var_before);
6022
6023   base = unshare_expr (cand->iv->base);
6024
6025   create_iv (base, unshare_expr (cand->iv->step),
6026              cand->var_before, data->current_loop,
6027              &incr_pos, after, &cand->var_before, &cand->var_after);
6028 }
6029
6030 /* Creates new induction variables described in SET.  */
6031
6032 static void
6033 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6034 {
6035   unsigned i;
6036   struct iv_cand *cand;
6037   bitmap_iterator bi;
6038
6039   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6040     {
6041       cand = iv_cand (data, i);
6042       create_new_iv (data, cand);
6043     }
6044
6045   if (dump_file && (dump_flags & TDF_DETAILS))
6046     {
6047       fprintf (dump_file, "\nSelected IV set: \n");
6048       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6049         {
6050           cand = iv_cand (data, i);
6051           dump_cand (dump_file, cand);
6052         }
6053       fprintf (dump_file, "\n");
6054     }
6055 }
6056
6057 /* Rewrites USE (definition of iv used in a nonlinear expression)
6058    using candidate CAND.  */
6059
6060 static void
6061 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6062                             struct iv_use *use, struct iv_cand *cand)
6063 {
6064   tree comp;
6065   tree op, tgt;
6066   gimple ass;
6067   gimple_stmt_iterator bsi;
6068
6069   /* An important special case -- if we are asked to express value of
6070      the original iv by itself, just exit; there is no need to
6071      introduce a new computation (that might also need casting the
6072      variable to unsigned and back).  */
6073   if (cand->pos == IP_ORIGINAL
6074       && cand->incremented_at == use->stmt)
6075     {
6076       tree step, ctype, utype;
6077       enum tree_code incr_code = PLUS_EXPR, old_code;
6078
6079       gcc_assert (is_gimple_assign (use->stmt));
6080       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6081
6082       step = cand->iv->step;
6083       ctype = TREE_TYPE (step);
6084       utype = TREE_TYPE (cand->var_after);
6085       if (TREE_CODE (step) == NEGATE_EXPR)
6086         {
6087           incr_code = MINUS_EXPR;
6088           step = TREE_OPERAND (step, 0);
6089         }
6090
6091       /* Check whether we may leave the computation unchanged.
6092          This is the case only if it does not rely on other
6093          computations in the loop -- otherwise, the computation
6094          we rely upon may be removed in remove_unused_ivs,
6095          thus leading to ICE.  */
6096       old_code = gimple_assign_rhs_code (use->stmt);
6097       if (old_code == PLUS_EXPR
6098           || old_code == MINUS_EXPR
6099           || old_code == POINTER_PLUS_EXPR)
6100         {
6101           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6102             op = gimple_assign_rhs2 (use->stmt);
6103           else if (old_code != MINUS_EXPR
6104                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6105             op = gimple_assign_rhs1 (use->stmt);
6106           else
6107             op = NULL_TREE;
6108         }
6109       else
6110         op = NULL_TREE;
6111
6112       if (op
6113           && (TREE_CODE (op) == INTEGER_CST
6114               || operand_equal_p (op, step, 0)))
6115         return;
6116
6117       /* Otherwise, add the necessary computations to express
6118          the iv.  */
6119       op = fold_convert (ctype, cand->var_before);
6120       comp = fold_convert (utype,
6121                            build2 (incr_code, ctype, op,
6122                                    unshare_expr (step)));
6123     }
6124   else
6125     {
6126       comp = get_computation (data->current_loop, use, cand);
6127       gcc_assert (comp != NULL_TREE);
6128     }
6129
6130   switch (gimple_code (use->stmt))
6131     {
6132     case GIMPLE_PHI:
6133       tgt = PHI_RESULT (use->stmt);
6134
6135       /* If we should keep the biv, do not replace it.  */
6136       if (name_info (data, tgt)->preserve_biv)
6137         return;
6138
6139       bsi = gsi_after_labels (gimple_bb (use->stmt));
6140       break;
6141
6142     case GIMPLE_ASSIGN:
6143       tgt = gimple_assign_lhs (use->stmt);
6144       bsi = gsi_for_stmt (use->stmt);
6145       break;
6146
6147     default:
6148       gcc_unreachable ();
6149     }
6150
6151   if (!valid_gimple_rhs_p (comp)
6152       || (gimple_code (use->stmt) != GIMPLE_PHI
6153           /* We can't allow re-allocating the stmt as it might be pointed
6154              to still.  */
6155           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6156               >= gimple_num_ops (gsi_stmt (bsi)))))
6157     {
6158       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6159                                        true, GSI_SAME_STMT);
6160       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6161         {
6162           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6163           /* As this isn't a plain copy we have to reset alignment
6164              information.  */
6165           if (SSA_NAME_PTR_INFO (comp))
6166             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6167         }
6168     }
6169
6170   if (gimple_code (use->stmt) == GIMPLE_PHI)
6171     {
6172       ass = gimple_build_assign (tgt, comp);
6173       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6174
6175       bsi = gsi_for_stmt (use->stmt);
6176       remove_phi_node (&bsi, false);
6177     }
6178   else
6179     {
6180       gimple_assign_set_rhs_from_tree (&bsi, comp);
6181       use->stmt = gsi_stmt (bsi);
6182     }
6183 }
6184
6185 /* Performs a peephole optimization to reorder the iv update statement with
6186    a mem ref to enable instruction combining in later phases. The mem ref uses
6187    the iv value before the update, so the reordering transformation requires
6188    adjustment of the offset. CAND is the selected IV_CAND.
6189
6190    Example:
6191
6192    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6193    iv2 = iv1 + 1;
6194
6195    if (t < val)      (1)
6196      goto L;
6197    goto Head;
6198
6199
6200    directly propagating t over to (1) will introduce overlapping live range
6201    thus increase register pressure. This peephole transform it into:
6202
6203
6204    iv2 = iv1 + 1;
6205    t = MEM_REF (base, iv2, 8, 8);
6206    if (t < val)
6207      goto L;
6208    goto Head;
6209 */
6210
6211 static void
6212 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6213 {
6214   tree var_after;
6215   gimple iv_update, stmt;
6216   basic_block bb;
6217   gimple_stmt_iterator gsi, gsi_iv;
6218
6219   if (cand->pos != IP_NORMAL)
6220     return;
6221
6222   var_after = cand->var_after;
6223   iv_update = SSA_NAME_DEF_STMT (var_after);
6224
6225   bb = gimple_bb (iv_update);
6226   gsi = gsi_last_nondebug_bb (bb);
6227   stmt = gsi_stmt (gsi);
6228
6229   /* Only handle conditional statement for now.  */
6230   if (gimple_code (stmt) != GIMPLE_COND)
6231     return;
6232
6233   gsi_prev_nondebug (&gsi);
6234   stmt = gsi_stmt (gsi);
6235   if (stmt != iv_update)
6236     return;
6237
6238   gsi_prev_nondebug (&gsi);
6239   if (gsi_end_p (gsi))
6240     return;
6241
6242   stmt = gsi_stmt (gsi);
6243   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6244     return;
6245
6246   if (stmt != use->stmt)
6247     return;
6248
6249   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6250     return;
6251
6252   if (dump_file && (dump_flags & TDF_DETAILS))
6253     {
6254       fprintf (dump_file, "Reordering \n");
6255       print_gimple_stmt (dump_file, iv_update, 0, 0);
6256       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6257       fprintf (dump_file, "\n");
6258     }
6259
6260   gsi = gsi_for_stmt (use->stmt);
6261   gsi_iv = gsi_for_stmt (iv_update);
6262   gsi_move_before (&gsi_iv, &gsi);
6263
6264   cand->pos = IP_BEFORE_USE;
6265   cand->incremented_at = use->stmt;
6266 }
6267
6268 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6269
6270 static void
6271 rewrite_use_address (struct ivopts_data *data,
6272                      struct iv_use *use, struct iv_cand *cand)
6273 {
6274   aff_tree aff;
6275   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6276   tree base_hint = NULL_TREE;
6277   tree ref, iv;
6278   bool ok;
6279
6280   adjust_iv_update_pos (cand, use);
6281   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6282   gcc_assert (ok);
6283   unshare_aff_combination (&aff);
6284
6285   /* To avoid undefined overflow problems, all IV candidates use unsigned
6286      integer types.  The drawback is that this makes it impossible for
6287      create_mem_ref to distinguish an IV that is based on a memory object
6288      from one that represents simply an offset.
6289
6290      To work around this problem, we pass a hint to create_mem_ref that
6291      indicates which variable (if any) in aff is an IV based on a memory
6292      object.  Note that we only consider the candidate.  If this is not
6293      based on an object, the base of the reference is in some subexpression
6294      of the use -- but these will use pointer types, so they are recognized
6295      by the create_mem_ref heuristics anyway.  */
6296   if (cand->iv->base_object)
6297     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6298
6299   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6300   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6301                         reference_alias_ptr_type (*use->op_p),
6302                         iv, base_hint, data->speed);
6303   copy_ref_info (ref, *use->op_p);
6304   *use->op_p = ref;
6305 }
6306
6307 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6308    candidate CAND.  */
6309
6310 static void
6311 rewrite_use_compare (struct ivopts_data *data,
6312                      struct iv_use *use, struct iv_cand *cand)
6313 {
6314   tree comp, *var_p, op, bound;
6315   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6316   enum tree_code compare;
6317   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6318   bool ok;
6319
6320   bound = cp->value;
6321   if (bound)
6322     {
6323       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6324       tree var_type = TREE_TYPE (var);
6325       gimple_seq stmts;
6326
6327       if (dump_file && (dump_flags & TDF_DETAILS))
6328         {
6329           fprintf (dump_file, "Replacing exit test: ");
6330           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6331         }
6332       compare = cp->comp;
6333       bound = unshare_expr (fold_convert (var_type, bound));
6334       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6335       if (stmts)
6336         gsi_insert_seq_on_edge_immediate (
6337                 loop_preheader_edge (data->current_loop),
6338                 stmts);
6339
6340       gimple_cond_set_lhs (use->stmt, var);
6341       gimple_cond_set_code (use->stmt, compare);
6342       gimple_cond_set_rhs (use->stmt, op);
6343       return;
6344     }
6345
6346   /* The induction variable elimination failed; just express the original
6347      giv.  */
6348   comp = get_computation (data->current_loop, use, cand);
6349   gcc_assert (comp != NULL_TREE);
6350
6351   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6352   gcc_assert (ok);
6353
6354   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6355                                      true, GSI_SAME_STMT);
6356 }
6357
6358 /* Rewrites USE using candidate CAND.  */
6359
6360 static void
6361 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6362 {
6363   switch (use->type)
6364     {
6365       case USE_NONLINEAR_EXPR:
6366         rewrite_use_nonlinear_expr (data, use, cand);
6367         break;
6368
6369       case USE_ADDRESS:
6370         rewrite_use_address (data, use, cand);
6371         break;
6372
6373       case USE_COMPARE:
6374         rewrite_use_compare (data, use, cand);
6375         break;
6376
6377       default:
6378         gcc_unreachable ();
6379     }
6380
6381   update_stmt (use->stmt);
6382 }
6383
6384 /* Rewrite the uses using the selected induction variables.  */
6385
6386 static void
6387 rewrite_uses (struct ivopts_data *data)
6388 {
6389   unsigned i;
6390   struct iv_cand *cand;
6391   struct iv_use *use;
6392
6393   for (i = 0; i < n_iv_uses (data); i++)
6394     {
6395       use = iv_use (data, i);
6396       cand = use->selected;
6397       gcc_assert (cand);
6398
6399       rewrite_use (data, use, cand);
6400     }
6401 }
6402
6403 /* Removes the ivs that are not used after rewriting.  */
6404
6405 static void
6406 remove_unused_ivs (struct ivopts_data *data)
6407 {
6408   unsigned j;
6409   bitmap_iterator bi;
6410   bitmap toremove = BITMAP_ALLOC (NULL);
6411
6412   /* Figure out an order in which to release SSA DEFs so that we don't
6413      release something that we'd have to propagate into a debug stmt
6414      afterwards.  */
6415   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6416     {
6417       struct version_info *info;
6418
6419       info = ver_info (data, j);
6420       if (info->iv
6421           && !integer_zerop (info->iv->step)
6422           && !info->inv_id
6423           && !info->iv->have_use_for
6424           && !info->preserve_biv)
6425         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6426     }
6427
6428   release_defs_bitset (toremove);
6429
6430   BITMAP_FREE (toremove);
6431 }
6432
6433 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6434    for pointer_map_traverse.  */
6435
6436 static bool
6437 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6438                       void *data ATTRIBUTE_UNUSED)
6439 {
6440   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6441
6442   free (niter);
6443   return true;
6444 }
6445
6446 /* Frees data allocated by the optimization of a single loop.  */
6447
6448 static void
6449 free_loop_data (struct ivopts_data *data)
6450 {
6451   unsigned i, j;
6452   bitmap_iterator bi;
6453   tree obj;
6454
6455   if (data->niters)
6456     {
6457       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6458       pointer_map_destroy (data->niters);
6459       data->niters = NULL;
6460     }
6461
6462   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6463     {
6464       struct version_info *info;
6465
6466       info = ver_info (data, i);
6467       free (info->iv);
6468       info->iv = NULL;
6469       info->has_nonlin_use = false;
6470       info->preserve_biv = false;
6471       info->inv_id = 0;
6472     }
6473   bitmap_clear (data->relevant);
6474   bitmap_clear (data->important_candidates);
6475
6476   for (i = 0; i < n_iv_uses (data); i++)
6477     {
6478       struct iv_use *use = iv_use (data, i);
6479
6480       free (use->iv);
6481       BITMAP_FREE (use->related_cands);
6482       for (j = 0; j < use->n_map_members; j++)
6483         if (use->cost_map[j].depends_on)
6484           BITMAP_FREE (use->cost_map[j].depends_on);
6485       free (use->cost_map);
6486       free (use);
6487     }
6488   VEC_truncate (iv_use_p, data->iv_uses, 0);
6489
6490   for (i = 0; i < n_iv_cands (data); i++)
6491     {
6492       struct iv_cand *cand = iv_cand (data, i);
6493
6494       free (cand->iv);
6495       if (cand->depends_on)
6496         BITMAP_FREE (cand->depends_on);
6497       free (cand);
6498     }
6499   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6500
6501   if (data->version_info_size < num_ssa_names)
6502     {
6503       data->version_info_size = 2 * num_ssa_names;
6504       free (data->version_info);
6505       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6506     }
6507
6508   data->max_inv_id = 0;
6509
6510   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6511     SET_DECL_RTL (obj, NULL_RTX);
6512
6513   VEC_truncate (tree, decl_rtl_to_reset, 0);
6514
6515   htab_empty (data->inv_expr_tab);
6516   data->inv_expr_id = 0;
6517 }
6518
6519 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6520    loop tree.  */
6521
6522 static void
6523 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6524 {
6525   free_loop_data (data);
6526   free (data->version_info);
6527   BITMAP_FREE (data->relevant);
6528   BITMAP_FREE (data->important_candidates);
6529
6530   VEC_free (tree, heap, decl_rtl_to_reset);
6531   VEC_free (iv_use_p, heap, data->iv_uses);
6532   VEC_free (iv_cand_p, heap, data->iv_candidates);
6533   htab_delete (data->inv_expr_tab);
6534 }
6535
6536 /* Returns true if the loop body BODY includes any function calls.  */
6537
6538 static bool
6539 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6540 {
6541   gimple_stmt_iterator gsi;
6542   unsigned i;
6543
6544   for (i = 0; i < num_nodes; i++)
6545     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6546       {
6547         gimple stmt = gsi_stmt (gsi);
6548         if (is_gimple_call (stmt)
6549             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6550           return true;
6551       }
6552   return false;
6553 }
6554
6555 /* Optimizes the LOOP.  Returns true if anything changed.  */
6556
6557 static bool
6558 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6559 {
6560   bool changed = false;
6561   struct iv_ca *iv_ca;
6562   edge exit = single_dom_exit (loop);
6563   basic_block *body;
6564
6565   gcc_assert (!data->niters);
6566   data->current_loop = loop;
6567   data->speed = optimize_loop_for_speed_p (loop);
6568
6569   if (dump_file && (dump_flags & TDF_DETAILS))
6570     {
6571       fprintf (dump_file, "Processing loop %d\n", loop->num);
6572
6573       if (exit)
6574         {
6575           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6576                    exit->src->index, exit->dest->index);
6577           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6578           fprintf (dump_file, "\n");
6579         }
6580
6581       fprintf (dump_file, "\n");
6582     }
6583
6584   body = get_loop_body (loop);
6585   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6586   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6587   free (body);
6588
6589   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6590
6591   /* For each ssa name determines whether it behaves as an induction variable
6592      in some loop.  */
6593   if (!find_induction_variables (data))
6594     goto finish;
6595
6596   /* Finds interesting uses (item 1).  */
6597   find_interesting_uses (data);
6598   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6599     goto finish;
6600
6601   /* Finds candidates for the induction variables (item 2).  */
6602   find_iv_candidates (data);
6603
6604   /* Calculates the costs (item 3, part 1).  */
6605   determine_iv_costs (data);
6606   determine_use_iv_costs (data);
6607   determine_set_costs (data);
6608
6609   /* Find the optimal set of induction variables (item 3, part 2).  */
6610   iv_ca = find_optimal_iv_set (data);
6611   if (!iv_ca)
6612     goto finish;
6613   changed = true;
6614
6615   /* Create the new induction variables (item 4, part 1).  */
6616   create_new_ivs (data, iv_ca);
6617   iv_ca_free (&iv_ca);
6618
6619   /* Rewrite the uses (item 4, part 2).  */
6620   rewrite_uses (data);
6621
6622   /* Remove the ivs that are unused after rewriting.  */
6623   remove_unused_ivs (data);
6624
6625   /* We have changed the structure of induction variables; it might happen
6626      that definitions in the scev database refer to some of them that were
6627      eliminated.  */
6628   scev_reset ();
6629
6630 finish:
6631   free_loop_data (data);
6632
6633   return changed;
6634 }
6635
6636 /* Main entry point.  Optimizes induction variables in loops.  */
6637
6638 void
6639 tree_ssa_iv_optimize (void)
6640 {
6641   struct loop *loop;
6642   struct ivopts_data data;
6643   loop_iterator li;
6644
6645   tree_ssa_iv_optimize_init (&data);
6646
6647   /* Optimize the loops starting with the innermost ones.  */
6648   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6649     {
6650       if (dump_file && (dump_flags & TDF_DETAILS))
6651         flow_loop_dump (loop, dump_file, NULL, 1);
6652
6653       tree_ssa_iv_optimize_loop (&data, loop);
6654     }
6655
6656   tree_ssa_iv_optimize_finalize (&data);
6657 }