gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "output.h"
  73 #include "tree-pretty-print.h"
  74 #include "gimple-pretty-print.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "tree-pass.h"
  80 #include "ggc.h"
  81 #include "insn-config.h"
  82 #include "recog.h"
  83 #include "pointer-set.h"
  84 #include "hashtab.h"
  85 #include "tree-chrec.h"
  86 #include "tree-scalar-evolution.h"
  87 #include "cfgloop.h"
  88 #include "params.h"
  89 #include "langhooks.h"
  90 #include "tree-affine.h"
  91 #include "target.h"
  92 #include "tree-inline.h"
  93 #include "tree-ssa-propagate.h"
  94
  95 /* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses.
  96  */
  97 #include "expmed.h"
  98 #undef add_cost
  99 #undef zero_cost
 100
 101 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 102    cost of different addressing modes.  This should be moved to a TBD
 103    interface between the GIMPLE and RTL worlds.  */
 104 #include "expr.h"
 105
 106 /* The infinite cost.  */
 107 #define INFTY 10000000
 108
 109 #define AVG_LOOP_NITER(LOOP) 5
 110
 111 /* Returns the expected number of loop iterations for LOOP.
 112    The average trip count is computed from profile data if it
 113    exists. */
 114
 115 static inline HOST_WIDE_INT
 116 avg_loop_niter (struct loop *loop)
 117 {
 118   HOST_WIDE_INT niter = max_stmt_executions_int (loop, false);
 119   if (niter == -1)
 120     return AVG_LOOP_NITER (loop);
 121
 122   return niter;
 123 }
 124
 125 /* Representation of the induction variable.  */
 126 struct iv
 127 {
 128   tree base;            /* Initial value of the iv.  */
 129   tree base_object;     /* A memory object to that the induction variable points.  */
 130   tree step;            /* Step of the iv (constant only).  */
 131   tree ssa_name;        /* The ssa name with the value.  */
 132   bool biv_p;           /* Is it a biv?  */
 133   bool have_use_for;    /* Do we already have a use for it?  */
 134   unsigned use_id;      /* The identifier in the use if it is the case.  */
 135 };
 136
 137 /* Per-ssa version information (induction variable descriptions, etc.).  */
 138 struct version_info
 139 {
 140   tree name;            /* The ssa name.  */
 141   struct iv *iv;        /* Induction variable description.  */
 142   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 143                            an expression that is not an induction variable.  */
 144   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 145   unsigned inv_id;      /* Id of an invariant.  */
 146 };
 147
 148 /* Types of uses.  */
 149 enum use_type
 150 {
 151   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 152   USE_ADDRESS,          /* Use in an address.  */
 153   USE_COMPARE           /* Use is a compare.  */
 154 };
 155
 156 /* Cost of a computation.  */
 157 typedef struct
 158 {
 159   int cost;             /* The runtime cost.  */
 160   unsigned complexity;  /* The estimate of the complexity of the code for
 161                            the computation (in no concrete units --
 162                            complexity field should be larger for more
 163                            complex expressions and addressing modes).  */
 164 } comp_cost;
 165
 166 static const comp_cost zero_cost = {0, 0};
 167 static const comp_cost infinite_cost = {INFTY, INFTY};
 168
 169 /* The candidate - cost pair.  */
 170 struct cost_pair
 171 {
 172   struct iv_cand *cand; /* The candidate.  */
 173   comp_cost cost;       /* The cost.  */
 174   bitmap depends_on;    /* The list of invariants that have to be
 175                            preserved.  */
 176   tree value;           /* For final value elimination, the expression for
 177                            the final value of the iv.  For iv elimination,
 178                            the new bound to compare with.  */
 179   enum tree_code comp;  /* For iv elimination, the comparison.  */
 180   int inv_expr_id;      /* Loop invariant expression id.  */
 181 };
 182
 183 /* Use.  */
 184 struct iv_use
 185 {
 186   unsigned id;          /* The id of the use.  */
 187   enum use_type type;   /* Type of the use.  */
 188   struct iv *iv;        /* The induction variable it is based on.  */
 189   gimple stmt;          /* Statement in that it occurs.  */
 190   tree *op_p;           /* The place where it occurs.  */
 191   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 192                            important ones.  */
 193
 194   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 195   struct cost_pair *cost_map;
 196                         /* The costs wrto the iv candidates.  */
 197
 198   struct iv_cand *selected;
 199                         /* The selected candidate.  */
 200 };
 201
 202 /* The position where the iv is computed.  */
 203 enum iv_position
 204 {
 205   IP_NORMAL,            /* At the end, just before the exit condition.  */
 206   IP_END,               /* At the end of the latch block.  */
 207   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 208   IP_AFTER_USE,         /* Immediately after a specific use.  */
 209   IP_ORIGINAL           /* The original biv.  */
 210 };
 211
 212 /* The induction variable candidate.  */
 213 struct iv_cand
 214 {
 215   unsigned id;          /* The number of the candidate.  */
 216   bool important;       /* Whether this is an "important" candidate, i.e. such
 217                            that it should be considered by all uses.  */
 218   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 219   gimple incremented_at;/* For original biv, the statement where it is
 220                            incremented.  */
 221   tree var_before;      /* The variable used for it before increment.  */
 222   tree var_after;       /* The variable used for it after increment.  */
 223   struct iv *iv;        /* The value of the candidate.  NULL for
 224                            "pseudocandidate" used to indicate the possibility
 225                            to replace the final value of an iv by direct
 226                            computation of the value.  */
 227   unsigned cost;        /* Cost of the candidate.  */
 228   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 229   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 230                               where it is incremented.  */
 231   bitmap depends_on;    /* The list of invariants that are used in step of the
 232                            biv.  */
 233 };
 234
 235 /* Loop invariant expression hashtable entry.  */
 236 struct iv_inv_expr_ent
 237 {
 238   tree expr;
 239   int id;
 240   hashval_t hash;
 241 };
 242
 243 /* The data used by the induction variable optimizations.  */
 244
 245 typedef struct iv_use *iv_use_p;
 246 DEF_VEC_P(iv_use_p);
 247 DEF_VEC_ALLOC_P(iv_use_p,heap);
 248
 249 typedef struct iv_cand *iv_cand_p;
 250 DEF_VEC_P(iv_cand_p);
 251 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 252
 253 struct ivopts_data
 254 {
 255   /* The currently optimized loop.  */
 256   struct loop *current_loop;
 257
 258   /* Numbers of iterations for all exits of the current loop.  */
 259   struct pointer_map_t *niters;
 260
 261   /* Number of registers used in it.  */
 262   unsigned regs_used;
 263
 264   /* The size of version_info array allocated.  */
 265   unsigned version_info_size;
 266
 267   /* The array of information for the ssa names.  */
 268   struct version_info *version_info;
 269
 270   /* The hashtable of loop invariant expressions created
 271      by ivopt.  */
 272   htab_t inv_expr_tab;
 273
 274   /* Loop invariant expression id.  */
 275   int inv_expr_id;
 276
 277   /* The bitmap of indices in version_info whose value was changed.  */
 278   bitmap relevant;
 279
 280   /* The uses of induction variables.  */
 281   VEC(iv_use_p,heap) *iv_uses;
 282
 283   /* The candidates.  */
 284   VEC(iv_cand_p,heap) *iv_candidates;
 285
 286   /* A bitmap of important candidates.  */
 287   bitmap important_candidates;
 288
 289   /* The maximum invariant id.  */
 290   unsigned max_inv_id;
 291
 292   /* Whether to consider just related and important candidates when replacing a
 293      use.  */
 294   bool consider_all_candidates;
 295
 296   /* Are we optimizing for speed?  */
 297   bool speed;
 298
 299   /* Whether the loop body includes any function calls.  */
 300   bool body_includes_call;
 301
 302   /* Whether the loop body can only be exited via single exit.  */
 303   bool loop_single_exit_p;
 304 };
 305
 306 /* An assignment of iv candidates to uses.  */
 307
 308 struct iv_ca
 309 {
 310   /* The number of uses covered by the assignment.  */
 311   unsigned upto;
 312
 313   /* Number of uses that cannot be expressed by the candidates in the set.  */
 314   unsigned bad_uses;
 315
 316   /* Candidate assigned to a use, together with the related costs.  */
 317   struct cost_pair **cand_for_use;
 318
 319   /* Number of times each candidate is used.  */
 320   unsigned *n_cand_uses;
 321
 322   /* The candidates used.  */
 323   bitmap cands;
 324
 325   /* The number of candidates in the set.  */
 326   unsigned n_cands;
 327
 328   /* Total number of registers needed.  */
 329   unsigned n_regs;
 330
 331   /* Total cost of expressing uses.  */
 332   comp_cost cand_use_cost;
 333
 334   /* Total cost of candidates.  */
 335   unsigned cand_cost;
 336
 337   /* Number of times each invariant is used.  */
 338   unsigned *n_invariant_uses;
 339
 340   /* The array holding the number of uses of each loop
 341      invariant expressions created by ivopt.  */
 342   unsigned *used_inv_expr;
 343
 344   /* The number of created loop invariants.  */
 345   unsigned num_used_inv_expr;
 346
 347   /* Total cost of the assignment.  */
 348   comp_cost cost;
 349 };
 350
 351 /* Difference of two iv candidate assignments.  */
 352
 353 struct iv_ca_delta
 354 {
 355   /* Changed use.  */
 356   struct iv_use *use;
 357
 358   /* An old assignment (for rollback purposes).  */
 359   struct cost_pair *old_cp;
 360
 361   /* A new assignment.  */
 362   struct cost_pair *new_cp;
 363
 364   /* Next change in the list.  */
 365   struct iv_ca_delta *next_change;
 366 };
 367
 368 /* Bound on number of candidates below that all candidates are considered.  */
 369
 370 #define CONSIDER_ALL_CANDIDATES_BOUND \
 371   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 372
 373 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 374    optimizing such a loop would help, and it would take ages).  */
 375
 376 #define MAX_CONSIDERED_USES \
 377   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 378
 379 /* If there are at most this number of ivs in the set, try removing unnecessary
 380    ivs from the set always.  */
 381
 382 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 383   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 384
 385 /* The list of trees for that the decl_rtl field must be reset is stored
 386    here.  */
 387
 388 static VEC(tree,heap) *decl_rtl_to_reset;
 389
 390 static comp_cost force_expr_to_var_cost (tree, bool);
 391
 392 /* Number of uses recorded in DATA.  */
 393
 394 static inline unsigned
 395 n_iv_uses (struct ivopts_data *data)
 396 {
 397   return VEC_length (iv_use_p, data->iv_uses);
 398 }
 399
 400 /* Ith use recorded in DATA.  */
 401
 402 static inline struct iv_use *
 403 iv_use (struct ivopts_data *data, unsigned i)
 404 {
 405   return VEC_index (iv_use_p, data->iv_uses, i);
 406 }
 407
 408 /* Number of candidates recorded in DATA.  */
 409
 410 static inline unsigned
 411 n_iv_cands (struct ivopts_data *data)
 412 {
 413   return VEC_length (iv_cand_p, data->iv_candidates);
 414 }
 415
 416 /* Ith candidate recorded in DATA.  */
 417
 418 static inline struct iv_cand *
 419 iv_cand (struct ivopts_data *data, unsigned i)
 420 {
 421   return VEC_index (iv_cand_p, data->iv_candidates, i);
 422 }
 423
 424 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 425
 426 edge
 427 single_dom_exit (struct loop *loop)
 428 {
 429   edge exit = single_exit (loop);
 430
 431   if (!exit)
 432     return NULL;
 433
 434   if (!just_once_each_iteration_p (loop, exit->src))
 435     return NULL;
 436
 437   return exit;
 438 }
 439
 440 /* Dumps information about the induction variable IV to FILE.  */
 441
 442 extern void dump_iv (FILE *, struct iv *);
 443 void
 444 dump_iv (FILE *file, struct iv *iv)
 445 {
 446   if (iv->ssa_name)
 447     {
 448       fprintf (file, "ssa name ");
 449       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 450       fprintf (file, "\n");
 451     }
 452
 453   fprintf (file, "  type ");
 454   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 455   fprintf (file, "\n");
 456
 457   if (iv->step)
 458     {
 459       fprintf (file, "  base ");
 460       print_generic_expr (file, iv->base, TDF_SLIM);
 461       fprintf (file, "\n");
 462
 463       fprintf (file, "  step ");
 464       print_generic_expr (file, iv->step, TDF_SLIM);
 465       fprintf (file, "\n");
 466     }
 467   else
 468     {
 469       fprintf (file, "  invariant ");
 470       print_generic_expr (file, iv->base, TDF_SLIM);
 471       fprintf (file, "\n");
 472     }
 473
 474   if (iv->base_object)
 475     {
 476       fprintf (file, "  base object ");
 477       print_generic_expr (file, iv->base_object, TDF_SLIM);
 478       fprintf (file, "\n");
 479     }
 480
 481   if (iv->biv_p)
 482     fprintf (file, "  is a biv\n");
 483 }
 484
 485 /* Dumps information about the USE to FILE.  */
 486
 487 extern void dump_use (FILE *, struct iv_use *);
 488 void
 489 dump_use (FILE *file, struct iv_use *use)
 490 {
 491   fprintf (file, "use %d\n", use->id);
 492
 493   switch (use->type)
 494     {
 495     case USE_NONLINEAR_EXPR:
 496       fprintf (file, "  generic\n");
 497       break;
 498
 499     case USE_ADDRESS:
 500       fprintf (file, "  address\n");
 501       break;
 502
 503     case USE_COMPARE:
 504       fprintf (file, "  compare\n");
 505       break;
 506
 507     default:
 508       gcc_unreachable ();
 509     }
 510
 511   fprintf (file, "  in statement ");
 512   print_gimple_stmt (file, use->stmt, 0, 0);
 513   fprintf (file, "\n");
 514
 515   fprintf (file, "  at position ");
 516   if (use->op_p)
 517     print_generic_expr (file, *use->op_p, TDF_SLIM);
 518   fprintf (file, "\n");
 519
 520   dump_iv (file, use->iv);
 521
 522   if (use->related_cands)
 523     {
 524       fprintf (file, "  related candidates ");
 525       dump_bitmap (file, use->related_cands);
 526     }
 527 }
 528
 529 /* Dumps information about the uses to FILE.  */
 530
 531 extern void dump_uses (FILE *, struct ivopts_data *);
 532 void
 533 dump_uses (FILE *file, struct ivopts_data *data)
 534 {
 535   unsigned i;
 536   struct iv_use *use;
 537
 538   for (i = 0; i < n_iv_uses (data); i++)
 539     {
 540       use = iv_use (data, i);
 541
 542       dump_use (file, use);
 543       fprintf (file, "\n");
 544     }
 545 }
 546
 547 /* Dumps information about induction variable candidate CAND to FILE.  */
 548
 549 extern void dump_cand (FILE *, struct iv_cand *);
 550 void
 551 dump_cand (FILE *file, struct iv_cand *cand)
 552 {
 553   struct iv *iv = cand->iv;
 554
 555   fprintf (file, "candidate %d%s\n",
 556            cand->id, cand->important ? " (important)" : "");
 557
 558   if (cand->depends_on)
 559     {
 560       fprintf (file, "  depends on ");
 561       dump_bitmap (file, cand->depends_on);
 562     }
 563
 564   if (!iv)
 565     {
 566       fprintf (file, "  final value replacement\n");
 567       return;
 568     }
 569
 570   if (cand->var_before)
 571     {
 572       fprintf (file, "  var_before ");
 573       print_generic_expr (file, cand->var_before, TDF_SLIM);
 574       fprintf (file, "\n");
 575     }
 576   if (cand->var_after)
 577     {
 578       fprintf (file, "  var_after ");
 579       print_generic_expr (file, cand->var_after, TDF_SLIM);
 580       fprintf (file, "\n");
 581     }
 582
 583   switch (cand->pos)
 584     {
 585     case IP_NORMAL:
 586       fprintf (file, "  incremented before exit test\n");
 587       break;
 588
 589     case IP_BEFORE_USE:
 590       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 591       break;
 592
 593     case IP_AFTER_USE:
 594       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 595       break;
 596
 597     case IP_END:
 598       fprintf (file, "  incremented at end\n");
 599       break;
 600
 601     case IP_ORIGINAL:
 602       fprintf (file, "  original biv\n");
 603       break;
 604     }
 605
 606   dump_iv (file, iv);
 607 }
 608
 609 /* Returns the info for ssa version VER.  */
 610
 611 static inline struct version_info *
 612 ver_info (struct ivopts_data *data, unsigned ver)
 613 {
 614   return data->version_info + ver;
 615 }
 616
 617 /* Returns the info for ssa name NAME.  */
 618
 619 static inline struct version_info *
 620 name_info (struct ivopts_data *data, tree name)
 621 {
 622   return ver_info (data, SSA_NAME_VERSION (name));
 623 }
 624
 625 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 626    emitted in LOOP.  */
 627
 628 static bool
 629 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 630 {
 631   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 632
 633   gcc_assert (bb);
 634
 635   if (sbb == loop->latch)
 636     return true;
 637
 638   if (sbb != bb)
 639     return false;
 640
 641   return stmt == last_stmt (bb);
 642 }
 643
 644 /* Returns true if STMT if after the place where the original induction
 645    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 646    if the positions are identical.  */
 647
 648 static bool
 649 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 650 {
 651   basic_block cand_bb = gimple_bb (cand->incremented_at);
 652   basic_block stmt_bb = gimple_bb (stmt);
 653
 654   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 655     return false;
 656
 657   if (stmt_bb != cand_bb)
 658     return true;
 659
 660   if (true_if_equal
 661       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 662     return true;
 663   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 664 }
 665
 666 /* Returns true if STMT if after the place where the induction variable
 667    CAND is incremented in LOOP.  */
 668
 669 static bool
 670 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 671 {
 672   switch (cand->pos)
 673     {
 674     case IP_END:
 675       return false;
 676
 677     case IP_NORMAL:
 678       return stmt_after_ip_normal_pos (loop, stmt);
 679
 680     case IP_ORIGINAL:
 681     case IP_AFTER_USE:
 682       return stmt_after_inc_pos (cand, stmt, false);
 683
 684     case IP_BEFORE_USE:
 685       return stmt_after_inc_pos (cand, stmt, true);
 686
 687     default:
 688       gcc_unreachable ();
 689     }
 690 }
 691
 692 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 693
 694 static bool
 695 abnormal_ssa_name_p (tree exp)
 696 {
 697   if (!exp)
 698     return false;
 699
 700   if (TREE_CODE (exp) != SSA_NAME)
 701     return false;
 702
 703   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 704 }
 705
 706 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 707    abnormal phi node.  Callback for for_each_index.  */
 708
 709 static bool
 710 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 711                                   void *data ATTRIBUTE_UNUSED)
 712 {
 713   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 714     {
 715       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 716         return false;
 717       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 718         return false;
 719     }
 720
 721   return !abnormal_ssa_name_p (*index);
 722 }
 723
 724 /* Returns true if EXPR contains a ssa name that occurs in an
 725    abnormal phi node.  */
 726
 727 bool
 728 contains_abnormal_ssa_name_p (tree expr)
 729 {
 730   enum tree_code code;
 731   enum tree_code_class codeclass;
 732
 733   if (!expr)
 734     return false;
 735
 736   code = TREE_CODE (expr);
 737   codeclass = TREE_CODE_CLASS (code);
 738
 739   if (code == SSA_NAME)
 740     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 741
 742   if (code == INTEGER_CST
 743       || is_gimple_min_invariant (expr))
 744     return false;
 745
 746   if (code == ADDR_EXPR)
 747     return !for_each_index (&TREE_OPERAND (expr, 0),
 748                             idx_contains_abnormal_ssa_name_p,
 749                             NULL);
 750
 751   if (code == COND_EXPR)
 752     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 753       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 754       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 755
 756   switch (codeclass)
 757     {
 758     case tcc_binary:
 759     case tcc_comparison:
 760       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 761         return true;
 762
 763       /* Fallthru.  */
 764     case tcc_unary:
 765       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 766         return true;
 767
 768       break;
 769
 770     default:
 771       gcc_unreachable ();
 772     }
 773
 774   return false;
 775 }
 776
 777 /*  Returns the structure describing number of iterations determined from
 778     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 779
 780 static struct tree_niter_desc *
 781 niter_for_exit (struct ivopts_data *data, edge exit)
 782 {
 783   struct tree_niter_desc *desc;
 784   void **slot;
 785
 786   if (!data->niters)
 787     {
 788       data->niters = pointer_map_create ();
 789       slot = NULL;
 790     }
 791   else
 792     slot = pointer_map_contains (data->niters, exit);
 793
 794   if (!slot)
 795     {
 796       /* Try to determine number of iterations.  We cannot safely work with ssa
 797          names that appear in phi nodes on abnormal edges, so that we do not
 798          create overlapping life ranges for them (PR 27283).  */
 799       desc = XNEW (struct tree_niter_desc);
 800       if (!number_of_iterations_exit (data->current_loop,
 801                                       exit, desc, true)
 802           || contains_abnormal_ssa_name_p (desc->niter))
 803         {
 804           XDELETE (desc);
 805           desc = NULL;
 806         }
 807       slot = pointer_map_insert (data->niters, exit);
 808       *slot = desc;
 809     }
 810   else
 811     desc = (struct tree_niter_desc *) *slot;
 812
 813   return desc;
 814 }
 815
 816 /* Returns the structure describing number of iterations determined from
 817    single dominating exit of DATA->current_loop, or NULL if something
 818    goes wrong.  */
 819
 820 static struct tree_niter_desc *
 821 niter_for_single_dom_exit (struct ivopts_data *data)
 822 {
 823   edge exit = single_dom_exit (data->current_loop);
 824
 825   if (!exit)
 826     return NULL;
 827
 828   return niter_for_exit (data, exit);
 829 }
 830
 831 /* Hash table equality function for expressions.  */
 832
 833 static int
 834 htab_inv_expr_eq (const void *ent1, const void *ent2)
 835 {
 836   const struct iv_inv_expr_ent *expr1 =
 837       (const struct iv_inv_expr_ent *)ent1;
 838   const struct iv_inv_expr_ent *expr2 =
 839       (const struct iv_inv_expr_ent *)ent2;
 840
 841   return expr1->hash == expr2->hash
 842          && operand_equal_p (expr1->expr, expr2->expr, 0);
 843 }
 844
 845 /* Hash function for loop invariant expressions.  */
 846
 847 static hashval_t
 848 htab_inv_expr_hash (const void *ent)
 849 {
 850   const struct iv_inv_expr_ent *expr =
 851       (const struct iv_inv_expr_ent *)ent;
 852   return expr->hash;
 853 }
 854
 855 /* Initializes data structures used by the iv optimization pass, stored
 856    in DATA.  */
 857
 858 static void
 859 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 860 {
 861   data->version_info_size = 2 * num_ssa_names;
 862   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 863   data->relevant = BITMAP_ALLOC (NULL);
 864   data->important_candidates = BITMAP_ALLOC (NULL);
 865   data->max_inv_id = 0;
 866   data->niters = NULL;
 867   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 868   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 869   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 870                                     htab_inv_expr_eq, free);
 871   data->inv_expr_id = 0;
 872   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 873 }
 874
 875 /* Returns a memory object to that EXPR points.  In case we are able to
 876    determine that it does not point to any such object, NULL is returned.  */
 877
 878 static tree
 879 determine_base_object (tree expr)
 880 {
 881   enum tree_code code = TREE_CODE (expr);
 882   tree base, obj;
 883
 884   /* If this is a pointer casted to any type, we need to determine
 885      the base object for the pointer; so handle conversions before
 886      throwing away non-pointer expressions.  */
 887   if (CONVERT_EXPR_P (expr))
 888     return determine_base_object (TREE_OPERAND (expr, 0));
 889
 890   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 891     return NULL_TREE;
 892
 893   switch (code)
 894     {
 895     case INTEGER_CST:
 896       return NULL_TREE;
 897
 898     case ADDR_EXPR:
 899       obj = TREE_OPERAND (expr, 0);
 900       base = get_base_address (obj);
 901
 902       if (!base)
 903         return expr;
 904
 905       if (TREE_CODE (base) == MEM_REF)
 906         return determine_base_object (TREE_OPERAND (base, 0));
 907
 908       return fold_convert (ptr_type_node,
 909                            build_fold_addr_expr (base));
 910
 911     case POINTER_PLUS_EXPR:
 912       return determine_base_object (TREE_OPERAND (expr, 0));
 913
 914     case PLUS_EXPR:
 915     case MINUS_EXPR:
 916       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 917       gcc_unreachable ();
 918
 919     default:
 920       return fold_convert (ptr_type_node, expr);
 921     }
 922 }
 923
 924 /* Allocates an induction variable with given initial value BASE and step STEP
 925    for loop LOOP.  */
 926
 927 static struct iv *
 928 alloc_iv (tree base, tree step)
 929 {
 930   struct iv *iv = XCNEW (struct iv);
 931   gcc_assert (step != NULL_TREE);
 932
 933   iv->base = base;
 934   iv->base_object = determine_base_object (base);
 935   iv->step = step;
 936   iv->biv_p = false;
 937   iv->have_use_for = false;
 938   iv->use_id = 0;
 939   iv->ssa_name = NULL_TREE;
 940
 941   return iv;
 942 }
 943
 944 /* Sets STEP and BASE for induction variable IV.  */
 945
 946 static void
 947 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 948 {
 949   struct version_info *info = name_info (data, iv);
 950
 951   gcc_assert (!info->iv);
 952
 953   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 954   info->iv = alloc_iv (base, step);
 955   info->iv->ssa_name = iv;
 956 }
 957
 958 /* Finds induction variable declaration for VAR.  */
 959
 960 static struct iv *
 961 get_iv (struct ivopts_data *data, tree var)
 962 {
 963   basic_block bb;
 964   tree type = TREE_TYPE (var);
 965
 966   if (!POINTER_TYPE_P (type)
 967       && !INTEGRAL_TYPE_P (type))
 968     return NULL;
 969
 970   if (!name_info (data, var)->iv)
 971     {
 972       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 973
 974       if (!bb
 975           || !flow_bb_inside_loop_p (data->current_loop, bb))
 976         set_iv (data, var, var, build_int_cst (type, 0));
 977     }
 978
 979   return name_info (data, var)->iv;
 980 }
 981
 982 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 983    not define a simple affine biv with nonzero step.  */
 984
 985 static tree
 986 determine_biv_step (gimple phi)
 987 {
 988   struct loop *loop = gimple_bb (phi)->loop_father;
 989   tree name = PHI_RESULT (phi);
 990   affine_iv iv;
 991
 992   if (!is_gimple_reg (name))
 993     return NULL_TREE;
 994
 995   if (!simple_iv (loop, loop, name, &iv, true))
 996     return NULL_TREE;
 997
 998   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 999 }
1000
1001 /* Finds basic ivs.  */
1002
1003 static bool
1004 find_bivs (struct ivopts_data *data)
1005 {
1006   gimple phi;
1007   tree step, type, base;
1008   bool found = false;
1009   struct loop *loop = data->current_loop;
1010   gimple_stmt_iterator psi;
1011
1012   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1013     {
1014       phi = gsi_stmt (psi);
1015
1016       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1017         continue;
1018
1019       step = determine_biv_step (phi);
1020       if (!step)
1021         continue;
1022
1023       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1024       base = expand_simple_operations (base);
1025       if (contains_abnormal_ssa_name_p (base)
1026           || contains_abnormal_ssa_name_p (step))
1027         continue;
1028
1029       type = TREE_TYPE (PHI_RESULT (phi));
1030       base = fold_convert (type, base);
1031       if (step)
1032         {
1033           if (POINTER_TYPE_P (type))
1034             step = convert_to_ptrofftype (step);
1035           else
1036             step = fold_convert (type, step);
1037         }
1038
1039       set_iv (data, PHI_RESULT (phi), base, step);
1040       found = true;
1041     }
1042
1043   return found;
1044 }
1045
1046 /* Marks basic ivs.  */
1047
1048 static void
1049 mark_bivs (struct ivopts_data *data)
1050 {
1051   gimple phi;
1052   tree var;
1053   struct iv *iv, *incr_iv;
1054   struct loop *loop = data->current_loop;
1055   basic_block incr_bb;
1056   gimple_stmt_iterator psi;
1057
1058   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1059     {
1060       phi = gsi_stmt (psi);
1061
1062       iv = get_iv (data, PHI_RESULT (phi));
1063       if (!iv)
1064         continue;
1065
1066       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1067       incr_iv = get_iv (data, var);
1068       if (!incr_iv)
1069         continue;
1070
1071       /* If the increment is in the subloop, ignore it.  */
1072       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1073       if (incr_bb->loop_father != data->current_loop
1074           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1075         continue;
1076
1077       iv->biv_p = true;
1078       incr_iv->biv_p = true;
1079     }
1080 }
1081
1082 /* Checks whether STMT defines a linear induction variable and stores its
1083    parameters to IV.  */
1084
1085 static bool
1086 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1087 {
1088   tree lhs;
1089   struct loop *loop = data->current_loop;
1090
1091   iv->base = NULL_TREE;
1092   iv->step = NULL_TREE;
1093
1094   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1095     return false;
1096
1097   lhs = gimple_assign_lhs (stmt);
1098   if (TREE_CODE (lhs) != SSA_NAME)
1099     return false;
1100
1101   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1102     return false;
1103   iv->base = expand_simple_operations (iv->base);
1104
1105   if (contains_abnormal_ssa_name_p (iv->base)
1106       || contains_abnormal_ssa_name_p (iv->step))
1107     return false;
1108
1109   /* If STMT could throw, then do not consider STMT as defining a GIV.
1110      While this will suppress optimizations, we can not safely delete this
1111      GIV and associated statements, even if it appears it is not used.  */
1112   if (stmt_could_throw_p (stmt))
1113     return false;
1114
1115   return true;
1116 }
1117
1118 /* Finds general ivs in statement STMT.  */
1119
1120 static void
1121 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1122 {
1123   affine_iv iv;
1124
1125   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1126     return;
1127
1128   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1129 }
1130
1131 /* Finds general ivs in basic block BB.  */
1132
1133 static void
1134 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1135 {
1136   gimple_stmt_iterator bsi;
1137
1138   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1139     find_givs_in_stmt (data, gsi_stmt (bsi));
1140 }
1141
1142 /* Finds general ivs.  */
1143
1144 static void
1145 find_givs (struct ivopts_data *data)
1146 {
1147   struct loop *loop = data->current_loop;
1148   basic_block *body = get_loop_body_in_dom_order (loop);
1149   unsigned i;
1150
1151   for (i = 0; i < loop->num_nodes; i++)
1152     find_givs_in_bb (data, body[i]);
1153   free (body);
1154 }
1155
1156 /* For each ssa name defined in LOOP determines whether it is an induction
1157    variable and if so, its initial value and step.  */
1158
1159 static bool
1160 find_induction_variables (struct ivopts_data *data)
1161 {
1162   unsigned i;
1163   bitmap_iterator bi;
1164
1165   if (!find_bivs (data))
1166     return false;
1167
1168   find_givs (data);
1169   mark_bivs (data);
1170
1171   if (dump_file && (dump_flags & TDF_DETAILS))
1172     {
1173       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1174
1175       if (niter)
1176         {
1177           fprintf (dump_file, "  number of iterations ");
1178           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1179           if (!integer_zerop (niter->may_be_zero))
1180             {
1181               fprintf (dump_file, "; zero if ");
1182               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1183             }
1184           fprintf (dump_file, "\n\n");
1185         };
1186
1187       fprintf (dump_file, "Induction variables:\n\n");
1188
1189       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1190         {
1191           if (ver_info (data, i)->iv)
1192             dump_iv (dump_file, ver_info (data, i)->iv);
1193         }
1194     }
1195
1196   return true;
1197 }
1198
1199 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1200
1201 static struct iv_use *
1202 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1203             gimple stmt, enum use_type use_type)
1204 {
1205   struct iv_use *use = XCNEW (struct iv_use);
1206
1207   use->id = n_iv_uses (data);
1208   use->type = use_type;
1209   use->iv = iv;
1210   use->stmt = stmt;
1211   use->op_p = use_p;
1212   use->related_cands = BITMAP_ALLOC (NULL);
1213
1214   /* To avoid showing ssa name in the dumps, if it was not reset by the
1215      caller.  */
1216   iv->ssa_name = NULL_TREE;
1217
1218   if (dump_file && (dump_flags & TDF_DETAILS))
1219     dump_use (dump_file, use);
1220
1221   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1222
1223   return use;
1224 }
1225
1226 /* Checks whether OP is a loop-level invariant and if so, records it.
1227    NONLINEAR_USE is true if the invariant is used in a way we do not
1228    handle specially.  */
1229
1230 static void
1231 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1232 {
1233   basic_block bb;
1234   struct version_info *info;
1235
1236   if (TREE_CODE (op) != SSA_NAME
1237       || !is_gimple_reg (op))
1238     return;
1239
1240   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1241   if (bb
1242       && flow_bb_inside_loop_p (data->current_loop, bb))
1243     return;
1244
1245   info = name_info (data, op);
1246   info->name = op;
1247   info->has_nonlin_use |= nonlinear_use;
1248   if (!info->inv_id)
1249     info->inv_id = ++data->max_inv_id;
1250   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1251 }
1252
1253 /* Checks whether the use OP is interesting and if so, records it.  */
1254
1255 static struct iv_use *
1256 find_interesting_uses_op (struct ivopts_data *data, tree op)
1257 {
1258   struct iv *iv;
1259   struct iv *civ;
1260   gimple stmt;
1261   struct iv_use *use;
1262
1263   if (TREE_CODE (op) != SSA_NAME)
1264     return NULL;
1265
1266   iv = get_iv (data, op);
1267   if (!iv)
1268     return NULL;
1269
1270   if (iv->have_use_for)
1271     {
1272       use = iv_use (data, iv->use_id);
1273
1274       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1275       return use;
1276     }
1277
1278   if (integer_zerop (iv->step))
1279     {
1280       record_invariant (data, op, true);
1281       return NULL;
1282     }
1283   iv->have_use_for = true;
1284
1285   civ = XNEW (struct iv);
1286   *civ = *iv;
1287
1288   stmt = SSA_NAME_DEF_STMT (op);
1289   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1290               || is_gimple_assign (stmt));
1291
1292   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1293   iv->use_id = use->id;
1294
1295   return use;
1296 }
1297
1298 /* Given a condition in statement STMT, checks whether it is a compare
1299    of an induction variable and an invariant.  If this is the case,
1300    CONTROL_VAR is set to location of the iv, BOUND to the location of
1301    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1302    induction variable descriptions, and true is returned.  If this is not
1303    the case, CONTROL_VAR and BOUND are set to the arguments of the
1304    condition and false is returned.  */
1305
1306 static bool
1307 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1308                        tree **control_var, tree **bound,
1309                        struct iv **iv_var, struct iv **iv_bound)
1310 {
1311   /* The objects returned when COND has constant operands.  */
1312   static struct iv const_iv;
1313   static tree zero;
1314   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1315   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1316   bool ret = false;
1317
1318   if (gimple_code (stmt) == GIMPLE_COND)
1319     {
1320       op0 = gimple_cond_lhs_ptr (stmt);
1321       op1 = gimple_cond_rhs_ptr (stmt);
1322     }
1323   else
1324     {
1325       op0 = gimple_assign_rhs1_ptr (stmt);
1326       op1 = gimple_assign_rhs2_ptr (stmt);
1327     }
1328
1329   zero = integer_zero_node;
1330   const_iv.step = integer_zero_node;
1331
1332   if (TREE_CODE (*op0) == SSA_NAME)
1333     iv0 = get_iv (data, *op0);
1334   if (TREE_CODE (*op1) == SSA_NAME)
1335     iv1 = get_iv (data, *op1);
1336
1337   /* Exactly one of the compared values must be an iv, and the other one must
1338      be an invariant.  */
1339   if (!iv0 || !iv1)
1340     goto end;
1341
1342   if (integer_zerop (iv0->step))
1343     {
1344       /* Control variable may be on the other side.  */
1345       tmp_op = op0; op0 = op1; op1 = tmp_op;
1346       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1347     }
1348   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1349
1350 end:
1351   if (control_var)
1352     *control_var = op0;;
1353   if (iv_var)
1354     *iv_var = iv0;;
1355   if (bound)
1356     *bound = op1;
1357   if (iv_bound)
1358     *iv_bound = iv1;
1359
1360   return ret;
1361 }
1362
1363 /* Checks whether the condition in STMT is interesting and if so,
1364    records it.  */
1365
1366 static void
1367 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1368 {
1369   tree *var_p, *bound_p;
1370   struct iv *var_iv, *civ;
1371
1372   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1373     {
1374       find_interesting_uses_op (data, *var_p);
1375       find_interesting_uses_op (data, *bound_p);
1376       return;
1377     }
1378
1379   civ = XNEW (struct iv);
1380   *civ = *var_iv;
1381   record_use (data, NULL, civ, stmt, USE_COMPARE);
1382 }
1383
1384 /* Returns true if expression EXPR is obviously invariant in LOOP,
1385    i.e. if all its operands are defined outside of the LOOP.  LOOP
1386    should not be the function body.  */
1387
1388 bool
1389 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1390 {
1391   basic_block def_bb;
1392   unsigned i, len;
1393
1394   gcc_assert (loop_depth (loop) > 0);
1395
1396   if (is_gimple_min_invariant (expr))
1397     return true;
1398
1399   if (TREE_CODE (expr) == SSA_NAME)
1400     {
1401       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1402       if (def_bb
1403           && flow_bb_inside_loop_p (loop, def_bb))
1404         return false;
1405
1406       return true;
1407     }
1408
1409   if (!EXPR_P (expr))
1410     return false;
1411
1412   len = TREE_OPERAND_LENGTH (expr);
1413   for (i = 0; i < len; i++)
1414     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1415       return false;
1416
1417   return true;
1418 }
1419
1420 /* Returns true if statement STMT is obviously invariant in LOOP,
1421    i.e. if all its operands on the RHS are defined outside of the LOOP.
1422    LOOP should not be the function body.  */
1423
1424 bool
1425 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1426 {
1427   unsigned i;
1428   tree lhs;
1429
1430   gcc_assert (loop_depth (loop) > 0);
1431
1432   lhs = gimple_get_lhs (stmt);
1433   for (i = 0; i < gimple_num_ops (stmt); i++)
1434     {
1435       tree op = gimple_op (stmt, i);
1436       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1437         return false;
1438     }
1439
1440   return true;
1441 }
1442
1443 /* Cumulates the steps of indices into DATA and replaces their values with the
1444    initial ones.  Returns false when the value of the index cannot be determined.
1445    Callback for for_each_index.  */
1446
1447 struct ifs_ivopts_data
1448 {
1449   struct ivopts_data *ivopts_data;
1450   gimple stmt;
1451   tree step;
1452 };
1453
1454 static bool
1455 idx_find_step (tree base, tree *idx, void *data)
1456 {
1457   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1458   struct iv *iv;
1459   tree step, iv_base, iv_step, lbound, off;
1460   struct loop *loop = dta->ivopts_data->current_loop;
1461
1462   /* If base is a component ref, require that the offset of the reference
1463      be invariant.  */
1464   if (TREE_CODE (base) == COMPONENT_REF)
1465     {
1466       off = component_ref_field_offset (base);
1467       return expr_invariant_in_loop_p (loop, off);
1468     }
1469
1470   /* If base is array, first check whether we will be able to move the
1471      reference out of the loop (in order to take its address in strength
1472      reduction).  In order for this to work we need both lower bound
1473      and step to be loop invariants.  */
1474   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1475     {
1476       /* Moreover, for a range, the size needs to be invariant as well.  */
1477       if (TREE_CODE (base) == ARRAY_RANGE_REF
1478           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1479         return false;
1480
1481       step = array_ref_element_size (base);
1482       lbound = array_ref_low_bound (base);
1483
1484       if (!expr_invariant_in_loop_p (loop, step)
1485           || !expr_invariant_in_loop_p (loop, lbound))
1486         return false;
1487     }
1488
1489   if (TREE_CODE (*idx) != SSA_NAME)
1490     return true;
1491
1492   iv = get_iv (dta->ivopts_data, *idx);
1493   if (!iv)
1494     return false;
1495
1496   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1497           *&x[0], which is not folded and does not trigger the
1498           ARRAY_REF path below.  */
1499   *idx = iv->base;
1500
1501   if (integer_zerop (iv->step))
1502     return true;
1503
1504   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1505     {
1506       step = array_ref_element_size (base);
1507
1508       /* We only handle addresses whose step is an integer constant.  */
1509       if (TREE_CODE (step) != INTEGER_CST)
1510         return false;
1511     }
1512   else
1513     /* The step for pointer arithmetics already is 1 byte.  */
1514     step = size_one_node;
1515
1516   iv_base = iv->base;
1517   iv_step = iv->step;
1518   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1519                             sizetype, &iv_base, &iv_step, dta->stmt,
1520                             false))
1521     {
1522       /* The index might wrap.  */
1523       return false;
1524     }
1525
1526   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1527   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1528
1529   return true;
1530 }
1531
1532 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1533    object is passed to it in DATA.  */
1534
1535 static bool
1536 idx_record_use (tree base, tree *idx,
1537                 void *vdata)
1538 {
1539   struct ivopts_data *data = (struct ivopts_data *) vdata;
1540   find_interesting_uses_op (data, *idx);
1541   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1542     {
1543       find_interesting_uses_op (data, array_ref_element_size (base));
1544       find_interesting_uses_op (data, array_ref_low_bound (base));
1545     }
1546   return true;
1547 }
1548
1549 /* If we can prove that TOP = cst * BOT for some constant cst,
1550    store cst to MUL and return true.  Otherwise return false.
1551    The returned value is always sign-extended, regardless of the
1552    signedness of TOP and BOT.  */
1553
1554 static bool
1555 constant_multiple_of (tree top, tree bot, double_int *mul)
1556 {
1557   tree mby;
1558   enum tree_code code;
1559   double_int res, p0, p1;
1560   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1561
1562   STRIP_NOPS (top);
1563   STRIP_NOPS (bot);
1564
1565   if (operand_equal_p (top, bot, 0))
1566     {
1567       *mul = double_int_one;
1568       return true;
1569     }
1570
1571   code = TREE_CODE (top);
1572   switch (code)
1573     {
1574     case MULT_EXPR:
1575       mby = TREE_OPERAND (top, 1);
1576       if (TREE_CODE (mby) != INTEGER_CST)
1577         return false;
1578
1579       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1580         return false;
1581
1582       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1583                               precision);
1584       return true;
1585
1586     case PLUS_EXPR:
1587     case MINUS_EXPR:
1588       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1589           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1590         return false;
1591
1592       if (code == MINUS_EXPR)
1593         p1 = double_int_neg (p1);
1594       *mul = double_int_sext (double_int_add (p0, p1), precision);
1595       return true;
1596
1597     case INTEGER_CST:
1598       if (TREE_CODE (bot) != INTEGER_CST)
1599         return false;
1600
1601       p0 = double_int_sext (tree_to_double_int (top), precision);
1602       p1 = double_int_sext (tree_to_double_int (bot), precision);
1603       if (double_int_zero_p (p1))
1604         return false;
1605       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1606                               precision);
1607       return double_int_zero_p (res);
1608
1609     default:
1610       return false;
1611     }
1612 }
1613
1614 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1615
1616 static bool
1617 may_be_unaligned_p (tree ref, tree step)
1618 {
1619   tree base;
1620   tree base_type;
1621   HOST_WIDE_INT bitsize;
1622   HOST_WIDE_INT bitpos;
1623   tree toffset;
1624   enum machine_mode mode;
1625   int unsignedp, volatilep;
1626   unsigned base_align;
1627
1628   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1629      thus they are not misaligned.  */
1630   if (TREE_CODE (ref) == TARGET_MEM_REF)
1631     return false;
1632
1633   /* The test below is basically copy of what expr.c:normal_inner_ref
1634      does to check whether the object must be loaded by parts when
1635      STRICT_ALIGNMENT is true.  */
1636   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1637                               &unsignedp, &volatilep, true);
1638   base_type = TREE_TYPE (base);
1639   base_align = get_object_alignment (base);
1640   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1641
1642   if (mode != BLKmode)
1643     {
1644       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1645
1646       if (base_align < mode_align
1647           || (bitpos % mode_align) != 0
1648           || (bitpos % BITS_PER_UNIT) != 0)
1649         return true;
1650
1651       if (toffset
1652           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1653         return true;
1654
1655       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1656         return true;
1657     }
1658
1659   return false;
1660 }
1661
1662 /* Return true if EXPR may be non-addressable.   */
1663
1664 bool
1665 may_be_nonaddressable_p (tree expr)
1666 {
1667   switch (TREE_CODE (expr))
1668     {
1669     case TARGET_MEM_REF:
1670       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1671          target, thus they are always addressable.  */
1672       return false;
1673
1674     case COMPONENT_REF:
1675       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1676              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1677
1678     case VIEW_CONVERT_EXPR:
1679       /* This kind of view-conversions may wrap non-addressable objects
1680          and make them look addressable.  After some processing the
1681          non-addressability may be uncovered again, causing ADDR_EXPRs
1682          of inappropriate objects to be built.  */
1683       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1684           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1685         return true;
1686
1687       /* ... fall through ... */
1688
1689     case ARRAY_REF:
1690     case ARRAY_RANGE_REF:
1691       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1692
1693     CASE_CONVERT:
1694       return true;
1695
1696     default:
1697       break;
1698     }
1699
1700   return false;
1701 }
1702
1703 /* Finds addresses in *OP_P inside STMT.  */
1704
1705 static void
1706 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1707 {
1708   tree base = *op_p, step = size_zero_node;
1709   struct iv *civ;
1710   struct ifs_ivopts_data ifs_ivopts_data;
1711
1712   /* Do not play with volatile memory references.  A bit too conservative,
1713      perhaps, but safe.  */
1714   if (gimple_has_volatile_ops (stmt))
1715     goto fail;
1716
1717   /* Ignore bitfields for now.  Not really something terribly complicated
1718      to handle.  TODO.  */
1719   if (TREE_CODE (base) == BIT_FIELD_REF)
1720     goto fail;
1721
1722   base = unshare_expr (base);
1723
1724   if (TREE_CODE (base) == TARGET_MEM_REF)
1725     {
1726       tree type = build_pointer_type (TREE_TYPE (base));
1727       tree astep;
1728
1729       if (TMR_BASE (base)
1730           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1731         {
1732           civ = get_iv (data, TMR_BASE (base));
1733           if (!civ)
1734             goto fail;
1735
1736           TMR_BASE (base) = civ->base;
1737           step = civ->step;
1738         }
1739       if (TMR_INDEX2 (base)
1740           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1741         {
1742           civ = get_iv (data, TMR_INDEX2 (base));
1743           if (!civ)
1744             goto fail;
1745
1746           TMR_INDEX2 (base) = civ->base;
1747           step = civ->step;
1748         }
1749       if (TMR_INDEX (base)
1750           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1751         {
1752           civ = get_iv (data, TMR_INDEX (base));
1753           if (!civ)
1754             goto fail;
1755
1756           TMR_INDEX (base) = civ->base;
1757           astep = civ->step;
1758
1759           if (astep)
1760             {
1761               if (TMR_STEP (base))
1762                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1763
1764               step = fold_build2 (PLUS_EXPR, type, step, astep);
1765             }
1766         }
1767
1768       if (integer_zerop (step))
1769         goto fail;
1770       base = tree_mem_ref_addr (type, base);
1771     }
1772   else
1773     {
1774       ifs_ivopts_data.ivopts_data = data;
1775       ifs_ivopts_data.stmt = stmt;
1776       ifs_ivopts_data.step = size_zero_node;
1777       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1778           || integer_zerop (ifs_ivopts_data.step))
1779         goto fail;
1780       step = ifs_ivopts_data.step;
1781
1782       /* Check that the base expression is addressable.  This needs
1783          to be done after substituting bases of IVs into it.  */
1784       if (may_be_nonaddressable_p (base))
1785         goto fail;
1786
1787       /* Moreover, on strict alignment platforms, check that it is
1788          sufficiently aligned.  */
1789       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1790         goto fail;
1791
1792       base = build_fold_addr_expr (base);
1793
1794       /* Substituting bases of IVs into the base expression might
1795          have caused folding opportunities.  */
1796       if (TREE_CODE (base) == ADDR_EXPR)
1797         {
1798           tree *ref = &TREE_OPERAND (base, 0);
1799           while (handled_component_p (*ref))
1800             ref = &TREE_OPERAND (*ref, 0);
1801           if (TREE_CODE (*ref) == MEM_REF)
1802             {
1803               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1804                                       TREE_OPERAND (*ref, 0),
1805                                       TREE_OPERAND (*ref, 1));
1806               if (tem)
1807                 *ref = tem;
1808             }
1809         }
1810     }
1811
1812   civ = alloc_iv (base, step);
1813   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1814   return;
1815
1816 fail:
1817   for_each_index (op_p, idx_record_use, data);
1818 }
1819
1820 /* Finds and records invariants used in STMT.  */
1821
1822 static void
1823 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1824 {
1825   ssa_op_iter iter;
1826   use_operand_p use_p;
1827   tree op;
1828
1829   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1830     {
1831       op = USE_FROM_PTR (use_p);
1832       record_invariant (data, op, false);
1833     }
1834 }
1835
1836 /* Finds interesting uses of induction variables in the statement STMT.  */
1837
1838 static void
1839 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1840 {
1841   struct iv *iv;
1842   tree op, *lhs, *rhs;
1843   ssa_op_iter iter;
1844   use_operand_p use_p;
1845   enum tree_code code;
1846
1847   find_invariants_stmt (data, stmt);
1848
1849   if (gimple_code (stmt) == GIMPLE_COND)
1850     {
1851       find_interesting_uses_cond (data, stmt);
1852       return;
1853     }
1854
1855   if (is_gimple_assign (stmt))
1856     {
1857       lhs = gimple_assign_lhs_ptr (stmt);
1858       rhs = gimple_assign_rhs1_ptr (stmt);
1859
1860       if (TREE_CODE (*lhs) == SSA_NAME)
1861         {
1862           /* If the statement defines an induction variable, the uses are not
1863              interesting by themselves.  */
1864
1865           iv = get_iv (data, *lhs);
1866
1867           if (iv && !integer_zerop (iv->step))
1868             return;
1869         }
1870
1871       code = gimple_assign_rhs_code (stmt);
1872       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1873           && (REFERENCE_CLASS_P (*rhs)
1874               || is_gimple_val (*rhs)))
1875         {
1876           if (REFERENCE_CLASS_P (*rhs))
1877             find_interesting_uses_address (data, stmt, rhs);
1878           else
1879             find_interesting_uses_op (data, *rhs);
1880
1881           if (REFERENCE_CLASS_P (*lhs))
1882             find_interesting_uses_address (data, stmt, lhs);
1883           return;
1884         }
1885       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1886         {
1887           find_interesting_uses_cond (data, stmt);
1888           return;
1889         }
1890
1891       /* TODO -- we should also handle address uses of type
1892
1893          memory = call (whatever);
1894
1895          and
1896
1897          call (memory).  */
1898     }
1899
1900   if (gimple_code (stmt) == GIMPLE_PHI
1901       && gimple_bb (stmt) == data->current_loop->header)
1902     {
1903       iv = get_iv (data, PHI_RESULT (stmt));
1904
1905       if (iv && !integer_zerop (iv->step))
1906         return;
1907     }
1908
1909   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1910     {
1911       op = USE_FROM_PTR (use_p);
1912
1913       if (TREE_CODE (op) != SSA_NAME)
1914         continue;
1915
1916       iv = get_iv (data, op);
1917       if (!iv)
1918         continue;
1919
1920       find_interesting_uses_op (data, op);
1921     }
1922 }
1923
1924 /* Finds interesting uses of induction variables outside of loops
1925    on loop exit edge EXIT.  */
1926
1927 static void
1928 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1929 {
1930   gimple phi;
1931   gimple_stmt_iterator psi;
1932   tree def;
1933
1934   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1935     {
1936       phi = gsi_stmt (psi);
1937       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1938       if (is_gimple_reg (def))
1939         find_interesting_uses_op (data, def);
1940     }
1941 }
1942
1943 /* Finds uses of the induction variables that are interesting.  */
1944
1945 static void
1946 find_interesting_uses (struct ivopts_data *data)
1947 {
1948   basic_block bb;
1949   gimple_stmt_iterator bsi;
1950   basic_block *body = get_loop_body (data->current_loop);
1951   unsigned i;
1952   struct version_info *info;
1953   edge e;
1954
1955   if (dump_file && (dump_flags & TDF_DETAILS))
1956     fprintf (dump_file, "Uses:\n\n");
1957
1958   for (i = 0; i < data->current_loop->num_nodes; i++)
1959     {
1960       edge_iterator ei;
1961       bb = body[i];
1962
1963       FOR_EACH_EDGE (e, ei, bb->succs)
1964         if (e->dest != EXIT_BLOCK_PTR
1965             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1966           find_interesting_uses_outside (data, e);
1967
1968       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1969         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1970       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1971         if (!is_gimple_debug (gsi_stmt (bsi)))
1972           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1973     }
1974
1975   if (dump_file && (dump_flags & TDF_DETAILS))
1976     {
1977       bitmap_iterator bi;
1978
1979       fprintf (dump_file, "\n");
1980
1981       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1982         {
1983           info = ver_info (data, i);
1984           if (info->inv_id)
1985             {
1986               fprintf (dump_file, "  ");
1987               print_generic_expr (dump_file, info->name, TDF_SLIM);
1988               fprintf (dump_file, " is invariant (%d)%s\n",
1989                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1990             }
1991         }
1992
1993       fprintf (dump_file, "\n");
1994     }
1995
1996   free (body);
1997 }
1998
1999 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2000    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2001    we are at the top-level of the processed address.  */
2002
2003 static tree
2004 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2005                 unsigned HOST_WIDE_INT *offset)
2006 {
2007   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2008   enum tree_code code;
2009   tree type, orig_type = TREE_TYPE (expr);
2010   unsigned HOST_WIDE_INT off0, off1, st;
2011   tree orig_expr = expr;
2012
2013   STRIP_NOPS (expr);
2014
2015   type = TREE_TYPE (expr);
2016   code = TREE_CODE (expr);
2017   *offset = 0;
2018
2019   switch (code)
2020     {
2021     case INTEGER_CST:
2022       if (!cst_and_fits_in_hwi (expr)
2023           || integer_zerop (expr))
2024         return orig_expr;
2025
2026       *offset = int_cst_value (expr);
2027       return build_int_cst (orig_type, 0);
2028
2029     case POINTER_PLUS_EXPR:
2030     case PLUS_EXPR:
2031     case MINUS_EXPR:
2032       op0 = TREE_OPERAND (expr, 0);
2033       op1 = TREE_OPERAND (expr, 1);
2034
2035       op0 = strip_offset_1 (op0, false, false, &off0);
2036       op1 = strip_offset_1 (op1, false, false, &off1);
2037
2038       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2039       if (op0 == TREE_OPERAND (expr, 0)
2040           && op1 == TREE_OPERAND (expr, 1))
2041         return orig_expr;
2042
2043       if (integer_zerop (op1))
2044         expr = op0;
2045       else if (integer_zerop (op0))
2046         {
2047           if (code == MINUS_EXPR)
2048             expr = fold_build1 (NEGATE_EXPR, type, op1);
2049           else
2050             expr = op1;
2051         }
2052       else
2053         expr = fold_build2 (code, type, op0, op1);
2054
2055       return fold_convert (orig_type, expr);
2056
2057     case MULT_EXPR:
2058       op1 = TREE_OPERAND (expr, 1);
2059       if (!cst_and_fits_in_hwi (op1))
2060         return orig_expr;
2061
2062       op0 = TREE_OPERAND (expr, 0);
2063       op0 = strip_offset_1 (op0, false, false, &off0);
2064       if (op0 == TREE_OPERAND (expr, 0))
2065         return orig_expr;
2066
2067       *offset = off0 * int_cst_value (op1);
2068       if (integer_zerop (op0))
2069         expr = op0;
2070       else
2071         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2072
2073       return fold_convert (orig_type, expr);
2074
2075     case ARRAY_REF:
2076     case ARRAY_RANGE_REF:
2077       if (!inside_addr)
2078         return orig_expr;
2079
2080       step = array_ref_element_size (expr);
2081       if (!cst_and_fits_in_hwi (step))
2082         break;
2083
2084       st = int_cst_value (step);
2085       op1 = TREE_OPERAND (expr, 1);
2086       op1 = strip_offset_1 (op1, false, false, &off1);
2087       *offset = off1 * st;
2088
2089       if (top_compref
2090           && integer_zerop (op1))
2091         {
2092           /* Strip the component reference completely.  */
2093           op0 = TREE_OPERAND (expr, 0);
2094           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2095           *offset += off0;
2096           return op0;
2097         }
2098       break;
2099
2100     case COMPONENT_REF:
2101       if (!inside_addr)
2102         return orig_expr;
2103
2104       tmp = component_ref_field_offset (expr);
2105       if (top_compref
2106           && cst_and_fits_in_hwi (tmp))
2107         {
2108           /* Strip the component reference completely.  */
2109           op0 = TREE_OPERAND (expr, 0);
2110           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2111           *offset = off0 + int_cst_value (tmp);
2112           return op0;
2113         }
2114       break;
2115
2116     case ADDR_EXPR:
2117       op0 = TREE_OPERAND (expr, 0);
2118       op0 = strip_offset_1 (op0, true, true, &off0);
2119       *offset += off0;
2120
2121       if (op0 == TREE_OPERAND (expr, 0))
2122         return orig_expr;
2123
2124       expr = build_fold_addr_expr (op0);
2125       return fold_convert (orig_type, expr);
2126
2127     case MEM_REF:
2128       /* ???  Offset operand?  */
2129       inside_addr = false;
2130       break;
2131
2132     default:
2133       return orig_expr;
2134     }
2135
2136   /* Default handling of expressions for that we want to recurse into
2137      the first operand.  */
2138   op0 = TREE_OPERAND (expr, 0);
2139   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2140   *offset += off0;
2141
2142   if (op0 == TREE_OPERAND (expr, 0)
2143       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2144     return orig_expr;
2145
2146   expr = copy_node (expr);
2147   TREE_OPERAND (expr, 0) = op0;
2148   if (op1)
2149     TREE_OPERAND (expr, 1) = op1;
2150
2151   /* Inside address, we might strip the top level component references,
2152      thus changing type of the expression.  Handling of ADDR_EXPR
2153      will fix that.  */
2154   expr = fold_convert (orig_type, expr);
2155
2156   return expr;
2157 }
2158
2159 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2160
2161 static tree
2162 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2163 {
2164   return strip_offset_1 (expr, false, false, offset);
2165 }
2166
2167 /* Returns variant of TYPE that can be used as base for different uses.
2168    We return unsigned type with the same precision, which avoids problems
2169    with overflows.  */
2170
2171 static tree
2172 generic_type_for (tree type)
2173 {
2174   if (POINTER_TYPE_P (type))
2175     return unsigned_type_for (type);
2176
2177   if (TYPE_UNSIGNED (type))
2178     return type;
2179
2180   return unsigned_type_for (type);
2181 }
2182
2183 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2184    the bitmap to that we should store it.  */
2185
2186 static struct ivopts_data *fd_ivopts_data;
2187 static tree
2188 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2189 {
2190   bitmap *depends_on = (bitmap *) data;
2191   struct version_info *info;
2192
2193   if (TREE_CODE (*expr_p) != SSA_NAME)
2194     return NULL_TREE;
2195   info = name_info (fd_ivopts_data, *expr_p);
2196
2197   if (!info->inv_id || info->has_nonlin_use)
2198     return NULL_TREE;
2199
2200   if (!*depends_on)
2201     *depends_on = BITMAP_ALLOC (NULL);
2202   bitmap_set_bit (*depends_on, info->inv_id);
2203
2204   return NULL_TREE;
2205 }
2206
2207 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2208    position to POS.  If USE is not NULL, the candidate is set as related to
2209    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2210    replacement of the final value of the iv by a direct computation.  */
2211
2212 static struct iv_cand *
2213 add_candidate_1 (struct ivopts_data *data,
2214                  tree base, tree step, bool important, enum iv_position pos,
2215                  struct iv_use *use, gimple incremented_at)
2216 {
2217   unsigned i;
2218   struct iv_cand *cand = NULL;
2219   tree type, orig_type;
2220
2221   /* For non-original variables, make sure their values are computed in a type
2222      that does not invoke undefined behavior on overflows (since in general,
2223      we cannot prove that these induction variables are non-wrapping).  */
2224   if (pos != IP_ORIGINAL)
2225     {
2226       orig_type = TREE_TYPE (base);
2227       type = generic_type_for (orig_type);
2228       if (type != orig_type)
2229         {
2230           base = fold_convert (type, base);
2231           step = fold_convert (type, step);
2232         }
2233     }
2234
2235   for (i = 0; i < n_iv_cands (data); i++)
2236     {
2237       cand = iv_cand (data, i);
2238
2239       if (cand->pos != pos)
2240         continue;
2241
2242       if (cand->incremented_at != incremented_at
2243           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2244               && cand->ainc_use != use))
2245         continue;
2246
2247       if (!cand->iv)
2248         {
2249           if (!base && !step)
2250             break;
2251
2252           continue;
2253         }
2254
2255       if (!base && !step)
2256         continue;
2257
2258       if (operand_equal_p (base, cand->iv->base, 0)
2259           && operand_equal_p (step, cand->iv->step, 0)
2260           && (TYPE_PRECISION (TREE_TYPE (base))
2261               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2262         break;
2263     }
2264
2265   if (i == n_iv_cands (data))
2266     {
2267       cand = XCNEW (struct iv_cand);
2268       cand->id = i;
2269
2270       if (!base && !step)
2271         cand->iv = NULL;
2272       else
2273         cand->iv = alloc_iv (base, step);
2274
2275       cand->pos = pos;
2276       if (pos != IP_ORIGINAL && cand->iv)
2277         {
2278           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2279           cand->var_after = cand->var_before;
2280         }
2281       cand->important = important;
2282       cand->incremented_at = incremented_at;
2283       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2284
2285       if (step
2286           && TREE_CODE (step) != INTEGER_CST)
2287         {
2288           fd_ivopts_data = data;
2289           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2290         }
2291
2292       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2293         cand->ainc_use = use;
2294       else
2295         cand->ainc_use = NULL;
2296
2297       if (dump_file && (dump_flags & TDF_DETAILS))
2298         dump_cand (dump_file, cand);
2299     }
2300
2301   if (important && !cand->important)
2302     {
2303       cand->important = true;
2304       if (dump_file && (dump_flags & TDF_DETAILS))
2305         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2306     }
2307
2308   if (use)
2309     {
2310       bitmap_set_bit (use->related_cands, i);
2311       if (dump_file && (dump_flags & TDF_DETAILS))
2312         fprintf (dump_file, "Candidate %d is related to use %d\n",
2313                  cand->id, use->id);
2314     }
2315
2316   return cand;
2317 }
2318
2319 /* Returns true if incrementing the induction variable at the end of the LOOP
2320    is allowed.
2321
2322    The purpose is to avoid splitting latch edge with a biv increment, thus
2323    creating a jump, possibly confusing other optimization passes and leaving
2324    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2325    is not available (so we do not have a better alternative), or if the latch
2326    edge is already nonempty.  */
2327
2328 static bool
2329 allow_ip_end_pos_p (struct loop *loop)
2330 {
2331   if (!ip_normal_pos (loop))
2332     return true;
2333
2334   if (!empty_block_p (ip_end_pos (loop)))
2335     return true;
2336
2337   return false;
2338 }
2339
2340 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2341    Important field is set to IMPORTANT.  */
2342
2343 static void
2344 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2345                         bool important, struct iv_use *use)
2346 {
2347   basic_block use_bb = gimple_bb (use->stmt);
2348   enum machine_mode mem_mode;
2349   unsigned HOST_WIDE_INT cstepi;
2350
2351   /* If we insert the increment in any position other than the standard
2352      ones, we must ensure that it is incremented once per iteration.
2353      It must not be in an inner nested loop, or one side of an if
2354      statement.  */
2355   if (use_bb->loop_father != data->current_loop
2356       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2357       || stmt_could_throw_p (use->stmt)
2358       || !cst_and_fits_in_hwi (step))
2359     return;
2360
2361   cstepi = int_cst_value (step);
2362
2363   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2364   if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2365       || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2366     {
2367       enum tree_code code = MINUS_EXPR;
2368       tree new_base;
2369       tree new_step = step;
2370
2371       if (POINTER_TYPE_P (TREE_TYPE (base)))
2372         {
2373           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2374           code = POINTER_PLUS_EXPR;
2375         }
2376       else
2377         new_step = fold_convert (TREE_TYPE (base), new_step);
2378       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2379       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2380                        use->stmt);
2381     }
2382   if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2383       || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2384     {
2385       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2386                        use->stmt);
2387     }
2388 }
2389
2390 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2391    position to POS.  If USE is not NULL, the candidate is set as related to
2392    it.  The candidate computation is scheduled on all available positions.  */
2393
2394 static void
2395 add_candidate (struct ivopts_data *data,
2396                tree base, tree step, bool important, struct iv_use *use)
2397 {
2398   if (ip_normal_pos (data->current_loop))
2399     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2400   if (ip_end_pos (data->current_loop)
2401       && allow_ip_end_pos_p (data->current_loop))
2402     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2403
2404   if (use != NULL && use->type == USE_ADDRESS)
2405     add_autoinc_candidates (data, base, step, important, use);
2406 }
2407
2408 /* Adds standard iv candidates.  */
2409
2410 static void
2411 add_standard_iv_candidates (struct ivopts_data *data)
2412 {
2413   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2414
2415   /* The same for a double-integer type if it is still fast enough.  */
2416   if (TYPE_PRECISION
2417         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2418       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2419     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2420                    build_int_cst (long_integer_type_node, 1), true, NULL);
2421
2422   /* The same for a double-integer type if it is still fast enough.  */
2423   if (TYPE_PRECISION
2424         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2425       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2426     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2427                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2428 }
2429
2430
2431 /* Adds candidates bases on the old induction variable IV.  */
2432
2433 static void
2434 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2435 {
2436   gimple phi;
2437   tree def;
2438   struct iv_cand *cand;
2439
2440   add_candidate (data, iv->base, iv->step, true, NULL);
2441
2442   /* The same, but with initial value zero.  */
2443   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2444     add_candidate (data, size_int (0), iv->step, true, NULL);
2445   else
2446     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2447                    iv->step, true, NULL);
2448
2449   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2450   if (gimple_code (phi) == GIMPLE_PHI)
2451     {
2452       /* Additionally record the possibility of leaving the original iv
2453          untouched.  */
2454       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2455       cand = add_candidate_1 (data,
2456                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2457                               SSA_NAME_DEF_STMT (def));
2458       cand->var_before = iv->ssa_name;
2459       cand->var_after = def;
2460     }
2461 }
2462
2463 /* Adds candidates based on the old induction variables.  */
2464
2465 static void
2466 add_old_ivs_candidates (struct ivopts_data *data)
2467 {
2468   unsigned i;
2469   struct iv *iv;
2470   bitmap_iterator bi;
2471
2472   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2473     {
2474       iv = ver_info (data, i)->iv;
2475       if (iv && iv->biv_p && !integer_zerop (iv->step))
2476         add_old_iv_candidates (data, iv);
2477     }
2478 }
2479
2480 /* Adds candidates based on the value of the induction variable IV and USE.  */
2481
2482 static void
2483 add_iv_value_candidates (struct ivopts_data *data,
2484                          struct iv *iv, struct iv_use *use)
2485 {
2486   unsigned HOST_WIDE_INT offset;
2487   tree base;
2488   tree basetype;
2489
2490   add_candidate (data, iv->base, iv->step, false, use);
2491
2492   /* The same, but with initial value zero.  Make such variable important,
2493      since it is generic enough so that possibly many uses may be based
2494      on it.  */
2495   basetype = TREE_TYPE (iv->base);
2496   if (POINTER_TYPE_P (basetype))
2497     basetype = sizetype;
2498   add_candidate (data, build_int_cst (basetype, 0),
2499                  iv->step, true, use);
2500
2501   /* Third, try removing the constant offset.  Make sure to even
2502      add a candidate for &a[0] vs. (T *)&a.  */
2503   base = strip_offset (iv->base, &offset);
2504   if (offset
2505       || base != iv->base)
2506     add_candidate (data, base, iv->step, false, use);
2507 }
2508
2509 /* Adds candidates based on the uses.  */
2510
2511 static void
2512 add_derived_ivs_candidates (struct ivopts_data *data)
2513 {
2514   unsigned i;
2515
2516   for (i = 0; i < n_iv_uses (data); i++)
2517     {
2518       struct iv_use *use = iv_use (data, i);
2519
2520       if (!use)
2521         continue;
2522
2523       switch (use->type)
2524         {
2525         case USE_NONLINEAR_EXPR:
2526         case USE_COMPARE:
2527         case USE_ADDRESS:
2528           /* Just add the ivs based on the value of the iv used here.  */
2529           add_iv_value_candidates (data, use->iv, use);
2530           break;
2531
2532         default:
2533           gcc_unreachable ();
2534         }
2535     }
2536 }
2537
2538 /* Record important candidates and add them to related_cands bitmaps
2539    if needed.  */
2540
2541 static void
2542 record_important_candidates (struct ivopts_data *data)
2543 {
2544   unsigned i;
2545   struct iv_use *use;
2546
2547   for (i = 0; i < n_iv_cands (data); i++)
2548     {
2549       struct iv_cand *cand = iv_cand (data, i);
2550
2551       if (cand->important)
2552         bitmap_set_bit (data->important_candidates, i);
2553     }
2554
2555   data->consider_all_candidates = (n_iv_cands (data)
2556                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2557
2558   if (data->consider_all_candidates)
2559     {
2560       /* We will not need "related_cands" bitmaps in this case,
2561          so release them to decrease peak memory consumption.  */
2562       for (i = 0; i < n_iv_uses (data); i++)
2563         {
2564           use = iv_use (data, i);
2565           BITMAP_FREE (use->related_cands);
2566         }
2567     }
2568   else
2569     {
2570       /* Add important candidates to the related_cands bitmaps.  */
2571       for (i = 0; i < n_iv_uses (data); i++)
2572         bitmap_ior_into (iv_use (data, i)->related_cands,
2573                          data->important_candidates);
2574     }
2575 }
2576
2577 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2578    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2579    we allocate a simple list to every use.  */
2580
2581 static void
2582 alloc_use_cost_map (struct ivopts_data *data)
2583 {
2584   unsigned i, size, s, j;
2585
2586   for (i = 0; i < n_iv_uses (data); i++)
2587     {
2588       struct iv_use *use = iv_use (data, i);
2589       bitmap_iterator bi;
2590
2591       if (data->consider_all_candidates)
2592         size = n_iv_cands (data);
2593       else
2594         {
2595           s = 0;
2596           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2597             {
2598               s++;
2599             }
2600
2601           /* Round up to the power of two, so that moduling by it is fast.  */
2602           for (size = 1; size < s; size <<= 1)
2603             continue;
2604         }
2605
2606       use->n_map_members = size;
2607       use->cost_map = XCNEWVEC (struct cost_pair, size);
2608     }
2609 }
2610
2611 /* Returns description of computation cost of expression whose runtime
2612    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2613
2614 static comp_cost
2615 new_cost (unsigned runtime, unsigned complexity)
2616 {
2617   comp_cost cost;
2618
2619   cost.cost = runtime;
2620   cost.complexity = complexity;
2621
2622   return cost;
2623 }
2624
2625 /* Adds costs COST1 and COST2.  */
2626
2627 static comp_cost
2628 add_costs (comp_cost cost1, comp_cost cost2)
2629 {
2630   cost1.cost += cost2.cost;
2631   cost1.complexity += cost2.complexity;
2632
2633   return cost1;
2634 }
2635 /* Subtracts costs COST1 and COST2.  */
2636
2637 static comp_cost
2638 sub_costs (comp_cost cost1, comp_cost cost2)
2639 {
2640   cost1.cost -= cost2.cost;
2641   cost1.complexity -= cost2.complexity;
2642
2643   return cost1;
2644 }
2645
2646 /* Returns a negative number if COST1 < COST2, a positive number if
2647    COST1 > COST2, and 0 if COST1 = COST2.  */
2648
2649 static int
2650 compare_costs (comp_cost cost1, comp_cost cost2)
2651 {
2652   if (cost1.cost == cost2.cost)
2653     return cost1.complexity - cost2.complexity;
2654
2655   return cost1.cost - cost2.cost;
2656 }
2657
2658 /* Returns true if COST is infinite.  */
2659
2660 static bool
2661 infinite_cost_p (comp_cost cost)
2662 {
2663   return cost.cost == INFTY;
2664 }
2665
2666 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2667    on invariants DEPENDS_ON and that the value used in expressing it
2668    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2669
2670 static void
2671 set_use_iv_cost (struct ivopts_data *data,
2672                  struct iv_use *use, struct iv_cand *cand,
2673                  comp_cost cost, bitmap depends_on, tree value,
2674                  enum tree_code comp, int inv_expr_id)
2675 {
2676   unsigned i, s;
2677
2678   if (infinite_cost_p (cost))
2679     {
2680       BITMAP_FREE (depends_on);
2681       return;
2682     }
2683
2684   if (data->consider_all_candidates)
2685     {
2686       use->cost_map[cand->id].cand = cand;
2687       use->cost_map[cand->id].cost = cost;
2688       use->cost_map[cand->id].depends_on = depends_on;
2689       use->cost_map[cand->id].value = value;
2690       use->cost_map[cand->id].comp = comp;
2691       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2692       return;
2693     }
2694
2695   /* n_map_members is a power of two, so this computes modulo.  */
2696   s = cand->id & (use->n_map_members - 1);
2697   for (i = s; i < use->n_map_members; i++)
2698     if (!use->cost_map[i].cand)
2699       goto found;
2700   for (i = 0; i < s; i++)
2701     if (!use->cost_map[i].cand)
2702       goto found;
2703
2704   gcc_unreachable ();
2705
2706 found:
2707   use->cost_map[i].cand = cand;
2708   use->cost_map[i].cost = cost;
2709   use->cost_map[i].depends_on = depends_on;
2710   use->cost_map[i].value = value;
2711   use->cost_map[i].comp = comp;
2712   use->cost_map[i].inv_expr_id = inv_expr_id;
2713 }
2714
2715 /* Gets cost of (USE, CANDIDATE) pair.  */
2716
2717 static struct cost_pair *
2718 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2719                  struct iv_cand *cand)
2720 {
2721   unsigned i, s;
2722   struct cost_pair *ret;
2723
2724   if (!cand)
2725     return NULL;
2726
2727   if (data->consider_all_candidates)
2728     {
2729       ret = use->cost_map + cand->id;
2730       if (!ret->cand)
2731         return NULL;
2732
2733       return ret;
2734     }
2735
2736   /* n_map_members is a power of two, so this computes modulo.  */
2737   s = cand->id & (use->n_map_members - 1);
2738   for (i = s; i < use->n_map_members; i++)
2739     if (use->cost_map[i].cand == cand)
2740       return use->cost_map + i;
2741
2742   for (i = 0; i < s; i++)
2743     if (use->cost_map[i].cand == cand)
2744       return use->cost_map + i;
2745
2746   return NULL;
2747 }
2748
2749 /* Returns estimate on cost of computing SEQ.  */
2750
2751 static unsigned
2752 seq_cost (rtx seq, bool speed)
2753 {
2754   unsigned cost = 0;
2755   rtx set;
2756
2757   for (; seq; seq = NEXT_INSN (seq))
2758     {
2759       set = single_set (seq);
2760       if (set)
2761         cost += set_src_cost (SET_SRC (set), speed);
2762       else
2763         cost++;
2764     }
2765
2766   return cost;
2767 }
2768
2769 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2770 static rtx
2771 produce_memory_decl_rtl (tree obj, int *regno)
2772 {
2773   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2774   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2775   rtx x;
2776
2777   gcc_assert (obj);
2778   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2779     {
2780       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2781       x = gen_rtx_SYMBOL_REF (address_mode, name);
2782       SET_SYMBOL_REF_DECL (x, obj);
2783       x = gen_rtx_MEM (DECL_MODE (obj), x);
2784       set_mem_addr_space (x, as);
2785       targetm.encode_section_info (obj, x, true);
2786     }
2787   else
2788     {
2789       x = gen_raw_REG (address_mode, (*regno)++);
2790       x = gen_rtx_MEM (DECL_MODE (obj), x);
2791       set_mem_addr_space (x, as);
2792     }
2793
2794   return x;
2795 }
2796
2797 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2798    walk_tree.  DATA contains the actual fake register number.  */
2799
2800 static tree
2801 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2802 {
2803   tree obj = NULL_TREE;
2804   rtx x = NULL_RTX;
2805   int *regno = (int *) data;
2806
2807   switch (TREE_CODE (*expr_p))
2808     {
2809     case ADDR_EXPR:
2810       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2811            handled_component_p (*expr_p);
2812            expr_p = &TREE_OPERAND (*expr_p, 0))
2813         continue;
2814       obj = *expr_p;
2815       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2816         x = produce_memory_decl_rtl (obj, regno);
2817       break;
2818
2819     case SSA_NAME:
2820       *ws = 0;
2821       obj = SSA_NAME_VAR (*expr_p);
2822       if (!DECL_RTL_SET_P (obj))
2823         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2824       break;
2825
2826     case VAR_DECL:
2827     case PARM_DECL:
2828     case RESULT_DECL:
2829       *ws = 0;
2830       obj = *expr_p;
2831
2832       if (DECL_RTL_SET_P (obj))
2833         break;
2834
2835       if (DECL_MODE (obj) == BLKmode)
2836         x = produce_memory_decl_rtl (obj, regno);
2837       else
2838         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2839
2840       break;
2841
2842     default:
2843       break;
2844     }
2845
2846   if (x)
2847     {
2848       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2849       SET_DECL_RTL (obj, x);
2850     }
2851
2852   return NULL_TREE;
2853 }
2854
2855 /* Determines cost of the computation of EXPR.  */
2856
2857 static unsigned
2858 computation_cost (tree expr, bool speed)
2859 {
2860   rtx seq, rslt;
2861   tree type = TREE_TYPE (expr);
2862   unsigned cost;
2863   /* Avoid using hard regs in ways which may be unsupported.  */
2864   int regno = LAST_VIRTUAL_REGISTER + 1;
2865   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2866   enum node_frequency real_frequency = node->frequency;
2867
2868   node->frequency = NODE_FREQUENCY_NORMAL;
2869   crtl->maybe_hot_insn_p = speed;
2870   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2871   start_sequence ();
2872   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2873   seq = get_insns ();
2874   end_sequence ();
2875   default_rtl_profile ();
2876   node->frequency = real_frequency;
2877
2878   cost = seq_cost (seq, speed);
2879   if (MEM_P (rslt))
2880     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2881                           TYPE_ADDR_SPACE (type), speed);
2882   else if (!REG_P (rslt))
2883     cost += set_src_cost (rslt, speed);
2884
2885   return cost;
2886 }
2887
2888 /* Returns variable containing the value of candidate CAND at statement AT.  */
2889
2890 static tree
2891 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2892 {
2893   if (stmt_after_increment (loop, cand, stmt))
2894     return cand->var_after;
2895   else
2896     return cand->var_before;
2897 }
2898
2899 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2900    same precision that is at least as wide as the precision of TYPE, stores
2901    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2902    type of A and B.  */
2903
2904 static tree
2905 determine_common_wider_type (tree *a, tree *b)
2906 {
2907   tree wider_type = NULL;
2908   tree suba, subb;
2909   tree atype = TREE_TYPE (*a);
2910
2911   if (CONVERT_EXPR_P (*a))
2912     {
2913       suba = TREE_OPERAND (*a, 0);
2914       wider_type = TREE_TYPE (suba);
2915       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2916         return atype;
2917     }
2918   else
2919     return atype;
2920
2921   if (CONVERT_EXPR_P (*b))
2922     {
2923       subb = TREE_OPERAND (*b, 0);
2924       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2925         return atype;
2926     }
2927   else
2928     return atype;
2929
2930   *a = suba;
2931   *b = subb;
2932   return wider_type;
2933 }
2934
2935 /* Determines the expression by that USE is expressed from induction variable
2936    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2937    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2938
2939 static bool
2940 get_computation_aff (struct loop *loop,
2941                      struct iv_use *use, struct iv_cand *cand, gimple at,
2942                      struct affine_tree_combination *aff)
2943 {
2944   tree ubase = use->iv->base;
2945   tree ustep = use->iv->step;
2946   tree cbase = cand->iv->base;
2947   tree cstep = cand->iv->step, cstep_common;
2948   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2949   tree common_type, var;
2950   tree uutype;
2951   aff_tree cbase_aff, var_aff;
2952   double_int rat;
2953
2954   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2955     {
2956       /* We do not have a precision to express the values of use.  */
2957       return false;
2958     }
2959
2960   var = var_at_stmt (loop, cand, at);
2961   uutype = unsigned_type_for (utype);
2962
2963   /* If the conversion is not noop, perform it.  */
2964   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2965     {
2966       cstep = fold_convert (uutype, cstep);
2967       cbase = fold_convert (uutype, cbase);
2968       var = fold_convert (uutype, var);
2969     }
2970
2971   if (!constant_multiple_of (ustep, cstep, &rat))
2972     return false;
2973
2974   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2975      type, we achieve better folding by computing their difference in this
2976      wider type, and cast the result to UUTYPE.  We do not need to worry about
2977      overflows, as all the arithmetics will in the end be performed in UUTYPE
2978      anyway.  */
2979   common_type = determine_common_wider_type (&ubase, &cbase);
2980
2981   /* use = ubase - ratio * cbase + ratio * var.  */
2982   tree_to_aff_combination (ubase, common_type, aff);
2983   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2984   tree_to_aff_combination (var, uutype, &var_aff);
2985
2986   /* We need to shift the value if we are after the increment.  */
2987   if (stmt_after_increment (loop, cand, at))
2988     {
2989       aff_tree cstep_aff;
2990
2991       if (common_type != uutype)
2992         cstep_common = fold_convert (common_type, cstep);
2993       else
2994         cstep_common = cstep;
2995
2996       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2997       aff_combination_add (&cbase_aff, &cstep_aff);
2998     }
2999
3000   aff_combination_scale (&cbase_aff, double_int_neg (rat));
3001   aff_combination_add (aff, &cbase_aff);
3002   if (common_type != uutype)
3003     aff_combination_convert (aff, uutype);
3004
3005   aff_combination_scale (&var_aff, rat);
3006   aff_combination_add (aff, &var_aff);
3007
3008   return true;
3009 }
3010
3011 /* Determines the expression by that USE is expressed from induction variable
3012    CAND at statement AT in LOOP.  The computation is unshared.  */
3013
3014 static tree
3015 get_computation_at (struct loop *loop,
3016                     struct iv_use *use, struct iv_cand *cand, gimple at)
3017 {
3018   aff_tree aff;
3019   tree type = TREE_TYPE (use->iv->base);
3020
3021   if (!get_computation_aff (loop, use, cand, at, &aff))
3022     return NULL_TREE;
3023   unshare_aff_combination (&aff);
3024   return fold_convert (type, aff_combination_to_tree (&aff));
3025 }
3026
3027 /* Determines the expression by that USE is expressed from induction variable
3028    CAND in LOOP.  The computation is unshared.  */
3029
3030 static tree
3031 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3032 {
3033   return get_computation_at (loop, use, cand, use->stmt);
3034 }
3035
3036 /* Adjust the cost COST for being in loop setup rather than loop body.
3037    If we're optimizing for space, the loop setup overhead is constant;
3038    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3039 static unsigned
3040 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3041 {
3042   if (cost == INFTY)
3043     return cost;
3044   else if (optimize_loop_for_speed_p (data->current_loop))
3045     return cost / avg_loop_niter (data->current_loop);
3046   else
3047     return cost;
3048 }
3049
3050 /* Returns cost of addition in MODE.  */
3051
3052 static unsigned
3053 add_cost (enum machine_mode mode, bool speed)
3054 {
3055   static unsigned costs[NUM_MACHINE_MODES];
3056   rtx seq;
3057   unsigned cost;
3058
3059   if (costs[mode])
3060     return costs[mode];
3061
3062   start_sequence ();
3063   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3064                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3065                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3066                  NULL_RTX);
3067   seq = get_insns ();
3068   end_sequence ();
3069
3070   cost = seq_cost (seq, speed);
3071   if (!cost)
3072     cost = 1;
3073
3074   costs[mode] = cost;
3075
3076   if (dump_file && (dump_flags & TDF_DETAILS))
3077     fprintf (dump_file, "Addition in %s costs %d\n",
3078              GET_MODE_NAME (mode), cost);
3079   return cost;
3080 }
3081
3082 /* Entry in a hashtable of already known costs for multiplication.  */
3083 struct mbc_entry
3084 {
3085   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3086   enum machine_mode mode;       /* In mode.  */
3087   unsigned cost;                /* The cost.  */
3088 };
3089
3090 /* Counts hash value for the ENTRY.  */
3091
3092 static hashval_t
3093 mbc_entry_hash (const void *entry)
3094 {
3095   const struct mbc_entry *e = (const struct mbc_entry *) entry;
3096
3097   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3098 }
3099
3100 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3101
3102 static int
3103 mbc_entry_eq (const void *entry1, const void *entry2)
3104 {
3105   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3106   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3107
3108   return (e1->mode == e2->mode
3109           && e1->cst == e2->cst);
3110 }
3111
3112 /* Returns cost of multiplication by constant CST in MODE.  */
3113
3114 unsigned
3115 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3116 {
3117   static htab_t costs;
3118   struct mbc_entry **cached, act;
3119   rtx seq;
3120   unsigned cost;
3121
3122   if (!costs)
3123     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3124
3125   act.mode = mode;
3126   act.cst = cst;
3127   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3128   if (*cached)
3129     return (*cached)->cost;
3130
3131   *cached = XNEW (struct mbc_entry);
3132   (*cached)->mode = mode;
3133   (*cached)->cst = cst;
3134
3135   start_sequence ();
3136   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3137                gen_int_mode (cst, mode), NULL_RTX, 0);
3138   seq = get_insns ();
3139   end_sequence ();
3140
3141   cost = seq_cost (seq, speed);
3142
3143   if (dump_file && (dump_flags & TDF_DETAILS))
3144     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3145              (int) cst, GET_MODE_NAME (mode), cost);
3146
3147   (*cached)->cost = cost;
3148
3149   return cost;
3150 }
3151
3152 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3153    validity for a memory reference accessing memory of mode MODE in
3154    address space AS.  */
3155
3156 DEF_VEC_P (sbitmap);
3157 DEF_VEC_ALLOC_P (sbitmap, heap);
3158
3159 bool
3160 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3161                                  addr_space_t as)
3162 {
3163 #define MAX_RATIO 128
3164   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3165   static VEC (sbitmap, heap) *valid_mult_list;
3166   sbitmap valid_mult;
3167
3168   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3169     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3170
3171   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3172   if (!valid_mult)
3173     {
3174       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3175       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3176       rtx addr;
3177       HOST_WIDE_INT i;
3178
3179       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3180       sbitmap_zero (valid_mult);
3181       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3182       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3183         {
3184           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3185           if (memory_address_addr_space_p (mode, addr, as))
3186             SET_BIT (valid_mult, i + MAX_RATIO);
3187         }
3188
3189       if (dump_file && (dump_flags & TDF_DETAILS))
3190         {
3191           fprintf (dump_file, "  allowed multipliers:");
3192           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3193             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3194               fprintf (dump_file, " %d", (int) i);
3195           fprintf (dump_file, "\n");
3196           fprintf (dump_file, "\n");
3197         }
3198
3199       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3200     }
3201
3202   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3203     return false;
3204
3205   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3206 }
3207
3208 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3209    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3210    variable is omitted.  Compute the cost for a memory reference that accesses
3211    a memory location of mode MEM_MODE in address space AS.
3212
3213    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3214    size of MEM_MODE / RATIO) is available.  To make this determination, we
3215    look at the size of the increment to be made, which is given in CSTEP.
3216    CSTEP may be zero if the step is unknown.
3217    STMT_AFTER_INC is true iff the statement we're looking at is after the
3218    increment of the original biv.
3219
3220    TODO -- there must be some better way.  This all is quite crude.  */
3221
3222 typedef struct
3223 {
3224   HOST_WIDE_INT min_offset, max_offset;
3225   unsigned costs[2][2][2][2];
3226 } *address_cost_data;
3227
3228 DEF_VEC_P (address_cost_data);
3229 DEF_VEC_ALLOC_P (address_cost_data, heap);
3230
3231 static comp_cost
3232 get_address_cost (bool symbol_present, bool var_present,
3233                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3234                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3235                   addr_space_t as, bool speed,
3236                   bool stmt_after_inc, bool *may_autoinc)
3237 {
3238   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3239   static VEC(address_cost_data, heap) *address_cost_data_list;
3240   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3241   address_cost_data data;
3242   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3243   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3244   unsigned cost, acost, complexity;
3245   bool offset_p, ratio_p, autoinc;
3246   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3247   unsigned HOST_WIDE_INT mask;
3248   unsigned bits;
3249
3250   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3251     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3252                            data_index + 1);
3253
3254   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3255   if (!data)
3256     {
3257       HOST_WIDE_INT i;
3258       HOST_WIDE_INT rat, off = 0;
3259       int old_cse_not_expected, width;
3260       unsigned sym_p, var_p, off_p, rat_p, add_c;
3261       rtx seq, addr, base;
3262       rtx reg0, reg1;
3263
3264       data = (address_cost_data) xcalloc (1, sizeof (*data));
3265
3266       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3267
3268       width = GET_MODE_BITSIZE (address_mode) - 1;
3269       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3270         width = HOST_BITS_PER_WIDE_INT - 1;
3271       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3272
3273       for (i = width; i >= 0; i--)
3274         {
3275           off = -((HOST_WIDE_INT) 1 << i);
3276           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3277           if (memory_address_addr_space_p (mem_mode, addr, as))
3278             break;
3279         }
3280       data->min_offset = (i == -1? 0 : off);
3281
3282       for (i = width; i >= 0; i--)
3283         {
3284           off = ((HOST_WIDE_INT) 1 << i) - 1;
3285           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3286           if (memory_address_addr_space_p (mem_mode, addr, as))
3287             break;
3288         }
3289       if (i == -1)
3290         off = 0;
3291       data->max_offset = off;
3292
3293       if (dump_file && (dump_flags & TDF_DETAILS))
3294         {
3295           fprintf (dump_file, "get_address_cost:\n");
3296           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3297                    GET_MODE_NAME (mem_mode),
3298                    data->min_offset);
3299           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3300                    GET_MODE_NAME (mem_mode),
3301                    data->max_offset);
3302         }
3303
3304       rat = 1;
3305       for (i = 2; i <= MAX_RATIO; i++)
3306         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3307           {
3308             rat = i;
3309             break;
3310           }
3311
3312       /* Compute the cost of various addressing modes.  */
3313       acost = 0;
3314       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3315       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3316
3317       if (HAVE_PRE_DECREMENT)
3318         {
3319           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3320           has_predec[mem_mode]
3321             = memory_address_addr_space_p (mem_mode, addr, as);
3322         }
3323       if (HAVE_POST_DECREMENT)
3324         {
3325           addr = gen_rtx_POST_DEC (address_mode, reg0);
3326           has_postdec[mem_mode]
3327             = memory_address_addr_space_p (mem_mode, addr, as);
3328         }
3329       if (HAVE_PRE_INCREMENT)
3330         {
3331           addr = gen_rtx_PRE_INC (address_mode, reg0);
3332           has_preinc[mem_mode]
3333             = memory_address_addr_space_p (mem_mode, addr, as);
3334         }
3335       if (HAVE_POST_INCREMENT)
3336         {
3337           addr = gen_rtx_POST_INC (address_mode, reg0);
3338           has_postinc[mem_mode]
3339             = memory_address_addr_space_p (mem_mode, addr, as);
3340         }
3341       for (i = 0; i < 16; i++)
3342         {
3343           sym_p = i & 1;
3344           var_p = (i >> 1) & 1;
3345           off_p = (i >> 2) & 1;
3346           rat_p = (i >> 3) & 1;
3347
3348           addr = reg0;
3349           if (rat_p)
3350             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3351                                    gen_int_mode (rat, address_mode));
3352
3353           if (var_p)
3354             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3355
3356           if (sym_p)
3357             {
3358               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3359               /* ??? We can run into trouble with some backends by presenting
3360                  it with symbols which haven't been properly passed through
3361                  targetm.encode_section_info.  By setting the local bit, we
3362                  enhance the probability of things working.  */
3363               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3364
3365               if (off_p)
3366                 base = gen_rtx_fmt_e (CONST, address_mode,
3367                                       gen_rtx_fmt_ee
3368                                         (PLUS, address_mode, base,
3369                                          gen_int_mode (off, address_mode)));
3370             }
3371           else if (off_p)
3372             base = gen_int_mode (off, address_mode);
3373           else
3374             base = NULL_RTX;
3375
3376           if (base)
3377             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3378
3379           start_sequence ();
3380           /* To avoid splitting addressing modes, pretend that no cse will
3381              follow.  */
3382           old_cse_not_expected = cse_not_expected;
3383           cse_not_expected = true;
3384           addr = memory_address_addr_space (mem_mode, addr, as);
3385           cse_not_expected = old_cse_not_expected;
3386           seq = get_insns ();
3387           end_sequence ();
3388
3389           acost = seq_cost (seq, speed);
3390           acost += address_cost (addr, mem_mode, as, speed);
3391
3392           if (!acost)
3393             acost = 1;
3394           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3395         }
3396
3397       /* On some targets, it is quite expensive to load symbol to a register,
3398          which makes addresses that contain symbols look much more expensive.
3399          However, the symbol will have to be loaded in any case before the
3400          loop (and quite likely we have it in register already), so it does not
3401          make much sense to penalize them too heavily.  So make some final
3402          tweaks for the SYMBOL_PRESENT modes:
3403
3404          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3405          var is cheaper, use this mode with small penalty.
3406          If VAR_PRESENT is true, try whether the mode with
3407          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3408          if this is the case, use it.  */
3409       add_c = add_cost (address_mode, speed);
3410       for (i = 0; i < 8; i++)
3411         {
3412           var_p = i & 1;
3413           off_p = (i >> 1) & 1;
3414           rat_p = (i >> 2) & 1;
3415
3416           acost = data->costs[0][1][off_p][rat_p] + 1;
3417           if (var_p)
3418             acost += add_c;
3419
3420           if (acost < data->costs[1][var_p][off_p][rat_p])
3421             data->costs[1][var_p][off_p][rat_p] = acost;
3422         }
3423
3424       if (dump_file && (dump_flags & TDF_DETAILS))
3425         {
3426           fprintf (dump_file, "Address costs:\n");
3427
3428           for (i = 0; i < 16; i++)
3429             {
3430               sym_p = i & 1;
3431               var_p = (i >> 1) & 1;
3432               off_p = (i >> 2) & 1;
3433               rat_p = (i >> 3) & 1;
3434
3435               fprintf (dump_file, "  ");
3436               if (sym_p)
3437                 fprintf (dump_file, "sym + ");
3438               if (var_p)
3439                 fprintf (dump_file, "var + ");
3440               if (off_p)
3441                 fprintf (dump_file, "cst + ");
3442               if (rat_p)
3443                 fprintf (dump_file, "rat * ");
3444
3445               acost = data->costs[sym_p][var_p][off_p][rat_p];
3446               fprintf (dump_file, "index costs %d\n", acost);
3447             }
3448           if (has_predec[mem_mode] || has_postdec[mem_mode]
3449               || has_preinc[mem_mode] || has_postinc[mem_mode])
3450             fprintf (dump_file, "  May include autoinc/dec\n");
3451           fprintf (dump_file, "\n");
3452         }
3453
3454       VEC_replace (address_cost_data, address_cost_data_list,
3455                    data_index, data);
3456     }
3457
3458   bits = GET_MODE_BITSIZE (address_mode);
3459   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3460   offset &= mask;
3461   if ((offset >> (bits - 1) & 1))
3462     offset |= ~mask;
3463   s_offset = offset;
3464
3465   autoinc = false;
3466   msize = GET_MODE_SIZE (mem_mode);
3467   autoinc_offset = offset;
3468   if (stmt_after_inc)
3469     autoinc_offset += ratio * cstep;
3470   if (symbol_present || var_present || ratio != 1)
3471     autoinc = false;
3472   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3473                && msize == cstep)
3474            || (has_postdec[mem_mode] && autoinc_offset == 0
3475                && msize == -cstep)
3476            || (has_preinc[mem_mode] && autoinc_offset == msize
3477                && msize == cstep)
3478            || (has_predec[mem_mode] && autoinc_offset == -msize
3479                && msize == -cstep))
3480     autoinc = true;
3481
3482   cost = 0;
3483   offset_p = (s_offset != 0
3484               && data->min_offset <= s_offset
3485               && s_offset <= data->max_offset);
3486   ratio_p = (ratio != 1
3487              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3488
3489   if (ratio != 1 && !ratio_p)
3490     cost += multiply_by_cost (ratio, address_mode, speed);
3491
3492   if (s_offset && !offset_p && !symbol_present)
3493     cost += add_cost (address_mode, speed);
3494
3495   if (may_autoinc)
3496     *may_autoinc = autoinc;
3497   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3498   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3499   return new_cost (cost + acost, complexity);
3500 }
3501
3502  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3503     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3504     calculating the operands of EXPR.  Returns true if successful, and returns
3505     the cost in COST.  */
3506
3507 static bool
3508 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3509                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3510 {
3511   comp_cost res;
3512   tree op1 = TREE_OPERAND (expr, 1);
3513   tree cst = TREE_OPERAND (mult, 1);
3514   tree multop = TREE_OPERAND (mult, 0);
3515   int m = exact_log2 (int_cst_value (cst));
3516   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3517   int sa_cost;
3518
3519   if (!(m >= 0 && m < maxm))
3520     return false;
3521
3522   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3523              ? shiftadd_cost[speed][mode][m]
3524              : (mult == op1
3525                 ? shiftsub1_cost[speed][mode][m]
3526                 : shiftsub0_cost[speed][mode][m]));
3527   res = new_cost (sa_cost, 0);
3528   res = add_costs (res, mult == op1 ? cost0 : cost1);
3529
3530   STRIP_NOPS (multop);
3531   if (!is_gimple_val (multop))
3532     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3533
3534   *cost = res;
3535   return true;
3536 }
3537
3538 /* Estimates cost of forcing expression EXPR into a variable.  */
3539
3540 static comp_cost
3541 force_expr_to_var_cost (tree expr, bool speed)
3542 {
3543   static bool costs_initialized = false;
3544   static unsigned integer_cost [2];
3545   static unsigned symbol_cost [2];
3546   static unsigned address_cost [2];
3547   tree op0, op1;
3548   comp_cost cost0, cost1, cost;
3549   enum machine_mode mode;
3550
3551   if (!costs_initialized)
3552     {
3553       tree type = build_pointer_type (integer_type_node);
3554       tree var, addr;
3555       rtx x;
3556       int i;
3557
3558       var = create_tmp_var_raw (integer_type_node, "test_var");
3559       TREE_STATIC (var) = 1;
3560       x = produce_memory_decl_rtl (var, NULL);
3561       SET_DECL_RTL (var, x);
3562
3563       addr = build1 (ADDR_EXPR, type, var);
3564
3565
3566       for (i = 0; i < 2; i++)
3567         {
3568           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3569                                                              2000), i);
3570
3571           symbol_cost[i] = computation_cost (addr, i) + 1;
3572
3573           address_cost[i]
3574             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3575           if (dump_file && (dump_flags & TDF_DETAILS))
3576             {
3577               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3578               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3579               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3580               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3581               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3582               fprintf (dump_file, "\n");
3583             }
3584         }
3585
3586       costs_initialized = true;
3587     }
3588
3589   STRIP_NOPS (expr);
3590
3591   if (SSA_VAR_P (expr))
3592     return zero_cost;
3593
3594   if (is_gimple_min_invariant (expr))
3595     {
3596       if (TREE_CODE (expr) == INTEGER_CST)
3597         return new_cost (integer_cost [speed], 0);
3598
3599       if (TREE_CODE (expr) == ADDR_EXPR)
3600         {
3601           tree obj = TREE_OPERAND (expr, 0);
3602
3603           if (TREE_CODE (obj) == VAR_DECL
3604               || TREE_CODE (obj) == PARM_DECL
3605               || TREE_CODE (obj) == RESULT_DECL)
3606             return new_cost (symbol_cost [speed], 0);
3607         }
3608
3609       return new_cost (address_cost [speed], 0);
3610     }
3611
3612   switch (TREE_CODE (expr))
3613     {
3614     case POINTER_PLUS_EXPR:
3615     case PLUS_EXPR:
3616     case MINUS_EXPR:
3617     case MULT_EXPR:
3618       op0 = TREE_OPERAND (expr, 0);
3619       op1 = TREE_OPERAND (expr, 1);
3620       STRIP_NOPS (op0);
3621       STRIP_NOPS (op1);
3622
3623       if (is_gimple_val (op0))
3624         cost0 = zero_cost;
3625       else
3626         cost0 = force_expr_to_var_cost (op0, speed);
3627
3628       if (is_gimple_val (op1))
3629         cost1 = zero_cost;
3630       else
3631         cost1 = force_expr_to_var_cost (op1, speed);
3632
3633       break;
3634
3635     case NEGATE_EXPR:
3636       op0 = TREE_OPERAND (expr, 0);
3637       STRIP_NOPS (op0);
3638       op1 = NULL_TREE;
3639
3640       if (is_gimple_val (op0))
3641         cost0 = zero_cost;
3642       else
3643         cost0 = force_expr_to_var_cost (op0, speed);
3644
3645       cost1 = zero_cost;
3646       break;
3647
3648     default:
3649       /* Just an arbitrary value, FIXME.  */
3650       return new_cost (target_spill_cost[speed], 0);
3651     }
3652
3653   mode = TYPE_MODE (TREE_TYPE (expr));
3654   switch (TREE_CODE (expr))
3655     {
3656     case POINTER_PLUS_EXPR:
3657     case PLUS_EXPR:
3658     case MINUS_EXPR:
3659     case NEGATE_EXPR:
3660       cost = new_cost (add_cost (mode, speed), 0);
3661       if (TREE_CODE (expr) != NEGATE_EXPR)
3662         {
3663           tree mult = NULL_TREE;
3664           comp_cost sa_cost;
3665           if (TREE_CODE (op1) == MULT_EXPR)
3666             mult = op1;
3667           else if (TREE_CODE (op0) == MULT_EXPR)
3668             mult = op0;
3669
3670           if (mult != NULL_TREE
3671               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3672               && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
3673                                     &sa_cost))
3674             return sa_cost;
3675         }
3676       break;
3677
3678     case MULT_EXPR:
3679       if (cst_and_fits_in_hwi (op0))
3680         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3681       else if (cst_and_fits_in_hwi (op1))
3682         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3683       else
3684         return new_cost (target_spill_cost [speed], 0);
3685       break;
3686
3687     default:
3688       gcc_unreachable ();
3689     }
3690
3691   cost = add_costs (cost, cost0);
3692   cost = add_costs (cost, cost1);
3693
3694   /* Bound the cost by target_spill_cost.  The parts of complicated
3695      computations often are either loop invariant or at least can
3696      be shared between several iv uses, so letting this grow without
3697      limits would not give reasonable results.  */
3698   if (cost.cost > (int) target_spill_cost [speed])
3699     cost.cost = target_spill_cost [speed];
3700
3701   return cost;
3702 }
3703
3704 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3705    invariants the computation depends on.  */
3706
3707 static comp_cost
3708 force_var_cost (struct ivopts_data *data,
3709                 tree expr, bitmap *depends_on)
3710 {
3711   if (depends_on)
3712     {
3713       fd_ivopts_data = data;
3714       walk_tree (&expr, find_depends, depends_on, NULL);
3715     }
3716
3717   return force_expr_to_var_cost (expr, data->speed);
3718 }
3719
3720 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3721    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3722    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3723    invariants the computation depends on.  */
3724
3725 static comp_cost
3726 split_address_cost (struct ivopts_data *data,
3727                     tree addr, bool *symbol_present, bool *var_present,
3728                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3729 {
3730   tree core;
3731   HOST_WIDE_INT bitsize;
3732   HOST_WIDE_INT bitpos;
3733   tree toffset;
3734   enum machine_mode mode;
3735   int unsignedp, volatilep;
3736
3737   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3738                               &unsignedp, &volatilep, false);
3739
3740   if (toffset != 0
3741       || bitpos % BITS_PER_UNIT != 0
3742       || TREE_CODE (core) != VAR_DECL)
3743     {
3744       *symbol_present = false;
3745       *var_present = true;
3746       fd_ivopts_data = data;
3747       walk_tree (&addr, find_depends, depends_on, NULL);
3748       return new_cost (target_spill_cost[data->speed], 0);
3749     }
3750
3751   *offset += bitpos / BITS_PER_UNIT;
3752   if (TREE_STATIC (core)
3753       || DECL_EXTERNAL (core))
3754     {
3755       *symbol_present = true;
3756       *var_present = false;
3757       return zero_cost;
3758     }
3759
3760   *symbol_present = false;
3761   *var_present = true;
3762   return zero_cost;
3763 }
3764
3765 /* Estimates cost of expressing difference of addresses E1 - E2 as
3766    var + symbol + offset.  The value of offset is added to OFFSET,
3767    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3768    part is missing.  DEPENDS_ON is a set of the invariants the computation
3769    depends on.  */
3770
3771 static comp_cost
3772 ptr_difference_cost (struct ivopts_data *data,
3773                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3774                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3775 {
3776   HOST_WIDE_INT diff = 0;
3777   aff_tree aff_e1, aff_e2;
3778   tree type;
3779
3780   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3781
3782   if (ptr_difference_const (e1, e2, &diff))
3783     {
3784       *offset += diff;
3785       *symbol_present = false;
3786       *var_present = false;
3787       return zero_cost;
3788     }
3789
3790   if (integer_zerop (e2))
3791     return split_address_cost (data, TREE_OPERAND (e1, 0),
3792                                symbol_present, var_present, offset, depends_on);
3793
3794   *symbol_present = false;
3795   *var_present = true;
3796
3797   type = signed_type_for (TREE_TYPE (e1));
3798   tree_to_aff_combination (e1, type, &aff_e1);
3799   tree_to_aff_combination (e2, type, &aff_e2);
3800   aff_combination_scale (&aff_e2, double_int_minus_one);
3801   aff_combination_add (&aff_e1, &aff_e2);
3802
3803   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3804 }
3805
3806 /* Estimates cost of expressing difference E1 - E2 as
3807    var + symbol + offset.  The value of offset is added to OFFSET,
3808    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3809    part is missing.  DEPENDS_ON is a set of the invariants the computation
3810    depends on.  */
3811
3812 static comp_cost
3813 difference_cost (struct ivopts_data *data,
3814                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3815                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3816 {
3817   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3818   unsigned HOST_WIDE_INT off1, off2;
3819   aff_tree aff_e1, aff_e2;
3820   tree type;
3821
3822   e1 = strip_offset (e1, &off1);
3823   e2 = strip_offset (e2, &off2);
3824   *offset += off1 - off2;
3825
3826   STRIP_NOPS (e1);
3827   STRIP_NOPS (e2);
3828
3829   if (TREE_CODE (e1) == ADDR_EXPR)
3830     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3831                                 offset, depends_on);
3832   *symbol_present = false;
3833
3834   if (operand_equal_p (e1, e2, 0))
3835     {
3836       *var_present = false;
3837       return zero_cost;
3838     }
3839
3840   *var_present = true;
3841
3842   if (integer_zerop (e2))
3843     return force_var_cost (data, e1, depends_on);
3844
3845   if (integer_zerop (e1))
3846     {
3847       comp_cost cost = force_var_cost (data, e2, depends_on);
3848       cost.cost += multiply_by_cost (-1, mode, data->speed);
3849       return cost;
3850     }
3851
3852   type = signed_type_for (TREE_TYPE (e1));
3853   tree_to_aff_combination (e1, type, &aff_e1);
3854   tree_to_aff_combination (e2, type, &aff_e2);
3855   aff_combination_scale (&aff_e2, double_int_minus_one);
3856   aff_combination_add (&aff_e1, &aff_e2);
3857
3858   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3859 }
3860
3861 /* Returns true if AFF1 and AFF2 are identical.  */
3862
3863 static bool
3864 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3865 {
3866   unsigned i;
3867
3868   if (aff1->n != aff2->n)
3869     return false;
3870
3871   for (i = 0; i < aff1->n; i++)
3872     {
3873       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3874         return false;
3875
3876       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3877         return false;
3878     }
3879   return true;
3880 }
3881
3882 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3883
3884 static int
3885 get_expr_id (struct ivopts_data *data, tree expr)
3886 {
3887   struct iv_inv_expr_ent ent;
3888   struct iv_inv_expr_ent **slot;
3889
3890   ent.expr = expr;
3891   ent.hash = iterative_hash_expr (expr, 0);
3892   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3893                                                      &ent, INSERT);
3894   if (*slot)
3895     return (*slot)->id;
3896
3897   *slot = XNEW (struct iv_inv_expr_ent);
3898   (*slot)->expr = expr;
3899   (*slot)->hash = ent.hash;
3900   (*slot)->id = data->inv_expr_id++;
3901   return (*slot)->id;
3902 }
3903
3904 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3905    requires a new compiler generated temporary.  Returns -1 otherwise.
3906    ADDRESS_P is a flag indicating if the expression is for address
3907    computation.  */
3908
3909 static int
3910 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3911                             tree cbase, HOST_WIDE_INT ratio,
3912                             bool address_p)
3913 {
3914   aff_tree ubase_aff, cbase_aff;
3915   tree expr, ub, cb;
3916
3917   STRIP_NOPS (ubase);
3918   STRIP_NOPS (cbase);
3919   ub = ubase;
3920   cb = cbase;
3921
3922   if ((TREE_CODE (ubase) == INTEGER_CST)
3923       && (TREE_CODE (cbase) == INTEGER_CST))
3924     return -1;
3925
3926   /* Strips the constant part. */
3927   if (TREE_CODE (ubase) == PLUS_EXPR
3928       || TREE_CODE (ubase) == MINUS_EXPR
3929       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3930     {
3931       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3932         ubase = TREE_OPERAND (ubase, 0);
3933     }
3934
3935   /* Strips the constant part. */
3936   if (TREE_CODE (cbase) == PLUS_EXPR
3937       || TREE_CODE (cbase) == MINUS_EXPR
3938       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3939     {
3940       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3941         cbase = TREE_OPERAND (cbase, 0);
3942     }
3943
3944   if (address_p)
3945     {
3946       if (((TREE_CODE (ubase) == SSA_NAME)
3947            || (TREE_CODE (ubase) == ADDR_EXPR
3948                && is_gimple_min_invariant (ubase)))
3949           && (TREE_CODE (cbase) == INTEGER_CST))
3950         return -1;
3951
3952       if (((TREE_CODE (cbase) == SSA_NAME)
3953            || (TREE_CODE (cbase) == ADDR_EXPR
3954                && is_gimple_min_invariant (cbase)))
3955           && (TREE_CODE (ubase) == INTEGER_CST))
3956         return -1;
3957     }
3958
3959   if (ratio == 1)
3960     {
3961       if(operand_equal_p (ubase, cbase, 0))
3962         return -1;
3963
3964       if (TREE_CODE (ubase) == ADDR_EXPR
3965           && TREE_CODE (cbase) == ADDR_EXPR)
3966         {
3967           tree usym, csym;
3968
3969           usym = TREE_OPERAND (ubase, 0);
3970           csym = TREE_OPERAND (cbase, 0);
3971           if (TREE_CODE (usym) == ARRAY_REF)
3972             {
3973               tree ind = TREE_OPERAND (usym, 1);
3974               if (TREE_CODE (ind) == INTEGER_CST
3975                   && host_integerp (ind, 0)
3976                   && TREE_INT_CST_LOW (ind) == 0)
3977                 usym = TREE_OPERAND (usym, 0);
3978             }
3979           if (TREE_CODE (csym) == ARRAY_REF)
3980             {
3981               tree ind = TREE_OPERAND (csym, 1);
3982               if (TREE_CODE (ind) == INTEGER_CST
3983                   && host_integerp (ind, 0)
3984                   && TREE_INT_CST_LOW (ind) == 0)
3985                 csym = TREE_OPERAND (csym, 0);
3986             }
3987           if (operand_equal_p (usym, csym, 0))
3988             return -1;
3989         }
3990       /* Now do more complex comparison  */
3991       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3992       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3993       if (compare_aff_trees (&ubase_aff, &cbase_aff))
3994         return -1;
3995     }
3996
3997   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3998   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3999
4000   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
4001   aff_combination_add (&ubase_aff, &cbase_aff);
4002   expr = aff_combination_to_tree (&ubase_aff);
4003   return get_expr_id (data, expr);
4004 }
4005
4006
4007
4008 /* Determines the cost of the computation by that USE is expressed
4009    from induction variable CAND.  If ADDRESS_P is true, we just need
4010    to create an address from it, otherwise we want to get it into
4011    register.  A set of invariants we depend on is stored in
4012    DEPENDS_ON.  AT is the statement at that the value is computed.
4013    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4014    addressing is likely.  */
4015
4016 static comp_cost
4017 get_computation_cost_at (struct ivopts_data *data,
4018                          struct iv_use *use, struct iv_cand *cand,
4019                          bool address_p, bitmap *depends_on, gimple at,
4020                          bool *can_autoinc,
4021                          int *inv_expr_id)
4022 {
4023   tree ubase = use->iv->base, ustep = use->iv->step;
4024   tree cbase, cstep;
4025   tree utype = TREE_TYPE (ubase), ctype;
4026   unsigned HOST_WIDE_INT cstepi, offset = 0;
4027   HOST_WIDE_INT ratio, aratio;
4028   bool var_present, symbol_present, stmt_is_after_inc;
4029   comp_cost cost;
4030   double_int rat;
4031   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4032
4033   *depends_on = NULL;
4034
4035   /* Only consider real candidates.  */
4036   if (!cand->iv)
4037     return infinite_cost;
4038
4039   cbase = cand->iv->base;
4040   cstep = cand->iv->step;
4041   ctype = TREE_TYPE (cbase);
4042
4043   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4044     {
4045       /* We do not have a precision to express the values of use.  */
4046       return infinite_cost;
4047     }
4048
4049   if (address_p
4050       || (use->iv->base_object
4051           && cand->iv->base_object
4052           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4053           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4054     {
4055       /* Do not try to express address of an object with computation based
4056          on address of a different object.  This may cause problems in rtl
4057          level alias analysis (that does not expect this to be happening,
4058          as this is illegal in C), and would be unlikely to be useful
4059          anyway.  */
4060       if (use->iv->base_object
4061           && cand->iv->base_object
4062           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4063         return infinite_cost;
4064     }
4065
4066   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4067     {
4068       /* TODO -- add direct handling of this case.  */
4069       goto fallback;
4070     }
4071
4072   /* CSTEPI is removed from the offset in case statement is after the
4073      increment.  If the step is not constant, we use zero instead.
4074      This is a bit imprecise (there is the extra addition), but
4075      redundancy elimination is likely to transform the code so that
4076      it uses value of the variable before increment anyway,
4077      so it is not that much unrealistic.  */
4078   if (cst_and_fits_in_hwi (cstep))
4079     cstepi = int_cst_value (cstep);
4080   else
4081     cstepi = 0;
4082
4083   if (!constant_multiple_of (ustep, cstep, &rat))
4084     return infinite_cost;
4085
4086   if (double_int_fits_in_shwi_p (rat))
4087     ratio = double_int_to_shwi (rat);
4088   else
4089     return infinite_cost;
4090
4091   STRIP_NOPS (cbase);
4092   ctype = TREE_TYPE (cbase);
4093
4094   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4095
4096   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4097      or ratio == 1, it is better to handle this like
4098
4099      ubase - ratio * cbase + ratio * var
4100
4101      (also holds in the case ratio == -1, TODO.  */
4102
4103   if (cst_and_fits_in_hwi (cbase))
4104     {
4105       offset = - ratio * int_cst_value (cbase);
4106       cost = difference_cost (data,
4107                               ubase, build_int_cst (utype, 0),
4108                               &symbol_present, &var_present, &offset,
4109                               depends_on);
4110       cost.cost /= avg_loop_niter (data->current_loop);
4111     }
4112   else if (ratio == 1)
4113     {
4114       tree real_cbase = cbase;
4115
4116       /* Check to see if any adjustment is needed.  */
4117       if (cstepi == 0 && stmt_is_after_inc)
4118         {
4119           aff_tree real_cbase_aff;
4120           aff_tree cstep_aff;
4121
4122           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4123                                    &real_cbase_aff);
4124           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4125
4126           aff_combination_add (&real_cbase_aff, &cstep_aff);
4127           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4128         }
4129
4130       cost = difference_cost (data,
4131                               ubase, real_cbase,
4132                               &symbol_present, &var_present, &offset,
4133                               depends_on);
4134       cost.cost /= avg_loop_niter (data->current_loop);
4135     }
4136   else if (address_p
4137            && !POINTER_TYPE_P (ctype)
4138            && multiplier_allowed_in_address_p
4139                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4140                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4141     {
4142       cbase
4143         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4144       cost = difference_cost (data,
4145                               ubase, cbase,
4146                               &symbol_present, &var_present, &offset,
4147                               depends_on);
4148       cost.cost /= avg_loop_niter (data->current_loop);
4149     }
4150   else
4151     {
4152       cost = force_var_cost (data, cbase, depends_on);
4153       cost = add_costs (cost,
4154                         difference_cost (data,
4155                                          ubase, build_int_cst (utype, 0),
4156                                          &symbol_present, &var_present,
4157                                          &offset, depends_on));
4158       cost.cost /= avg_loop_niter (data->current_loop);
4159       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4160     }
4161
4162   if (inv_expr_id)
4163     {
4164       *inv_expr_id =
4165           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4166       /* Clear depends on.  */
4167       if (*inv_expr_id != -1 && depends_on && *depends_on)
4168         bitmap_clear (*depends_on);
4169     }
4170
4171   /* If we are after the increment, the value of the candidate is higher by
4172      one iteration.  */
4173   if (stmt_is_after_inc)
4174     offset -= ratio * cstepi;
4175
4176   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4177      (symbol/var1/const parts may be omitted).  If we are looking for an
4178      address, find the cost of addressing this.  */
4179   if (address_p)
4180     return add_costs (cost,
4181                       get_address_cost (symbol_present, var_present,
4182                                         offset, ratio, cstepi,
4183                                         TYPE_MODE (TREE_TYPE (utype)),
4184                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4185                                         speed, stmt_is_after_inc,
4186                                         can_autoinc));
4187
4188   /* Otherwise estimate the costs for computing the expression.  */
4189   if (!symbol_present && !var_present && !offset)
4190     {
4191       if (ratio != 1)
4192         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4193       return cost;
4194     }
4195
4196   /* Symbol + offset should be compile-time computable so consider that they
4197       are added once to the variable, if present.  */
4198   if (var_present && (symbol_present || offset))
4199     cost.cost += adjust_setup_cost (data,
4200                                     add_cost (TYPE_MODE (ctype), speed));
4201
4202   /* Having offset does not affect runtime cost in case it is added to
4203      symbol, but it increases complexity.  */
4204   if (offset)
4205     cost.complexity++;
4206
4207   cost.cost += add_cost (TYPE_MODE (ctype), speed);
4208
4209   aratio = ratio > 0 ? ratio : -ratio;
4210   if (aratio != 1)
4211     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4212   return cost;
4213
4214 fallback:
4215   if (can_autoinc)
4216     *can_autoinc = false;
4217
4218   {
4219     /* Just get the expression, expand it and measure the cost.  */
4220     tree comp = get_computation_at (data->current_loop, use, cand, at);
4221
4222     if (!comp)
4223       return infinite_cost;
4224
4225     if (address_p)
4226       comp = build_simple_mem_ref (comp);
4227
4228     return new_cost (computation_cost (comp, speed), 0);
4229   }
4230 }
4231
4232 /* Determines the cost of the computation by that USE is expressed
4233    from induction variable CAND.  If ADDRESS_P is true, we just need
4234    to create an address from it, otherwise we want to get it into
4235    register.  A set of invariants we depend on is stored in
4236    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4237    autoinc addressing is likely.  */
4238
4239 static comp_cost
4240 get_computation_cost (struct ivopts_data *data,
4241                       struct iv_use *use, struct iv_cand *cand,
4242                       bool address_p, bitmap *depends_on,
4243                       bool *can_autoinc, int *inv_expr_id)
4244 {
4245   return get_computation_cost_at (data,
4246                                   use, cand, address_p, depends_on, use->stmt,
4247                                   can_autoinc, inv_expr_id);
4248 }
4249
4250 /* Determines cost of basing replacement of USE on CAND in a generic
4251    expression.  */
4252
4253 static bool
4254 determine_use_iv_cost_generic (struct ivopts_data *data,
4255                                struct iv_use *use, struct iv_cand *cand)
4256 {
4257   bitmap depends_on;
4258   comp_cost cost;
4259   int inv_expr_id = -1;
4260
4261   /* The simple case first -- if we need to express value of the preserved
4262      original biv, the cost is 0.  This also prevents us from counting the
4263      cost of increment twice -- once at this use and once in the cost of
4264      the candidate.  */
4265   if (cand->pos == IP_ORIGINAL
4266       && cand->incremented_at == use->stmt)
4267     {
4268       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE,
4269                        ERROR_MARK, -1);
4270       return true;
4271     }
4272
4273   cost = get_computation_cost (data, use, cand, false, &depends_on,
4274                                NULL, &inv_expr_id);
4275
4276   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4277                    inv_expr_id);
4278
4279   return !infinite_cost_p (cost);
4280 }
4281
4282 /* Determines cost of basing replacement of USE on CAND in an address.  */
4283
4284 static bool
4285 determine_use_iv_cost_address (struct ivopts_data *data,
4286                                struct iv_use *use, struct iv_cand *cand)
4287 {
4288   bitmap depends_on;
4289   bool can_autoinc;
4290   int inv_expr_id = -1;
4291   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4292                                          &can_autoinc, &inv_expr_id);
4293
4294   if (cand->ainc_use == use)
4295     {
4296       if (can_autoinc)
4297         cost.cost -= cand->cost_step;
4298       /* If we generated the candidate solely for exploiting autoincrement
4299          opportunities, and it turns out it can't be used, set the cost to
4300          infinity to make sure we ignore it.  */
4301       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4302         cost = infinite_cost;
4303     }
4304   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4305                    inv_expr_id);
4306
4307   return !infinite_cost_p (cost);
4308 }
4309
4310 /* Computes value of candidate CAND at position AT in iteration NITER, and
4311    stores it to VAL.  */
4312
4313 static void
4314 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4315                aff_tree *val)
4316 {
4317   aff_tree step, delta, nit;
4318   struct iv *iv = cand->iv;
4319   tree type = TREE_TYPE (iv->base);
4320   tree steptype = type;
4321   if (POINTER_TYPE_P (type))
4322     steptype = sizetype;
4323
4324   tree_to_aff_combination (iv->step, steptype, &step);
4325   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4326   aff_combination_convert (&nit, steptype);
4327   aff_combination_mult (&nit, &step, &delta);
4328   if (stmt_after_increment (loop, cand, at))
4329     aff_combination_add (&delta, &step);
4330
4331   tree_to_aff_combination (iv->base, type, val);
4332   aff_combination_add (val, &delta);
4333 }
4334
4335 /* Returns period of induction variable iv.  */
4336
4337 static tree
4338 iv_period (struct iv *iv)
4339 {
4340   tree step = iv->step, period, type;
4341   tree pow2div;
4342
4343   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4344
4345   type = unsigned_type_for (TREE_TYPE (step));
4346   /* Period of the iv is lcm (step, type_range)/step -1,
4347      i.e., N*type_range/step - 1. Since type range is power
4348      of two, N == (step >> num_of_ending_zeros_binary (step),
4349      so the final result is
4350
4351        (type_range >> num_of_ending_zeros_binary (step)) - 1
4352
4353   */
4354   pow2div = num_ending_zeros (step);
4355
4356   period = build_low_bits_mask (type,
4357                                 (TYPE_PRECISION (type)
4358                                  - tree_low_cst (pow2div, 1)));
4359
4360   return period;
4361 }
4362
4363 /* Returns the comparison operator used when eliminating the iv USE.  */
4364
4365 static enum tree_code
4366 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4367 {
4368   struct loop *loop = data->current_loop;
4369   basic_block ex_bb;
4370   edge exit;
4371
4372   ex_bb = gimple_bb (use->stmt);
4373   exit = EDGE_SUCC (ex_bb, 0);
4374   if (flow_bb_inside_loop_p (loop, exit->dest))
4375     exit = EDGE_SUCC (ex_bb, 1);
4376
4377   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4378 }
4379
4380 static tree
4381 strip_wrap_conserving_type_conversions (tree exp)
4382 {
4383   while (tree_ssa_useless_type_conversion (exp)
4384          && (nowrap_type_p (TREE_TYPE (exp))
4385              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4386     exp = TREE_OPERAND (exp, 0);
4387   return exp;
4388 }
4389
4390 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4391    check for an exact match.  */
4392
4393 static bool
4394 expr_equal_p (tree e, tree what)
4395 {
4396   gimple stmt;
4397   enum tree_code code;
4398
4399   e = strip_wrap_conserving_type_conversions (e);
4400   what = strip_wrap_conserving_type_conversions (what);
4401
4402   code = TREE_CODE (what);
4403   if (TREE_TYPE (e) != TREE_TYPE (what))
4404     return false;
4405
4406   if (operand_equal_p (e, what, 0))
4407     return true;
4408
4409   if (TREE_CODE (e) != SSA_NAME)
4410     return false;
4411
4412   stmt = SSA_NAME_DEF_STMT (e);
4413   if (gimple_code (stmt) != GIMPLE_ASSIGN
4414       || gimple_assign_rhs_code (stmt) != code)
4415     return false;
4416
4417   switch (get_gimple_rhs_class (code))
4418     {
4419     case GIMPLE_BINARY_RHS:
4420       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4421         return false;
4422       /* Fallthru.  */
4423
4424     case GIMPLE_UNARY_RHS:
4425     case GIMPLE_SINGLE_RHS:
4426       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4427     default:
4428       return false;
4429     }
4430 }
4431
4432 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4433    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4434    calculation is performed in non-wrapping type.
4435
4436    TODO: More generally, we could test for the situation that
4437          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4438          This would require knowing the sign of OFFSET.
4439
4440          Also, we only look for the first addition in the computation of BASE.
4441          More complex analysis would be better, but introducing it just for
4442          this optimization seems like an overkill.  */
4443
4444 static bool
4445 difference_cannot_overflow_p (tree base, tree offset)
4446 {
4447   enum tree_code code;
4448   tree e1, e2;
4449
4450   if (!nowrap_type_p (TREE_TYPE (base)))
4451     return false;
4452
4453   base = expand_simple_operations (base);
4454
4455   if (TREE_CODE (base) == SSA_NAME)
4456     {
4457       gimple stmt = SSA_NAME_DEF_STMT (base);
4458
4459       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4460         return false;
4461
4462       code = gimple_assign_rhs_code (stmt);
4463       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4464         return false;
4465
4466       e1 = gimple_assign_rhs1 (stmt);
4467       e2 = gimple_assign_rhs2 (stmt);
4468     }
4469   else
4470     {
4471       code = TREE_CODE (base);
4472       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4473         return false;
4474       e1 = TREE_OPERAND (base, 0);
4475       e2 = TREE_OPERAND (base, 1);
4476     }
4477
4478   /* TODO: deeper inspection may be necessary to prove the equality.  */
4479   switch (code)
4480     {
4481     case PLUS_EXPR:
4482       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4483     case POINTER_PLUS_EXPR:
4484       return expr_equal_p (e2, offset);
4485
4486     default:
4487       return false;
4488     }
4489 }
4490
4491 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4492    comparison with CAND.  NITER describes the number of iterations of
4493    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4494
4495    We aim to handle the following situation:
4496
4497    sometype *base, *p;
4498    int a, b, i;
4499
4500    i = a;
4501    p = p_0 = base + a;
4502
4503    do
4504      {
4505        bla (*p);
4506        p++;
4507        i++;
4508      }
4509    while (i < b);
4510
4511    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4512    We aim to optimize this to
4513
4514    p = p_0 = base + a;
4515    do
4516      {
4517        bla (*p);
4518        p++;
4519      }
4520    while (p < p_0 - a + b);
4521
4522    This preserves the correctness, since the pointer arithmetics does not
4523    overflow.  More precisely:
4524
4525    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4526       overflow in computing it or the values of p.
4527    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4528       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4529
4530 static bool
4531 iv_elimination_compare_lt (struct ivopts_data *data,
4532                            struct iv_cand *cand, enum tree_code *comp_p,
4533                            struct tree_niter_desc *niter)
4534 {
4535   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4536   struct affine_tree_combination nit, tmpa, tmpb;
4537   enum tree_code comp;
4538   HOST_WIDE_INT step;
4539
4540   /* We need to know that the candidate induction variable does not overflow.
4541      While more complex analysis may be used to prove this, for now just
4542      check that the variable appears in the original program and that it
4543      is computed in a type that guarantees no overflows.  */
4544   cand_type = TREE_TYPE (cand->iv->base);
4545   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4546     return false;
4547
4548   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4549      the calculation of the BOUND could overflow, making the comparison
4550      invalid.  */
4551   if (!data->loop_single_exit_p)
4552     return false;
4553
4554   /* We need to be able to decide whether candidate is increasing or decreasing
4555      in order to choose the right comparison operator.  */
4556   if (!cst_and_fits_in_hwi (cand->iv->step))
4557     return false;
4558   step = int_cst_value (cand->iv->step);
4559
4560   /* Check that the number of iterations matches the expected pattern:
4561      a + 1 > b ? 0 : b - a - 1.  */
4562   mbz = niter->may_be_zero;
4563   if (TREE_CODE (mbz) == GT_EXPR)
4564     {
4565       /* Handle a + 1 > b.  */
4566       tree op0 = TREE_OPERAND (mbz, 0);
4567       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4568         {
4569           a = TREE_OPERAND (op0, 0);
4570           b = TREE_OPERAND (mbz, 1);
4571         }
4572       else
4573         return false;
4574     }
4575   else if (TREE_CODE (mbz) == LT_EXPR)
4576     {
4577       tree op1 = TREE_OPERAND (mbz, 1);
4578
4579       /* Handle b < a + 1.  */
4580       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4581         {
4582           a = TREE_OPERAND (op1, 0);
4583           b = TREE_OPERAND (mbz, 0);
4584         }
4585       else
4586         return false;
4587     }
4588   else
4589     return false;
4590
4591   /* Expected number of iterations is B - A - 1.  Check that it matches
4592      the actual number, i.e., that B - A - NITER = 1.  */
4593   tree_to_aff_combination (niter->niter, nit_type, &nit);
4594   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4595   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4596   aff_combination_scale (&nit, double_int_minus_one);
4597   aff_combination_scale (&tmpa, double_int_minus_one);
4598   aff_combination_add (&tmpb, &tmpa);
4599   aff_combination_add (&tmpb, &nit);
4600   if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
4601     return false;
4602
4603   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4604      overflow.  */
4605   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4606                         cand->iv->step,
4607                         fold_convert (TREE_TYPE (cand->iv->step), a));
4608   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4609     return false;
4610
4611   /* Determine the new comparison operator.  */
4612   comp = step < 0 ? GT_EXPR : LT_EXPR;
4613   if (*comp_p == NE_EXPR)
4614     *comp_p = comp;
4615   else if (*comp_p == EQ_EXPR)
4616     *comp_p = invert_tree_comparison (comp, false);
4617   else
4618     gcc_unreachable ();
4619
4620   return true;
4621 }
4622
4623 /* Check whether it is possible to express the condition in USE by comparison
4624    of candidate CAND.  If so, store the value compared with to BOUND, and the
4625    comparison operator to COMP.  */
4626
4627 static bool
4628 may_eliminate_iv (struct ivopts_data *data,
4629                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4630                   enum tree_code *comp)
4631 {
4632   basic_block ex_bb;
4633   edge exit;
4634   tree period;
4635   struct loop *loop = data->current_loop;
4636   aff_tree bnd;
4637   struct tree_niter_desc *desc = NULL;
4638
4639   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4640     return false;
4641
4642   /* For now works only for exits that dominate the loop latch.
4643      TODO: extend to other conditions inside loop body.  */
4644   ex_bb = gimple_bb (use->stmt);
4645   if (use->stmt != last_stmt (ex_bb)
4646       || gimple_code (use->stmt) != GIMPLE_COND
4647       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4648     return false;
4649
4650   exit = EDGE_SUCC (ex_bb, 0);
4651   if (flow_bb_inside_loop_p (loop, exit->dest))
4652     exit = EDGE_SUCC (ex_bb, 1);
4653   if (flow_bb_inside_loop_p (loop, exit->dest))
4654     return false;
4655
4656   desc = niter_for_exit (data, exit);
4657   if (!desc)
4658     return false;
4659
4660   /* Determine whether we can use the variable to test the exit condition.
4661      This is the case iff the period of the induction variable is greater
4662      than the number of iterations for which the exit condition is true.  */
4663   period = iv_period (cand->iv);
4664
4665   /* If the number of iterations is constant, compare against it directly.  */
4666   if (TREE_CODE (desc->niter) == INTEGER_CST)
4667     {
4668       /* See cand_value_at.  */
4669       if (stmt_after_increment (loop, cand, use->stmt))
4670         {
4671           if (!tree_int_cst_lt (desc->niter, period))
4672             return false;
4673         }
4674       else
4675         {
4676           if (tree_int_cst_lt (period, desc->niter))
4677             return false;
4678         }
4679     }
4680
4681   /* If not, and if this is the only possible exit of the loop, see whether
4682      we can get a conservative estimate on the number of iterations of the
4683      entire loop and compare against that instead.  */
4684   else
4685     {
4686       double_int period_value, max_niter;
4687
4688       max_niter = desc->max;
4689       if (stmt_after_increment (loop, cand, use->stmt))
4690         max_niter = double_int_add (max_niter, double_int_one);
4691       period_value = tree_to_double_int (period);
4692       if (double_int_ucmp (max_niter, period_value) > 0)
4693         {
4694           /* See if we can take advantage of infered loop bound information.  */
4695           if (data->loop_single_exit_p)
4696             {
4697               if (!estimated_loop_iterations (loop, true, &max_niter))
4698                 return false;
4699               /* The loop bound is already adjusted by adding 1.  */
4700               if (double_int_ucmp (max_niter, period_value) > 0)
4701                 return false;
4702             }
4703           else
4704             return false;
4705         }
4706     }
4707
4708   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4709
4710   *bound = aff_combination_to_tree (&bnd);
4711   *comp = iv_elimination_compare (data, use);
4712
4713   /* It is unlikely that computing the number of iterations using division
4714      would be more profitable than keeping the original induction variable.  */
4715   if (expression_expensive_p (*bound))
4716     return false;
4717
4718   /* Sometimes, it is possible to handle the situation that the number of
4719      iterations may be zero unless additional assumtions by using <
4720      instead of != in the exit condition.
4721
4722      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4723            base the exit condition on it.  However, that is often too
4724            expensive.  */
4725   if (!integer_zerop (desc->may_be_zero))
4726     return iv_elimination_compare_lt (data, cand, comp, desc);
4727
4728   return true;
4729 }
4730
4731  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4732     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4733
4734 static int
4735 parm_decl_cost (struct ivopts_data *data, tree bound)
4736 {
4737   tree sbound = bound;
4738   STRIP_NOPS (sbound);
4739
4740   if (TREE_CODE (sbound) == SSA_NAME
4741       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4742       && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
4743       && data->body_includes_call)
4744     return COSTS_N_INSNS (1);
4745
4746   return 0;
4747 }
4748
4749 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4750
4751 static bool
4752 determine_use_iv_cost_condition (struct ivopts_data *data,
4753                                  struct iv_use *use, struct iv_cand *cand)
4754 {
4755   tree bound = NULL_TREE;
4756   struct iv *cmp_iv;
4757   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4758   comp_cost elim_cost, express_cost, cost, bound_cost;
4759   bool ok;
4760   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4761   tree *control_var, *bound_cst;
4762   enum tree_code comp = ERROR_MARK;
4763
4764   /* Only consider real candidates.  */
4765   if (!cand->iv)
4766     {
4767       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4768                        ERROR_MARK, -1);
4769       return false;
4770     }
4771
4772   /* Try iv elimination.  */
4773   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4774     {
4775       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4776       if (elim_cost.cost == 0)
4777         elim_cost.cost = parm_decl_cost (data, bound);
4778       else if (TREE_CODE (bound) == INTEGER_CST)
4779         elim_cost.cost = 0;
4780       /* If we replace a loop condition 'i < n' with 'p < base + n',
4781          depends_on_elim will have 'base' and 'n' set, which implies
4782          that both 'base' and 'n' will be live during the loop.  More likely,
4783          'base + n' will be loop invariant, resulting in only one live value
4784          during the loop.  So in that case we clear depends_on_elim and set
4785         elim_inv_expr_id instead.  */
4786       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4787         {
4788           elim_inv_expr_id = get_expr_id (data, bound);
4789           bitmap_clear (depends_on_elim);
4790         }
4791       /* The bound is a loop invariant, so it will be only computed
4792          once.  */
4793       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4794     }
4795   else
4796     elim_cost = infinite_cost;
4797
4798   /* Try expressing the original giv.  If it is compared with an invariant,
4799      note that we cannot get rid of it.  */
4800   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4801                               NULL, &cmp_iv);
4802   gcc_assert (ok);
4803
4804   /* When the condition is a comparison of the candidate IV against
4805      zero, prefer this IV.
4806
4807      TODO: The constant that we're substracting from the cost should
4808      be target-dependent.  This information should be added to the
4809      target costs for each backend.  */
4810   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4811       && integer_zerop (*bound_cst)
4812       && (operand_equal_p (*control_var, cand->var_after, 0)
4813           || operand_equal_p (*control_var, cand->var_before, 0)))
4814     elim_cost.cost -= 1;
4815
4816   express_cost = get_computation_cost (data, use, cand, false,
4817                                        &depends_on_express, NULL,
4818                                        &express_inv_expr_id);
4819   fd_ivopts_data = data;
4820   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4821
4822   /* Count the cost of the original bound as well.  */
4823   bound_cost = force_var_cost (data, *bound_cst, NULL);
4824   if (bound_cost.cost == 0)
4825     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4826   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4827     bound_cost.cost = 0;
4828   express_cost.cost += bound_cost.cost;
4829
4830   /* Choose the better approach, preferring the eliminated IV. */
4831   if (compare_costs (elim_cost, express_cost) <= 0)
4832     {
4833       cost = elim_cost;
4834       depends_on = depends_on_elim;
4835       depends_on_elim = NULL;
4836       inv_expr_id = elim_inv_expr_id;
4837     }
4838   else
4839     {
4840       cost = express_cost;
4841       depends_on = depends_on_express;
4842       depends_on_express = NULL;
4843       bound = NULL_TREE;
4844       comp = ERROR_MARK;
4845       inv_expr_id = express_inv_expr_id;
4846     }
4847
4848   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4849
4850   if (depends_on_elim)
4851     BITMAP_FREE (depends_on_elim);
4852   if (depends_on_express)
4853     BITMAP_FREE (depends_on_express);
4854
4855   return !infinite_cost_p (cost);
4856 }
4857
4858 /* Determines cost of basing replacement of USE on CAND.  Returns false
4859    if USE cannot be based on CAND.  */
4860
4861 static bool
4862 determine_use_iv_cost (struct ivopts_data *data,
4863                        struct iv_use *use, struct iv_cand *cand)
4864 {
4865   switch (use->type)
4866     {
4867     case USE_NONLINEAR_EXPR:
4868       return determine_use_iv_cost_generic (data, use, cand);
4869
4870     case USE_ADDRESS:
4871       return determine_use_iv_cost_address (data, use, cand);
4872
4873     case USE_COMPARE:
4874       return determine_use_iv_cost_condition (data, use, cand);
4875
4876     default:
4877       gcc_unreachable ();
4878     }
4879 }
4880
4881 /* Return true if get_computation_cost indicates that autoincrement is
4882    a possibility for the pair of USE and CAND, false otherwise.  */
4883
4884 static bool
4885 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4886                            struct iv_cand *cand)
4887 {
4888   bitmap depends_on;
4889   bool can_autoinc;
4890   comp_cost cost;
4891
4892   if (use->type != USE_ADDRESS)
4893     return false;
4894
4895   cost = get_computation_cost (data, use, cand, true, &depends_on,
4896                                &can_autoinc, NULL);
4897
4898   BITMAP_FREE (depends_on);
4899
4900   return !infinite_cost_p (cost) && can_autoinc;
4901 }
4902
4903 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4904    use that allows autoincrement, and set their AINC_USE if possible.  */
4905
4906 static void
4907 set_autoinc_for_original_candidates (struct ivopts_data *data)
4908 {
4909   unsigned i, j;
4910
4911   for (i = 0; i < n_iv_cands (data); i++)
4912     {
4913       struct iv_cand *cand = iv_cand (data, i);
4914       struct iv_use *closest = NULL;
4915       if (cand->pos != IP_ORIGINAL)
4916         continue;
4917       for (j = 0; j < n_iv_uses (data); j++)
4918         {
4919           struct iv_use *use = iv_use (data, j);
4920           unsigned uid = gimple_uid (use->stmt);
4921           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4922               || uid > gimple_uid (cand->incremented_at))
4923             continue;
4924           if (closest == NULL || uid > gimple_uid (closest->stmt))
4925             closest = use;
4926         }
4927       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4928         continue;
4929       cand->ainc_use = closest;
4930     }
4931 }
4932
4933 /* Finds the candidates for the induction variables.  */
4934
4935 static void
4936 find_iv_candidates (struct ivopts_data *data)
4937 {
4938   /* Add commonly used ivs.  */
4939   add_standard_iv_candidates (data);
4940
4941   /* Add old induction variables.  */
4942   add_old_ivs_candidates (data);
4943
4944   /* Add induction variables derived from uses.  */
4945   add_derived_ivs_candidates (data);
4946
4947   set_autoinc_for_original_candidates (data);
4948
4949   /* Record the important candidates.  */
4950   record_important_candidates (data);
4951 }
4952
4953 /* Determines costs of basing the use of the iv on an iv candidate.  */
4954
4955 static void
4956 determine_use_iv_costs (struct ivopts_data *data)
4957 {
4958   unsigned i, j;
4959   struct iv_use *use;
4960   struct iv_cand *cand;
4961   bitmap to_clear = BITMAP_ALLOC (NULL);
4962
4963   alloc_use_cost_map (data);
4964
4965   for (i = 0; i < n_iv_uses (data); i++)
4966     {
4967       use = iv_use (data, i);
4968
4969       if (data->consider_all_candidates)
4970         {
4971           for (j = 0; j < n_iv_cands (data); j++)
4972             {
4973               cand = iv_cand (data, j);
4974               determine_use_iv_cost (data, use, cand);
4975             }
4976         }
4977       else
4978         {
4979           bitmap_iterator bi;
4980
4981           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4982             {
4983               cand = iv_cand (data, j);
4984               if (!determine_use_iv_cost (data, use, cand))
4985                 bitmap_set_bit (to_clear, j);
4986             }
4987
4988           /* Remove the candidates for that the cost is infinite from
4989              the list of related candidates.  */
4990           bitmap_and_compl_into (use->related_cands, to_clear);
4991           bitmap_clear (to_clear);
4992         }
4993     }
4994
4995   BITMAP_FREE (to_clear);
4996
4997   if (dump_file && (dump_flags & TDF_DETAILS))
4998     {
4999       fprintf (dump_file, "Use-candidate costs:\n");
5000
5001       for (i = 0; i < n_iv_uses (data); i++)
5002         {
5003           use = iv_use (data, i);
5004
5005           fprintf (dump_file, "Use %d:\n", i);
5006           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5007           for (j = 0; j < use->n_map_members; j++)
5008             {
5009               if (!use->cost_map[j].cand
5010                   || infinite_cost_p (use->cost_map[j].cost))
5011                 continue;
5012
5013               fprintf (dump_file, "  %d\t%d\t%d\t",
5014                        use->cost_map[j].cand->id,
5015                        use->cost_map[j].cost.cost,
5016                        use->cost_map[j].cost.complexity);
5017               if (use->cost_map[j].depends_on)
5018                 bitmap_print (dump_file,
5019                               use->cost_map[j].depends_on, "","");
5020               if (use->cost_map[j].inv_expr_id != -1)
5021                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5022               fprintf (dump_file, "\n");
5023             }
5024
5025           fprintf (dump_file, "\n");
5026         }
5027       fprintf (dump_file, "\n");
5028     }
5029 }
5030
5031 /* Determines cost of the candidate CAND.  */
5032
5033 static void
5034 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5035 {
5036   comp_cost cost_base;
5037   unsigned cost, cost_step;
5038   tree base;
5039
5040   if (!cand->iv)
5041     {
5042       cand->cost = 0;
5043       return;
5044     }
5045
5046   /* There are two costs associated with the candidate -- its increment
5047      and its initialization.  The second is almost negligible for any loop
5048      that rolls enough, so we take it just very little into account.  */
5049
5050   base = cand->iv->base;
5051   cost_base = force_var_cost (data, base, NULL);
5052   /* It will be exceptional that the iv register happens to be initialized with
5053      the proper value at no cost.  In general, there will at least be a regcopy
5054      or a const set.  */
5055   if (cost_base.cost == 0)
5056     cost_base.cost = COSTS_N_INSNS (1);
5057   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
5058
5059   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5060
5061   /* Prefer the original ivs unless we may gain something by replacing it.
5062      The reason is to make debugging simpler; so this is not relevant for
5063      artificial ivs created by other optimization passes.  */
5064   if (cand->pos != IP_ORIGINAL
5065       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5066     cost++;
5067
5068   /* Prefer not to insert statements into latch unless there are some
5069      already (so that we do not create unnecessary jumps).  */
5070   if (cand->pos == IP_END
5071       && empty_block_p (ip_end_pos (data->current_loop)))
5072     cost++;
5073
5074   cand->cost = cost;
5075   cand->cost_step = cost_step;
5076 }
5077
5078 /* Determines costs of computation of the candidates.  */
5079
5080 static void
5081 determine_iv_costs (struct ivopts_data *data)
5082 {
5083   unsigned i;
5084
5085   if (dump_file && (dump_flags & TDF_DETAILS))
5086     {
5087       fprintf (dump_file, "Candidate costs:\n");
5088       fprintf (dump_file, "  cand\tcost\n");
5089     }
5090
5091   for (i = 0; i < n_iv_cands (data); i++)
5092     {
5093       struct iv_cand *cand = iv_cand (data, i);
5094
5095       determine_iv_cost (data, cand);
5096
5097       if (dump_file && (dump_flags & TDF_DETAILS))
5098         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5099     }
5100
5101   if (dump_file && (dump_flags & TDF_DETAILS))
5102     fprintf (dump_file, "\n");
5103 }
5104
5105 /* Calculates cost for having SIZE induction variables.  */
5106
5107 static unsigned
5108 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5109 {
5110   /* We add size to the cost, so that we prefer eliminating ivs
5111      if possible.  */
5112   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5113                                             data->body_includes_call);
5114 }
5115
5116 /* For each size of the induction variable set determine the penalty.  */
5117
5118 static void
5119 determine_set_costs (struct ivopts_data *data)
5120 {
5121   unsigned j, n;
5122   gimple phi;
5123   gimple_stmt_iterator psi;
5124   tree op;
5125   struct loop *loop = data->current_loop;
5126   bitmap_iterator bi;
5127
5128   if (dump_file && (dump_flags & TDF_DETAILS))
5129     {
5130       fprintf (dump_file, "Global costs:\n");
5131       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5132       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5133       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5134       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5135     }
5136
5137   n = 0;
5138   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5139     {
5140       phi = gsi_stmt (psi);
5141       op = PHI_RESULT (phi);
5142
5143       if (!is_gimple_reg (op))
5144         continue;
5145
5146       if (get_iv (data, op))
5147         continue;
5148
5149       n++;
5150     }
5151
5152   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5153     {
5154       struct version_info *info = ver_info (data, j);
5155
5156       if (info->inv_id && info->has_nonlin_use)
5157         n++;
5158     }
5159
5160   data->regs_used = n;
5161   if (dump_file && (dump_flags & TDF_DETAILS))
5162     fprintf (dump_file, "  regs_used %d\n", n);
5163
5164   if (dump_file && (dump_flags & TDF_DETAILS))
5165     {
5166       fprintf (dump_file, "  cost for size:\n");
5167       fprintf (dump_file, "  ivs\tcost\n");
5168       for (j = 0; j <= 2 * target_avail_regs; j++)
5169         fprintf (dump_file, "  %d\t%d\n", j,
5170                  ivopts_global_cost_for_size (data, j));
5171       fprintf (dump_file, "\n");
5172     }
5173 }
5174
5175 /* Returns true if A is a cheaper cost pair than B.  */
5176
5177 static bool
5178 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5179 {
5180   int cmp;
5181
5182   if (!a)
5183     return false;
5184
5185   if (!b)
5186     return true;
5187
5188   cmp = compare_costs (a->cost, b->cost);
5189   if (cmp < 0)
5190     return true;
5191
5192   if (cmp > 0)
5193     return false;
5194
5195   /* In case the costs are the same, prefer the cheaper candidate.  */
5196   if (a->cand->cost < b->cand->cost)
5197     return true;
5198
5199   return false;
5200 }
5201
5202
5203 /* Returns candidate by that USE is expressed in IVS.  */
5204
5205 static struct cost_pair *
5206 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5207 {
5208   return ivs->cand_for_use[use->id];
5209 }
5210
5211 /* Computes the cost field of IVS structure.  */
5212
5213 static void
5214 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5215 {
5216   comp_cost cost = ivs->cand_use_cost;
5217
5218   cost.cost += ivs->cand_cost;
5219
5220   cost.cost += ivopts_global_cost_for_size (data,
5221                                             ivs->n_regs + ivs->num_used_inv_expr);
5222
5223   ivs->cost = cost;
5224 }
5225
5226 /* Remove invariants in set INVS to set IVS.  */
5227
5228 static void
5229 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5230 {
5231   bitmap_iterator bi;
5232   unsigned iid;
5233
5234   if (!invs)
5235     return;
5236
5237   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5238     {
5239       ivs->n_invariant_uses[iid]--;
5240       if (ivs->n_invariant_uses[iid] == 0)
5241         ivs->n_regs--;
5242     }
5243 }
5244
5245 /* Set USE not to be expressed by any candidate in IVS.  */
5246
5247 static void
5248 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5249                  struct iv_use *use)
5250 {
5251   unsigned uid = use->id, cid;
5252   struct cost_pair *cp;
5253
5254   cp = ivs->cand_for_use[uid];
5255   if (!cp)
5256     return;
5257   cid = cp->cand->id;
5258
5259   ivs->bad_uses++;
5260   ivs->cand_for_use[uid] = NULL;
5261   ivs->n_cand_uses[cid]--;
5262
5263   if (ivs->n_cand_uses[cid] == 0)
5264     {
5265       bitmap_clear_bit (ivs->cands, cid);
5266       /* Do not count the pseudocandidates.  */
5267       if (cp->cand->iv)
5268         ivs->n_regs--;
5269       ivs->n_cands--;
5270       ivs->cand_cost -= cp->cand->cost;
5271
5272       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5273     }
5274
5275   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5276
5277   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5278
5279   if (cp->inv_expr_id != -1)
5280     {
5281       ivs->used_inv_expr[cp->inv_expr_id]--;
5282       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5283         ivs->num_used_inv_expr--;
5284     }
5285   iv_ca_recount_cost (data, ivs);
5286 }
5287
5288 /* Add invariants in set INVS to set IVS.  */
5289
5290 static void
5291 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5292 {
5293   bitmap_iterator bi;
5294   unsigned iid;
5295
5296   if (!invs)
5297     return;
5298
5299   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5300     {
5301       ivs->n_invariant_uses[iid]++;
5302       if (ivs->n_invariant_uses[iid] == 1)
5303         ivs->n_regs++;
5304     }
5305 }
5306
5307 /* Set cost pair for USE in set IVS to CP.  */
5308
5309 static void
5310 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5311               struct iv_use *use, struct cost_pair *cp)
5312 {
5313   unsigned uid = use->id, cid;
5314
5315   if (ivs->cand_for_use[uid] == cp)
5316     return;
5317
5318   if (ivs->cand_for_use[uid])
5319     iv_ca_set_no_cp (data, ivs, use);
5320
5321   if (cp)
5322     {
5323       cid = cp->cand->id;
5324
5325       ivs->bad_uses--;
5326       ivs->cand_for_use[uid] = cp;
5327       ivs->n_cand_uses[cid]++;
5328       if (ivs->n_cand_uses[cid] == 1)
5329         {
5330           bitmap_set_bit (ivs->cands, cid);
5331           /* Do not count the pseudocandidates.  */
5332           if (cp->cand->iv)
5333             ivs->n_regs++;
5334           ivs->n_cands++;
5335           ivs->cand_cost += cp->cand->cost;
5336
5337           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5338         }
5339
5340       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5341       iv_ca_set_add_invariants (ivs, cp->depends_on);
5342
5343       if (cp->inv_expr_id != -1)
5344         {
5345           ivs->used_inv_expr[cp->inv_expr_id]++;
5346           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5347             ivs->num_used_inv_expr++;
5348         }
5349       iv_ca_recount_cost (data, ivs);
5350     }
5351 }
5352
5353 /* Extend set IVS by expressing USE by some of the candidates in it
5354    if possible. All important candidates will be considered
5355    if IMPORTANT_CANDIDATES is true.  */
5356
5357 static void
5358 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5359                struct iv_use *use, bool important_candidates)
5360 {
5361   struct cost_pair *best_cp = NULL, *cp;
5362   bitmap_iterator bi;
5363   bitmap cands;
5364   unsigned i;
5365
5366   gcc_assert (ivs->upto >= use->id);
5367
5368   if (ivs->upto == use->id)
5369     {
5370       ivs->upto++;
5371       ivs->bad_uses++;
5372     }
5373
5374   cands = (important_candidates ? data->important_candidates : ivs->cands);
5375   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5376     {
5377       struct iv_cand *cand = iv_cand (data, i);
5378
5379       cp = get_use_iv_cost (data, use, cand);
5380
5381       if (cheaper_cost_pair (cp, best_cp))
5382         best_cp = cp;
5383     }
5384
5385   iv_ca_set_cp (data, ivs, use, best_cp);
5386 }
5387
5388 /* Get cost for assignment IVS.  */
5389
5390 static comp_cost
5391 iv_ca_cost (struct iv_ca *ivs)
5392 {
5393   /* This was a conditional expression but it triggered a bug in
5394      Sun C 5.5.  */
5395   if (ivs->bad_uses)
5396     return infinite_cost;
5397   else
5398     return ivs->cost;
5399 }
5400
5401 /* Returns true if all dependences of CP are among invariants in IVS.  */
5402
5403 static bool
5404 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5405 {
5406   unsigned i;
5407   bitmap_iterator bi;
5408
5409   if (!cp->depends_on)
5410     return true;
5411
5412   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5413     {
5414       if (ivs->n_invariant_uses[i] == 0)
5415         return false;
5416     }
5417
5418   return true;
5419 }
5420
5421 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5422    it before NEXT_CHANGE.  */
5423
5424 static struct iv_ca_delta *
5425 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5426                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5427 {
5428   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5429
5430   change->use = use;
5431   change->old_cp = old_cp;
5432   change->new_cp = new_cp;
5433   change->next_change = next_change;
5434
5435   return change;
5436 }
5437
5438 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5439    are rewritten.  */
5440
5441 static struct iv_ca_delta *
5442 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5443 {
5444   struct iv_ca_delta *last;
5445
5446   if (!l2)
5447     return l1;
5448
5449   if (!l1)
5450     return l2;
5451
5452   for (last = l1; last->next_change; last = last->next_change)
5453     continue;
5454   last->next_change = l2;
5455
5456   return l1;
5457 }
5458
5459 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5460
5461 static struct iv_ca_delta *
5462 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5463 {
5464   struct iv_ca_delta *act, *next, *prev = NULL;
5465   struct cost_pair *tmp;
5466
5467   for (act = delta; act; act = next)
5468     {
5469       next = act->next_change;
5470       act->next_change = prev;
5471       prev = act;
5472
5473       tmp = act->old_cp;
5474       act->old_cp = act->new_cp;
5475       act->new_cp = tmp;
5476     }
5477
5478   return prev;
5479 }
5480
5481 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5482    reverted instead.  */
5483
5484 static void
5485 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5486                     struct iv_ca_delta *delta, bool forward)
5487 {
5488   struct cost_pair *from, *to;
5489   struct iv_ca_delta *act;
5490
5491   if (!forward)
5492     delta = iv_ca_delta_reverse (delta);
5493
5494   for (act = delta; act; act = act->next_change)
5495     {
5496       from = act->old_cp;
5497       to = act->new_cp;
5498       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5499       iv_ca_set_cp (data, ivs, act->use, to);
5500     }
5501
5502   if (!forward)
5503     iv_ca_delta_reverse (delta);
5504 }
5505
5506 /* Returns true if CAND is used in IVS.  */
5507
5508 static bool
5509 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5510 {
5511   return ivs->n_cand_uses[cand->id] > 0;
5512 }
5513
5514 /* Returns number of induction variable candidates in the set IVS.  */
5515
5516 static unsigned
5517 iv_ca_n_cands (struct iv_ca *ivs)
5518 {
5519   return ivs->n_cands;
5520 }
5521
5522 /* Free the list of changes DELTA.  */
5523
5524 static void
5525 iv_ca_delta_free (struct iv_ca_delta **delta)
5526 {
5527   struct iv_ca_delta *act, *next;
5528
5529   for (act = *delta; act; act = next)
5530     {
5531       next = act->next_change;
5532       free (act);
5533     }
5534
5535   *delta = NULL;
5536 }
5537
5538 /* Allocates new iv candidates assignment.  */
5539
5540 static struct iv_ca *
5541 iv_ca_new (struct ivopts_data *data)
5542 {
5543   struct iv_ca *nw = XNEW (struct iv_ca);
5544
5545   nw->upto = 0;
5546   nw->bad_uses = 0;
5547   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5548   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5549   nw->cands = BITMAP_ALLOC (NULL);
5550   nw->n_cands = 0;
5551   nw->n_regs = 0;
5552   nw->cand_use_cost = zero_cost;
5553   nw->cand_cost = 0;
5554   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5555   nw->cost = zero_cost;
5556   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5557   nw->num_used_inv_expr = 0;
5558
5559   return nw;
5560 }
5561
5562 /* Free memory occupied by the set IVS.  */
5563
5564 static void
5565 iv_ca_free (struct iv_ca **ivs)
5566 {
5567   free ((*ivs)->cand_for_use);
5568   free ((*ivs)->n_cand_uses);
5569   BITMAP_FREE ((*ivs)->cands);
5570   free ((*ivs)->n_invariant_uses);
5571   free ((*ivs)->used_inv_expr);
5572   free (*ivs);
5573   *ivs = NULL;
5574 }
5575
5576 /* Dumps IVS to FILE.  */
5577
5578 static void
5579 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5580 {
5581   const char *pref = "  invariants ";
5582   unsigned i;
5583   comp_cost cost = iv_ca_cost (ivs);
5584
5585   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5586   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5587            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5588   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5589
5590    for (i = 0; i < ivs->upto; i++)
5591     {
5592       struct iv_use *use = iv_use (data, i);
5593       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5594       if (cp)
5595         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5596                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5597       else
5598         fprintf (file, "   use:%d --> ??\n", use->id);
5599     }
5600
5601   for (i = 1; i <= data->max_inv_id; i++)
5602     if (ivs->n_invariant_uses[i])
5603       {
5604         fprintf (file, "%s%d", pref, i);
5605         pref = ", ";
5606       }
5607   fprintf (file, "\n\n");
5608 }
5609
5610 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5611    new set, and store differences in DELTA.  Number of induction variables
5612    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5613    the function will try to find a solution with mimimal iv candidates.  */
5614
5615 static comp_cost
5616 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5617               struct iv_cand *cand, struct iv_ca_delta **delta,
5618               unsigned *n_ivs, bool min_ncand)
5619 {
5620   unsigned i;
5621   comp_cost cost;
5622   struct iv_use *use;
5623   struct cost_pair *old_cp, *new_cp;
5624
5625   *delta = NULL;
5626   for (i = 0; i < ivs->upto; i++)
5627     {
5628       use = iv_use (data, i);
5629       old_cp = iv_ca_cand_for_use (ivs, use);
5630
5631       if (old_cp
5632           && old_cp->cand == cand)
5633         continue;
5634
5635       new_cp = get_use_iv_cost (data, use, cand);
5636       if (!new_cp)
5637         continue;
5638
5639       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5640         continue;
5641
5642       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5643         continue;
5644
5645       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5646     }
5647
5648   iv_ca_delta_commit (data, ivs, *delta, true);
5649   cost = iv_ca_cost (ivs);
5650   if (n_ivs)
5651     *n_ivs = iv_ca_n_cands (ivs);
5652   iv_ca_delta_commit (data, ivs, *delta, false);
5653
5654   return cost;
5655 }
5656
5657 /* Try narrowing set IVS by removing CAND.  Return the cost of
5658    the new set and store the differences in DELTA.  */
5659
5660 static comp_cost
5661 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5662               struct iv_cand *cand, struct iv_ca_delta **delta)
5663 {
5664   unsigned i, ci;
5665   struct iv_use *use;
5666   struct cost_pair *old_cp, *new_cp, *cp;
5667   bitmap_iterator bi;
5668   struct iv_cand *cnd;
5669   comp_cost cost;
5670
5671   *delta = NULL;
5672   for (i = 0; i < n_iv_uses (data); i++)
5673     {
5674       use = iv_use (data, i);
5675
5676       old_cp = iv_ca_cand_for_use (ivs, use);
5677       if (old_cp->cand != cand)
5678         continue;
5679
5680       new_cp = NULL;
5681
5682       if (data->consider_all_candidates)
5683         {
5684           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5685             {
5686               if (ci == cand->id)
5687                 continue;
5688
5689               cnd = iv_cand (data, ci);
5690
5691               cp = get_use_iv_cost (data, use, cnd);
5692               if (!cp)
5693                 continue;
5694
5695               if (!iv_ca_has_deps (ivs, cp))
5696                 continue;
5697
5698               if (!cheaper_cost_pair (cp, new_cp))
5699                 continue;
5700
5701               new_cp = cp;
5702             }
5703         }
5704       else
5705         {
5706           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5707             {
5708               if (ci == cand->id)
5709                 continue;
5710
5711               cnd = iv_cand (data, ci);
5712
5713               cp = get_use_iv_cost (data, use, cnd);
5714               if (!cp)
5715                 continue;
5716               if (!iv_ca_has_deps (ivs, cp))
5717                 continue;
5718
5719               if (!cheaper_cost_pair (cp, new_cp))
5720                 continue;
5721
5722               new_cp = cp;
5723             }
5724         }
5725
5726       if (!new_cp)
5727         {
5728           iv_ca_delta_free (delta);
5729           return infinite_cost;
5730         }
5731
5732       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5733     }
5734
5735   iv_ca_delta_commit (data, ivs, *delta, true);
5736   cost = iv_ca_cost (ivs);
5737   iv_ca_delta_commit (data, ivs, *delta, false);
5738
5739   return cost;
5740 }
5741
5742 /* Try optimizing the set of candidates IVS by removing candidates different
5743    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5744    differences in DELTA.  */
5745
5746 static comp_cost
5747 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5748              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5749 {
5750   bitmap_iterator bi;
5751   struct iv_ca_delta *act_delta, *best_delta;
5752   unsigned i;
5753   comp_cost best_cost, acost;
5754   struct iv_cand *cand;
5755
5756   best_delta = NULL;
5757   best_cost = iv_ca_cost (ivs);
5758
5759   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5760     {
5761       cand = iv_cand (data, i);
5762
5763       if (cand == except_cand)
5764         continue;
5765
5766       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5767
5768       if (compare_costs (acost, best_cost) < 0)
5769         {
5770           best_cost = acost;
5771           iv_ca_delta_free (&best_delta);
5772           best_delta = act_delta;
5773         }
5774       else
5775         iv_ca_delta_free (&act_delta);
5776     }
5777
5778   if (!best_delta)
5779     {
5780       *delta = NULL;
5781       return best_cost;
5782     }
5783
5784   /* Recurse to possibly remove other unnecessary ivs.  */
5785   iv_ca_delta_commit (data, ivs, best_delta, true);
5786   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5787   iv_ca_delta_commit (data, ivs, best_delta, false);
5788   *delta = iv_ca_delta_join (best_delta, *delta);
5789   return best_cost;
5790 }
5791
5792 /* Tries to extend the sets IVS in the best possible way in order
5793    to express the USE.  If ORIGINALP is true, prefer candidates from
5794    the original set of IVs, otherwise favor important candidates not
5795    based on any memory object.  */
5796
5797 static bool
5798 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5799                   struct iv_use *use, bool originalp)
5800 {
5801   comp_cost best_cost, act_cost;
5802   unsigned i;
5803   bitmap_iterator bi;
5804   struct iv_cand *cand;
5805   struct iv_ca_delta *best_delta = NULL, *act_delta;
5806   struct cost_pair *cp;
5807
5808   iv_ca_add_use (data, ivs, use, false);
5809   best_cost = iv_ca_cost (ivs);
5810
5811   cp = iv_ca_cand_for_use (ivs, use);
5812   if (!cp)
5813     {
5814       ivs->upto--;
5815       ivs->bad_uses--;
5816       iv_ca_add_use (data, ivs, use, true);
5817       best_cost = iv_ca_cost (ivs);
5818       cp = iv_ca_cand_for_use (ivs, use);
5819     }
5820   if (cp)
5821     {
5822       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5823       iv_ca_set_no_cp (data, ivs, use);
5824     }
5825
5826   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5827      first try important candidates not based on any memory object.  Only if
5828      this fails, try the specific ones.  Rationale -- in loops with many
5829      variables the best choice often is to use just one generic biv.  If we
5830      added here many ivs specific to the uses, the optimization algorithm later
5831      would be likely to get stuck in a local minimum, thus causing us to create
5832      too many ivs.  The approach from few ivs to more seems more likely to be
5833      successful -- starting from few ivs, replacing an expensive use by a
5834      specific iv should always be a win.  */
5835   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5836     {
5837       cand = iv_cand (data, i);
5838
5839       if (originalp && cand->pos !=IP_ORIGINAL)
5840         continue;
5841
5842       if (!originalp && cand->iv->base_object != NULL_TREE)
5843         continue;
5844
5845       if (iv_ca_cand_used_p (ivs, cand))
5846         continue;
5847
5848       cp = get_use_iv_cost (data, use, cand);
5849       if (!cp)
5850         continue;
5851
5852       iv_ca_set_cp (data, ivs, use, cp);
5853       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5854                                true);
5855       iv_ca_set_no_cp (data, ivs, use);
5856       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5857
5858       if (compare_costs (act_cost, best_cost) < 0)
5859         {
5860           best_cost = act_cost;
5861
5862           iv_ca_delta_free (&best_delta);
5863           best_delta = act_delta;
5864         }
5865       else
5866         iv_ca_delta_free (&act_delta);
5867     }
5868
5869   if (infinite_cost_p (best_cost))
5870     {
5871       for (i = 0; i < use->n_map_members; i++)
5872         {
5873           cp = use->cost_map + i;
5874           cand = cp->cand;
5875           if (!cand)
5876             continue;
5877
5878           /* Already tried this.  */
5879           if (cand->important)
5880             {
5881               if (originalp && cand->pos == IP_ORIGINAL)
5882                 continue;
5883               if (!originalp && cand->iv->base_object == NULL_TREE)
5884                 continue;
5885             }
5886
5887           if (iv_ca_cand_used_p (ivs, cand))
5888             continue;
5889
5890           act_delta = NULL;
5891           iv_ca_set_cp (data, ivs, use, cp);
5892           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5893           iv_ca_set_no_cp (data, ivs, use);
5894           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5895                                        cp, act_delta);
5896
5897           if (compare_costs (act_cost, best_cost) < 0)
5898             {
5899               best_cost = act_cost;
5900
5901               if (best_delta)
5902                 iv_ca_delta_free (&best_delta);
5903               best_delta = act_delta;
5904             }
5905           else
5906             iv_ca_delta_free (&act_delta);
5907         }
5908     }
5909
5910   iv_ca_delta_commit (data, ivs, best_delta, true);
5911   iv_ca_delta_free (&best_delta);
5912
5913   return !infinite_cost_p (best_cost);
5914 }
5915
5916 /* Finds an initial assignment of candidates to uses.  */
5917
5918 static struct iv_ca *
5919 get_initial_solution (struct ivopts_data *data, bool originalp)
5920 {
5921   struct iv_ca *ivs = iv_ca_new (data);
5922   unsigned i;
5923
5924   for (i = 0; i < n_iv_uses (data); i++)
5925     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5926       {
5927         iv_ca_free (&ivs);
5928         return NULL;
5929       }
5930
5931   return ivs;
5932 }
5933
5934 /* Tries to improve set of induction variables IVS.  */
5935
5936 static bool
5937 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5938 {
5939   unsigned i, n_ivs;
5940   comp_cost acost, best_cost = iv_ca_cost (ivs);
5941   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5942   struct iv_cand *cand;
5943
5944   /* Try extending the set of induction variables by one.  */
5945   for (i = 0; i < n_iv_cands (data); i++)
5946     {
5947       cand = iv_cand (data, i);
5948
5949       if (iv_ca_cand_used_p (ivs, cand))
5950         continue;
5951
5952       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5953       if (!act_delta)
5954         continue;
5955
5956       /* If we successfully added the candidate and the set is small enough,
5957          try optimizing it by removing other candidates.  */
5958       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5959         {
5960           iv_ca_delta_commit (data, ivs, act_delta, true);
5961           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5962           iv_ca_delta_commit (data, ivs, act_delta, false);
5963           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5964         }
5965
5966       if (compare_costs (acost, best_cost) < 0)
5967         {
5968           best_cost = acost;
5969           iv_ca_delta_free (&best_delta);
5970           best_delta = act_delta;
5971         }
5972       else
5973         iv_ca_delta_free (&act_delta);
5974     }
5975
5976   if (!best_delta)
5977     {
5978       /* Try removing the candidates from the set instead.  */
5979       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5980
5981       /* Nothing more we can do.  */
5982       if (!best_delta)
5983         return false;
5984     }
5985
5986   iv_ca_delta_commit (data, ivs, best_delta, true);
5987   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5988   iv_ca_delta_free (&best_delta);
5989   return true;
5990 }
5991
5992 /* Attempts to find the optimal set of induction variables.  We do simple
5993    greedy heuristic -- we try to replace at most one candidate in the selected
5994    solution and remove the unused ivs while this improves the cost.  */
5995
5996 static struct iv_ca *
5997 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5998 {
5999   struct iv_ca *set;
6000
6001   /* Get the initial solution.  */
6002   set = get_initial_solution (data, originalp);
6003   if (!set)
6004     {
6005       if (dump_file && (dump_flags & TDF_DETAILS))
6006         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6007       return NULL;
6008     }
6009
6010   if (dump_file && (dump_flags & TDF_DETAILS))
6011     {
6012       fprintf (dump_file, "Initial set of candidates:\n");
6013       iv_ca_dump (data, dump_file, set);
6014     }
6015
6016   while (try_improve_iv_set (data, set))
6017     {
6018       if (dump_file && (dump_flags & TDF_DETAILS))
6019         {
6020           fprintf (dump_file, "Improved to:\n");
6021           iv_ca_dump (data, dump_file, set);
6022         }
6023     }
6024
6025   return set;
6026 }
6027
6028 static struct iv_ca *
6029 find_optimal_iv_set (struct ivopts_data *data)
6030 {
6031   unsigned i;
6032   struct iv_ca *set, *origset;
6033   struct iv_use *use;
6034   comp_cost cost, origcost;
6035
6036   /* Determine the cost based on a strategy that starts with original IVs,
6037      and try again using a strategy that prefers candidates not based
6038      on any IVs.  */
6039   origset = find_optimal_iv_set_1 (data, true);
6040   set = find_optimal_iv_set_1 (data, false);
6041
6042   if (!origset && !set)
6043     return NULL;
6044
6045   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6046   cost = set ? iv_ca_cost (set) : infinite_cost;
6047
6048   if (dump_file && (dump_flags & TDF_DETAILS))
6049     {
6050       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6051                origcost.cost, origcost.complexity);
6052       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6053                cost.cost, cost.complexity);
6054     }
6055
6056   /* Choose the one with the best cost.  */
6057   if (compare_costs (origcost, cost) <= 0)
6058     {
6059       if (set)
6060         iv_ca_free (&set);
6061       set = origset;
6062     }
6063   else if (origset)
6064     iv_ca_free (&origset);
6065
6066   for (i = 0; i < n_iv_uses (data); i++)
6067     {
6068       use = iv_use (data, i);
6069       use->selected = iv_ca_cand_for_use (set, use)->cand;
6070     }
6071
6072   return set;
6073 }
6074
6075 /* Creates a new induction variable corresponding to CAND.  */
6076
6077 static void
6078 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6079 {
6080   gimple_stmt_iterator incr_pos;
6081   tree base;
6082   bool after = false;
6083
6084   if (!cand->iv)
6085     return;
6086
6087   switch (cand->pos)
6088     {
6089     case IP_NORMAL:
6090       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6091       break;
6092
6093     case IP_END:
6094       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6095       after = true;
6096       break;
6097
6098     case IP_AFTER_USE:
6099       after = true;
6100       /* fall through */
6101     case IP_BEFORE_USE:
6102       incr_pos = gsi_for_stmt (cand->incremented_at);
6103       break;
6104
6105     case IP_ORIGINAL:
6106       /* Mark that the iv is preserved.  */
6107       name_info (data, cand->var_before)->preserve_biv = true;
6108       name_info (data, cand->var_after)->preserve_biv = true;
6109
6110       /* Rewrite the increment so that it uses var_before directly.  */
6111       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6112       return;
6113     }
6114
6115   gimple_add_tmp_var (cand->var_before);
6116   add_referenced_var (cand->var_before);
6117
6118   base = unshare_expr (cand->iv->base);
6119
6120   create_iv (base, unshare_expr (cand->iv->step),
6121              cand->var_before, data->current_loop,
6122              &incr_pos, after, &cand->var_before, &cand->var_after);
6123 }
6124
6125 /* Creates new induction variables described in SET.  */
6126
6127 static void
6128 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6129 {
6130   unsigned i;
6131   struct iv_cand *cand;
6132   bitmap_iterator bi;
6133
6134   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6135     {
6136       cand = iv_cand (data, i);
6137       create_new_iv (data, cand);
6138     }
6139
6140   if (dump_file && (dump_flags & TDF_DETAILS))
6141     {
6142       fprintf (dump_file, "\nSelected IV set: \n");
6143       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6144         {
6145           cand = iv_cand (data, i);
6146           dump_cand (dump_file, cand);
6147         }
6148       fprintf (dump_file, "\n");
6149     }
6150 }
6151
6152 /* Rewrites USE (definition of iv used in a nonlinear expression)
6153    using candidate CAND.  */
6154
6155 static void
6156 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6157                             struct iv_use *use, struct iv_cand *cand)
6158 {
6159   tree comp;
6160   tree op, tgt;
6161   gimple ass;
6162   gimple_stmt_iterator bsi;
6163
6164   /* An important special case -- if we are asked to express value of
6165      the original iv by itself, just exit; there is no need to
6166      introduce a new computation (that might also need casting the
6167      variable to unsigned and back).  */
6168   if (cand->pos == IP_ORIGINAL
6169       && cand->incremented_at == use->stmt)
6170     {
6171       tree step, ctype, utype;
6172       enum tree_code incr_code = PLUS_EXPR, old_code;
6173
6174       gcc_assert (is_gimple_assign (use->stmt));
6175       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6176
6177       step = cand->iv->step;
6178       ctype = TREE_TYPE (step);
6179       utype = TREE_TYPE (cand->var_after);
6180       if (TREE_CODE (step) == NEGATE_EXPR)
6181         {
6182           incr_code = MINUS_EXPR;
6183           step = TREE_OPERAND (step, 0);
6184         }
6185
6186       /* Check whether we may leave the computation unchanged.
6187          This is the case only if it does not rely on other
6188          computations in the loop -- otherwise, the computation
6189          we rely upon may be removed in remove_unused_ivs,
6190          thus leading to ICE.  */
6191       old_code = gimple_assign_rhs_code (use->stmt);
6192       if (old_code == PLUS_EXPR
6193           || old_code == MINUS_EXPR
6194           || old_code == POINTER_PLUS_EXPR)
6195         {
6196           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6197             op = gimple_assign_rhs2 (use->stmt);
6198           else if (old_code != MINUS_EXPR
6199                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6200             op = gimple_assign_rhs1 (use->stmt);
6201           else
6202             op = NULL_TREE;
6203         }
6204       else
6205         op = NULL_TREE;
6206
6207       if (op
6208           && (TREE_CODE (op) == INTEGER_CST
6209               || operand_equal_p (op, step, 0)))
6210         return;
6211
6212       /* Otherwise, add the necessary computations to express
6213          the iv.  */
6214       op = fold_convert (ctype, cand->var_before);
6215       comp = fold_convert (utype,
6216                            build2 (incr_code, ctype, op,
6217                                    unshare_expr (step)));
6218     }
6219   else
6220     {
6221       comp = get_computation (data->current_loop, use, cand);
6222       gcc_assert (comp != NULL_TREE);
6223     }
6224
6225   switch (gimple_code (use->stmt))
6226     {
6227     case GIMPLE_PHI:
6228       tgt = PHI_RESULT (use->stmt);
6229
6230       /* If we should keep the biv, do not replace it.  */
6231       if (name_info (data, tgt)->preserve_biv)
6232         return;
6233
6234       bsi = gsi_after_labels (gimple_bb (use->stmt));
6235       break;
6236
6237     case GIMPLE_ASSIGN:
6238       tgt = gimple_assign_lhs (use->stmt);
6239       bsi = gsi_for_stmt (use->stmt);
6240       break;
6241
6242     default:
6243       gcc_unreachable ();
6244     }
6245
6246   if (!valid_gimple_rhs_p (comp)
6247       || (gimple_code (use->stmt) != GIMPLE_PHI
6248           /* We can't allow re-allocating the stmt as it might be pointed
6249              to still.  */
6250           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6251               >= gimple_num_ops (gsi_stmt (bsi)))))
6252     {
6253       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6254                                        true, GSI_SAME_STMT);
6255       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6256         {
6257           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6258           /* As this isn't a plain copy we have to reset alignment
6259              information.  */
6260           if (SSA_NAME_PTR_INFO (comp))
6261             {
6262               SSA_NAME_PTR_INFO (comp)->align = 1;
6263               SSA_NAME_PTR_INFO (comp)->misalign = 0;
6264             }
6265         }
6266     }
6267
6268   if (gimple_code (use->stmt) == GIMPLE_PHI)
6269     {
6270       ass = gimple_build_assign (tgt, comp);
6271       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6272
6273       bsi = gsi_for_stmt (use->stmt);
6274       remove_phi_node (&bsi, false);
6275     }
6276   else
6277     {
6278       gimple_assign_set_rhs_from_tree (&bsi, comp);
6279       use->stmt = gsi_stmt (bsi);
6280     }
6281 }
6282
6283 /* Performs a peephole optimization to reorder the iv update statement with
6284    a mem ref to enable instruction combining in later phases. The mem ref uses
6285    the iv value before the update, so the reordering transformation requires
6286    adjustment of the offset. CAND is the selected IV_CAND.
6287
6288    Example:
6289
6290    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6291    iv2 = iv1 + 1;
6292
6293    if (t < val)      (1)
6294      goto L;
6295    goto Head;
6296
6297
6298    directly propagating t over to (1) will introduce overlapping live range
6299    thus increase register pressure. This peephole transform it into:
6300
6301
6302    iv2 = iv1 + 1;
6303    t = MEM_REF (base, iv2, 8, 8);
6304    if (t < val)
6305      goto L;
6306    goto Head;
6307 */
6308
6309 static void
6310 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6311 {
6312   tree var_after;
6313   gimple iv_update, stmt;
6314   basic_block bb;
6315   gimple_stmt_iterator gsi, gsi_iv;
6316
6317   if (cand->pos != IP_NORMAL)
6318     return;
6319
6320   var_after = cand->var_after;
6321   iv_update = SSA_NAME_DEF_STMT (var_after);
6322
6323   bb = gimple_bb (iv_update);
6324   gsi = gsi_last_nondebug_bb (bb);
6325   stmt = gsi_stmt (gsi);
6326
6327   /* Only handle conditional statement for now.  */
6328   if (gimple_code (stmt) != GIMPLE_COND)
6329     return;
6330
6331   gsi_prev_nondebug (&gsi);
6332   stmt = gsi_stmt (gsi);
6333   if (stmt != iv_update)
6334     return;
6335
6336   gsi_prev_nondebug (&gsi);
6337   if (gsi_end_p (gsi))
6338     return;
6339
6340   stmt = gsi_stmt (gsi);
6341   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6342     return;
6343
6344   if (stmt != use->stmt)
6345     return;
6346
6347   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6348     return;
6349
6350   if (dump_file && (dump_flags & TDF_DETAILS))
6351     {
6352       fprintf (dump_file, "Reordering \n");
6353       print_gimple_stmt (dump_file, iv_update, 0, 0);
6354       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6355       fprintf (dump_file, "\n");
6356     }
6357
6358   gsi = gsi_for_stmt (use->stmt);
6359   gsi_iv = gsi_for_stmt (iv_update);
6360   gsi_move_before (&gsi_iv, &gsi);
6361
6362   cand->pos = IP_BEFORE_USE;
6363   cand->incremented_at = use->stmt;
6364 }
6365
6366 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6367
6368 static void
6369 rewrite_use_address (struct ivopts_data *data,
6370                      struct iv_use *use, struct iv_cand *cand)
6371 {
6372   aff_tree aff;
6373   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6374   tree base_hint = NULL_TREE;
6375   tree ref, iv;
6376   bool ok;
6377
6378   adjust_iv_update_pos (cand, use);
6379   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6380   gcc_assert (ok);
6381   unshare_aff_combination (&aff);
6382
6383   /* To avoid undefined overflow problems, all IV candidates use unsigned
6384      integer types.  The drawback is that this makes it impossible for
6385      create_mem_ref to distinguish an IV that is based on a memory object
6386      from one that represents simply an offset.
6387
6388      To work around this problem, we pass a hint to create_mem_ref that
6389      indicates which variable (if any) in aff is an IV based on a memory
6390      object.  Note that we only consider the candidate.  If this is not
6391      based on an object, the base of the reference is in some subexpression
6392      of the use -- but these will use pointer types, so they are recognized
6393      by the create_mem_ref heuristics anyway.  */
6394   if (cand->iv->base_object)
6395     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6396
6397   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6398   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6399                         reference_alias_ptr_type (*use->op_p),
6400                         iv, base_hint, data->speed);
6401   copy_ref_info (ref, *use->op_p);
6402   *use->op_p = ref;
6403 }
6404
6405 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6406    candidate CAND.  */
6407
6408 static void
6409 rewrite_use_compare (struct ivopts_data *data,
6410                      struct iv_use *use, struct iv_cand *cand)
6411 {
6412   tree comp, *var_p, op, bound;
6413   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6414   enum tree_code compare;
6415   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6416   bool ok;
6417
6418   bound = cp->value;
6419   if (bound)
6420     {
6421       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6422       tree var_type = TREE_TYPE (var);
6423       gimple_seq stmts;
6424
6425       if (dump_file && (dump_flags & TDF_DETAILS))
6426         {
6427           fprintf (dump_file, "Replacing exit test: ");
6428           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6429         }
6430       compare = cp->comp;
6431       bound = unshare_expr (fold_convert (var_type, bound));
6432       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6433       if (stmts)
6434         gsi_insert_seq_on_edge_immediate (
6435                 loop_preheader_edge (data->current_loop),
6436                 stmts);
6437
6438       gimple_cond_set_lhs (use->stmt, var);
6439       gimple_cond_set_code (use->stmt, compare);
6440       gimple_cond_set_rhs (use->stmt, op);
6441       return;
6442     }
6443
6444   /* The induction variable elimination failed; just express the original
6445      giv.  */
6446   comp = get_computation (data->current_loop, use, cand);
6447   gcc_assert (comp != NULL_TREE);
6448
6449   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6450   gcc_assert (ok);
6451
6452   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6453                                      true, GSI_SAME_STMT);
6454 }
6455
6456 /* Rewrites USE using candidate CAND.  */
6457
6458 static void
6459 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6460 {
6461   switch (use->type)
6462     {
6463       case USE_NONLINEAR_EXPR:
6464         rewrite_use_nonlinear_expr (data, use, cand);
6465         break;
6466
6467       case USE_ADDRESS:
6468         rewrite_use_address (data, use, cand);
6469         break;
6470
6471       case USE_COMPARE:
6472         rewrite_use_compare (data, use, cand);
6473         break;
6474
6475       default:
6476         gcc_unreachable ();
6477     }
6478
6479   update_stmt (use->stmt);
6480 }
6481
6482 /* Rewrite the uses using the selected induction variables.  */
6483
6484 static void
6485 rewrite_uses (struct ivopts_data *data)
6486 {
6487   unsigned i;
6488   struct iv_cand *cand;
6489   struct iv_use *use;
6490
6491   for (i = 0; i < n_iv_uses (data); i++)
6492     {
6493       use = iv_use (data, i);
6494       cand = use->selected;
6495       gcc_assert (cand);
6496
6497       rewrite_use (data, use, cand);
6498     }
6499 }
6500
6501 /* Removes the ivs that are not used after rewriting.  */
6502
6503 static void
6504 remove_unused_ivs (struct ivopts_data *data)
6505 {
6506   unsigned j;
6507   bitmap_iterator bi;
6508   bitmap toremove = BITMAP_ALLOC (NULL);
6509
6510   /* Figure out an order in which to release SSA DEFs so that we don't
6511      release something that we'd have to propagate into a debug stmt
6512      afterwards.  */
6513   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6514     {
6515       struct version_info *info;
6516
6517       info = ver_info (data, j);
6518       if (info->iv
6519           && !integer_zerop (info->iv->step)
6520           && !info->inv_id
6521           && !info->iv->have_use_for
6522           && !info->preserve_biv)
6523         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6524     }
6525
6526   release_defs_bitset (toremove);
6527
6528   BITMAP_FREE (toremove);
6529 }
6530
6531 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6532    for pointer_map_traverse.  */
6533
6534 static bool
6535 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6536                       void *data ATTRIBUTE_UNUSED)
6537 {
6538   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6539
6540   free (niter);
6541   return true;
6542 }
6543
6544 /* Frees data allocated by the optimization of a single loop.  */
6545
6546 static void
6547 free_loop_data (struct ivopts_data *data)
6548 {
6549   unsigned i, j;
6550   bitmap_iterator bi;
6551   tree obj;
6552
6553   if (data->niters)
6554     {
6555       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6556       pointer_map_destroy (data->niters);
6557       data->niters = NULL;
6558     }
6559
6560   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6561     {
6562       struct version_info *info;
6563
6564       info = ver_info (data, i);
6565       free (info->iv);
6566       info->iv = NULL;
6567       info->has_nonlin_use = false;
6568       info->preserve_biv = false;
6569       info->inv_id = 0;
6570     }
6571   bitmap_clear (data->relevant);
6572   bitmap_clear (data->important_candidates);
6573
6574   for (i = 0; i < n_iv_uses (data); i++)
6575     {
6576       struct iv_use *use = iv_use (data, i);
6577
6578       free (use->iv);
6579       BITMAP_FREE (use->related_cands);
6580       for (j = 0; j < use->n_map_members; j++)
6581         if (use->cost_map[j].depends_on)
6582           BITMAP_FREE (use->cost_map[j].depends_on);
6583       free (use->cost_map);
6584       free (use);
6585     }
6586   VEC_truncate (iv_use_p, data->iv_uses, 0);
6587
6588   for (i = 0; i < n_iv_cands (data); i++)
6589     {
6590       struct iv_cand *cand = iv_cand (data, i);
6591
6592       free (cand->iv);
6593       if (cand->depends_on)
6594         BITMAP_FREE (cand->depends_on);
6595       free (cand);
6596     }
6597   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6598
6599   if (data->version_info_size < num_ssa_names)
6600     {
6601       data->version_info_size = 2 * num_ssa_names;
6602       free (data->version_info);
6603       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6604     }
6605
6606   data->max_inv_id = 0;
6607
6608   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6609     SET_DECL_RTL (obj, NULL_RTX);
6610
6611   VEC_truncate (tree, decl_rtl_to_reset, 0);
6612
6613   htab_empty (data->inv_expr_tab);
6614   data->inv_expr_id = 0;
6615 }
6616
6617 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6618    loop tree.  */
6619
6620 static void
6621 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6622 {
6623   free_loop_data (data);
6624   free (data->version_info);
6625   BITMAP_FREE (data->relevant);
6626   BITMAP_FREE (data->important_candidates);
6627
6628   VEC_free (tree, heap, decl_rtl_to_reset);
6629   VEC_free (iv_use_p, heap, data->iv_uses);
6630   VEC_free (iv_cand_p, heap, data->iv_candidates);
6631   htab_delete (data->inv_expr_tab);
6632 }
6633
6634 /* Returns true if the loop body BODY includes any function calls.  */
6635
6636 static bool
6637 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6638 {
6639   gimple_stmt_iterator gsi;
6640   unsigned i;
6641
6642   for (i = 0; i < num_nodes; i++)
6643     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6644       {
6645         gimple stmt = gsi_stmt (gsi);
6646         if (is_gimple_call (stmt)
6647             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6648           return true;
6649       }
6650   return false;
6651 }
6652
6653 /* Optimizes the LOOP.  Returns true if anything changed.  */
6654
6655 static bool
6656 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6657 {
6658   bool changed = false;
6659   struct iv_ca *iv_ca;
6660   edge exit = single_dom_exit (loop);
6661   basic_block *body;
6662
6663   gcc_assert (!data->niters);
6664   data->current_loop = loop;
6665   data->speed = optimize_loop_for_speed_p (loop);
6666
6667   if (dump_file && (dump_flags & TDF_DETAILS))
6668     {
6669       fprintf (dump_file, "Processing loop %d\n", loop->num);
6670
6671       if (exit)
6672         {
6673           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6674                    exit->src->index, exit->dest->index);
6675           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6676           fprintf (dump_file, "\n");
6677         }
6678
6679       fprintf (dump_file, "\n");
6680     }
6681
6682   body = get_loop_body (loop);
6683   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6684   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6685   free (body);
6686
6687   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6688
6689   /* For each ssa name determines whether it behaves as an induction variable
6690      in some loop.  */
6691   if (!find_induction_variables (data))
6692     goto finish;
6693
6694   /* Finds interesting uses (item 1).  */
6695   find_interesting_uses (data);
6696   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6697     goto finish;
6698
6699   /* Finds candidates for the induction variables (item 2).  */
6700   find_iv_candidates (data);
6701
6702   /* Calculates the costs (item 3, part 1).  */
6703   determine_iv_costs (data);
6704   determine_use_iv_costs (data);
6705   determine_set_costs (data);
6706
6707   /* Find the optimal set of induction variables (item 3, part 2).  */
6708   iv_ca = find_optimal_iv_set (data);
6709   if (!iv_ca)
6710     goto finish;
6711   changed = true;
6712
6713   /* Create the new induction variables (item 4, part 1).  */
6714   create_new_ivs (data, iv_ca);
6715   iv_ca_free (&iv_ca);
6716
6717   /* Rewrite the uses (item 4, part 2).  */
6718   rewrite_uses (data);
6719
6720   /* Remove the ivs that are unused after rewriting.  */
6721   remove_unused_ivs (data);
6722
6723   /* We have changed the structure of induction variables; it might happen
6724      that definitions in the scev database refer to some of them that were
6725      eliminated.  */
6726   scev_reset ();
6727
6728 finish:
6729   free_loop_data (data);
6730
6731   return changed;
6732 }
6733
6734 /* Main entry point.  Optimizes induction variables in loops.  */
6735
6736 void
6737 tree_ssa_iv_optimize (void)
6738 {
6739   struct loop *loop;
6740   struct ivopts_data data;
6741   loop_iterator li;
6742
6743   tree_ssa_iv_optimize_init (&data);
6744
6745   /* Optimize the loops starting with the innermost ones.  */
6746   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6747     {
6748       if (dump_file && (dump_flags & TDF_DETAILS))
6749         flow_loop_dump (loop, dump_file, NULL, 1);
6750
6751       tree_ssa_iv_optimize_loop (&data, loop);
6752     }
6753
6754   tree_ssa_iv_optimize_finalize (&data);
6755 }