gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2015 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "input.h"
  69 #include "alias.h"
  70 #include "symtab.h"
  71 #include "tree.h"
  72 #include "fold-const.h"
  73 #include "stor-layout.h"
  74 #include "tm_p.h"
  75 #include "predict.h"
  76 #include "hard-reg-set.h"
  77 #include "function.h"
  78 #include "dominance.h"
  79 #include "cfg.h"
  80 #include "basic-block.h"
  81 #include "gimple-pretty-print.h"
  82 #include "tree-ssa-alias.h"
  83 #include "internal-fn.h"
  84 #include "tree-eh.h"
  85 #include "gimple-expr.h"
  86 #include "is-a.h"
  87 #include "gimple.h"
  88 #include "gimplify.h"
  89 #include "gimple-iterator.h"
  90 #include "gimplify-me.h"
  91 #include "gimple-ssa.h"
  92 #include "plugin-api.h"
  93 #include "ipa-ref.h"
  94 #include "cgraph.h"
  95 #include "tree-cfg.h"
  96 #include "tree-phinodes.h"
  97 #include "ssa-iterators.h"
  98 #include "stringpool.h"
  99 #include "tree-ssanames.h"
 100 #include "tree-ssa-loop-ivopts.h"
 101 #include "tree-ssa-loop-manip.h"
 102 #include "tree-ssa-loop-niter.h"
 103 #include "tree-ssa-loop.h"
 104 #include "rtl.h"
 105 #include "flags.h"
 106 #include "insn-config.h"
 107 #include "expmed.h"
 108 #include "dojump.h"
 109 #include "explow.h"
 110 #include "calls.h"
 111 #include "emit-rtl.h"
 112 #include "varasm.h"
 113 #include "stmt.h"
 114 #include "expr.h"
 115 #include "tree-dfa.h"
 116 #include "tree-ssa.h"
 117 #include "cfgloop.h"
 118 #include "tree-pass.h"
 119 #include "tree-chrec.h"
 120 #include "tree-scalar-evolution.h"
 121 #include "params.h"
 122 #include "langhooks.h"
 123 #include "tree-affine.h"
 124 #include "target.h"
 125 #include "tree-inline.h"
 126 #include "tree-ssa-propagate.h"
 127 #include "tree-ssa-address.h"
 128 #include "builtins.h"
 129 #include "tree-vectorizer.h"
 130
 131 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 132    cost of different addressing modes.  This should be moved to a TBD
 133    interface between the GIMPLE and RTL worlds.  */
 134 #include "recog.h"
 135
 136 /* The infinite cost.  */
 137 #define INFTY 10000000
 138
 139 #define AVG_LOOP_NITER(LOOP) 5
 140
 141 /* Returns the expected number of loop iterations for LOOP.
 142    The average trip count is computed from profile data if it
 143    exists. */
 144
 145 static inline HOST_WIDE_INT
 146 avg_loop_niter (struct loop *loop)
 147 {
 148   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 149   if (niter == -1)
 150     return AVG_LOOP_NITER (loop);
 151
 152   return niter;
 153 }
 154
 155 /* Representation of the induction variable.  */
 156 struct iv
 157 {
 158   tree base;            /* Initial value of the iv.  */
 159   tree base_object;     /* A memory object to that the induction variable points.  */
 160   tree step;            /* Step of the iv (constant only).  */
 161   tree ssa_name;        /* The ssa name with the value.  */
 162   unsigned use_id;      /* The identifier in the use if it is the case.  */
 163   bool biv_p;           /* Is it a biv?  */
 164   bool have_use_for;    /* Do we already have a use for it?  */
 165   bool no_overflow;     /* True if the iv doesn't overflow.  */
 166 };
 167
 168 /* Per-ssa version information (induction variable descriptions, etc.).  */
 169 struct version_info
 170 {
 171   tree name;            /* The ssa name.  */
 172   struct iv *iv;        /* Induction variable description.  */
 173   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 174                            an expression that is not an induction variable.  */
 175   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 176   unsigned inv_id;      /* Id of an invariant.  */
 177 };
 178
 179 /* Types of uses.  */
 180 enum use_type
 181 {
 182   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 183   USE_ADDRESS,          /* Use in an address.  */
 184   USE_COMPARE           /* Use is a compare.  */
 185 };
 186
 187 /* Cost of a computation.  */
 188 typedef struct
 189 {
 190   int cost;             /* The runtime cost.  */
 191   unsigned complexity;  /* The estimate of the complexity of the code for
 192                            the computation (in no concrete units --
 193                            complexity field should be larger for more
 194                            complex expressions and addressing modes).  */
 195 } comp_cost;
 196
 197 static const comp_cost no_cost = {0, 0};
 198 static const comp_cost infinite_cost = {INFTY, INFTY};
 199
 200 /* The candidate - cost pair.  */
 201 struct cost_pair
 202 {
 203   struct iv_cand *cand; /* The candidate.  */
 204   comp_cost cost;       /* The cost.  */
 205   bitmap depends_on;    /* The list of invariants that have to be
 206                            preserved.  */
 207   tree value;           /* For final value elimination, the expression for
 208                            the final value of the iv.  For iv elimination,
 209                            the new bound to compare with.  */
 210   enum tree_code comp;  /* For iv elimination, the comparison.  */
 211   int inv_expr_id;      /* Loop invariant expression id.  */
 212 };
 213
 214 /* Use.  */
 215 struct iv_use
 216 {
 217   unsigned id;          /* The id of the use.  */
 218   unsigned sub_id;      /* The id of the sub use.  */
 219   enum use_type type;   /* Type of the use.  */
 220   struct iv *iv;        /* The induction variable it is based on.  */
 221   gimple stmt;          /* Statement in that it occurs.  */
 222   tree *op_p;           /* The place where it occurs.  */
 223   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 224                            important ones.  */
 225
 226   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 227   struct cost_pair *cost_map;
 228                         /* The costs wrto the iv candidates.  */
 229
 230   struct iv_cand *selected;
 231                         /* The selected candidate.  */
 232
 233   struct iv_use *next;  /* The next sub use.  */
 234   tree addr_base;       /* Base address with const offset stripped.  */
 235   unsigned HOST_WIDE_INT addr_offset;
 236                         /* Const offset stripped from base address.  */
 237 };
 238
 239 /* The position where the iv is computed.  */
 240 enum iv_position
 241 {
 242   IP_NORMAL,            /* At the end, just before the exit condition.  */
 243   IP_END,               /* At the end of the latch block.  */
 244   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 245   IP_AFTER_USE,         /* Immediately after a specific use.  */
 246   IP_ORIGINAL           /* The original biv.  */
 247 };
 248
 249 /* The induction variable candidate.  */
 250 struct iv_cand
 251 {
 252   unsigned id;          /* The number of the candidate.  */
 253   bool important;       /* Whether this is an "important" candidate, i.e. such
 254                            that it should be considered by all uses.  */
 255   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 256   gimple incremented_at;/* For original biv, the statement where it is
 257                            incremented.  */
 258   tree var_before;      /* The variable used for it before increment.  */
 259   tree var_after;       /* The variable used for it after increment.  */
 260   struct iv *iv;        /* The value of the candidate.  NULL for
 261                            "pseudocandidate" used to indicate the possibility
 262                            to replace the final value of an iv by direct
 263                            computation of the value.  */
 264   unsigned cost;        /* Cost of the candidate.  */
 265   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 266   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 267                               where it is incremented.  */
 268   bitmap depends_on;    /* The list of invariants that are used in step of the
 269                            biv.  */
 270 };
 271
 272 /* Loop invariant expression hashtable entry.  */
 273 struct iv_inv_expr_ent
 274 {
 275   tree expr;
 276   int id;
 277   hashval_t hash;
 278 };
 279
 280 /* The data used by the induction variable optimizations.  */
 281
 282 typedef struct iv_use *iv_use_p;
 283
 284 typedef struct iv_cand *iv_cand_p;
 285
 286 /* Hashtable helpers.  */
 287
 288 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
 289 {
 290   typedef iv_inv_expr_ent *value_type;
 291   typedef iv_inv_expr_ent *compare_type;
 292   static inline hashval_t hash (const iv_inv_expr_ent *);
 293   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 294 };
 295
 296 /* Hash function for loop invariant expressions.  */
 297
 298 inline hashval_t
 299 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 300 {
 301   return expr->hash;
 302 }
 303
 304 /* Hash table equality function for expressions.  */
 305
 306 inline bool
 307 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 308                            const iv_inv_expr_ent *expr2)
 309 {
 310   return expr1->hash == expr2->hash
 311          && operand_equal_p (expr1->expr, expr2->expr, 0);
 312 }
 313
 314 struct ivopts_data
 315 {
 316   /* The currently optimized loop.  */
 317   struct loop *current_loop;
 318   source_location loop_loc;
 319
 320   /* Numbers of iterations for all exits of the current loop.  */
 321   hash_map<edge, tree_niter_desc *> *niters;
 322
 323   /* Number of registers used in it.  */
 324   unsigned regs_used;
 325
 326   /* The size of version_info array allocated.  */
 327   unsigned version_info_size;
 328
 329   /* The array of information for the ssa names.  */
 330   struct version_info *version_info;
 331
 332   /* The hashtable of loop invariant expressions created
 333      by ivopt.  */
 334   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 335
 336   /* Loop invariant expression id.  */
 337   int inv_expr_id;
 338
 339   /* The bitmap of indices in version_info whose value was changed.  */
 340   bitmap relevant;
 341
 342   /* The uses of induction variables.  */
 343   vec<iv_use_p> iv_uses;
 344
 345   /* The candidates.  */
 346   vec<iv_cand_p> iv_candidates;
 347
 348   /* A bitmap of important candidates.  */
 349   bitmap important_candidates;
 350
 351   /* Cache used by tree_to_aff_combination_expand.  */
 352   hash_map<tree, name_expansion *> *name_expansion_cache;
 353
 354   /* The maximum invariant id.  */
 355   unsigned max_inv_id;
 356
 357   /* Whether to consider just related and important candidates when replacing a
 358      use.  */
 359   bool consider_all_candidates;
 360
 361   /* Are we optimizing for speed?  */
 362   bool speed;
 363
 364   /* Whether the loop body includes any function calls.  */
 365   bool body_includes_call;
 366
 367   /* Whether the loop body can only be exited via single exit.  */
 368   bool loop_single_exit_p;
 369 };
 370
 371 /* An assignment of iv candidates to uses.  */
 372
 373 struct iv_ca
 374 {
 375   /* The number of uses covered by the assignment.  */
 376   unsigned upto;
 377
 378   /* Number of uses that cannot be expressed by the candidates in the set.  */
 379   unsigned bad_uses;
 380
 381   /* Candidate assigned to a use, together with the related costs.  */
 382   struct cost_pair **cand_for_use;
 383
 384   /* Number of times each candidate is used.  */
 385   unsigned *n_cand_uses;
 386
 387   /* The candidates used.  */
 388   bitmap cands;
 389
 390   /* The number of candidates in the set.  */
 391   unsigned n_cands;
 392
 393   /* Total number of registers needed.  */
 394   unsigned n_regs;
 395
 396   /* Total cost of expressing uses.  */
 397   comp_cost cand_use_cost;
 398
 399   /* Total cost of candidates.  */
 400   unsigned cand_cost;
 401
 402   /* Number of times each invariant is used.  */
 403   unsigned *n_invariant_uses;
 404
 405   /* The array holding the number of uses of each loop
 406      invariant expressions created by ivopt.  */
 407   unsigned *used_inv_expr;
 408
 409   /* The number of created loop invariants.  */
 410   unsigned num_used_inv_expr;
 411
 412   /* Total cost of the assignment.  */
 413   comp_cost cost;
 414 };
 415
 416 /* Difference of two iv candidate assignments.  */
 417
 418 struct iv_ca_delta
 419 {
 420   /* Changed use.  */
 421   struct iv_use *use;
 422
 423   /* An old assignment (for rollback purposes).  */
 424   struct cost_pair *old_cp;
 425
 426   /* A new assignment.  */
 427   struct cost_pair *new_cp;
 428
 429   /* Next change in the list.  */
 430   struct iv_ca_delta *next_change;
 431 };
 432
 433 /* Bound on number of candidates below that all candidates are considered.  */
 434
 435 #define CONSIDER_ALL_CANDIDATES_BOUND \
 436   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 437
 438 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 439    optimizing such a loop would help, and it would take ages).  */
 440
 441 #define MAX_CONSIDERED_USES \
 442   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 443
 444 /* If there are at most this number of ivs in the set, try removing unnecessary
 445    ivs from the set always.  */
 446
 447 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 448   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 449
 450 /* The list of trees for that the decl_rtl field must be reset is stored
 451    here.  */
 452
 453 static vec<tree> decl_rtl_to_reset;
 454
 455 static comp_cost force_expr_to_var_cost (tree, bool);
 456
 457 /* Number of uses recorded in DATA.  */
 458
 459 static inline unsigned
 460 n_iv_uses (struct ivopts_data *data)
 461 {
 462   return data->iv_uses.length ();
 463 }
 464
 465 /* Ith use recorded in DATA.  */
 466
 467 static inline struct iv_use *
 468 iv_use (struct ivopts_data *data, unsigned i)
 469 {
 470   return data->iv_uses[i];
 471 }
 472
 473 /* Number of candidates recorded in DATA.  */
 474
 475 static inline unsigned
 476 n_iv_cands (struct ivopts_data *data)
 477 {
 478   return data->iv_candidates.length ();
 479 }
 480
 481 /* Ith candidate recorded in DATA.  */
 482
 483 static inline struct iv_cand *
 484 iv_cand (struct ivopts_data *data, unsigned i)
 485 {
 486   return data->iv_candidates[i];
 487 }
 488
 489 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 490
 491 edge
 492 single_dom_exit (struct loop *loop)
 493 {
 494   edge exit = single_exit (loop);
 495
 496   if (!exit)
 497     return NULL;
 498
 499   if (!just_once_each_iteration_p (loop, exit->src))
 500     return NULL;
 501
 502   return exit;
 503 }
 504
 505 /* Dumps information about the induction variable IV to FILE.  */
 506
 507 void
 508 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 509 {
 510   if (iv->ssa_name && dump_name)
 511     {
 512       fprintf (file, "ssa name ");
 513       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 514       fprintf (file, "\n");
 515     }
 516
 517   fprintf (file, "  type ");
 518   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 519   fprintf (file, "\n");
 520
 521   if (iv->step)
 522     {
 523       fprintf (file, "  base ");
 524       print_generic_expr (file, iv->base, TDF_SLIM);
 525       fprintf (file, "\n");
 526
 527       fprintf (file, "  step ");
 528       print_generic_expr (file, iv->step, TDF_SLIM);
 529       fprintf (file, "\n");
 530     }
 531   else
 532     {
 533       fprintf (file, "  invariant ");
 534       print_generic_expr (file, iv->base, TDF_SLIM);
 535       fprintf (file, "\n");
 536     }
 537
 538   if (iv->base_object)
 539     {
 540       fprintf (file, "  base object ");
 541       print_generic_expr (file, iv->base_object, TDF_SLIM);
 542       fprintf (file, "\n");
 543     }
 544
 545   if (iv->biv_p)
 546     fprintf (file, "  is a biv\n");
 547 }
 548
 549 /* Dumps information about the USE to FILE.  */
 550
 551 void
 552 dump_use (FILE *file, struct iv_use *use)
 553 {
 554   fprintf (file, "use %d", use->id);
 555   if (use->sub_id)
 556     fprintf (file, ".%d", use->sub_id);
 557
 558   fprintf (file, "\n");
 559
 560   switch (use->type)
 561     {
 562     case USE_NONLINEAR_EXPR:
 563       fprintf (file, "  generic\n");
 564       break;
 565
 566     case USE_ADDRESS:
 567       fprintf (file, "  address\n");
 568       break;
 569
 570     case USE_COMPARE:
 571       fprintf (file, "  compare\n");
 572       break;
 573
 574     default:
 575       gcc_unreachable ();
 576     }
 577
 578   fprintf (file, "  in statement ");
 579   print_gimple_stmt (file, use->stmt, 0, 0);
 580   fprintf (file, "\n");
 581
 582   fprintf (file, "  at position ");
 583   if (use->op_p)
 584     print_generic_expr (file, *use->op_p, TDF_SLIM);
 585   fprintf (file, "\n");
 586
 587   dump_iv (file, use->iv, false);
 588
 589   if (use->related_cands)
 590     {
 591       fprintf (file, "  related candidates ");
 592       dump_bitmap (file, use->related_cands);
 593     }
 594 }
 595
 596 /* Dumps information about the uses to FILE.  */
 597
 598 void
 599 dump_uses (FILE *file, struct ivopts_data *data)
 600 {
 601   unsigned i;
 602   struct iv_use *use;
 603
 604   for (i = 0; i < n_iv_uses (data); i++)
 605     {
 606       use = iv_use (data, i);
 607       do
 608         {
 609           dump_use (file, use);
 610           use = use->next;
 611         }
 612       while (use);
 613       fprintf (file, "\n");
 614     }
 615 }
 616
 617 /* Dumps information about induction variable candidate CAND to FILE.  */
 618
 619 void
 620 dump_cand (FILE *file, struct iv_cand *cand)
 621 {
 622   struct iv *iv = cand->iv;
 623
 624   fprintf (file, "candidate %d%s\n",
 625            cand->id, cand->important ? " (important)" : "");
 626
 627   if (cand->depends_on)
 628     {
 629       fprintf (file, "  depends on ");
 630       dump_bitmap (file, cand->depends_on);
 631     }
 632
 633   if (!iv)
 634     {
 635       fprintf (file, "  final value replacement\n");
 636       return;
 637     }
 638
 639   if (cand->var_before)
 640     {
 641       fprintf (file, "  var_before ");
 642       print_generic_expr (file, cand->var_before, TDF_SLIM);
 643       fprintf (file, "\n");
 644     }
 645   if (cand->var_after)
 646     {
 647       fprintf (file, "  var_after ");
 648       print_generic_expr (file, cand->var_after, TDF_SLIM);
 649       fprintf (file, "\n");
 650     }
 651
 652   switch (cand->pos)
 653     {
 654     case IP_NORMAL:
 655       fprintf (file, "  incremented before exit test\n");
 656       break;
 657
 658     case IP_BEFORE_USE:
 659       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 660       break;
 661
 662     case IP_AFTER_USE:
 663       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 664       break;
 665
 666     case IP_END:
 667       fprintf (file, "  incremented at end\n");
 668       break;
 669
 670     case IP_ORIGINAL:
 671       fprintf (file, "  original biv\n");
 672       break;
 673     }
 674
 675   dump_iv (file, iv, false);
 676 }
 677
 678 /* Returns the info for ssa version VER.  */
 679
 680 static inline struct version_info *
 681 ver_info (struct ivopts_data *data, unsigned ver)
 682 {
 683   return data->version_info + ver;
 684 }
 685
 686 /* Returns the info for ssa name NAME.  */
 687
 688 static inline struct version_info *
 689 name_info (struct ivopts_data *data, tree name)
 690 {
 691   return ver_info (data, SSA_NAME_VERSION (name));
 692 }
 693
 694 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 695    emitted in LOOP.  */
 696
 697 static bool
 698 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 699 {
 700   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 701
 702   gcc_assert (bb);
 703
 704   if (sbb == loop->latch)
 705     return true;
 706
 707   if (sbb != bb)
 708     return false;
 709
 710   return stmt == last_stmt (bb);
 711 }
 712
 713 /* Returns true if STMT if after the place where the original induction
 714    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 715    if the positions are identical.  */
 716
 717 static bool
 718 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 719 {
 720   basic_block cand_bb = gimple_bb (cand->incremented_at);
 721   basic_block stmt_bb = gimple_bb (stmt);
 722
 723   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 724     return false;
 725
 726   if (stmt_bb != cand_bb)
 727     return true;
 728
 729   if (true_if_equal
 730       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 731     return true;
 732   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 733 }
 734
 735 /* Returns true if STMT if after the place where the induction variable
 736    CAND is incremented in LOOP.  */
 737
 738 static bool
 739 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 740 {
 741   switch (cand->pos)
 742     {
 743     case IP_END:
 744       return false;
 745
 746     case IP_NORMAL:
 747       return stmt_after_ip_normal_pos (loop, stmt);
 748
 749     case IP_ORIGINAL:
 750     case IP_AFTER_USE:
 751       return stmt_after_inc_pos (cand, stmt, false);
 752
 753     case IP_BEFORE_USE:
 754       return stmt_after_inc_pos (cand, stmt, true);
 755
 756     default:
 757       gcc_unreachable ();
 758     }
 759 }
 760
 761 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 762
 763 static bool
 764 abnormal_ssa_name_p (tree exp)
 765 {
 766   if (!exp)
 767     return false;
 768
 769   if (TREE_CODE (exp) != SSA_NAME)
 770     return false;
 771
 772   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 773 }
 774
 775 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 776    abnormal phi node.  Callback for for_each_index.  */
 777
 778 static bool
 779 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 780                                   void *data ATTRIBUTE_UNUSED)
 781 {
 782   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 783     {
 784       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 785         return false;
 786       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 787         return false;
 788     }
 789
 790   return !abnormal_ssa_name_p (*index);
 791 }
 792
 793 /* Returns true if EXPR contains a ssa name that occurs in an
 794    abnormal phi node.  */
 795
 796 bool
 797 contains_abnormal_ssa_name_p (tree expr)
 798 {
 799   enum tree_code code;
 800   enum tree_code_class codeclass;
 801
 802   if (!expr)
 803     return false;
 804
 805   code = TREE_CODE (expr);
 806   codeclass = TREE_CODE_CLASS (code);
 807
 808   if (code == SSA_NAME)
 809     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 810
 811   if (code == INTEGER_CST
 812       || is_gimple_min_invariant (expr))
 813     return false;
 814
 815   if (code == ADDR_EXPR)
 816     return !for_each_index (&TREE_OPERAND (expr, 0),
 817                             idx_contains_abnormal_ssa_name_p,
 818                             NULL);
 819
 820   if (code == COND_EXPR)
 821     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 822       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 823       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 824
 825   switch (codeclass)
 826     {
 827     case tcc_binary:
 828     case tcc_comparison:
 829       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 830         return true;
 831
 832       /* Fallthru.  */
 833     case tcc_unary:
 834       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 835         return true;
 836
 837       break;
 838
 839     default:
 840       gcc_unreachable ();
 841     }
 842
 843   return false;
 844 }
 845
 846 /*  Returns the structure describing number of iterations determined from
 847     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 848
 849 static struct tree_niter_desc *
 850 niter_for_exit (struct ivopts_data *data, edge exit)
 851 {
 852   struct tree_niter_desc *desc;
 853   tree_niter_desc **slot;
 854
 855   if (!data->niters)
 856     {
 857       data->niters = new hash_map<edge, tree_niter_desc *>;
 858       slot = NULL;
 859     }
 860   else
 861     slot = data->niters->get (exit);
 862
 863   if (!slot)
 864     {
 865       /* Try to determine number of iterations.  We cannot safely work with ssa
 866          names that appear in phi nodes on abnormal edges, so that we do not
 867          create overlapping life ranges for them (PR 27283).  */
 868       desc = XNEW (struct tree_niter_desc);
 869       if (!number_of_iterations_exit (data->current_loop,
 870                                       exit, desc, true)
 871           || contains_abnormal_ssa_name_p (desc->niter))
 872         {
 873           XDELETE (desc);
 874           desc = NULL;
 875         }
 876       data->niters->put (exit, desc);
 877     }
 878   else
 879     desc = *slot;
 880
 881   return desc;
 882 }
 883
 884 /* Returns the structure describing number of iterations determined from
 885    single dominating exit of DATA->current_loop, or NULL if something
 886    goes wrong.  */
 887
 888 static struct tree_niter_desc *
 889 niter_for_single_dom_exit (struct ivopts_data *data)
 890 {
 891   edge exit = single_dom_exit (data->current_loop);
 892
 893   if (!exit)
 894     return NULL;
 895
 896   return niter_for_exit (data, exit);
 897 }
 898
 899 /* Initializes data structures used by the iv optimization pass, stored
 900    in DATA.  */
 901
 902 static void
 903 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 904 {
 905   data->version_info_size = 2 * num_ssa_names;
 906   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 907   data->relevant = BITMAP_ALLOC (NULL);
 908   data->important_candidates = BITMAP_ALLOC (NULL);
 909   data->max_inv_id = 0;
 910   data->niters = NULL;
 911   data->iv_uses.create (20);
 912   data->iv_candidates.create (20);
 913   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 914   data->inv_expr_id = 0;
 915   data->name_expansion_cache = NULL;
 916   decl_rtl_to_reset.create (20);
 917 }
 918
 919 /* Returns a memory object to that EXPR points.  In case we are able to
 920    determine that it does not point to any such object, NULL is returned.  */
 921
 922 static tree
 923 determine_base_object (tree expr)
 924 {
 925   enum tree_code code = TREE_CODE (expr);
 926   tree base, obj;
 927
 928   /* If this is a pointer casted to any type, we need to determine
 929      the base object for the pointer; so handle conversions before
 930      throwing away non-pointer expressions.  */
 931   if (CONVERT_EXPR_P (expr))
 932     return determine_base_object (TREE_OPERAND (expr, 0));
 933
 934   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 935     return NULL_TREE;
 936
 937   switch (code)
 938     {
 939     case INTEGER_CST:
 940       return NULL_TREE;
 941
 942     case ADDR_EXPR:
 943       obj = TREE_OPERAND (expr, 0);
 944       base = get_base_address (obj);
 945
 946       if (!base)
 947         return expr;
 948
 949       if (TREE_CODE (base) == MEM_REF)
 950         return determine_base_object (TREE_OPERAND (base, 0));
 951
 952       return fold_convert (ptr_type_node,
 953                            build_fold_addr_expr (base));
 954
 955     case POINTER_PLUS_EXPR:
 956       return determine_base_object (TREE_OPERAND (expr, 0));
 957
 958     case PLUS_EXPR:
 959     case MINUS_EXPR:
 960       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 961       gcc_unreachable ();
 962
 963     default:
 964       return fold_convert (ptr_type_node, expr);
 965     }
 966 }
 967
 968 /* Return true if address expression with non-DECL_P operand appears
 969    in EXPR.  */
 970
 971 static bool
 972 contain_complex_addr_expr (tree expr)
 973 {
 974   bool res = false;
 975
 976   STRIP_NOPS (expr);
 977   switch (TREE_CODE (expr))
 978     {
 979     case POINTER_PLUS_EXPR:
 980     case PLUS_EXPR:
 981     case MINUS_EXPR:
 982       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 983       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 984       break;
 985
 986     case ADDR_EXPR:
 987       return (!DECL_P (TREE_OPERAND (expr, 0)));
 988
 989     default:
 990       return false;
 991     }
 992
 993   return res;
 994 }
 995
 996 /* Allocates an induction variable with given initial value BASE and step STEP
 997    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 998
 999 static struct iv *
1000 alloc_iv (tree base, tree step, bool no_overflow = false)
1001 {
1002   tree expr = base;
1003   struct iv *iv = XCNEW (struct iv);
1004   gcc_assert (step != NULL_TREE);
1005
1006   /* Lower address expression in base except ones with DECL_P as operand.
1007      By doing this:
1008        1) More accurate cost can be computed for address expressions;
1009        2) Duplicate candidates won't be created for bases in different
1010           forms, like &a[0] and &a.  */
1011   STRIP_NOPS (expr);
1012   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1013       || contain_complex_addr_expr (expr))
1014     {
1015       aff_tree comb;
1016       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1017       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1018     }
1019
1020   iv->base = base;
1021   iv->base_object = determine_base_object (base);
1022   iv->step = step;
1023   iv->biv_p = false;
1024   iv->have_use_for = false;
1025   iv->use_id = 0;
1026   iv->ssa_name = NULL_TREE;
1027   iv->no_overflow = no_overflow;
1028
1029   return iv;
1030 }
1031
1032 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1033    doesn't overflow.  */
1034
1035 static void
1036 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1037         bool no_overflow)
1038 {
1039   struct version_info *info = name_info (data, iv);
1040
1041   gcc_assert (!info->iv);
1042
1043   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1044   info->iv = alloc_iv (base, step, no_overflow);
1045   info->iv->ssa_name = iv;
1046 }
1047
1048 /* Finds induction variable declaration for VAR.  */
1049
1050 static struct iv *
1051 get_iv (struct ivopts_data *data, tree var)
1052 {
1053   basic_block bb;
1054   tree type = TREE_TYPE (var);
1055
1056   if (!POINTER_TYPE_P (type)
1057       && !INTEGRAL_TYPE_P (type))
1058     return NULL;
1059
1060   if (!name_info (data, var)->iv)
1061     {
1062       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1063
1064       if (!bb
1065           || !flow_bb_inside_loop_p (data->current_loop, bb))
1066         set_iv (data, var, var, build_int_cst (type, 0), true);
1067     }
1068
1069   return name_info (data, var)->iv;
1070 }
1071
1072 /* Return the first non-invariant ssa var found in EXPR.  */
1073
1074 static tree
1075 extract_single_var_from_expr (tree expr)
1076 {
1077   int i, n;
1078   tree tmp;
1079   enum tree_code code;
1080
1081   if (!expr || is_gimple_min_invariant (expr))
1082     return NULL;
1083
1084   code = TREE_CODE (expr);
1085   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1086     {
1087       n = TREE_OPERAND_LENGTH (expr);
1088       for (i = 0; i < n; i++)
1089         {
1090           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1091
1092           if (tmp)
1093             return tmp;
1094         }
1095     }
1096   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1097 }
1098
1099 /* Finds basic ivs.  */
1100
1101 static bool
1102 find_bivs (struct ivopts_data *data)
1103 {
1104   gphi *phi;
1105   affine_iv iv;
1106   tree step, type, base, stop;
1107   bool found = false;
1108   struct loop *loop = data->current_loop;
1109   gphi_iterator psi;
1110
1111   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1112     {
1113       phi = psi.phi ();
1114
1115       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1116         continue;
1117
1118       if (virtual_operand_p (PHI_RESULT (phi)))
1119         continue;
1120
1121       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1122         continue;
1123
1124       if (integer_zerop (iv.step))
1125         continue;
1126
1127       step = iv.step;
1128       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1129       /* Stop expanding iv base at the first ssa var referred by iv step.
1130          Ideally we should stop at any ssa var, because that's expensive
1131          and unusual to happen, we just do it on the first one.
1132
1133          See PR64705 for the rationale.  */
1134       stop = extract_single_var_from_expr (step);
1135       base = expand_simple_operations (base, stop);
1136       if (contains_abnormal_ssa_name_p (base)
1137           || contains_abnormal_ssa_name_p (step))
1138         continue;
1139
1140       type = TREE_TYPE (PHI_RESULT (phi));
1141       base = fold_convert (type, base);
1142       if (step)
1143         {
1144           if (POINTER_TYPE_P (type))
1145             step = convert_to_ptrofftype (step);
1146           else
1147             step = fold_convert (type, step);
1148         }
1149
1150       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1151       found = true;
1152     }
1153
1154   return found;
1155 }
1156
1157 /* Marks basic ivs.  */
1158
1159 static void
1160 mark_bivs (struct ivopts_data *data)
1161 {
1162   gphi *phi;
1163   gimple def;
1164   tree var;
1165   struct iv *iv, *incr_iv;
1166   struct loop *loop = data->current_loop;
1167   basic_block incr_bb;
1168   gphi_iterator psi;
1169
1170   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1171     {
1172       phi = psi.phi ();
1173
1174       iv = get_iv (data, PHI_RESULT (phi));
1175       if (!iv)
1176         continue;
1177
1178       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1179       def = SSA_NAME_DEF_STMT (var);
1180       /* Don't mark iv peeled from other one as biv.  */
1181       if (def
1182           && gimple_code (def) == GIMPLE_PHI
1183           && gimple_bb (def) == loop->header)
1184         continue;
1185
1186       incr_iv = get_iv (data, var);
1187       if (!incr_iv)
1188         continue;
1189
1190       /* If the increment is in the subloop, ignore it.  */
1191       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1192       if (incr_bb->loop_father != data->current_loop
1193           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1194         continue;
1195
1196       iv->biv_p = true;
1197       incr_iv->biv_p = true;
1198     }
1199 }
1200
1201 /* Checks whether STMT defines a linear induction variable and stores its
1202    parameters to IV.  */
1203
1204 static bool
1205 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1206 {
1207   tree lhs, stop;
1208   struct loop *loop = data->current_loop;
1209
1210   iv->base = NULL_TREE;
1211   iv->step = NULL_TREE;
1212
1213   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1214     return false;
1215
1216   lhs = gimple_assign_lhs (stmt);
1217   if (TREE_CODE (lhs) != SSA_NAME)
1218     return false;
1219
1220   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1221     return false;
1222
1223   /* Stop expanding iv base at the first ssa var referred by iv step.
1224      Ideally we should stop at any ssa var, because that's expensive
1225      and unusual to happen, we just do it on the first one.
1226
1227      See PR64705 for the rationale.  */
1228   stop = extract_single_var_from_expr (iv->step);
1229   iv->base = expand_simple_operations (iv->base, stop);
1230   if (contains_abnormal_ssa_name_p (iv->base)
1231       || contains_abnormal_ssa_name_p (iv->step))
1232     return false;
1233
1234   /* If STMT could throw, then do not consider STMT as defining a GIV.
1235      While this will suppress optimizations, we can not safely delete this
1236      GIV and associated statements, even if it appears it is not used.  */
1237   if (stmt_could_throw_p (stmt))
1238     return false;
1239
1240   return true;
1241 }
1242
1243 /* Finds general ivs in statement STMT.  */
1244
1245 static void
1246 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1247 {
1248   affine_iv iv;
1249
1250   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1251     return;
1252
1253   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1254 }
1255
1256 /* Finds general ivs in basic block BB.  */
1257
1258 static void
1259 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1260 {
1261   gimple_stmt_iterator bsi;
1262
1263   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1264     find_givs_in_stmt (data, gsi_stmt (bsi));
1265 }
1266
1267 /* Finds general ivs.  */
1268
1269 static void
1270 find_givs (struct ivopts_data *data)
1271 {
1272   struct loop *loop = data->current_loop;
1273   basic_block *body = get_loop_body_in_dom_order (loop);
1274   unsigned i;
1275
1276   for (i = 0; i < loop->num_nodes; i++)
1277     find_givs_in_bb (data, body[i]);
1278   free (body);
1279 }
1280
1281 /* For each ssa name defined in LOOP determines whether it is an induction
1282    variable and if so, its initial value and step.  */
1283
1284 static bool
1285 find_induction_variables (struct ivopts_data *data)
1286 {
1287   unsigned i;
1288   bitmap_iterator bi;
1289
1290   if (!find_bivs (data))
1291     return false;
1292
1293   find_givs (data);
1294   mark_bivs (data);
1295
1296   if (dump_file && (dump_flags & TDF_DETAILS))
1297     {
1298       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1299
1300       if (niter)
1301         {
1302           fprintf (dump_file, "  number of iterations ");
1303           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1304           if (!integer_zerop (niter->may_be_zero))
1305             {
1306               fprintf (dump_file, "; zero if ");
1307               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1308             }
1309           fprintf (dump_file, "\n\n");
1310         };
1311
1312       fprintf (dump_file, "Induction variables:\n\n");
1313
1314       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1315         {
1316           if (ver_info (data, i)->iv)
1317             dump_iv (dump_file, ver_info (data, i)->iv, true);
1318         }
1319     }
1320
1321   return true;
1322 }
1323
1324 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1325    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1326    is the const offset stripped from IV base.  For uses of other types,
1327    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1328
1329 static struct iv_use *
1330 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1331             gimple stmt, enum use_type use_type, tree addr_base = NULL,
1332             unsigned HOST_WIDE_INT addr_offset = 0)
1333 {
1334   struct iv_use *use = XCNEW (struct iv_use);
1335
1336   use->id = n_iv_uses (data);
1337   use->sub_id = 0;
1338   use->type = use_type;
1339   use->iv = iv;
1340   use->stmt = stmt;
1341   use->op_p = use_p;
1342   use->related_cands = BITMAP_ALLOC (NULL);
1343   use->next = NULL;
1344   use->addr_base = addr_base;
1345   use->addr_offset = addr_offset;
1346
1347   data->iv_uses.safe_push (use);
1348
1349   return use;
1350 }
1351
1352 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1353    The sub use is recorded under the one whose use id is ID_GROUP.  */
1354
1355 static struct iv_use *
1356 record_sub_use (struct ivopts_data *data, tree *use_p,
1357                     struct iv *iv, gimple stmt, enum use_type use_type,
1358                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1359                     unsigned int id_group)
1360 {
1361   struct iv_use *use = XCNEW (struct iv_use);
1362   struct iv_use *group = iv_use (data, id_group);
1363
1364   use->id = group->id;
1365   use->sub_id = 0;
1366   use->type = use_type;
1367   use->iv = iv;
1368   use->stmt = stmt;
1369   use->op_p = use_p;
1370   use->related_cands = NULL;
1371   use->addr_base = addr_base;
1372   use->addr_offset = addr_offset;
1373
1374   /* Sub use list is maintained in offset ascending order.  */
1375   if (addr_offset <= group->addr_offset)
1376     {
1377       use->related_cands = group->related_cands;
1378       group->related_cands = NULL;
1379       use->next = group;
1380       data->iv_uses[id_group] = use;
1381     }
1382   else
1383     {
1384       struct iv_use *pre;
1385       do
1386         {
1387           pre = group;
1388           group = group->next;
1389         }
1390       while (group && addr_offset > group->addr_offset);
1391       use->next = pre->next;
1392       pre->next = use;
1393     }
1394
1395   /* To avoid showing ssa name in the dumps, if it was not reset by the
1396      caller.  */
1397   iv->ssa_name = NULL_TREE;
1398
1399   return use;
1400 }
1401
1402 /* Checks whether OP is a loop-level invariant and if so, records it.
1403    NONLINEAR_USE is true if the invariant is used in a way we do not
1404    handle specially.  */
1405
1406 static void
1407 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1408 {
1409   basic_block bb;
1410   struct version_info *info;
1411
1412   if (TREE_CODE (op) != SSA_NAME
1413       || virtual_operand_p (op))
1414     return;
1415
1416   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1417   if (bb
1418       && flow_bb_inside_loop_p (data->current_loop, bb))
1419     return;
1420
1421   info = name_info (data, op);
1422   info->name = op;
1423   info->has_nonlin_use |= nonlinear_use;
1424   if (!info->inv_id)
1425     info->inv_id = ++data->max_inv_id;
1426   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1427 }
1428
1429 /* Checks whether the use OP is interesting and if so, records it.  */
1430
1431 static struct iv_use *
1432 find_interesting_uses_op (struct ivopts_data *data, tree op)
1433 {
1434   struct iv *iv;
1435   struct iv *civ;
1436   gimple stmt;
1437   struct iv_use *use;
1438
1439   if (TREE_CODE (op) != SSA_NAME)
1440     return NULL;
1441
1442   iv = get_iv (data, op);
1443   if (!iv)
1444     return NULL;
1445
1446   if (iv->have_use_for)
1447     {
1448       use = iv_use (data, iv->use_id);
1449
1450       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1451       return use;
1452     }
1453
1454   if (integer_zerop (iv->step))
1455     {
1456       record_invariant (data, op, true);
1457       return NULL;
1458     }
1459   iv->have_use_for = true;
1460
1461   civ = XNEW (struct iv);
1462   *civ = *iv;
1463
1464   stmt = SSA_NAME_DEF_STMT (op);
1465   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1466               || is_gimple_assign (stmt));
1467
1468   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1469   iv->use_id = use->id;
1470
1471   return use;
1472 }
1473
1474 /* Given a condition in statement STMT, checks whether it is a compare
1475    of an induction variable and an invariant.  If this is the case,
1476    CONTROL_VAR is set to location of the iv, BOUND to the location of
1477    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1478    induction variable descriptions, and true is returned.  If this is not
1479    the case, CONTROL_VAR and BOUND are set to the arguments of the
1480    condition and false is returned.  */
1481
1482 static bool
1483 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1484                        tree **control_var, tree **bound,
1485                        struct iv **iv_var, struct iv **iv_bound)
1486 {
1487   /* The objects returned when COND has constant operands.  */
1488   static struct iv const_iv;
1489   static tree zero;
1490   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1491   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1492   bool ret = false;
1493
1494   if (gimple_code (stmt) == GIMPLE_COND)
1495     {
1496       gcond *cond_stmt = as_a <gcond *> (stmt);
1497       op0 = gimple_cond_lhs_ptr (cond_stmt);
1498       op1 = gimple_cond_rhs_ptr (cond_stmt);
1499     }
1500   else
1501     {
1502       op0 = gimple_assign_rhs1_ptr (stmt);
1503       op1 = gimple_assign_rhs2_ptr (stmt);
1504     }
1505
1506   zero = integer_zero_node;
1507   const_iv.step = integer_zero_node;
1508
1509   if (TREE_CODE (*op0) == SSA_NAME)
1510     iv0 = get_iv (data, *op0);
1511   if (TREE_CODE (*op1) == SSA_NAME)
1512     iv1 = get_iv (data, *op1);
1513
1514   /* Exactly one of the compared values must be an iv, and the other one must
1515      be an invariant.  */
1516   if (!iv0 || !iv1)
1517     goto end;
1518
1519   if (integer_zerop (iv0->step))
1520     {
1521       /* Control variable may be on the other side.  */
1522       tmp_op = op0; op0 = op1; op1 = tmp_op;
1523       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1524     }
1525   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1526
1527 end:
1528   if (control_var)
1529     *control_var = op0;
1530   if (iv_var)
1531     *iv_var = iv0;
1532   if (bound)
1533     *bound = op1;
1534   if (iv_bound)
1535     *iv_bound = iv1;
1536
1537   return ret;
1538 }
1539
1540 /* Checks whether the condition in STMT is interesting and if so,
1541    records it.  */
1542
1543 static void
1544 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1545 {
1546   tree *var_p, *bound_p;
1547   struct iv *var_iv, *civ;
1548
1549   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1550     {
1551       find_interesting_uses_op (data, *var_p);
1552       find_interesting_uses_op (data, *bound_p);
1553       return;
1554     }
1555
1556   civ = XNEW (struct iv);
1557   *civ = *var_iv;
1558   record_use (data, NULL, civ, stmt, USE_COMPARE);
1559 }
1560
1561 /* Returns the outermost loop EXPR is obviously invariant in
1562    relative to the loop LOOP, i.e. if all its operands are defined
1563    outside of the returned loop.  Returns NULL if EXPR is not
1564    even obviously invariant in LOOP.  */
1565
1566 struct loop *
1567 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1568 {
1569   basic_block def_bb;
1570   unsigned i, len;
1571
1572   if (is_gimple_min_invariant (expr))
1573     return current_loops->tree_root;
1574
1575   if (TREE_CODE (expr) == SSA_NAME)
1576     {
1577       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1578       if (def_bb)
1579         {
1580           if (flow_bb_inside_loop_p (loop, def_bb))
1581             return NULL;
1582           return superloop_at_depth (loop,
1583                                      loop_depth (def_bb->loop_father) + 1);
1584         }
1585
1586       return current_loops->tree_root;
1587     }
1588
1589   if (!EXPR_P (expr))
1590     return NULL;
1591
1592   unsigned maxdepth = 0;
1593   len = TREE_OPERAND_LENGTH (expr);
1594   for (i = 0; i < len; i++)
1595     {
1596       struct loop *ivloop;
1597       if (!TREE_OPERAND (expr, i))
1598         continue;
1599
1600       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1601       if (!ivloop)
1602         return NULL;
1603       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1604     }
1605
1606   return superloop_at_depth (loop, maxdepth);
1607 }
1608
1609 /* Returns true if expression EXPR is obviously invariant in LOOP,
1610    i.e. if all its operands are defined outside of the LOOP.  LOOP
1611    should not be the function body.  */
1612
1613 bool
1614 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1615 {
1616   basic_block def_bb;
1617   unsigned i, len;
1618
1619   gcc_assert (loop_depth (loop) > 0);
1620
1621   if (is_gimple_min_invariant (expr))
1622     return true;
1623
1624   if (TREE_CODE (expr) == SSA_NAME)
1625     {
1626       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1627       if (def_bb
1628           && flow_bb_inside_loop_p (loop, def_bb))
1629         return false;
1630
1631       return true;
1632     }
1633
1634   if (!EXPR_P (expr))
1635     return false;
1636
1637   len = TREE_OPERAND_LENGTH (expr);
1638   for (i = 0; i < len; i++)
1639     if (TREE_OPERAND (expr, i)
1640         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1641       return false;
1642
1643   return true;
1644 }
1645
1646 /* Cumulates the steps of indices into DATA and replaces their values with the
1647    initial ones.  Returns false when the value of the index cannot be determined.
1648    Callback for for_each_index.  */
1649
1650 struct ifs_ivopts_data
1651 {
1652   struct ivopts_data *ivopts_data;
1653   gimple stmt;
1654   tree step;
1655 };
1656
1657 static bool
1658 idx_find_step (tree base, tree *idx, void *data)
1659 {
1660   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1661   struct iv *iv;
1662   bool use_overflow_semantics = false;
1663   tree step, iv_base, iv_step, lbound, off;
1664   struct loop *loop = dta->ivopts_data->current_loop;
1665
1666   /* If base is a component ref, require that the offset of the reference
1667      be invariant.  */
1668   if (TREE_CODE (base) == COMPONENT_REF)
1669     {
1670       off = component_ref_field_offset (base);
1671       return expr_invariant_in_loop_p (loop, off);
1672     }
1673
1674   /* If base is array, first check whether we will be able to move the
1675      reference out of the loop (in order to take its address in strength
1676      reduction).  In order for this to work we need both lower bound
1677      and step to be loop invariants.  */
1678   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1679     {
1680       /* Moreover, for a range, the size needs to be invariant as well.  */
1681       if (TREE_CODE (base) == ARRAY_RANGE_REF
1682           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1683         return false;
1684
1685       step = array_ref_element_size (base);
1686       lbound = array_ref_low_bound (base);
1687
1688       if (!expr_invariant_in_loop_p (loop, step)
1689           || !expr_invariant_in_loop_p (loop, lbound))
1690         return false;
1691     }
1692
1693   if (TREE_CODE (*idx) != SSA_NAME)
1694     return true;
1695
1696   iv = get_iv (dta->ivopts_data, *idx);
1697   if (!iv)
1698     return false;
1699
1700   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1701           *&x[0], which is not folded and does not trigger the
1702           ARRAY_REF path below.  */
1703   *idx = iv->base;
1704
1705   if (integer_zerop (iv->step))
1706     return true;
1707
1708   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1709     {
1710       step = array_ref_element_size (base);
1711
1712       /* We only handle addresses whose step is an integer constant.  */
1713       if (TREE_CODE (step) != INTEGER_CST)
1714         return false;
1715     }
1716   else
1717     /* The step for pointer arithmetics already is 1 byte.  */
1718     step = size_one_node;
1719
1720   iv_base = iv->base;
1721   iv_step = iv->step;
1722   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1723     use_overflow_semantics = true;
1724
1725   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1726                             sizetype, &iv_base, &iv_step, dta->stmt,
1727                             use_overflow_semantics))
1728     {
1729       /* The index might wrap.  */
1730       return false;
1731     }
1732
1733   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1734   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1735
1736   return true;
1737 }
1738
1739 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1740    object is passed to it in DATA.  */
1741
1742 static bool
1743 idx_record_use (tree base, tree *idx,
1744                 void *vdata)
1745 {
1746   struct ivopts_data *data = (struct ivopts_data *) vdata;
1747   find_interesting_uses_op (data, *idx);
1748   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1749     {
1750       find_interesting_uses_op (data, array_ref_element_size (base));
1751       find_interesting_uses_op (data, array_ref_low_bound (base));
1752     }
1753   return true;
1754 }
1755
1756 /* If we can prove that TOP = cst * BOT for some constant cst,
1757    store cst to MUL and return true.  Otherwise return false.
1758    The returned value is always sign-extended, regardless of the
1759    signedness of TOP and BOT.  */
1760
1761 static bool
1762 constant_multiple_of (tree top, tree bot, widest_int *mul)
1763 {
1764   tree mby;
1765   enum tree_code code;
1766   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1767   widest_int res, p0, p1;
1768
1769   STRIP_NOPS (top);
1770   STRIP_NOPS (bot);
1771
1772   if (operand_equal_p (top, bot, 0))
1773     {
1774       *mul = 1;
1775       return true;
1776     }
1777
1778   code = TREE_CODE (top);
1779   switch (code)
1780     {
1781     case MULT_EXPR:
1782       mby = TREE_OPERAND (top, 1);
1783       if (TREE_CODE (mby) != INTEGER_CST)
1784         return false;
1785
1786       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1787         return false;
1788
1789       *mul = wi::sext (res * wi::to_widest (mby), precision);
1790       return true;
1791
1792     case PLUS_EXPR:
1793     case MINUS_EXPR:
1794       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1795           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1796         return false;
1797
1798       if (code == MINUS_EXPR)
1799         p1 = -p1;
1800       *mul = wi::sext (p0 + p1, precision);
1801       return true;
1802
1803     case INTEGER_CST:
1804       if (TREE_CODE (bot) != INTEGER_CST)
1805         return false;
1806
1807       p0 = widest_int::from (top, SIGNED);
1808       p1 = widest_int::from (bot, SIGNED);
1809       if (p1 == 0)
1810         return false;
1811       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1812       return res == 0;
1813
1814     default:
1815       return false;
1816     }
1817 }
1818
1819 /* Return true if memory reference REF with step STEP may be unaligned.  */
1820
1821 static bool
1822 may_be_unaligned_p (tree ref, tree step)
1823 {
1824   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1825      thus they are not misaligned.  */
1826   if (TREE_CODE (ref) == TARGET_MEM_REF)
1827     return false;
1828
1829   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1830   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1831     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1832
1833   unsigned HOST_WIDE_INT bitpos;
1834   unsigned int ref_align;
1835   get_object_alignment_1 (ref, &ref_align, &bitpos);
1836   if (ref_align < align
1837       || (bitpos % align) != 0
1838       || (bitpos % BITS_PER_UNIT) != 0)
1839     return true;
1840
1841   unsigned int trailing_zeros = tree_ctz (step);
1842   if (trailing_zeros < HOST_BITS_PER_INT
1843       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1844     return true;
1845
1846   return false;
1847 }
1848
1849 /* Return true if EXPR may be non-addressable.   */
1850
1851 bool
1852 may_be_nonaddressable_p (tree expr)
1853 {
1854   switch (TREE_CODE (expr))
1855     {
1856     case TARGET_MEM_REF:
1857       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1858          target, thus they are always addressable.  */
1859       return false;
1860
1861     case COMPONENT_REF:
1862       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1863              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1864
1865     case VIEW_CONVERT_EXPR:
1866       /* This kind of view-conversions may wrap non-addressable objects
1867          and make them look addressable.  After some processing the
1868          non-addressability may be uncovered again, causing ADDR_EXPRs
1869          of inappropriate objects to be built.  */
1870       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1871           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1872         return true;
1873
1874       /* ... fall through ... */
1875
1876     case ARRAY_REF:
1877     case ARRAY_RANGE_REF:
1878       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1879
1880     CASE_CONVERT:
1881       return true;
1882
1883     default:
1884       break;
1885     }
1886
1887   return false;
1888 }
1889
1890 static tree
1891 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1892
1893 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1894    If there is an existing use which has same stripped iv base and step,
1895    this function records this one as a sub use to that; otherwise records
1896    it as a normal one.  */
1897
1898 static struct iv_use *
1899 record_group_use (struct ivopts_data *data, tree *use_p,
1900                   struct iv *iv, gimple stmt, enum use_type use_type)
1901 {
1902   unsigned int i;
1903   struct iv_use *use;
1904   tree addr_base;
1905   unsigned HOST_WIDE_INT addr_offset;
1906
1907   /* Only support sub use for address type uses, that is, with base
1908      object.  */
1909   if (!iv->base_object)
1910     return record_use (data, use_p, iv, stmt, use_type);
1911
1912   addr_base = strip_offset (iv->base, &addr_offset);
1913   for (i = 0; i < n_iv_uses (data); i++)
1914     {
1915       use = iv_use (data, i);
1916       if (use->type != USE_ADDRESS || !use->iv->base_object)
1917         continue;
1918
1919       /* Check if it has the same stripped base and step.  */
1920       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1921           && operand_equal_p (iv->step, use->iv->step, 0)
1922           && operand_equal_p (addr_base, use->addr_base, 0))
1923         break;
1924     }
1925
1926   if (i == n_iv_uses (data))
1927     return record_use (data, use_p, iv, stmt,
1928                        use_type, addr_base, addr_offset);
1929   else
1930     return record_sub_use (data, use_p, iv, stmt,
1931                            use_type, addr_base, addr_offset, i);
1932 }
1933
1934 /* Finds addresses in *OP_P inside STMT.  */
1935
1936 static void
1937 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1938 {
1939   tree base = *op_p, step = size_zero_node;
1940   struct iv *civ;
1941   struct ifs_ivopts_data ifs_ivopts_data;
1942
1943   /* Do not play with volatile memory references.  A bit too conservative,
1944      perhaps, but safe.  */
1945   if (gimple_has_volatile_ops (stmt))
1946     goto fail;
1947
1948   /* Ignore bitfields for now.  Not really something terribly complicated
1949      to handle.  TODO.  */
1950   if (TREE_CODE (base) == BIT_FIELD_REF)
1951     goto fail;
1952
1953   base = unshare_expr (base);
1954
1955   if (TREE_CODE (base) == TARGET_MEM_REF)
1956     {
1957       tree type = build_pointer_type (TREE_TYPE (base));
1958       tree astep;
1959
1960       if (TMR_BASE (base)
1961           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1962         {
1963           civ = get_iv (data, TMR_BASE (base));
1964           if (!civ)
1965             goto fail;
1966
1967           TMR_BASE (base) = civ->base;
1968           step = civ->step;
1969         }
1970       if (TMR_INDEX2 (base)
1971           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1972         {
1973           civ = get_iv (data, TMR_INDEX2 (base));
1974           if (!civ)
1975             goto fail;
1976
1977           TMR_INDEX2 (base) = civ->base;
1978           step = civ->step;
1979         }
1980       if (TMR_INDEX (base)
1981           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1982         {
1983           civ = get_iv (data, TMR_INDEX (base));
1984           if (!civ)
1985             goto fail;
1986
1987           TMR_INDEX (base) = civ->base;
1988           astep = civ->step;
1989
1990           if (astep)
1991             {
1992               if (TMR_STEP (base))
1993                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1994
1995               step = fold_build2 (PLUS_EXPR, type, step, astep);
1996             }
1997         }
1998
1999       if (integer_zerop (step))
2000         goto fail;
2001       base = tree_mem_ref_addr (type, base);
2002     }
2003   else
2004     {
2005       ifs_ivopts_data.ivopts_data = data;
2006       ifs_ivopts_data.stmt = stmt;
2007       ifs_ivopts_data.step = size_zero_node;
2008       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2009           || integer_zerop (ifs_ivopts_data.step))
2010         goto fail;
2011       step = ifs_ivopts_data.step;
2012
2013       /* Check that the base expression is addressable.  This needs
2014          to be done after substituting bases of IVs into it.  */
2015       if (may_be_nonaddressable_p (base))
2016         goto fail;
2017
2018       /* Moreover, on strict alignment platforms, check that it is
2019          sufficiently aligned.  */
2020       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2021         goto fail;
2022
2023       base = build_fold_addr_expr (base);
2024
2025       /* Substituting bases of IVs into the base expression might
2026          have caused folding opportunities.  */
2027       if (TREE_CODE (base) == ADDR_EXPR)
2028         {
2029           tree *ref = &TREE_OPERAND (base, 0);
2030           while (handled_component_p (*ref))
2031             ref = &TREE_OPERAND (*ref, 0);
2032           if (TREE_CODE (*ref) == MEM_REF)
2033             {
2034               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2035                                       TREE_OPERAND (*ref, 0),
2036                                       TREE_OPERAND (*ref, 1));
2037               if (tem)
2038                 *ref = tem;
2039             }
2040         }
2041     }
2042
2043   civ = alloc_iv (base, step);
2044   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2045   return;
2046
2047 fail:
2048   for_each_index (op_p, idx_record_use, data);
2049 }
2050
2051 /* Finds and records invariants used in STMT.  */
2052
2053 static void
2054 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2055 {
2056   ssa_op_iter iter;
2057   use_operand_p use_p;
2058   tree op;
2059
2060   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2061     {
2062       op = USE_FROM_PTR (use_p);
2063       record_invariant (data, op, false);
2064     }
2065 }
2066
2067 /* Finds interesting uses of induction variables in the statement STMT.  */
2068
2069 static void
2070 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2071 {
2072   struct iv *iv;
2073   tree op, *lhs, *rhs;
2074   ssa_op_iter iter;
2075   use_operand_p use_p;
2076   enum tree_code code;
2077
2078   find_invariants_stmt (data, stmt);
2079
2080   if (gimple_code (stmt) == GIMPLE_COND)
2081     {
2082       find_interesting_uses_cond (data, stmt);
2083       return;
2084     }
2085
2086   if (is_gimple_assign (stmt))
2087     {
2088       lhs = gimple_assign_lhs_ptr (stmt);
2089       rhs = gimple_assign_rhs1_ptr (stmt);
2090
2091       if (TREE_CODE (*lhs) == SSA_NAME)
2092         {
2093           /* If the statement defines an induction variable, the uses are not
2094              interesting by themselves.  */
2095
2096           iv = get_iv (data, *lhs);
2097
2098           if (iv && !integer_zerop (iv->step))
2099             return;
2100         }
2101
2102       code = gimple_assign_rhs_code (stmt);
2103       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2104           && (REFERENCE_CLASS_P (*rhs)
2105               || is_gimple_val (*rhs)))
2106         {
2107           if (REFERENCE_CLASS_P (*rhs))
2108             find_interesting_uses_address (data, stmt, rhs);
2109           else
2110             find_interesting_uses_op (data, *rhs);
2111
2112           if (REFERENCE_CLASS_P (*lhs))
2113             find_interesting_uses_address (data, stmt, lhs);
2114           return;
2115         }
2116       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2117         {
2118           find_interesting_uses_cond (data, stmt);
2119           return;
2120         }
2121
2122       /* TODO -- we should also handle address uses of type
2123
2124          memory = call (whatever);
2125
2126          and
2127
2128          call (memory).  */
2129     }
2130
2131   if (gimple_code (stmt) == GIMPLE_PHI
2132       && gimple_bb (stmt) == data->current_loop->header)
2133     {
2134       iv = get_iv (data, PHI_RESULT (stmt));
2135
2136       if (iv && !integer_zerop (iv->step))
2137         return;
2138     }
2139
2140   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2141     {
2142       op = USE_FROM_PTR (use_p);
2143
2144       if (TREE_CODE (op) != SSA_NAME)
2145         continue;
2146
2147       iv = get_iv (data, op);
2148       if (!iv)
2149         continue;
2150
2151       find_interesting_uses_op (data, op);
2152     }
2153 }
2154
2155 /* Finds interesting uses of induction variables outside of loops
2156    on loop exit edge EXIT.  */
2157
2158 static void
2159 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2160 {
2161   gphi *phi;
2162   gphi_iterator psi;
2163   tree def;
2164
2165   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2166     {
2167       phi = psi.phi ();
2168       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2169       if (!virtual_operand_p (def))
2170         find_interesting_uses_op (data, def);
2171     }
2172 }
2173
2174 /* Finds uses of the induction variables that are interesting.  */
2175
2176 static void
2177 find_interesting_uses (struct ivopts_data *data)
2178 {
2179   basic_block bb;
2180   gimple_stmt_iterator bsi;
2181   basic_block *body = get_loop_body (data->current_loop);
2182   unsigned i;
2183   struct version_info *info;
2184   edge e;
2185
2186   if (dump_file && (dump_flags & TDF_DETAILS))
2187     fprintf (dump_file, "Uses:\n\n");
2188
2189   for (i = 0; i < data->current_loop->num_nodes; i++)
2190     {
2191       edge_iterator ei;
2192       bb = body[i];
2193
2194       FOR_EACH_EDGE (e, ei, bb->succs)
2195         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2196             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2197           find_interesting_uses_outside (data, e);
2198
2199       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2200         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2201       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2202         if (!is_gimple_debug (gsi_stmt (bsi)))
2203           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2204     }
2205
2206   if (dump_file && (dump_flags & TDF_DETAILS))
2207     {
2208       bitmap_iterator bi;
2209
2210       fprintf (dump_file, "\n");
2211
2212       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2213         {
2214           info = ver_info (data, i);
2215           if (info->inv_id)
2216             {
2217               fprintf (dump_file, "  ");
2218               print_generic_expr (dump_file, info->name, TDF_SLIM);
2219               fprintf (dump_file, " is invariant (%d)%s\n",
2220                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2221             }
2222         }
2223
2224       fprintf (dump_file, "\n");
2225     }
2226
2227   free (body);
2228 }
2229
2230 /* Compute maximum offset of [base + offset] addressing mode
2231    for memory reference represented by USE.  */
2232
2233 static HOST_WIDE_INT
2234 compute_max_addr_offset (struct iv_use *use)
2235 {
2236   int width;
2237   rtx reg, addr;
2238   HOST_WIDE_INT i, off;
2239   unsigned list_index, num;
2240   addr_space_t as;
2241   machine_mode mem_mode, addr_mode;
2242   static vec<HOST_WIDE_INT> max_offset_list;
2243
2244   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2245   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2246
2247   num = max_offset_list.length ();
2248   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2249   if (list_index >= num)
2250     {
2251       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2252       for (; num < max_offset_list.length (); num++)
2253         max_offset_list[num] = -1;
2254     }
2255
2256   off = max_offset_list[list_index];
2257   if (off != -1)
2258     return off;
2259
2260   addr_mode = targetm.addr_space.address_mode (as);
2261   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2262   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2263
2264   width = GET_MODE_BITSIZE (addr_mode) - 1;
2265   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2266     width = HOST_BITS_PER_WIDE_INT - 1;
2267
2268   for (i = width; i > 0; i--)
2269     {
2270       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2271       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2272       if (memory_address_addr_space_p (mem_mode, addr, as))
2273         break;
2274
2275       /* For some strict-alignment targets, the offset must be naturally
2276          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2277       off = ((unsigned HOST_WIDE_INT) 1 << i);
2278       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2279         {
2280           off -= GET_MODE_SIZE (mem_mode);
2281           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2282           if (memory_address_addr_space_p (mem_mode, addr, as))
2283             break;
2284         }
2285     }
2286   if (i == 0)
2287     off = 0;
2288
2289   max_offset_list[list_index] = off;
2290   return off;
2291 }
2292
2293 /* Check if all small groups should be split.  Return true if and
2294    only if:
2295
2296      1) At least one groups contain two uses with different offsets.
2297      2) No group contains more than two uses with different offsets.
2298
2299    Return false otherwise.  We want to split such groups because:
2300
2301      1) Small groups don't have much benefit and may interfer with
2302         general candidate selection.
2303      2) Size for problem with only small groups is usually small and
2304         general algorithm can handle it well.
2305
2306    TODO -- Above claim may not hold when auto increment is supported.  */
2307
2308 static bool
2309 split_all_small_groups (struct ivopts_data *data)
2310 {
2311   bool split_p = false;
2312   unsigned int i, n, distinct;
2313   struct iv_use *pre, *use;
2314
2315   n = n_iv_uses (data);
2316   for (i = 0; i < n; i++)
2317     {
2318       use = iv_use (data, i);
2319       if (!use->next)
2320         continue;
2321
2322       distinct = 1;
2323       gcc_assert (use->type == USE_ADDRESS);
2324       for (pre = use, use = use->next; use; pre = use, use = use->next)
2325         {
2326           if (pre->addr_offset != use->addr_offset)
2327             distinct++;
2328
2329           if (distinct > 2)
2330             return false;
2331         }
2332       if (distinct == 2)
2333         split_p = true;
2334     }
2335
2336   return split_p;
2337 }
2338
2339 /* For each group of address type uses, this function further groups
2340    these uses according to the maximum offset supported by target's
2341    [base + offset] addressing mode.  */
2342
2343 static void
2344 group_address_uses (struct ivopts_data *data)
2345 {
2346   HOST_WIDE_INT max_offset = -1;
2347   unsigned int i, n, sub_id;
2348   struct iv_use *pre, *use;
2349   unsigned HOST_WIDE_INT addr_offset_first;
2350
2351   /* Reset max offset to split all small groups.  */
2352   if (split_all_small_groups (data))
2353     max_offset = 0;
2354
2355   n = n_iv_uses (data);
2356   for (i = 0; i < n; i++)
2357     {
2358       use = iv_use (data, i);
2359       if (!use->next)
2360         continue;
2361
2362       gcc_assert (use->type == USE_ADDRESS);
2363       if (max_offset != 0)
2364         max_offset = compute_max_addr_offset (use);
2365
2366       while (use)
2367         {
2368           sub_id = 0;
2369           addr_offset_first = use->addr_offset;
2370           /* Only uses with offset that can fit in offset part against
2371              the first use can be grouped together.  */
2372           for (pre = use, use = use->next;
2373                use && (use->addr_offset - addr_offset_first
2374                        <= (unsigned HOST_WIDE_INT) max_offset);
2375                pre = use, use = use->next)
2376             {
2377               use->id = pre->id;
2378               use->sub_id = ++sub_id;
2379             }
2380
2381           /* Break the list and create new group.  */
2382           if (use)
2383             {
2384               pre->next = NULL;
2385               use->id = n_iv_uses (data);
2386               use->related_cands = BITMAP_ALLOC (NULL);
2387               data->iv_uses.safe_push (use);
2388             }
2389         }
2390     }
2391
2392   if (dump_file && (dump_flags & TDF_DETAILS))
2393     dump_uses (dump_file, data);
2394 }
2395
2396 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2397    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2398    we are at the top-level of the processed address.  */
2399
2400 static tree
2401 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2402                 HOST_WIDE_INT *offset)
2403 {
2404   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2405   enum tree_code code;
2406   tree type, orig_type = TREE_TYPE (expr);
2407   HOST_WIDE_INT off0, off1, st;
2408   tree orig_expr = expr;
2409
2410   STRIP_NOPS (expr);
2411
2412   type = TREE_TYPE (expr);
2413   code = TREE_CODE (expr);
2414   *offset = 0;
2415
2416   switch (code)
2417     {
2418     case INTEGER_CST:
2419       if (!cst_and_fits_in_hwi (expr)
2420           || integer_zerop (expr))
2421         return orig_expr;
2422
2423       *offset = int_cst_value (expr);
2424       return build_int_cst (orig_type, 0);
2425
2426     case POINTER_PLUS_EXPR:
2427     case PLUS_EXPR:
2428     case MINUS_EXPR:
2429       op0 = TREE_OPERAND (expr, 0);
2430       op1 = TREE_OPERAND (expr, 1);
2431
2432       op0 = strip_offset_1 (op0, false, false, &off0);
2433       op1 = strip_offset_1 (op1, false, false, &off1);
2434
2435       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2436       if (op0 == TREE_OPERAND (expr, 0)
2437           && op1 == TREE_OPERAND (expr, 1))
2438         return orig_expr;
2439
2440       if (integer_zerop (op1))
2441         expr = op0;
2442       else if (integer_zerop (op0))
2443         {
2444           if (code == MINUS_EXPR)
2445             expr = fold_build1 (NEGATE_EXPR, type, op1);
2446           else
2447             expr = op1;
2448         }
2449       else
2450         expr = fold_build2 (code, type, op0, op1);
2451
2452       return fold_convert (orig_type, expr);
2453
2454     case MULT_EXPR:
2455       op1 = TREE_OPERAND (expr, 1);
2456       if (!cst_and_fits_in_hwi (op1))
2457         return orig_expr;
2458
2459       op0 = TREE_OPERAND (expr, 0);
2460       op0 = strip_offset_1 (op0, false, false, &off0);
2461       if (op0 == TREE_OPERAND (expr, 0))
2462         return orig_expr;
2463
2464       *offset = off0 * int_cst_value (op1);
2465       if (integer_zerop (op0))
2466         expr = op0;
2467       else
2468         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2469
2470       return fold_convert (orig_type, expr);
2471
2472     case ARRAY_REF:
2473     case ARRAY_RANGE_REF:
2474       if (!inside_addr)
2475         return orig_expr;
2476
2477       step = array_ref_element_size (expr);
2478       if (!cst_and_fits_in_hwi (step))
2479         break;
2480
2481       st = int_cst_value (step);
2482       op1 = TREE_OPERAND (expr, 1);
2483       op1 = strip_offset_1 (op1, false, false, &off1);
2484       *offset = off1 * st;
2485
2486       if (top_compref
2487           && integer_zerop (op1))
2488         {
2489           /* Strip the component reference completely.  */
2490           op0 = TREE_OPERAND (expr, 0);
2491           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2492           *offset += off0;
2493           return op0;
2494         }
2495       break;
2496
2497     case COMPONENT_REF:
2498       {
2499         tree field;
2500
2501         if (!inside_addr)
2502           return orig_expr;
2503
2504         tmp = component_ref_field_offset (expr);
2505         field = TREE_OPERAND (expr, 1);
2506         if (top_compref
2507             && cst_and_fits_in_hwi (tmp)
2508             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2509           {
2510             HOST_WIDE_INT boffset, abs_off;
2511
2512             /* Strip the component reference completely.  */
2513             op0 = TREE_OPERAND (expr, 0);
2514             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2515             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2516             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2517             if (boffset < 0)
2518               abs_off = -abs_off;
2519
2520             *offset = off0 + int_cst_value (tmp) + abs_off;
2521             return op0;
2522           }
2523       }
2524       break;
2525
2526     case ADDR_EXPR:
2527       op0 = TREE_OPERAND (expr, 0);
2528       op0 = strip_offset_1 (op0, true, true, &off0);
2529       *offset += off0;
2530
2531       if (op0 == TREE_OPERAND (expr, 0))
2532         return orig_expr;
2533
2534       expr = build_fold_addr_expr (op0);
2535       return fold_convert (orig_type, expr);
2536
2537     case MEM_REF:
2538       /* ???  Offset operand?  */
2539       inside_addr = false;
2540       break;
2541
2542     default:
2543       return orig_expr;
2544     }
2545
2546   /* Default handling of expressions for that we want to recurse into
2547      the first operand.  */
2548   op0 = TREE_OPERAND (expr, 0);
2549   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2550   *offset += off0;
2551
2552   if (op0 == TREE_OPERAND (expr, 0)
2553       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2554     return orig_expr;
2555
2556   expr = copy_node (expr);
2557   TREE_OPERAND (expr, 0) = op0;
2558   if (op1)
2559     TREE_OPERAND (expr, 1) = op1;
2560
2561   /* Inside address, we might strip the top level component references,
2562      thus changing type of the expression.  Handling of ADDR_EXPR
2563      will fix that.  */
2564   expr = fold_convert (orig_type, expr);
2565
2566   return expr;
2567 }
2568
2569 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2570
2571 static tree
2572 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2573 {
2574   HOST_WIDE_INT off;
2575   tree core = strip_offset_1 (expr, false, false, &off);
2576   *offset = off;
2577   return core;
2578 }
2579
2580 /* Returns variant of TYPE that can be used as base for different uses.
2581    We return unsigned type with the same precision, which avoids problems
2582    with overflows.  */
2583
2584 static tree
2585 generic_type_for (tree type)
2586 {
2587   if (POINTER_TYPE_P (type))
2588     return unsigned_type_for (type);
2589
2590   if (TYPE_UNSIGNED (type))
2591     return type;
2592
2593   return unsigned_type_for (type);
2594 }
2595
2596 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2597    the bitmap to that we should store it.  */
2598
2599 static struct ivopts_data *fd_ivopts_data;
2600 static tree
2601 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2602 {
2603   bitmap *depends_on = (bitmap *) data;
2604   struct version_info *info;
2605
2606   if (TREE_CODE (*expr_p) != SSA_NAME)
2607     return NULL_TREE;
2608   info = name_info (fd_ivopts_data, *expr_p);
2609
2610   if (!info->inv_id || info->has_nonlin_use)
2611     return NULL_TREE;
2612
2613   if (!*depends_on)
2614     *depends_on = BITMAP_ALLOC (NULL);
2615   bitmap_set_bit (*depends_on, info->inv_id);
2616
2617   return NULL_TREE;
2618 }
2619
2620 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2621    position to POS.  If USE is not NULL, the candidate is set as related to
2622    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2623    replacement of the final value of the iv by a direct computation.  */
2624
2625 static struct iv_cand *
2626 add_candidate_1 (struct ivopts_data *data,
2627                  tree base, tree step, bool important, enum iv_position pos,
2628                  struct iv_use *use, gimple incremented_at)
2629 {
2630   unsigned i;
2631   struct iv_cand *cand = NULL;
2632   tree type, orig_type;
2633
2634   /* For non-original variables, make sure their values are computed in a type
2635      that does not invoke undefined behavior on overflows (since in general,
2636      we cannot prove that these induction variables are non-wrapping).  */
2637   if (pos != IP_ORIGINAL)
2638     {
2639       orig_type = TREE_TYPE (base);
2640       type = generic_type_for (orig_type);
2641       if (type != orig_type)
2642         {
2643           base = fold_convert (type, base);
2644           step = fold_convert (type, step);
2645         }
2646     }
2647
2648   for (i = 0; i < n_iv_cands (data); i++)
2649     {
2650       cand = iv_cand (data, i);
2651
2652       if (cand->pos != pos)
2653         continue;
2654
2655       if (cand->incremented_at != incremented_at
2656           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2657               && cand->ainc_use != use))
2658         continue;
2659
2660       if (!cand->iv)
2661         {
2662           if (!base && !step)
2663             break;
2664
2665           continue;
2666         }
2667
2668       if (!base && !step)
2669         continue;
2670
2671       if (operand_equal_p (base, cand->iv->base, 0)
2672           && operand_equal_p (step, cand->iv->step, 0)
2673           && (TYPE_PRECISION (TREE_TYPE (base))
2674               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2675         break;
2676     }
2677
2678   if (i == n_iv_cands (data))
2679     {
2680       cand = XCNEW (struct iv_cand);
2681       cand->id = i;
2682
2683       if (!base && !step)
2684         cand->iv = NULL;
2685       else
2686         cand->iv = alloc_iv (base, step);
2687
2688       cand->pos = pos;
2689       if (pos != IP_ORIGINAL && cand->iv)
2690         {
2691           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2692           cand->var_after = cand->var_before;
2693         }
2694       cand->important = important;
2695       cand->incremented_at = incremented_at;
2696       data->iv_candidates.safe_push (cand);
2697
2698       if (step
2699           && TREE_CODE (step) != INTEGER_CST)
2700         {
2701           fd_ivopts_data = data;
2702           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2703         }
2704
2705       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2706         cand->ainc_use = use;
2707       else
2708         cand->ainc_use = NULL;
2709
2710       if (dump_file && (dump_flags & TDF_DETAILS))
2711         dump_cand (dump_file, cand);
2712     }
2713
2714   if (important && !cand->important)
2715     {
2716       cand->important = true;
2717       if (dump_file && (dump_flags & TDF_DETAILS))
2718         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2719     }
2720
2721   if (use)
2722     {
2723       bitmap_set_bit (use->related_cands, i);
2724       if (dump_file && (dump_flags & TDF_DETAILS))
2725         fprintf (dump_file, "Candidate %d is related to use %d\n",
2726                  cand->id, use->id);
2727     }
2728
2729   return cand;
2730 }
2731
2732 /* Returns true if incrementing the induction variable at the end of the LOOP
2733    is allowed.
2734
2735    The purpose is to avoid splitting latch edge with a biv increment, thus
2736    creating a jump, possibly confusing other optimization passes and leaving
2737    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2738    is not available (so we do not have a better alternative), or if the latch
2739    edge is already nonempty.  */
2740
2741 static bool
2742 allow_ip_end_pos_p (struct loop *loop)
2743 {
2744   if (!ip_normal_pos (loop))
2745     return true;
2746
2747   if (!empty_block_p (ip_end_pos (loop)))
2748     return true;
2749
2750   return false;
2751 }
2752
2753 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2754    Important field is set to IMPORTANT.  */
2755
2756 static void
2757 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2758                         bool important, struct iv_use *use)
2759 {
2760   basic_block use_bb = gimple_bb (use->stmt);
2761   machine_mode mem_mode;
2762   unsigned HOST_WIDE_INT cstepi;
2763
2764   /* If we insert the increment in any position other than the standard
2765      ones, we must ensure that it is incremented once per iteration.
2766      It must not be in an inner nested loop, or one side of an if
2767      statement.  */
2768   if (use_bb->loop_father != data->current_loop
2769       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2770       || stmt_could_throw_p (use->stmt)
2771       || !cst_and_fits_in_hwi (step))
2772     return;
2773
2774   cstepi = int_cst_value (step);
2775
2776   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2777   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2778         || USE_STORE_PRE_INCREMENT (mem_mode))
2779        && GET_MODE_SIZE (mem_mode) == cstepi)
2780       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2781            || USE_STORE_PRE_DECREMENT (mem_mode))
2782           && GET_MODE_SIZE (mem_mode) == -cstepi))
2783     {
2784       enum tree_code code = MINUS_EXPR;
2785       tree new_base;
2786       tree new_step = step;
2787
2788       if (POINTER_TYPE_P (TREE_TYPE (base)))
2789         {
2790           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2791           code = POINTER_PLUS_EXPR;
2792         }
2793       else
2794         new_step = fold_convert (TREE_TYPE (base), new_step);
2795       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2796       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2797                        use->stmt);
2798     }
2799   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2800         || USE_STORE_POST_INCREMENT (mem_mode))
2801        && GET_MODE_SIZE (mem_mode) == cstepi)
2802       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2803            || USE_STORE_POST_DECREMENT (mem_mode))
2804           && GET_MODE_SIZE (mem_mode) == -cstepi))
2805     {
2806       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2807                        use->stmt);
2808     }
2809 }
2810
2811 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2812    position to POS.  If USE is not NULL, the candidate is set as related to
2813    it.  The candidate computation is scheduled on all available positions.  */
2814
2815 static void
2816 add_candidate (struct ivopts_data *data,
2817                tree base, tree step, bool important, struct iv_use *use)
2818 {
2819   gcc_assert (use == NULL || use->sub_id == 0);
2820
2821   if (ip_normal_pos (data->current_loop))
2822     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2823   if (ip_end_pos (data->current_loop)
2824       && allow_ip_end_pos_p (data->current_loop))
2825     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2826
2827   if (use != NULL && use->type == USE_ADDRESS)
2828     add_autoinc_candidates (data, base, step, important, use);
2829 }
2830
2831 /* Adds standard iv candidates.  */
2832
2833 static void
2834 add_standard_iv_candidates (struct ivopts_data *data)
2835 {
2836   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2837
2838   /* The same for a double-integer type if it is still fast enough.  */
2839   if (TYPE_PRECISION
2840         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2841       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2842     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2843                    build_int_cst (long_integer_type_node, 1), true, NULL);
2844
2845   /* The same for a double-integer type if it is still fast enough.  */
2846   if (TYPE_PRECISION
2847         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2848       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2849     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2850                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2851 }
2852
2853
2854 /* Adds candidates bases on the old induction variable IV.  */
2855
2856 static void
2857 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2858 {
2859   gimple phi;
2860   tree def;
2861   struct iv_cand *cand;
2862
2863   add_candidate (data, iv->base, iv->step, true, NULL);
2864
2865   /* The same, but with initial value zero.  */
2866   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2867     add_candidate (data, size_int (0), iv->step, true, NULL);
2868   else
2869     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2870                    iv->step, true, NULL);
2871
2872   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2873   if (gimple_code (phi) == GIMPLE_PHI)
2874     {
2875       /* Additionally record the possibility of leaving the original iv
2876          untouched.  */
2877       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2878       /* Don't add candidate if it's from another PHI node because
2879          it's an affine iv appearing in the form of PEELED_CHREC.  */
2880       phi = SSA_NAME_DEF_STMT (def);
2881       if (gimple_code (phi) != GIMPLE_PHI)
2882         {
2883           cand = add_candidate_1 (data,
2884                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
2885                                   SSA_NAME_DEF_STMT (def));
2886           cand->var_before = iv->ssa_name;
2887           cand->var_after = def;
2888         }
2889       else
2890         gcc_assert (gimple_bb (phi) == data->current_loop->header);
2891     }
2892 }
2893
2894 /* Adds candidates based on the old induction variables.  */
2895
2896 static void
2897 add_old_ivs_candidates (struct ivopts_data *data)
2898 {
2899   unsigned i;
2900   struct iv *iv;
2901   bitmap_iterator bi;
2902
2903   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2904     {
2905       iv = ver_info (data, i)->iv;
2906       if (iv && iv->biv_p && !integer_zerop (iv->step))
2907         add_old_iv_candidates (data, iv);
2908     }
2909 }
2910
2911 /* Adds candidates based on the value of the induction variable IV and USE.  */
2912
2913 static void
2914 add_iv_value_candidates (struct ivopts_data *data,
2915                          struct iv *iv, struct iv_use *use)
2916 {
2917   unsigned HOST_WIDE_INT offset;
2918   tree base;
2919   tree basetype;
2920
2921   add_candidate (data, iv->base, iv->step, false, use);
2922
2923   /* The same, but with initial value zero.  Make such variable important,
2924      since it is generic enough so that possibly many uses may be based
2925      on it.  */
2926   basetype = TREE_TYPE (iv->base);
2927   if (POINTER_TYPE_P (basetype))
2928     basetype = sizetype;
2929   add_candidate (data, build_int_cst (basetype, 0),
2930                  iv->step, true, use);
2931
2932   /* Third, try removing the constant offset.  Make sure to even
2933      add a candidate for &a[0] vs. (T *)&a.  */
2934   base = strip_offset (iv->base, &offset);
2935   if (offset
2936       || base != iv->base)
2937     add_candidate (data, base, iv->step, false, use);
2938 }
2939
2940 /* Adds candidates based on the uses.  */
2941
2942 static void
2943 add_derived_ivs_candidates (struct ivopts_data *data)
2944 {
2945   unsigned i;
2946
2947   for (i = 0; i < n_iv_uses (data); i++)
2948     {
2949       struct iv_use *use = iv_use (data, i);
2950
2951       if (!use)
2952         continue;
2953
2954       switch (use->type)
2955         {
2956         case USE_NONLINEAR_EXPR:
2957         case USE_COMPARE:
2958         case USE_ADDRESS:
2959           /* Just add the ivs based on the value of the iv used here.  */
2960           add_iv_value_candidates (data, use->iv, use);
2961           break;
2962
2963         default:
2964           gcc_unreachable ();
2965         }
2966     }
2967 }
2968
2969 /* Record important candidates and add them to related_cands bitmaps
2970    if needed.  */
2971
2972 static void
2973 record_important_candidates (struct ivopts_data *data)
2974 {
2975   unsigned i;
2976   struct iv_use *use;
2977
2978   for (i = 0; i < n_iv_cands (data); i++)
2979     {
2980       struct iv_cand *cand = iv_cand (data, i);
2981
2982       if (cand->important)
2983         bitmap_set_bit (data->important_candidates, i);
2984     }
2985
2986   data->consider_all_candidates = (n_iv_cands (data)
2987                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2988
2989   if (data->consider_all_candidates)
2990     {
2991       /* We will not need "related_cands" bitmaps in this case,
2992          so release them to decrease peak memory consumption.  */
2993       for (i = 0; i < n_iv_uses (data); i++)
2994         {
2995           use = iv_use (data, i);
2996           BITMAP_FREE (use->related_cands);
2997         }
2998     }
2999   else
3000     {
3001       /* Add important candidates to the related_cands bitmaps.  */
3002       for (i = 0; i < n_iv_uses (data); i++)
3003         bitmap_ior_into (iv_use (data, i)->related_cands,
3004                          data->important_candidates);
3005     }
3006 }
3007
3008 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3009    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3010    we allocate a simple list to every use.  */
3011
3012 static void
3013 alloc_use_cost_map (struct ivopts_data *data)
3014 {
3015   unsigned i, size, s;
3016
3017   for (i = 0; i < n_iv_uses (data); i++)
3018     {
3019       struct iv_use *use = iv_use (data, i);
3020
3021       if (data->consider_all_candidates)
3022         size = n_iv_cands (data);
3023       else
3024         {
3025           s = bitmap_count_bits (use->related_cands);
3026
3027           /* Round up to the power of two, so that moduling by it is fast.  */
3028           size = s ? (1 << ceil_log2 (s)) : 1;
3029         }
3030
3031       use->n_map_members = size;
3032       use->cost_map = XCNEWVEC (struct cost_pair, size);
3033     }
3034 }
3035
3036 /* Returns description of computation cost of expression whose runtime
3037    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3038
3039 static comp_cost
3040 new_cost (unsigned runtime, unsigned complexity)
3041 {
3042   comp_cost cost;
3043
3044   cost.cost = runtime;
3045   cost.complexity = complexity;
3046
3047   return cost;
3048 }
3049
3050 /* Returns true if COST is infinite.  */
3051
3052 static bool
3053 infinite_cost_p (comp_cost cost)
3054 {
3055   return cost.cost == INFTY;
3056 }
3057
3058 /* Adds costs COST1 and COST2.  */
3059
3060 static comp_cost
3061 add_costs (comp_cost cost1, comp_cost cost2)
3062 {
3063   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3064     return infinite_cost;
3065
3066   cost1.cost += cost2.cost;
3067   cost1.complexity += cost2.complexity;
3068
3069   return cost1;
3070 }
3071 /* Subtracts costs COST1 and COST2.  */
3072
3073 static comp_cost
3074 sub_costs (comp_cost cost1, comp_cost cost2)
3075 {
3076   cost1.cost -= cost2.cost;
3077   cost1.complexity -= cost2.complexity;
3078
3079   return cost1;
3080 }
3081
3082 /* Returns a negative number if COST1 < COST2, a positive number if
3083    COST1 > COST2, and 0 if COST1 = COST2.  */
3084
3085 static int
3086 compare_costs (comp_cost cost1, comp_cost cost2)
3087 {
3088   if (cost1.cost == cost2.cost)
3089     return cost1.complexity - cost2.complexity;
3090
3091   return cost1.cost - cost2.cost;
3092 }
3093
3094 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3095    on invariants DEPENDS_ON and that the value used in expressing it
3096    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3097
3098 static void
3099 set_use_iv_cost (struct ivopts_data *data,
3100                  struct iv_use *use, struct iv_cand *cand,
3101                  comp_cost cost, bitmap depends_on, tree value,
3102                  enum tree_code comp, int inv_expr_id)
3103 {
3104   unsigned i, s;
3105
3106   if (infinite_cost_p (cost))
3107     {
3108       BITMAP_FREE (depends_on);
3109       return;
3110     }
3111
3112   if (data->consider_all_candidates)
3113     {
3114       use->cost_map[cand->id].cand = cand;
3115       use->cost_map[cand->id].cost = cost;
3116       use->cost_map[cand->id].depends_on = depends_on;
3117       use->cost_map[cand->id].value = value;
3118       use->cost_map[cand->id].comp = comp;
3119       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3120       return;
3121     }
3122
3123   /* n_map_members is a power of two, so this computes modulo.  */
3124   s = cand->id & (use->n_map_members - 1);
3125   for (i = s; i < use->n_map_members; i++)
3126     if (!use->cost_map[i].cand)
3127       goto found;
3128   for (i = 0; i < s; i++)
3129     if (!use->cost_map[i].cand)
3130       goto found;
3131
3132   gcc_unreachable ();
3133
3134 found:
3135   use->cost_map[i].cand = cand;
3136   use->cost_map[i].cost = cost;
3137   use->cost_map[i].depends_on = depends_on;
3138   use->cost_map[i].value = value;
3139   use->cost_map[i].comp = comp;
3140   use->cost_map[i].inv_expr_id = inv_expr_id;
3141 }
3142
3143 /* Gets cost of (USE, CANDIDATE) pair.  */
3144
3145 static struct cost_pair *
3146 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3147                  struct iv_cand *cand)
3148 {
3149   unsigned i, s;
3150   struct cost_pair *ret;
3151
3152   if (!cand)
3153     return NULL;
3154
3155   if (data->consider_all_candidates)
3156     {
3157       ret = use->cost_map + cand->id;
3158       if (!ret->cand)
3159         return NULL;
3160
3161       return ret;
3162     }
3163
3164   /* n_map_members is a power of two, so this computes modulo.  */
3165   s = cand->id & (use->n_map_members - 1);
3166   for (i = s; i < use->n_map_members; i++)
3167     if (use->cost_map[i].cand == cand)
3168       return use->cost_map + i;
3169     else if (use->cost_map[i].cand == NULL)
3170       return NULL;
3171   for (i = 0; i < s; i++)
3172     if (use->cost_map[i].cand == cand)
3173       return use->cost_map + i;
3174     else if (use->cost_map[i].cand == NULL)
3175       return NULL;
3176
3177   return NULL;
3178 }
3179
3180 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3181 static rtx
3182 produce_memory_decl_rtl (tree obj, int *regno)
3183 {
3184   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3185   machine_mode address_mode = targetm.addr_space.address_mode (as);
3186   rtx x;
3187
3188   gcc_assert (obj);
3189   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3190     {
3191       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3192       x = gen_rtx_SYMBOL_REF (address_mode, name);
3193       SET_SYMBOL_REF_DECL (x, obj);
3194       x = gen_rtx_MEM (DECL_MODE (obj), x);
3195       set_mem_addr_space (x, as);
3196       targetm.encode_section_info (obj, x, true);
3197     }
3198   else
3199     {
3200       x = gen_raw_REG (address_mode, (*regno)++);
3201       x = gen_rtx_MEM (DECL_MODE (obj), x);
3202       set_mem_addr_space (x, as);
3203     }
3204
3205   return x;
3206 }
3207
3208 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3209    walk_tree.  DATA contains the actual fake register number.  */
3210
3211 static tree
3212 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3213 {
3214   tree obj = NULL_TREE;
3215   rtx x = NULL_RTX;
3216   int *regno = (int *) data;
3217
3218   switch (TREE_CODE (*expr_p))
3219     {
3220     case ADDR_EXPR:
3221       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3222            handled_component_p (*expr_p);
3223            expr_p = &TREE_OPERAND (*expr_p, 0))
3224         continue;
3225       obj = *expr_p;
3226       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3227         x = produce_memory_decl_rtl (obj, regno);
3228       break;
3229
3230     case SSA_NAME:
3231       *ws = 0;
3232       obj = SSA_NAME_VAR (*expr_p);
3233       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3234       if (!obj)
3235         return NULL_TREE;
3236       if (!DECL_RTL_SET_P (obj))
3237         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3238       break;
3239
3240     case VAR_DECL:
3241     case PARM_DECL:
3242     case RESULT_DECL:
3243       *ws = 0;
3244       obj = *expr_p;
3245
3246       if (DECL_RTL_SET_P (obj))
3247         break;
3248
3249       if (DECL_MODE (obj) == BLKmode)
3250         x = produce_memory_decl_rtl (obj, regno);
3251       else
3252         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3253
3254       break;
3255
3256     default:
3257       break;
3258     }
3259
3260   if (x)
3261     {
3262       decl_rtl_to_reset.safe_push (obj);
3263       SET_DECL_RTL (obj, x);
3264     }
3265
3266   return NULL_TREE;
3267 }
3268
3269 /* Determines cost of the computation of EXPR.  */
3270
3271 static unsigned
3272 computation_cost (tree expr, bool speed)
3273 {
3274   rtx_insn *seq;
3275   rtx rslt;
3276   tree type = TREE_TYPE (expr);
3277   unsigned cost;
3278   /* Avoid using hard regs in ways which may be unsupported.  */
3279   int regno = LAST_VIRTUAL_REGISTER + 1;
3280   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3281   enum node_frequency real_frequency = node->frequency;
3282
3283   node->frequency = NODE_FREQUENCY_NORMAL;
3284   crtl->maybe_hot_insn_p = speed;
3285   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3286   start_sequence ();
3287   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3288   seq = get_insns ();
3289   end_sequence ();
3290   default_rtl_profile ();
3291   node->frequency = real_frequency;
3292
3293   cost = seq_cost (seq, speed);
3294   if (MEM_P (rslt))
3295     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3296                           TYPE_ADDR_SPACE (type), speed);
3297   else if (!REG_P (rslt))
3298     cost += set_src_cost (rslt, speed);
3299
3300   return cost;
3301 }
3302
3303 /* Returns variable containing the value of candidate CAND at statement AT.  */
3304
3305 static tree
3306 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3307 {
3308   if (stmt_after_increment (loop, cand, stmt))
3309     return cand->var_after;
3310   else
3311     return cand->var_before;
3312 }
3313
3314 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3315    same precision that is at least as wide as the precision of TYPE, stores
3316    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3317    type of A and B.  */
3318
3319 static tree
3320 determine_common_wider_type (tree *a, tree *b)
3321 {
3322   tree wider_type = NULL;
3323   tree suba, subb;
3324   tree atype = TREE_TYPE (*a);
3325
3326   if (CONVERT_EXPR_P (*a))
3327     {
3328       suba = TREE_OPERAND (*a, 0);
3329       wider_type = TREE_TYPE (suba);
3330       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3331         return atype;
3332     }
3333   else
3334     return atype;
3335
3336   if (CONVERT_EXPR_P (*b))
3337     {
3338       subb = TREE_OPERAND (*b, 0);
3339       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3340         return atype;
3341     }
3342   else
3343     return atype;
3344
3345   *a = suba;
3346   *b = subb;
3347   return wider_type;
3348 }
3349
3350 /* Determines the expression by that USE is expressed from induction variable
3351    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3352    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3353
3354 static bool
3355 get_computation_aff (struct loop *loop,
3356                      struct iv_use *use, struct iv_cand *cand, gimple at,
3357                      struct aff_tree *aff)
3358 {
3359   tree ubase = use->iv->base;
3360   tree ustep = use->iv->step;
3361   tree cbase = cand->iv->base;
3362   tree cstep = cand->iv->step, cstep_common;
3363   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3364   tree common_type, var;
3365   tree uutype;
3366   aff_tree cbase_aff, var_aff;
3367   widest_int rat;
3368
3369   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3370     {
3371       /* We do not have a precision to express the values of use.  */
3372       return false;
3373     }
3374
3375   var = var_at_stmt (loop, cand, at);
3376   uutype = unsigned_type_for (utype);
3377
3378   /* If the conversion is not noop, perform it.  */
3379   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3380     {
3381       cstep = fold_convert (uutype, cstep);
3382       cbase = fold_convert (uutype, cbase);
3383       var = fold_convert (uutype, var);
3384     }
3385
3386   if (!constant_multiple_of (ustep, cstep, &rat))
3387     return false;
3388
3389   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3390      type, we achieve better folding by computing their difference in this
3391      wider type, and cast the result to UUTYPE.  We do not need to worry about
3392      overflows, as all the arithmetics will in the end be performed in UUTYPE
3393      anyway.  */
3394   common_type = determine_common_wider_type (&ubase, &cbase);
3395
3396   /* use = ubase - ratio * cbase + ratio * var.  */
3397   tree_to_aff_combination (ubase, common_type, aff);
3398   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3399   tree_to_aff_combination (var, uutype, &var_aff);
3400
3401   /* We need to shift the value if we are after the increment.  */
3402   if (stmt_after_increment (loop, cand, at))
3403     {
3404       aff_tree cstep_aff;
3405
3406       if (common_type != uutype)
3407         cstep_common = fold_convert (common_type, cstep);
3408       else
3409         cstep_common = cstep;
3410
3411       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3412       aff_combination_add (&cbase_aff, &cstep_aff);
3413     }
3414
3415   aff_combination_scale (&cbase_aff, -rat);
3416   aff_combination_add (aff, &cbase_aff);
3417   if (common_type != uutype)
3418     aff_combination_convert (aff, uutype);
3419
3420   aff_combination_scale (&var_aff, rat);
3421   aff_combination_add (aff, &var_aff);
3422
3423   return true;
3424 }
3425
3426 /* Return the type of USE.  */
3427
3428 static tree
3429 get_use_type (struct iv_use *use)
3430 {
3431   tree base_type = TREE_TYPE (use->iv->base);
3432   tree type;
3433
3434   if (use->type == USE_ADDRESS)
3435     {
3436       /* The base_type may be a void pointer.  Create a pointer type based on
3437          the mem_ref instead.  */
3438       type = build_pointer_type (TREE_TYPE (*use->op_p));
3439       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3440                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3441     }
3442   else
3443     type = base_type;
3444
3445   return type;
3446 }
3447
3448 /* Determines the expression by that USE is expressed from induction variable
3449    CAND at statement AT in LOOP.  The computation is unshared.  */
3450
3451 static tree
3452 get_computation_at (struct loop *loop,
3453                     struct iv_use *use, struct iv_cand *cand, gimple at)
3454 {
3455   aff_tree aff;
3456   tree type = get_use_type (use);
3457
3458   if (!get_computation_aff (loop, use, cand, at, &aff))
3459     return NULL_TREE;
3460   unshare_aff_combination (&aff);
3461   return fold_convert (type, aff_combination_to_tree (&aff));
3462 }
3463
3464 /* Determines the expression by that USE is expressed from induction variable
3465    CAND in LOOP.  The computation is unshared.  */
3466
3467 static tree
3468 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3469 {
3470   return get_computation_at (loop, use, cand, use->stmt);
3471 }
3472
3473 /* Adjust the cost COST for being in loop setup rather than loop body.
3474    If we're optimizing for space, the loop setup overhead is constant;
3475    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3476 static unsigned
3477 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3478 {
3479   if (cost == INFTY)
3480     return cost;
3481   else if (optimize_loop_for_speed_p (data->current_loop))
3482     return cost / avg_loop_niter (data->current_loop);
3483   else
3484     return cost;
3485 }
3486
3487 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3488    validity for a memory reference accessing memory of mode MODE in
3489    address space AS.  */
3490
3491
3492 bool
3493 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3494                                  addr_space_t as)
3495 {
3496 #define MAX_RATIO 128
3497   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3498   static vec<sbitmap> valid_mult_list;
3499   sbitmap valid_mult;
3500
3501   if (data_index >= valid_mult_list.length ())
3502     valid_mult_list.safe_grow_cleared (data_index + 1);
3503
3504   valid_mult = valid_mult_list[data_index];
3505   if (!valid_mult)
3506     {
3507       machine_mode address_mode = targetm.addr_space.address_mode (as);
3508       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3509       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3510       rtx addr, scaled;
3511       HOST_WIDE_INT i;
3512
3513       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3514       bitmap_clear (valid_mult);
3515       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3516       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3517       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3518         {
3519           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3520           if (memory_address_addr_space_p (mode, addr, as)
3521               || memory_address_addr_space_p (mode, scaled, as))
3522             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3523         }
3524
3525       if (dump_file && (dump_flags & TDF_DETAILS))
3526         {
3527           fprintf (dump_file, "  allowed multipliers:");
3528           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3529             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3530               fprintf (dump_file, " %d", (int) i);
3531           fprintf (dump_file, "\n");
3532           fprintf (dump_file, "\n");
3533         }
3534
3535       valid_mult_list[data_index] = valid_mult;
3536     }
3537
3538   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3539     return false;
3540
3541   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3542 }
3543
3544 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3545    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3546    variable is omitted.  Compute the cost for a memory reference that accesses
3547    a memory location of mode MEM_MODE in address space AS.
3548
3549    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3550    size of MEM_MODE / RATIO) is available.  To make this determination, we
3551    look at the size of the increment to be made, which is given in CSTEP.
3552    CSTEP may be zero if the step is unknown.
3553    STMT_AFTER_INC is true iff the statement we're looking at is after the
3554    increment of the original biv.
3555
3556    TODO -- there must be some better way.  This all is quite crude.  */
3557
3558 enum ainc_type
3559 {
3560   AINC_PRE_INC,         /* Pre increment.  */
3561   AINC_PRE_DEC,         /* Pre decrement.  */
3562   AINC_POST_INC,        /* Post increment.  */
3563   AINC_POST_DEC,        /* Post decrement.  */
3564   AINC_NONE             /* Also the number of auto increment types.  */
3565 };
3566
3567 typedef struct address_cost_data_s
3568 {
3569   HOST_WIDE_INT min_offset, max_offset;
3570   unsigned costs[2][2][2][2];
3571   unsigned ainc_costs[AINC_NONE];
3572 } *address_cost_data;
3573
3574
3575 static comp_cost
3576 get_address_cost (bool symbol_present, bool var_present,
3577                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3578                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3579                   addr_space_t as, bool speed,
3580                   bool stmt_after_inc, bool *may_autoinc)
3581 {
3582   machine_mode address_mode = targetm.addr_space.address_mode (as);
3583   static vec<address_cost_data> address_cost_data_list;
3584   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3585   address_cost_data data;
3586   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3587   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3588   unsigned cost, acost, complexity;
3589   enum ainc_type autoinc_type;
3590   bool offset_p, ratio_p, autoinc;
3591   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3592   unsigned HOST_WIDE_INT mask;
3593   unsigned bits;
3594
3595   if (data_index >= address_cost_data_list.length ())
3596     address_cost_data_list.safe_grow_cleared (data_index + 1);
3597
3598   data = address_cost_data_list[data_index];
3599   if (!data)
3600     {
3601       HOST_WIDE_INT i;
3602       HOST_WIDE_INT rat, off = 0;
3603       int old_cse_not_expected, width;
3604       unsigned sym_p, var_p, off_p, rat_p, add_c;
3605       rtx_insn *seq;
3606       rtx addr, base;
3607       rtx reg0, reg1;
3608
3609       data = (address_cost_data) xcalloc (1, sizeof (*data));
3610
3611       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3612
3613       width = GET_MODE_BITSIZE (address_mode) - 1;
3614       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3615         width = HOST_BITS_PER_WIDE_INT - 1;
3616       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3617
3618       for (i = width; i >= 0; i--)
3619         {
3620           off = -((unsigned HOST_WIDE_INT) 1 << i);
3621           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3622           if (memory_address_addr_space_p (mem_mode, addr, as))
3623             break;
3624         }
3625       data->min_offset = (i == -1? 0 : off);
3626
3627       for (i = width; i >= 0; i--)
3628         {
3629           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3630           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3631           if (memory_address_addr_space_p (mem_mode, addr, as))
3632             break;
3633           /* For some strict-alignment targets, the offset must be naturally
3634              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3635           off = mem_mode != QImode
3636                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3637                     - GET_MODE_SIZE (mem_mode)
3638                 : 0;
3639           if (off > 0)
3640             {
3641               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3642               if (memory_address_addr_space_p (mem_mode, addr, as))
3643                 break;
3644             }
3645         }
3646       if (i == -1)
3647         off = 0;
3648       data->max_offset = off;
3649
3650       if (dump_file && (dump_flags & TDF_DETAILS))
3651         {
3652           fprintf (dump_file, "get_address_cost:\n");
3653           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3654                    GET_MODE_NAME (mem_mode),
3655                    data->min_offset);
3656           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3657                    GET_MODE_NAME (mem_mode),
3658                    data->max_offset);
3659         }
3660
3661       rat = 1;
3662       for (i = 2; i <= MAX_RATIO; i++)
3663         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3664           {
3665             rat = i;
3666             break;
3667           }
3668
3669       /* Compute the cost of various addressing modes.  */
3670       acost = 0;
3671       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3672       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3673
3674       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3675           || USE_STORE_PRE_DECREMENT (mem_mode))
3676         {
3677           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3678           has_predec[mem_mode]
3679             = memory_address_addr_space_p (mem_mode, addr, as);
3680
3681           if (has_predec[mem_mode])
3682             data->ainc_costs[AINC_PRE_DEC]
3683               = address_cost (addr, mem_mode, as, speed);
3684         }
3685       if (USE_LOAD_POST_DECREMENT (mem_mode)
3686           || USE_STORE_POST_DECREMENT (mem_mode))
3687         {
3688           addr = gen_rtx_POST_DEC (address_mode, reg0);
3689           has_postdec[mem_mode]
3690             = memory_address_addr_space_p (mem_mode, addr, as);
3691
3692           if (has_postdec[mem_mode])
3693             data->ainc_costs[AINC_POST_DEC]
3694               = address_cost (addr, mem_mode, as, speed);
3695         }
3696       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3697           || USE_STORE_PRE_DECREMENT (mem_mode))
3698         {
3699           addr = gen_rtx_PRE_INC (address_mode, reg0);
3700           has_preinc[mem_mode]
3701             = memory_address_addr_space_p (mem_mode, addr, as);
3702
3703           if (has_preinc[mem_mode])
3704             data->ainc_costs[AINC_PRE_INC]
3705               = address_cost (addr, mem_mode, as, speed);
3706         }
3707       if (USE_LOAD_POST_INCREMENT (mem_mode)
3708           || USE_STORE_POST_INCREMENT (mem_mode))
3709         {
3710           addr = gen_rtx_POST_INC (address_mode, reg0);
3711           has_postinc[mem_mode]
3712             = memory_address_addr_space_p (mem_mode, addr, as);
3713
3714           if (has_postinc[mem_mode])
3715             data->ainc_costs[AINC_POST_INC]
3716               = address_cost (addr, mem_mode, as, speed);
3717         }
3718       for (i = 0; i < 16; i++)
3719         {
3720           sym_p = i & 1;
3721           var_p = (i >> 1) & 1;
3722           off_p = (i >> 2) & 1;
3723           rat_p = (i >> 3) & 1;
3724
3725           addr = reg0;
3726           if (rat_p)
3727             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3728                                    gen_int_mode (rat, address_mode));
3729
3730           if (var_p)
3731             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3732
3733           if (sym_p)
3734             {
3735               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3736               /* ??? We can run into trouble with some backends by presenting
3737                  it with symbols which haven't been properly passed through
3738                  targetm.encode_section_info.  By setting the local bit, we
3739                  enhance the probability of things working.  */
3740               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3741
3742               if (off_p)
3743                 base = gen_rtx_fmt_e (CONST, address_mode,
3744                                       gen_rtx_fmt_ee
3745                                         (PLUS, address_mode, base,
3746                                          gen_int_mode (off, address_mode)));
3747             }
3748           else if (off_p)
3749             base = gen_int_mode (off, address_mode);
3750           else
3751             base = NULL_RTX;
3752
3753           if (base)
3754             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3755
3756           start_sequence ();
3757           /* To avoid splitting addressing modes, pretend that no cse will
3758              follow.  */
3759           old_cse_not_expected = cse_not_expected;
3760           cse_not_expected = true;
3761           addr = memory_address_addr_space (mem_mode, addr, as);
3762           cse_not_expected = old_cse_not_expected;
3763           seq = get_insns ();
3764           end_sequence ();
3765
3766           acost = seq_cost (seq, speed);
3767           acost += address_cost (addr, mem_mode, as, speed);
3768
3769           if (!acost)
3770             acost = 1;
3771           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3772         }
3773
3774       /* On some targets, it is quite expensive to load symbol to a register,
3775          which makes addresses that contain symbols look much more expensive.
3776          However, the symbol will have to be loaded in any case before the
3777          loop (and quite likely we have it in register already), so it does not
3778          make much sense to penalize them too heavily.  So make some final
3779          tweaks for the SYMBOL_PRESENT modes:
3780
3781          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3782          var is cheaper, use this mode with small penalty.
3783          If VAR_PRESENT is true, try whether the mode with
3784          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3785          if this is the case, use it.  */
3786       add_c = add_cost (speed, address_mode);
3787       for (i = 0; i < 8; i++)
3788         {
3789           var_p = i & 1;
3790           off_p = (i >> 1) & 1;
3791           rat_p = (i >> 2) & 1;
3792
3793           acost = data->costs[0][1][off_p][rat_p] + 1;
3794           if (var_p)
3795             acost += add_c;
3796
3797           if (acost < data->costs[1][var_p][off_p][rat_p])
3798             data->costs[1][var_p][off_p][rat_p] = acost;
3799         }
3800
3801       if (dump_file && (dump_flags & TDF_DETAILS))
3802         {
3803           fprintf (dump_file, "Address costs:\n");
3804
3805           for (i = 0; i < 16; i++)
3806             {
3807               sym_p = i & 1;
3808               var_p = (i >> 1) & 1;
3809               off_p = (i >> 2) & 1;
3810               rat_p = (i >> 3) & 1;
3811
3812               fprintf (dump_file, "  ");
3813               if (sym_p)
3814                 fprintf (dump_file, "sym + ");
3815               if (var_p)
3816                 fprintf (dump_file, "var + ");
3817               if (off_p)
3818                 fprintf (dump_file, "cst + ");
3819               if (rat_p)
3820                 fprintf (dump_file, "rat * ");
3821
3822               acost = data->costs[sym_p][var_p][off_p][rat_p];
3823               fprintf (dump_file, "index costs %d\n", acost);
3824             }
3825           if (has_predec[mem_mode] || has_postdec[mem_mode]
3826               || has_preinc[mem_mode] || has_postinc[mem_mode])
3827             fprintf (dump_file, "  May include autoinc/dec\n");
3828           fprintf (dump_file, "\n");
3829         }
3830
3831       address_cost_data_list[data_index] = data;
3832     }
3833
3834   bits = GET_MODE_BITSIZE (address_mode);
3835   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3836   offset &= mask;
3837   if ((offset >> (bits - 1) & 1))
3838     offset |= ~mask;
3839   s_offset = offset;
3840
3841   autoinc = false;
3842   autoinc_type = AINC_NONE;
3843   msize = GET_MODE_SIZE (mem_mode);
3844   autoinc_offset = offset;
3845   if (stmt_after_inc)
3846     autoinc_offset += ratio * cstep;
3847   if (symbol_present || var_present || ratio != 1)
3848     autoinc = false;
3849   else
3850     {
3851       if (has_postinc[mem_mode] && autoinc_offset == 0
3852           && msize == cstep)
3853         autoinc_type = AINC_POST_INC;
3854       else if (has_postdec[mem_mode] && autoinc_offset == 0
3855                && msize == -cstep)
3856         autoinc_type = AINC_POST_DEC;
3857       else if (has_preinc[mem_mode] && autoinc_offset == msize
3858                && msize == cstep)
3859         autoinc_type = AINC_PRE_INC;
3860       else if (has_predec[mem_mode] && autoinc_offset == -msize
3861                && msize == -cstep)
3862         autoinc_type = AINC_PRE_DEC;
3863
3864       if (autoinc_type != AINC_NONE)
3865         autoinc = true;
3866     }
3867
3868   cost = 0;
3869   offset_p = (s_offset != 0
3870               && data->min_offset <= s_offset
3871               && s_offset <= data->max_offset);
3872   ratio_p = (ratio != 1
3873              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3874
3875   if (ratio != 1 && !ratio_p)
3876     cost += mult_by_coeff_cost (ratio, address_mode, speed);
3877
3878   if (s_offset && !offset_p && !symbol_present)
3879     cost += add_cost (speed, address_mode);
3880
3881   if (may_autoinc)
3882     *may_autoinc = autoinc;
3883   if (autoinc)
3884     acost = data->ainc_costs[autoinc_type];
3885   else
3886     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3887   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3888   return new_cost (cost + acost, complexity);
3889 }
3890
3891  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3892     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3893     calculating the operands of EXPR.  Returns true if successful, and returns
3894     the cost in COST.  */
3895
3896 static bool
3897 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3898                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3899 {
3900   comp_cost res;
3901   tree op1 = TREE_OPERAND (expr, 1);
3902   tree cst = TREE_OPERAND (mult, 1);
3903   tree multop = TREE_OPERAND (mult, 0);
3904   int m = exact_log2 (int_cst_value (cst));
3905   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3906   int as_cost, sa_cost;
3907   bool mult_in_op1;
3908
3909   if (!(m >= 0 && m < maxm))
3910     return false;
3911
3912   mult_in_op1 = operand_equal_p (op1, mult, 0);
3913
3914   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3915
3916   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3917      use that in preference to a shift insn followed by an add insn.  */
3918   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3919              ? shiftadd_cost (speed, mode, m)
3920              : (mult_in_op1
3921                 ? shiftsub1_cost (speed, mode, m)
3922                 : shiftsub0_cost (speed, mode, m)));
3923
3924   res = new_cost (MIN (as_cost, sa_cost), 0);
3925   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3926
3927   STRIP_NOPS (multop);
3928   if (!is_gimple_val (multop))
3929     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3930
3931   *cost = res;
3932   return true;
3933 }
3934
3935 /* Estimates cost of forcing expression EXPR into a variable.  */
3936
3937 static comp_cost
3938 force_expr_to_var_cost (tree expr, bool speed)
3939 {
3940   static bool costs_initialized = false;
3941   static unsigned integer_cost [2];
3942   static unsigned symbol_cost [2];
3943   static unsigned address_cost [2];
3944   tree op0, op1;
3945   comp_cost cost0, cost1, cost;
3946   machine_mode mode;
3947
3948   if (!costs_initialized)
3949     {
3950       tree type = build_pointer_type (integer_type_node);
3951       tree var, addr;
3952       rtx x;
3953       int i;
3954
3955       var = create_tmp_var_raw (integer_type_node, "test_var");
3956       TREE_STATIC (var) = 1;
3957       x = produce_memory_decl_rtl (var, NULL);
3958       SET_DECL_RTL (var, x);
3959
3960       addr = build1 (ADDR_EXPR, type, var);
3961
3962
3963       for (i = 0; i < 2; i++)
3964         {
3965           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3966                                                              2000), i);
3967
3968           symbol_cost[i] = computation_cost (addr, i) + 1;
3969
3970           address_cost[i]
3971             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3972           if (dump_file && (dump_flags & TDF_DETAILS))
3973             {
3974               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3975               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3976               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3977               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3978               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3979               fprintf (dump_file, "\n");
3980             }
3981         }
3982
3983       costs_initialized = true;
3984     }
3985
3986   STRIP_NOPS (expr);
3987
3988   if (SSA_VAR_P (expr))
3989     return no_cost;
3990
3991   if (is_gimple_min_invariant (expr))
3992     {
3993       if (TREE_CODE (expr) == INTEGER_CST)
3994         return new_cost (integer_cost [speed], 0);
3995
3996       if (TREE_CODE (expr) == ADDR_EXPR)
3997         {
3998           tree obj = TREE_OPERAND (expr, 0);
3999
4000           if (TREE_CODE (obj) == VAR_DECL
4001               || TREE_CODE (obj) == PARM_DECL
4002               || TREE_CODE (obj) == RESULT_DECL)
4003             return new_cost (symbol_cost [speed], 0);
4004         }
4005
4006       return new_cost (address_cost [speed], 0);
4007     }
4008
4009   switch (TREE_CODE (expr))
4010     {
4011     case POINTER_PLUS_EXPR:
4012     case PLUS_EXPR:
4013     case MINUS_EXPR:
4014     case MULT_EXPR:
4015       op0 = TREE_OPERAND (expr, 0);
4016       op1 = TREE_OPERAND (expr, 1);
4017       STRIP_NOPS (op0);
4018       STRIP_NOPS (op1);
4019       break;
4020
4021     CASE_CONVERT:
4022     case NEGATE_EXPR:
4023       op0 = TREE_OPERAND (expr, 0);
4024       STRIP_NOPS (op0);
4025       op1 = NULL_TREE;
4026       break;
4027
4028     default:
4029       /* Just an arbitrary value, FIXME.  */
4030       return new_cost (target_spill_cost[speed], 0);
4031     }
4032
4033   if (op0 == NULL_TREE
4034       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4035     cost0 = no_cost;
4036   else
4037     cost0 = force_expr_to_var_cost (op0, speed);
4038
4039   if (op1 == NULL_TREE
4040       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4041     cost1 = no_cost;
4042   else
4043     cost1 = force_expr_to_var_cost (op1, speed);
4044
4045   mode = TYPE_MODE (TREE_TYPE (expr));
4046   switch (TREE_CODE (expr))
4047     {
4048     case POINTER_PLUS_EXPR:
4049     case PLUS_EXPR:
4050     case MINUS_EXPR:
4051     case NEGATE_EXPR:
4052       cost = new_cost (add_cost (speed, mode), 0);
4053       if (TREE_CODE (expr) != NEGATE_EXPR)
4054         {
4055           tree mult = NULL_TREE;
4056           comp_cost sa_cost;
4057           if (TREE_CODE (op1) == MULT_EXPR)
4058             mult = op1;
4059           else if (TREE_CODE (op0) == MULT_EXPR)
4060             mult = op0;
4061
4062           if (mult != NULL_TREE
4063               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4064               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4065                                     speed, &sa_cost))
4066             return sa_cost;
4067         }
4068       break;
4069
4070     CASE_CONVERT:
4071       {
4072         tree inner_mode, outer_mode;
4073         outer_mode = TREE_TYPE (expr);
4074         inner_mode = TREE_TYPE (op0);
4075         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4076                                        TYPE_MODE (inner_mode), speed), 0);
4077       }
4078       break;
4079
4080     case MULT_EXPR:
4081       if (cst_and_fits_in_hwi (op0))
4082         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4083                                              mode, speed), 0);
4084       else if (cst_and_fits_in_hwi (op1))
4085         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4086                                              mode, speed), 0);
4087       else
4088         return new_cost (target_spill_cost [speed], 0);
4089       break;
4090
4091     default:
4092       gcc_unreachable ();
4093     }
4094
4095   cost = add_costs (cost, cost0);
4096   cost = add_costs (cost, cost1);
4097
4098   /* Bound the cost by target_spill_cost.  The parts of complicated
4099      computations often are either loop invariant or at least can
4100      be shared between several iv uses, so letting this grow without
4101      limits would not give reasonable results.  */
4102   if (cost.cost > (int) target_spill_cost [speed])
4103     cost.cost = target_spill_cost [speed];
4104
4105   return cost;
4106 }
4107
4108 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4109    invariants the computation depends on.  */
4110
4111 static comp_cost
4112 force_var_cost (struct ivopts_data *data,
4113                 tree expr, bitmap *depends_on)
4114 {
4115   if (depends_on)
4116     {
4117       fd_ivopts_data = data;
4118       walk_tree (&expr, find_depends, depends_on, NULL);
4119     }
4120
4121   return force_expr_to_var_cost (expr, data->speed);
4122 }
4123
4124 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4125    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4126    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4127    invariants the computation depends on.  */
4128
4129 static comp_cost
4130 split_address_cost (struct ivopts_data *data,
4131                     tree addr, bool *symbol_present, bool *var_present,
4132                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4133 {
4134   tree core;
4135   HOST_WIDE_INT bitsize;
4136   HOST_WIDE_INT bitpos;
4137   tree toffset;
4138   machine_mode mode;
4139   int unsignedp, volatilep;
4140
4141   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4142                               &unsignedp, &volatilep, false);
4143
4144   if (toffset != 0
4145       || bitpos % BITS_PER_UNIT != 0
4146       || TREE_CODE (core) != VAR_DECL)
4147     {
4148       *symbol_present = false;
4149       *var_present = true;
4150       fd_ivopts_data = data;
4151       walk_tree (&addr, find_depends, depends_on, NULL);
4152       return new_cost (target_spill_cost[data->speed], 0);
4153     }
4154
4155   *offset += bitpos / BITS_PER_UNIT;
4156   if (TREE_STATIC (core)
4157       || DECL_EXTERNAL (core))
4158     {
4159       *symbol_present = true;
4160       *var_present = false;
4161       return no_cost;
4162     }
4163
4164   *symbol_present = false;
4165   *var_present = true;
4166   return no_cost;
4167 }
4168
4169 /* Estimates cost of expressing difference of addresses E1 - E2 as
4170    var + symbol + offset.  The value of offset is added to OFFSET,
4171    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4172    part is missing.  DEPENDS_ON is a set of the invariants the computation
4173    depends on.  */
4174
4175 static comp_cost
4176 ptr_difference_cost (struct ivopts_data *data,
4177                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4178                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4179 {
4180   HOST_WIDE_INT diff = 0;
4181   aff_tree aff_e1, aff_e2;
4182   tree type;
4183
4184   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4185
4186   if (ptr_difference_const (e1, e2, &diff))
4187     {
4188       *offset += diff;
4189       *symbol_present = false;
4190       *var_present = false;
4191       return no_cost;
4192     }
4193
4194   if (integer_zerop (e2))
4195     return split_address_cost (data, TREE_OPERAND (e1, 0),
4196                                symbol_present, var_present, offset, depends_on);
4197
4198   *symbol_present = false;
4199   *var_present = true;
4200
4201   type = signed_type_for (TREE_TYPE (e1));
4202   tree_to_aff_combination (e1, type, &aff_e1);
4203   tree_to_aff_combination (e2, type, &aff_e2);
4204   aff_combination_scale (&aff_e2, -1);
4205   aff_combination_add (&aff_e1, &aff_e2);
4206
4207   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4208 }
4209
4210 /* Estimates cost of expressing difference E1 - E2 as
4211    var + symbol + offset.  The value of offset is added to OFFSET,
4212    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4213    part is missing.  DEPENDS_ON is a set of the invariants the computation
4214    depends on.  */
4215
4216 static comp_cost
4217 difference_cost (struct ivopts_data *data,
4218                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4219                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4220 {
4221   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4222   unsigned HOST_WIDE_INT off1, off2;
4223   aff_tree aff_e1, aff_e2;
4224   tree type;
4225
4226   e1 = strip_offset (e1, &off1);
4227   e2 = strip_offset (e2, &off2);
4228   *offset += off1 - off2;
4229
4230   STRIP_NOPS (e1);
4231   STRIP_NOPS (e2);
4232
4233   if (TREE_CODE (e1) == ADDR_EXPR)
4234     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4235                                 offset, depends_on);
4236   *symbol_present = false;
4237
4238   if (operand_equal_p (e1, e2, 0))
4239     {
4240       *var_present = false;
4241       return no_cost;
4242     }
4243
4244   *var_present = true;
4245
4246   if (integer_zerop (e2))
4247     return force_var_cost (data, e1, depends_on);
4248
4249   if (integer_zerop (e1))
4250     {
4251       comp_cost cost = force_var_cost (data, e2, depends_on);
4252       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4253       return cost;
4254     }
4255
4256   type = signed_type_for (TREE_TYPE (e1));
4257   tree_to_aff_combination (e1, type, &aff_e1);
4258   tree_to_aff_combination (e2, type, &aff_e2);
4259   aff_combination_scale (&aff_e2, -1);
4260   aff_combination_add (&aff_e1, &aff_e2);
4261
4262   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4263 }
4264
4265 /* Returns true if AFF1 and AFF2 are identical.  */
4266
4267 static bool
4268 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4269 {
4270   unsigned i;
4271
4272   if (aff1->n != aff2->n)
4273     return false;
4274
4275   for (i = 0; i < aff1->n; i++)
4276     {
4277       if (aff1->elts[i].coef != aff2->elts[i].coef)
4278         return false;
4279
4280       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4281         return false;
4282     }
4283   return true;
4284 }
4285
4286 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4287
4288 static int
4289 get_expr_id (struct ivopts_data *data, tree expr)
4290 {
4291   struct iv_inv_expr_ent ent;
4292   struct iv_inv_expr_ent **slot;
4293
4294   ent.expr = expr;
4295   ent.hash = iterative_hash_expr (expr, 0);
4296   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4297   if (*slot)
4298     return (*slot)->id;
4299
4300   *slot = XNEW (struct iv_inv_expr_ent);
4301   (*slot)->expr = expr;
4302   (*slot)->hash = ent.hash;
4303   (*slot)->id = data->inv_expr_id++;
4304   return (*slot)->id;
4305 }
4306
4307 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4308    requires a new compiler generated temporary.  Returns -1 otherwise.
4309    ADDRESS_P is a flag indicating if the expression is for address
4310    computation.  */
4311
4312 static int
4313 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4314                             tree cbase, HOST_WIDE_INT ratio,
4315                             bool address_p)
4316 {
4317   aff_tree ubase_aff, cbase_aff;
4318   tree expr, ub, cb;
4319
4320   STRIP_NOPS (ubase);
4321   STRIP_NOPS (cbase);
4322   ub = ubase;
4323   cb = cbase;
4324
4325   if ((TREE_CODE (ubase) == INTEGER_CST)
4326       && (TREE_CODE (cbase) == INTEGER_CST))
4327     return -1;
4328
4329   /* Strips the constant part. */
4330   if (TREE_CODE (ubase) == PLUS_EXPR
4331       || TREE_CODE (ubase) == MINUS_EXPR
4332       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4333     {
4334       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4335         ubase = TREE_OPERAND (ubase, 0);
4336     }
4337
4338   /* Strips the constant part. */
4339   if (TREE_CODE (cbase) == PLUS_EXPR
4340       || TREE_CODE (cbase) == MINUS_EXPR
4341       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4342     {
4343       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4344         cbase = TREE_OPERAND (cbase, 0);
4345     }
4346
4347   if (address_p)
4348     {
4349       if (((TREE_CODE (ubase) == SSA_NAME)
4350            || (TREE_CODE (ubase) == ADDR_EXPR
4351                && is_gimple_min_invariant (ubase)))
4352           && (TREE_CODE (cbase) == INTEGER_CST))
4353         return -1;
4354
4355       if (((TREE_CODE (cbase) == SSA_NAME)
4356            || (TREE_CODE (cbase) == ADDR_EXPR
4357                && is_gimple_min_invariant (cbase)))
4358           && (TREE_CODE (ubase) == INTEGER_CST))
4359         return -1;
4360     }
4361
4362   if (ratio == 1)
4363     {
4364       if (operand_equal_p (ubase, cbase, 0))
4365         return -1;
4366
4367       if (TREE_CODE (ubase) == ADDR_EXPR
4368           && TREE_CODE (cbase) == ADDR_EXPR)
4369         {
4370           tree usym, csym;
4371
4372           usym = TREE_OPERAND (ubase, 0);
4373           csym = TREE_OPERAND (cbase, 0);
4374           if (TREE_CODE (usym) == ARRAY_REF)
4375             {
4376               tree ind = TREE_OPERAND (usym, 1);
4377               if (TREE_CODE (ind) == INTEGER_CST
4378                   && tree_fits_shwi_p (ind)
4379                   && tree_to_shwi (ind) == 0)
4380                 usym = TREE_OPERAND (usym, 0);
4381             }
4382           if (TREE_CODE (csym) == ARRAY_REF)
4383             {
4384               tree ind = TREE_OPERAND (csym, 1);
4385               if (TREE_CODE (ind) == INTEGER_CST
4386                   && tree_fits_shwi_p (ind)
4387                   && tree_to_shwi (ind) == 0)
4388                 csym = TREE_OPERAND (csym, 0);
4389             }
4390           if (operand_equal_p (usym, csym, 0))
4391             return -1;
4392         }
4393       /* Now do more complex comparison  */
4394       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4395       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4396       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4397         return -1;
4398     }
4399
4400   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4401   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4402
4403   aff_combination_scale (&cbase_aff, -1 * ratio);
4404   aff_combination_add (&ubase_aff, &cbase_aff);
4405   expr = aff_combination_to_tree (&ubase_aff);
4406   return get_expr_id (data, expr);
4407 }
4408
4409
4410
4411 /* Determines the cost of the computation by that USE is expressed
4412    from induction variable CAND.  If ADDRESS_P is true, we just need
4413    to create an address from it, otherwise we want to get it into
4414    register.  A set of invariants we depend on is stored in
4415    DEPENDS_ON.  AT is the statement at that the value is computed.
4416    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4417    addressing is likely.  */
4418
4419 static comp_cost
4420 get_computation_cost_at (struct ivopts_data *data,
4421                          struct iv_use *use, struct iv_cand *cand,
4422                          bool address_p, bitmap *depends_on, gimple at,
4423                          bool *can_autoinc,
4424                          int *inv_expr_id)
4425 {
4426   tree ubase = use->iv->base, ustep = use->iv->step;
4427   tree cbase, cstep;
4428   tree utype = TREE_TYPE (ubase), ctype;
4429   unsigned HOST_WIDE_INT cstepi, offset = 0;
4430   HOST_WIDE_INT ratio, aratio;
4431   bool var_present, symbol_present, stmt_is_after_inc;
4432   comp_cost cost;
4433   widest_int rat;
4434   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4435   machine_mode mem_mode = (address_p
4436                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4437                                 : VOIDmode);
4438
4439   *depends_on = NULL;
4440
4441   /* Only consider real candidates.  */
4442   if (!cand->iv)
4443     return infinite_cost;
4444
4445   cbase = cand->iv->base;
4446   cstep = cand->iv->step;
4447   ctype = TREE_TYPE (cbase);
4448
4449   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4450     {
4451       /* We do not have a precision to express the values of use.  */
4452       return infinite_cost;
4453     }
4454
4455   if (address_p
4456       || (use->iv->base_object
4457           && cand->iv->base_object
4458           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4459           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4460     {
4461       /* Do not try to express address of an object with computation based
4462          on address of a different object.  This may cause problems in rtl
4463          level alias analysis (that does not expect this to be happening,
4464          as this is illegal in C), and would be unlikely to be useful
4465          anyway.  */
4466       if (use->iv->base_object
4467           && cand->iv->base_object
4468           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4469         return infinite_cost;
4470     }
4471
4472   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4473     {
4474       /* TODO -- add direct handling of this case.  */
4475       goto fallback;
4476     }
4477
4478   /* CSTEPI is removed from the offset in case statement is after the
4479      increment.  If the step is not constant, we use zero instead.
4480      This is a bit imprecise (there is the extra addition), but
4481      redundancy elimination is likely to transform the code so that
4482      it uses value of the variable before increment anyway,
4483      so it is not that much unrealistic.  */
4484   if (cst_and_fits_in_hwi (cstep))
4485     cstepi = int_cst_value (cstep);
4486   else
4487     cstepi = 0;
4488
4489   if (!constant_multiple_of (ustep, cstep, &rat))
4490     return infinite_cost;
4491
4492   if (wi::fits_shwi_p (rat))
4493     ratio = rat.to_shwi ();
4494   else
4495     return infinite_cost;
4496
4497   STRIP_NOPS (cbase);
4498   ctype = TREE_TYPE (cbase);
4499
4500   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4501
4502   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4503      or ratio == 1, it is better to handle this like
4504
4505      ubase - ratio * cbase + ratio * var
4506
4507      (also holds in the case ratio == -1, TODO.  */
4508
4509   if (cst_and_fits_in_hwi (cbase))
4510     {
4511       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4512       cost = difference_cost (data,
4513                               ubase, build_int_cst (utype, 0),
4514                               &symbol_present, &var_present, &offset,
4515                               depends_on);
4516       cost.cost /= avg_loop_niter (data->current_loop);
4517     }
4518   else if (ratio == 1)
4519     {
4520       tree real_cbase = cbase;
4521
4522       /* Check to see if any adjustment is needed.  */
4523       if (cstepi == 0 && stmt_is_after_inc)
4524         {
4525           aff_tree real_cbase_aff;
4526           aff_tree cstep_aff;
4527
4528           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4529                                    &real_cbase_aff);
4530           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4531
4532           aff_combination_add (&real_cbase_aff, &cstep_aff);
4533           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4534         }
4535
4536       cost = difference_cost (data,
4537                               ubase, real_cbase,
4538                               &symbol_present, &var_present, &offset,
4539                               depends_on);
4540       cost.cost /= avg_loop_niter (data->current_loop);
4541     }
4542   else if (address_p
4543            && !POINTER_TYPE_P (ctype)
4544            && multiplier_allowed_in_address_p
4545                 (ratio, mem_mode,
4546                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4547     {
4548       cbase
4549         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4550       cost = difference_cost (data,
4551                               ubase, cbase,
4552                               &symbol_present, &var_present, &offset,
4553                               depends_on);
4554       cost.cost /= avg_loop_niter (data->current_loop);
4555     }
4556   else
4557     {
4558       cost = force_var_cost (data, cbase, depends_on);
4559       cost = add_costs (cost,
4560                         difference_cost (data,
4561                                          ubase, build_int_cst (utype, 0),
4562                                          &symbol_present, &var_present,
4563                                          &offset, depends_on));
4564       cost.cost /= avg_loop_niter (data->current_loop);
4565       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4566     }
4567
4568   /* Set of invariants depended on by sub use has already been computed
4569      for the first use in the group.  */
4570   if (use->sub_id)
4571     {
4572       cost.cost = 0;
4573       if (depends_on && *depends_on)
4574         bitmap_clear (*depends_on);
4575     }
4576   else if (inv_expr_id)
4577     {
4578       *inv_expr_id =
4579           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4580       /* Clear depends on.  */
4581       if (*inv_expr_id != -1 && depends_on && *depends_on)
4582         bitmap_clear (*depends_on);
4583     }
4584
4585   /* If we are after the increment, the value of the candidate is higher by
4586      one iteration.  */
4587   if (stmt_is_after_inc)
4588     offset -= ratio * cstepi;
4589
4590   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4591      (symbol/var1/const parts may be omitted).  If we are looking for an
4592      address, find the cost of addressing this.  */
4593   if (address_p)
4594     return add_costs (cost,
4595                       get_address_cost (symbol_present, var_present,
4596                                         offset, ratio, cstepi,
4597                                         mem_mode,
4598                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4599                                         speed, stmt_is_after_inc,
4600                                         can_autoinc));
4601
4602   /* Otherwise estimate the costs for computing the expression.  */
4603   if (!symbol_present && !var_present && !offset)
4604     {
4605       if (ratio != 1)
4606         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4607       return cost;
4608     }
4609
4610   /* Symbol + offset should be compile-time computable so consider that they
4611       are added once to the variable, if present.  */
4612   if (var_present && (symbol_present || offset))
4613     cost.cost += adjust_setup_cost (data,
4614                                     add_cost (speed, TYPE_MODE (ctype)));
4615
4616   /* Having offset does not affect runtime cost in case it is added to
4617      symbol, but it increases complexity.  */
4618   if (offset)
4619     cost.complexity++;
4620
4621   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4622
4623   aratio = ratio > 0 ? ratio : -ratio;
4624   if (aratio != 1)
4625     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4626   return cost;
4627
4628 fallback:
4629   if (can_autoinc)
4630     *can_autoinc = false;
4631
4632   {
4633     /* Just get the expression, expand it and measure the cost.  */
4634     tree comp = get_computation_at (data->current_loop, use, cand, at);
4635
4636     if (!comp)
4637       return infinite_cost;
4638
4639     if (address_p)
4640       comp = build_simple_mem_ref (comp);
4641
4642     return new_cost (computation_cost (comp, speed), 0);
4643   }
4644 }
4645
4646 /* Determines the cost of the computation by that USE is expressed
4647    from induction variable CAND.  If ADDRESS_P is true, we just need
4648    to create an address from it, otherwise we want to get it into
4649    register.  A set of invariants we depend on is stored in
4650    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4651    autoinc addressing is likely.  */
4652
4653 static comp_cost
4654 get_computation_cost (struct ivopts_data *data,
4655                       struct iv_use *use, struct iv_cand *cand,
4656                       bool address_p, bitmap *depends_on,
4657                       bool *can_autoinc, int *inv_expr_id)
4658 {
4659   return get_computation_cost_at (data,
4660                                   use, cand, address_p, depends_on, use->stmt,
4661                                   can_autoinc, inv_expr_id);
4662 }
4663
4664 /* Determines cost of basing replacement of USE on CAND in a generic
4665    expression.  */
4666
4667 static bool
4668 determine_use_iv_cost_generic (struct ivopts_data *data,
4669                                struct iv_use *use, struct iv_cand *cand)
4670 {
4671   bitmap depends_on;
4672   comp_cost cost;
4673   int inv_expr_id = -1;
4674
4675   /* The simple case first -- if we need to express value of the preserved
4676      original biv, the cost is 0.  This also prevents us from counting the
4677      cost of increment twice -- once at this use and once in the cost of
4678      the candidate.  */
4679   if (cand->pos == IP_ORIGINAL
4680       && cand->incremented_at == use->stmt)
4681     {
4682       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4683                        ERROR_MARK, -1);
4684       return true;
4685     }
4686
4687   cost = get_computation_cost (data, use, cand, false, &depends_on,
4688                                NULL, &inv_expr_id);
4689
4690   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4691                    inv_expr_id);
4692
4693   return !infinite_cost_p (cost);
4694 }
4695
4696 /* Determines cost of basing replacement of USE on CAND in an address.  */
4697
4698 static bool
4699 determine_use_iv_cost_address (struct ivopts_data *data,
4700                                struct iv_use *use, struct iv_cand *cand)
4701 {
4702   bitmap depends_on;
4703   bool can_autoinc;
4704   int inv_expr_id = -1;
4705   struct iv_use *sub_use;
4706   comp_cost sub_cost;
4707   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4708                                          &can_autoinc, &inv_expr_id);
4709
4710   if (cand->ainc_use == use)
4711     {
4712       if (can_autoinc)
4713         cost.cost -= cand->cost_step;
4714       /* If we generated the candidate solely for exploiting autoincrement
4715          opportunities, and it turns out it can't be used, set the cost to
4716          infinity to make sure we ignore it.  */
4717       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4718         cost = infinite_cost;
4719     }
4720   for (sub_use = use->next;
4721        sub_use && !infinite_cost_p (cost);
4722        sub_use = sub_use->next)
4723     {
4724        sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4725                                         &can_autoinc, &inv_expr_id);
4726        cost = add_costs (cost, sub_cost);
4727     }
4728
4729   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4730                    inv_expr_id);
4731
4732   return !infinite_cost_p (cost);
4733 }
4734
4735 /* Computes value of candidate CAND at position AT in iteration NITER, and
4736    stores it to VAL.  */
4737
4738 static void
4739 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4740                aff_tree *val)
4741 {
4742   aff_tree step, delta, nit;
4743   struct iv *iv = cand->iv;
4744   tree type = TREE_TYPE (iv->base);
4745   tree steptype = type;
4746   if (POINTER_TYPE_P (type))
4747     steptype = sizetype;
4748   steptype = unsigned_type_for (type);
4749
4750   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4751   aff_combination_convert (&step, steptype);
4752   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4753   aff_combination_convert (&nit, steptype);
4754   aff_combination_mult (&nit, &step, &delta);
4755   if (stmt_after_increment (loop, cand, at))
4756     aff_combination_add (&delta, &step);
4757
4758   tree_to_aff_combination (iv->base, type, val);
4759   if (!POINTER_TYPE_P (type))
4760     aff_combination_convert (val, steptype);
4761   aff_combination_add (val, &delta);
4762 }
4763
4764 /* Returns period of induction variable iv.  */
4765
4766 static tree
4767 iv_period (struct iv *iv)
4768 {
4769   tree step = iv->step, period, type;
4770   tree pow2div;
4771
4772   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4773
4774   type = unsigned_type_for (TREE_TYPE (step));
4775   /* Period of the iv is lcm (step, type_range)/step -1,
4776      i.e., N*type_range/step - 1. Since type range is power
4777      of two, N == (step >> num_of_ending_zeros_binary (step),
4778      so the final result is
4779
4780        (type_range >> num_of_ending_zeros_binary (step)) - 1
4781
4782   */
4783   pow2div = num_ending_zeros (step);
4784
4785   period = build_low_bits_mask (type,
4786                                 (TYPE_PRECISION (type)
4787                                  - tree_to_uhwi (pow2div)));
4788
4789   return period;
4790 }
4791
4792 /* Returns the comparison operator used when eliminating the iv USE.  */
4793
4794 static enum tree_code
4795 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4796 {
4797   struct loop *loop = data->current_loop;
4798   basic_block ex_bb;
4799   edge exit;
4800
4801   ex_bb = gimple_bb (use->stmt);
4802   exit = EDGE_SUCC (ex_bb, 0);
4803   if (flow_bb_inside_loop_p (loop, exit->dest))
4804     exit = EDGE_SUCC (ex_bb, 1);
4805
4806   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4807 }
4808
4809 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4810    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4811    calculation is performed in non-wrapping type.
4812
4813    TODO: More generally, we could test for the situation that
4814          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4815          This would require knowing the sign of OFFSET.  */
4816
4817 static bool
4818 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4819 {
4820   enum tree_code code;
4821   tree e1, e2;
4822   aff_tree aff_e1, aff_e2, aff_offset;
4823
4824   if (!nowrap_type_p (TREE_TYPE (base)))
4825     return false;
4826
4827   base = expand_simple_operations (base);
4828
4829   if (TREE_CODE (base) == SSA_NAME)
4830     {
4831       gimple stmt = SSA_NAME_DEF_STMT (base);
4832
4833       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4834         return false;
4835
4836       code = gimple_assign_rhs_code (stmt);
4837       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4838         return false;
4839
4840       e1 = gimple_assign_rhs1 (stmt);
4841       e2 = gimple_assign_rhs2 (stmt);
4842     }
4843   else
4844     {
4845       code = TREE_CODE (base);
4846       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4847         return false;
4848       e1 = TREE_OPERAND (base, 0);
4849       e2 = TREE_OPERAND (base, 1);
4850     }
4851
4852   /* Use affine expansion as deeper inspection to prove the equality.  */
4853   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4854                                   &aff_e2, &data->name_expansion_cache);
4855   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4856                                   &aff_offset, &data->name_expansion_cache);
4857   aff_combination_scale (&aff_offset, -1);
4858   switch (code)
4859     {
4860     case PLUS_EXPR:
4861       aff_combination_add (&aff_e2, &aff_offset);
4862       if (aff_combination_zero_p (&aff_e2))
4863         return true;
4864
4865       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4866                                       &aff_e1, &data->name_expansion_cache);
4867       aff_combination_add (&aff_e1, &aff_offset);
4868       return aff_combination_zero_p (&aff_e1);
4869
4870     case POINTER_PLUS_EXPR:
4871       aff_combination_add (&aff_e2, &aff_offset);
4872       return aff_combination_zero_p (&aff_e2);
4873
4874     default:
4875       return false;
4876     }
4877 }
4878
4879 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4880    comparison with CAND.  NITER describes the number of iterations of
4881    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4882
4883    We aim to handle the following situation:
4884
4885    sometype *base, *p;
4886    int a, b, i;
4887
4888    i = a;
4889    p = p_0 = base + a;
4890
4891    do
4892      {
4893        bla (*p);
4894        p++;
4895        i++;
4896      }
4897    while (i < b);
4898
4899    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4900    We aim to optimize this to
4901
4902    p = p_0 = base + a;
4903    do
4904      {
4905        bla (*p);
4906        p++;
4907      }
4908    while (p < p_0 - a + b);
4909
4910    This preserves the correctness, since the pointer arithmetics does not
4911    overflow.  More precisely:
4912
4913    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4914       overflow in computing it or the values of p.
4915    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4916       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4917
4918 static bool
4919 iv_elimination_compare_lt (struct ivopts_data *data,
4920                            struct iv_cand *cand, enum tree_code *comp_p,
4921                            struct tree_niter_desc *niter)
4922 {
4923   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4924   struct aff_tree nit, tmpa, tmpb;
4925   enum tree_code comp;
4926   HOST_WIDE_INT step;
4927
4928   /* We need to know that the candidate induction variable does not overflow.
4929      While more complex analysis may be used to prove this, for now just
4930      check that the variable appears in the original program and that it
4931      is computed in a type that guarantees no overflows.  */
4932   cand_type = TREE_TYPE (cand->iv->base);
4933   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4934     return false;
4935
4936   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4937      the calculation of the BOUND could overflow, making the comparison
4938      invalid.  */
4939   if (!data->loop_single_exit_p)
4940     return false;
4941
4942   /* We need to be able to decide whether candidate is increasing or decreasing
4943      in order to choose the right comparison operator.  */
4944   if (!cst_and_fits_in_hwi (cand->iv->step))
4945     return false;
4946   step = int_cst_value (cand->iv->step);
4947
4948   /* Check that the number of iterations matches the expected pattern:
4949      a + 1 > b ? 0 : b - a - 1.  */
4950   mbz = niter->may_be_zero;
4951   if (TREE_CODE (mbz) == GT_EXPR)
4952     {
4953       /* Handle a + 1 > b.  */
4954       tree op0 = TREE_OPERAND (mbz, 0);
4955       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4956         {
4957           a = TREE_OPERAND (op0, 0);
4958           b = TREE_OPERAND (mbz, 1);
4959         }
4960       else
4961         return false;
4962     }
4963   else if (TREE_CODE (mbz) == LT_EXPR)
4964     {
4965       tree op1 = TREE_OPERAND (mbz, 1);
4966
4967       /* Handle b < a + 1.  */
4968       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4969         {
4970           a = TREE_OPERAND (op1, 0);
4971           b = TREE_OPERAND (mbz, 0);
4972         }
4973       else
4974         return false;
4975     }
4976   else
4977     return false;
4978
4979   /* Expected number of iterations is B - A - 1.  Check that it matches
4980      the actual number, i.e., that B - A - NITER = 1.  */
4981   tree_to_aff_combination (niter->niter, nit_type, &nit);
4982   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4983   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4984   aff_combination_scale (&nit, -1);
4985   aff_combination_scale (&tmpa, -1);
4986   aff_combination_add (&tmpb, &tmpa);
4987   aff_combination_add (&tmpb, &nit);
4988   if (tmpb.n != 0 || tmpb.offset != 1)
4989     return false;
4990
4991   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4992      overflow.  */
4993   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4994                         cand->iv->step,
4995                         fold_convert (TREE_TYPE (cand->iv->step), a));
4996   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4997     return false;
4998
4999   /* Determine the new comparison operator.  */
5000   comp = step < 0 ? GT_EXPR : LT_EXPR;
5001   if (*comp_p == NE_EXPR)
5002     *comp_p = comp;
5003   else if (*comp_p == EQ_EXPR)
5004     *comp_p = invert_tree_comparison (comp, false);
5005   else
5006     gcc_unreachable ();
5007
5008   return true;
5009 }
5010
5011 /* Check whether it is possible to express the condition in USE by comparison
5012    of candidate CAND.  If so, store the value compared with to BOUND, and the
5013    comparison operator to COMP.  */
5014
5015 static bool
5016 may_eliminate_iv (struct ivopts_data *data,
5017                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5018                   enum tree_code *comp)
5019 {
5020   basic_block ex_bb;
5021   edge exit;
5022   tree period;
5023   struct loop *loop = data->current_loop;
5024   aff_tree bnd;
5025   struct tree_niter_desc *desc = NULL;
5026
5027   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5028     return false;
5029
5030   /* For now works only for exits that dominate the loop latch.
5031      TODO: extend to other conditions inside loop body.  */
5032   ex_bb = gimple_bb (use->stmt);
5033   if (use->stmt != last_stmt (ex_bb)
5034       || gimple_code (use->stmt) != GIMPLE_COND
5035       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5036     return false;
5037
5038   exit = EDGE_SUCC (ex_bb, 0);
5039   if (flow_bb_inside_loop_p (loop, exit->dest))
5040     exit = EDGE_SUCC (ex_bb, 1);
5041   if (flow_bb_inside_loop_p (loop, exit->dest))
5042     return false;
5043
5044   desc = niter_for_exit (data, exit);
5045   if (!desc)
5046     return false;
5047
5048   /* Determine whether we can use the variable to test the exit condition.
5049      This is the case iff the period of the induction variable is greater
5050      than the number of iterations for which the exit condition is true.  */
5051   period = iv_period (cand->iv);
5052
5053   /* If the number of iterations is constant, compare against it directly.  */
5054   if (TREE_CODE (desc->niter) == INTEGER_CST)
5055     {
5056       /* See cand_value_at.  */
5057       if (stmt_after_increment (loop, cand, use->stmt))
5058         {
5059           if (!tree_int_cst_lt (desc->niter, period))
5060             return false;
5061         }
5062       else
5063         {
5064           if (tree_int_cst_lt (period, desc->niter))
5065             return false;
5066         }
5067     }
5068
5069   /* If not, and if this is the only possible exit of the loop, see whether
5070      we can get a conservative estimate on the number of iterations of the
5071      entire loop and compare against that instead.  */
5072   else
5073     {
5074       widest_int period_value, max_niter;
5075
5076       max_niter = desc->max;
5077       if (stmt_after_increment (loop, cand, use->stmt))
5078         max_niter += 1;
5079       period_value = wi::to_widest (period);
5080       if (wi::gtu_p (max_niter, period_value))
5081         {
5082           /* See if we can take advantage of inferred loop bound information.  */
5083           if (data->loop_single_exit_p)
5084             {
5085               if (!max_loop_iterations (loop, &max_niter))
5086                 return false;
5087               /* The loop bound is already adjusted by adding 1.  */
5088               if (wi::gtu_p (max_niter, period_value))
5089                 return false;
5090             }
5091           else
5092             return false;
5093         }
5094     }
5095
5096   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5097
5098   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5099                          aff_combination_to_tree (&bnd));
5100   *comp = iv_elimination_compare (data, use);
5101
5102   /* It is unlikely that computing the number of iterations using division
5103      would be more profitable than keeping the original induction variable.  */
5104   if (expression_expensive_p (*bound))
5105     return false;
5106
5107   /* Sometimes, it is possible to handle the situation that the number of
5108      iterations may be zero unless additional assumtions by using <
5109      instead of != in the exit condition.
5110
5111      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5112            base the exit condition on it.  However, that is often too
5113            expensive.  */
5114   if (!integer_zerop (desc->may_be_zero))
5115     return iv_elimination_compare_lt (data, cand, comp, desc);
5116
5117   return true;
5118 }
5119
5120  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5121     be copied, if is is used in the loop body and DATA->body_includes_call.  */
5122
5123 static int
5124 parm_decl_cost (struct ivopts_data *data, tree bound)
5125 {
5126   tree sbound = bound;
5127   STRIP_NOPS (sbound);
5128
5129   if (TREE_CODE (sbound) == SSA_NAME
5130       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5131       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5132       && data->body_includes_call)
5133     return COSTS_N_INSNS (1);
5134
5135   return 0;
5136 }
5137
5138 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5139
5140 static bool
5141 determine_use_iv_cost_condition (struct ivopts_data *data,
5142                                  struct iv_use *use, struct iv_cand *cand)
5143 {
5144   tree bound = NULL_TREE;
5145   struct iv *cmp_iv;
5146   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5147   comp_cost elim_cost, express_cost, cost, bound_cost;
5148   bool ok;
5149   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5150   tree *control_var, *bound_cst;
5151   enum tree_code comp = ERROR_MARK;
5152
5153   /* Only consider real candidates.  */
5154   if (!cand->iv)
5155     {
5156       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5157                        ERROR_MARK, -1);
5158       return false;
5159     }
5160
5161   /* Try iv elimination.  */
5162   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5163     {
5164       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5165       if (elim_cost.cost == 0)
5166         elim_cost.cost = parm_decl_cost (data, bound);
5167       else if (TREE_CODE (bound) == INTEGER_CST)
5168         elim_cost.cost = 0;
5169       /* If we replace a loop condition 'i < n' with 'p < base + n',
5170          depends_on_elim will have 'base' and 'n' set, which implies
5171          that both 'base' and 'n' will be live during the loop.  More likely,
5172          'base + n' will be loop invariant, resulting in only one live value
5173          during the loop.  So in that case we clear depends_on_elim and set
5174         elim_inv_expr_id instead.  */
5175       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5176         {
5177           elim_inv_expr_id = get_expr_id (data, bound);
5178           bitmap_clear (depends_on_elim);
5179         }
5180       /* The bound is a loop invariant, so it will be only computed
5181          once.  */
5182       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5183     }
5184   else
5185     elim_cost = infinite_cost;
5186
5187   /* Try expressing the original giv.  If it is compared with an invariant,
5188      note that we cannot get rid of it.  */
5189   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5190                               NULL, &cmp_iv);
5191   gcc_assert (ok);
5192
5193   /* When the condition is a comparison of the candidate IV against
5194      zero, prefer this IV.
5195
5196      TODO: The constant that we're subtracting from the cost should
5197      be target-dependent.  This information should be added to the
5198      target costs for each backend.  */
5199   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5200       && integer_zerop (*bound_cst)
5201       && (operand_equal_p (*control_var, cand->var_after, 0)
5202           || operand_equal_p (*control_var, cand->var_before, 0)))
5203     elim_cost.cost -= 1;
5204
5205   express_cost = get_computation_cost (data, use, cand, false,
5206                                        &depends_on_express, NULL,
5207                                        &express_inv_expr_id);
5208   fd_ivopts_data = data;
5209   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5210
5211   /* Count the cost of the original bound as well.  */
5212   bound_cost = force_var_cost (data, *bound_cst, NULL);
5213   if (bound_cost.cost == 0)
5214     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5215   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5216     bound_cost.cost = 0;
5217   express_cost.cost += bound_cost.cost;
5218
5219   /* Choose the better approach, preferring the eliminated IV. */
5220   if (compare_costs (elim_cost, express_cost) <= 0)
5221     {
5222       cost = elim_cost;
5223       depends_on = depends_on_elim;
5224       depends_on_elim = NULL;
5225       inv_expr_id = elim_inv_expr_id;
5226     }
5227   else
5228     {
5229       cost = express_cost;
5230       depends_on = depends_on_express;
5231       depends_on_express = NULL;
5232       bound = NULL_TREE;
5233       comp = ERROR_MARK;
5234       inv_expr_id = express_inv_expr_id;
5235     }
5236
5237   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5238
5239   if (depends_on_elim)
5240     BITMAP_FREE (depends_on_elim);
5241   if (depends_on_express)
5242     BITMAP_FREE (depends_on_express);
5243
5244   return !infinite_cost_p (cost);
5245 }
5246
5247 /* Determines cost of basing replacement of USE on CAND.  Returns false
5248    if USE cannot be based on CAND.  */
5249
5250 static bool
5251 determine_use_iv_cost (struct ivopts_data *data,
5252                        struct iv_use *use, struct iv_cand *cand)
5253 {
5254   switch (use->type)
5255     {
5256     case USE_NONLINEAR_EXPR:
5257       return determine_use_iv_cost_generic (data, use, cand);
5258
5259     case USE_ADDRESS:
5260       return determine_use_iv_cost_address (data, use, cand);
5261
5262     case USE_COMPARE:
5263       return determine_use_iv_cost_condition (data, use, cand);
5264
5265     default:
5266       gcc_unreachable ();
5267     }
5268 }
5269
5270 /* Return true if get_computation_cost indicates that autoincrement is
5271    a possibility for the pair of USE and CAND, false otherwise.  */
5272
5273 static bool
5274 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5275                            struct iv_cand *cand)
5276 {
5277   bitmap depends_on;
5278   bool can_autoinc;
5279   comp_cost cost;
5280
5281   if (use->type != USE_ADDRESS)
5282     return false;
5283
5284   cost = get_computation_cost (data, use, cand, true, &depends_on,
5285                                &can_autoinc, NULL);
5286
5287   BITMAP_FREE (depends_on);
5288
5289   return !infinite_cost_p (cost) && can_autoinc;
5290 }
5291
5292 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5293    use that allows autoincrement, and set their AINC_USE if possible.  */
5294
5295 static void
5296 set_autoinc_for_original_candidates (struct ivopts_data *data)
5297 {
5298   unsigned i, j;
5299
5300   for (i = 0; i < n_iv_cands (data); i++)
5301     {
5302       struct iv_cand *cand = iv_cand (data, i);
5303       struct iv_use *closest_before = NULL;
5304       struct iv_use *closest_after = NULL;
5305       if (cand->pos != IP_ORIGINAL)
5306         continue;
5307
5308       for (j = 0; j < n_iv_uses (data); j++)
5309         {
5310           struct iv_use *use = iv_use (data, j);
5311           unsigned uid = gimple_uid (use->stmt);
5312
5313           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5314             continue;
5315
5316           if (uid < gimple_uid (cand->incremented_at)
5317               && (closest_before == NULL
5318                   || uid > gimple_uid (closest_before->stmt)))
5319             closest_before = use;
5320
5321           if (uid > gimple_uid (cand->incremented_at)
5322               && (closest_after == NULL
5323                   || uid < gimple_uid (closest_after->stmt)))
5324             closest_after = use;
5325         }
5326
5327       if (closest_before != NULL
5328           && autoinc_possible_for_pair (data, closest_before, cand))
5329         cand->ainc_use = closest_before;
5330       else if (closest_after != NULL
5331                && autoinc_possible_for_pair (data, closest_after, cand))
5332         cand->ainc_use = closest_after;
5333     }
5334 }
5335
5336 /* Finds the candidates for the induction variables.  */
5337
5338 static void
5339 find_iv_candidates (struct ivopts_data *data)
5340 {
5341   /* Add commonly used ivs.  */
5342   add_standard_iv_candidates (data);
5343
5344   /* Add old induction variables.  */
5345   add_old_ivs_candidates (data);
5346
5347   /* Add induction variables derived from uses.  */
5348   add_derived_ivs_candidates (data);
5349
5350   set_autoinc_for_original_candidates (data);
5351
5352   /* Record the important candidates.  */
5353   record_important_candidates (data);
5354 }
5355
5356 /* Determines costs of basing the use of the iv on an iv candidate.  */
5357
5358 static void
5359 determine_use_iv_costs (struct ivopts_data *data)
5360 {
5361   unsigned i, j;
5362   struct iv_use *use;
5363   struct iv_cand *cand;
5364   bitmap to_clear = BITMAP_ALLOC (NULL);
5365
5366   alloc_use_cost_map (data);
5367
5368   for (i = 0; i < n_iv_uses (data); i++)
5369     {
5370       use = iv_use (data, i);
5371
5372       if (data->consider_all_candidates)
5373         {
5374           for (j = 0; j < n_iv_cands (data); j++)
5375             {
5376               cand = iv_cand (data, j);
5377               determine_use_iv_cost (data, use, cand);
5378             }
5379         }
5380       else
5381         {
5382           bitmap_iterator bi;
5383
5384           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5385             {
5386               cand = iv_cand (data, j);
5387               if (!determine_use_iv_cost (data, use, cand))
5388                 bitmap_set_bit (to_clear, j);
5389             }
5390
5391           /* Remove the candidates for that the cost is infinite from
5392              the list of related candidates.  */
5393           bitmap_and_compl_into (use->related_cands, to_clear);
5394           bitmap_clear (to_clear);
5395         }
5396     }
5397
5398   BITMAP_FREE (to_clear);
5399
5400   if (dump_file && (dump_flags & TDF_DETAILS))
5401     {
5402       fprintf (dump_file, "Use-candidate costs:\n");
5403
5404       for (i = 0; i < n_iv_uses (data); i++)
5405         {
5406           use = iv_use (data, i);
5407
5408           fprintf (dump_file, "Use %d:\n", i);
5409           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5410           for (j = 0; j < use->n_map_members; j++)
5411             {
5412               if (!use->cost_map[j].cand
5413                   || infinite_cost_p (use->cost_map[j].cost))
5414                 continue;
5415
5416               fprintf (dump_file, "  %d\t%d\t%d\t",
5417                        use->cost_map[j].cand->id,
5418                        use->cost_map[j].cost.cost,
5419                        use->cost_map[j].cost.complexity);
5420               if (use->cost_map[j].depends_on)
5421                 bitmap_print (dump_file,
5422                               use->cost_map[j].depends_on, "","");
5423               if (use->cost_map[j].inv_expr_id != -1)
5424                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5425               fprintf (dump_file, "\n");
5426             }
5427
5428           fprintf (dump_file, "\n");
5429         }
5430       fprintf (dump_file, "\n");
5431     }
5432 }
5433
5434 /* Determines cost of the candidate CAND.  */
5435
5436 static void
5437 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5438 {
5439   comp_cost cost_base;
5440   unsigned cost, cost_step;
5441   tree base;
5442
5443   if (!cand->iv)
5444     {
5445       cand->cost = 0;
5446       return;
5447     }
5448
5449   /* There are two costs associated with the candidate -- its increment
5450      and its initialization.  The second is almost negligible for any loop
5451      that rolls enough, so we take it just very little into account.  */
5452
5453   base = cand->iv->base;
5454   cost_base = force_var_cost (data, base, NULL);
5455   /* It will be exceptional that the iv register happens to be initialized with
5456      the proper value at no cost.  In general, there will at least be a regcopy
5457      or a const set.  */
5458   if (cost_base.cost == 0)
5459     cost_base.cost = COSTS_N_INSNS (1);
5460   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5461
5462   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5463
5464   /* Prefer the original ivs unless we may gain something by replacing it.
5465      The reason is to make debugging simpler; so this is not relevant for
5466      artificial ivs created by other optimization passes.  */
5467   if (cand->pos != IP_ORIGINAL
5468       || !SSA_NAME_VAR (cand->var_before)
5469       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5470     cost++;
5471
5472   /* Prefer not to insert statements into latch unless there are some
5473      already (so that we do not create unnecessary jumps).  */
5474   if (cand->pos == IP_END
5475       && empty_block_p (ip_end_pos (data->current_loop)))
5476     cost++;
5477
5478   cand->cost = cost;
5479   cand->cost_step = cost_step;
5480 }
5481
5482 /* Determines costs of computation of the candidates.  */
5483
5484 static void
5485 determine_iv_costs (struct ivopts_data *data)
5486 {
5487   unsigned i;
5488
5489   if (dump_file && (dump_flags & TDF_DETAILS))
5490     {
5491       fprintf (dump_file, "Candidate costs:\n");
5492       fprintf (dump_file, "  cand\tcost\n");
5493     }
5494
5495   for (i = 0; i < n_iv_cands (data); i++)
5496     {
5497       struct iv_cand *cand = iv_cand (data, i);
5498
5499       determine_iv_cost (data, cand);
5500
5501       if (dump_file && (dump_flags & TDF_DETAILS))
5502         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5503     }
5504
5505   if (dump_file && (dump_flags & TDF_DETAILS))
5506     fprintf (dump_file, "\n");
5507 }
5508
5509 /* Calculates cost for having SIZE induction variables.  */
5510
5511 static unsigned
5512 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5513 {
5514   /* We add size to the cost, so that we prefer eliminating ivs
5515      if possible.  */
5516   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5517                                             data->body_includes_call);
5518 }
5519
5520 /* For each size of the induction variable set determine the penalty.  */
5521
5522 static void
5523 determine_set_costs (struct ivopts_data *data)
5524 {
5525   unsigned j, n;
5526   gphi *phi;
5527   gphi_iterator psi;
5528   tree op;
5529   struct loop *loop = data->current_loop;
5530   bitmap_iterator bi;
5531
5532   if (dump_file && (dump_flags & TDF_DETAILS))
5533     {
5534       fprintf (dump_file, "Global costs:\n");
5535       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5536       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5537       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5538       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5539     }
5540
5541   n = 0;
5542   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5543     {
5544       phi = psi.phi ();
5545       op = PHI_RESULT (phi);
5546
5547       if (virtual_operand_p (op))
5548         continue;
5549
5550       if (get_iv (data, op))
5551         continue;
5552
5553       n++;
5554     }
5555
5556   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5557     {
5558       struct version_info *info = ver_info (data, j);
5559
5560       if (info->inv_id && info->has_nonlin_use)
5561         n++;
5562     }
5563
5564   data->regs_used = n;
5565   if (dump_file && (dump_flags & TDF_DETAILS))
5566     fprintf (dump_file, "  regs_used %d\n", n);
5567
5568   if (dump_file && (dump_flags & TDF_DETAILS))
5569     {
5570       fprintf (dump_file, "  cost for size:\n");
5571       fprintf (dump_file, "  ivs\tcost\n");
5572       for (j = 0; j <= 2 * target_avail_regs; j++)
5573         fprintf (dump_file, "  %d\t%d\n", j,
5574                  ivopts_global_cost_for_size (data, j));
5575       fprintf (dump_file, "\n");
5576     }
5577 }
5578
5579 /* Returns true if A is a cheaper cost pair than B.  */
5580
5581 static bool
5582 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5583 {
5584   int cmp;
5585
5586   if (!a)
5587     return false;
5588
5589   if (!b)
5590     return true;
5591
5592   cmp = compare_costs (a->cost, b->cost);
5593   if (cmp < 0)
5594     return true;
5595
5596   if (cmp > 0)
5597     return false;
5598
5599   /* In case the costs are the same, prefer the cheaper candidate.  */
5600   if (a->cand->cost < b->cand->cost)
5601     return true;
5602
5603   return false;
5604 }
5605
5606
5607 /* Returns candidate by that USE is expressed in IVS.  */
5608
5609 static struct cost_pair *
5610 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5611 {
5612   return ivs->cand_for_use[use->id];
5613 }
5614
5615 /* Computes the cost field of IVS structure.  */
5616
5617 static void
5618 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5619 {
5620   comp_cost cost = ivs->cand_use_cost;
5621
5622   cost.cost += ivs->cand_cost;
5623
5624   cost.cost += ivopts_global_cost_for_size (data,
5625                                             ivs->n_regs + ivs->num_used_inv_expr);
5626
5627   ivs->cost = cost;
5628 }
5629
5630 /* Remove invariants in set INVS to set IVS.  */
5631
5632 static void
5633 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5634 {
5635   bitmap_iterator bi;
5636   unsigned iid;
5637
5638   if (!invs)
5639     return;
5640
5641   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5642     {
5643       ivs->n_invariant_uses[iid]--;
5644       if (ivs->n_invariant_uses[iid] == 0)
5645         ivs->n_regs--;
5646     }
5647 }
5648
5649 /* Set USE not to be expressed by any candidate in IVS.  */
5650
5651 static void
5652 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5653                  struct iv_use *use)
5654 {
5655   unsigned uid = use->id, cid;
5656   struct cost_pair *cp;
5657
5658   cp = ivs->cand_for_use[uid];
5659   if (!cp)
5660     return;
5661   cid = cp->cand->id;
5662
5663   ivs->bad_uses++;
5664   ivs->cand_for_use[uid] = NULL;
5665   ivs->n_cand_uses[cid]--;
5666
5667   if (ivs->n_cand_uses[cid] == 0)
5668     {
5669       bitmap_clear_bit (ivs->cands, cid);
5670       /* Do not count the pseudocandidates.  */
5671       if (cp->cand->iv)
5672         ivs->n_regs--;
5673       ivs->n_cands--;
5674       ivs->cand_cost -= cp->cand->cost;
5675
5676       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5677     }
5678
5679   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5680
5681   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5682
5683   if (cp->inv_expr_id != -1)
5684     {
5685       ivs->used_inv_expr[cp->inv_expr_id]--;
5686       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5687         ivs->num_used_inv_expr--;
5688     }
5689   iv_ca_recount_cost (data, ivs);
5690 }
5691
5692 /* Add invariants in set INVS to set IVS.  */
5693
5694 static void
5695 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5696 {
5697   bitmap_iterator bi;
5698   unsigned iid;
5699
5700   if (!invs)
5701     return;
5702
5703   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5704     {
5705       ivs->n_invariant_uses[iid]++;
5706       if (ivs->n_invariant_uses[iid] == 1)
5707         ivs->n_regs++;
5708     }
5709 }
5710
5711 /* Set cost pair for USE in set IVS to CP.  */
5712
5713 static void
5714 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5715               struct iv_use *use, struct cost_pair *cp)
5716 {
5717   unsigned uid = use->id, cid;
5718
5719   if (ivs->cand_for_use[uid] == cp)
5720     return;
5721
5722   if (ivs->cand_for_use[uid])
5723     iv_ca_set_no_cp (data, ivs, use);
5724
5725   if (cp)
5726     {
5727       cid = cp->cand->id;
5728
5729       ivs->bad_uses--;
5730       ivs->cand_for_use[uid] = cp;
5731       ivs->n_cand_uses[cid]++;
5732       if (ivs->n_cand_uses[cid] == 1)
5733         {
5734           bitmap_set_bit (ivs->cands, cid);
5735           /* Do not count the pseudocandidates.  */
5736           if (cp->cand->iv)
5737             ivs->n_regs++;
5738           ivs->n_cands++;
5739           ivs->cand_cost += cp->cand->cost;
5740
5741           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5742         }
5743
5744       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5745       iv_ca_set_add_invariants (ivs, cp->depends_on);
5746
5747       if (cp->inv_expr_id != -1)
5748         {
5749           ivs->used_inv_expr[cp->inv_expr_id]++;
5750           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5751             ivs->num_used_inv_expr++;
5752         }
5753       iv_ca_recount_cost (data, ivs);
5754     }
5755 }
5756
5757 /* Extend set IVS by expressing USE by some of the candidates in it
5758    if possible.  Consider all important candidates if candidates in
5759    set IVS don't give any result.  */
5760
5761 static void
5762 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5763                struct iv_use *use)
5764 {
5765   struct cost_pair *best_cp = NULL, *cp;
5766   bitmap_iterator bi;
5767   unsigned i;
5768   struct iv_cand *cand;
5769
5770   gcc_assert (ivs->upto >= use->id);
5771   ivs->upto++;
5772   ivs->bad_uses++;
5773
5774   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5775     {
5776       cand = iv_cand (data, i);
5777       cp = get_use_iv_cost (data, use, cand);
5778       if (cheaper_cost_pair (cp, best_cp))
5779         best_cp = cp;
5780     }
5781
5782   if (best_cp == NULL)
5783     {
5784       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5785         {
5786           cand = iv_cand (data, i);
5787           cp = get_use_iv_cost (data, use, cand);
5788           if (cheaper_cost_pair (cp, best_cp))
5789             best_cp = cp;
5790         }
5791     }
5792
5793   iv_ca_set_cp (data, ivs, use, best_cp);
5794 }
5795
5796 /* Get cost for assignment IVS.  */
5797
5798 static comp_cost
5799 iv_ca_cost (struct iv_ca *ivs)
5800 {
5801   /* This was a conditional expression but it triggered a bug in
5802      Sun C 5.5.  */
5803   if (ivs->bad_uses)
5804     return infinite_cost;
5805   else
5806     return ivs->cost;
5807 }
5808
5809 /* Returns true if all dependences of CP are among invariants in IVS.  */
5810
5811 static bool
5812 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5813 {
5814   unsigned i;
5815   bitmap_iterator bi;
5816
5817   if (!cp->depends_on)
5818     return true;
5819
5820   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5821     {
5822       if (ivs->n_invariant_uses[i] == 0)
5823         return false;
5824     }
5825
5826   return true;
5827 }
5828
5829 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5830    it before NEXT_CHANGE.  */
5831
5832 static struct iv_ca_delta *
5833 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5834                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5835 {
5836   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5837
5838   change->use = use;
5839   change->old_cp = old_cp;
5840   change->new_cp = new_cp;
5841   change->next_change = next_change;
5842
5843   return change;
5844 }
5845
5846 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5847    are rewritten.  */
5848
5849 static struct iv_ca_delta *
5850 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5851 {
5852   struct iv_ca_delta *last;
5853
5854   if (!l2)
5855     return l1;
5856
5857   if (!l1)
5858     return l2;
5859
5860   for (last = l1; last->next_change; last = last->next_change)
5861     continue;
5862   last->next_change = l2;
5863
5864   return l1;
5865 }
5866
5867 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5868
5869 static struct iv_ca_delta *
5870 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5871 {
5872   struct iv_ca_delta *act, *next, *prev = NULL;
5873
5874   for (act = delta; act; act = next)
5875     {
5876       next = act->next_change;
5877       act->next_change = prev;
5878       prev = act;
5879
5880       std::swap (act->old_cp, act->new_cp);
5881     }
5882
5883   return prev;
5884 }
5885
5886 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5887    reverted instead.  */
5888
5889 static void
5890 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5891                     struct iv_ca_delta *delta, bool forward)
5892 {
5893   struct cost_pair *from, *to;
5894   struct iv_ca_delta *act;
5895
5896   if (!forward)
5897     delta = iv_ca_delta_reverse (delta);
5898
5899   for (act = delta; act; act = act->next_change)
5900     {
5901       from = act->old_cp;
5902       to = act->new_cp;
5903       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5904       iv_ca_set_cp (data, ivs, act->use, to);
5905     }
5906
5907   if (!forward)
5908     iv_ca_delta_reverse (delta);
5909 }
5910
5911 /* Returns true if CAND is used in IVS.  */
5912
5913 static bool
5914 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5915 {
5916   return ivs->n_cand_uses[cand->id] > 0;
5917 }
5918
5919 /* Returns number of induction variable candidates in the set IVS.  */
5920
5921 static unsigned
5922 iv_ca_n_cands (struct iv_ca *ivs)
5923 {
5924   return ivs->n_cands;
5925 }
5926
5927 /* Free the list of changes DELTA.  */
5928
5929 static void
5930 iv_ca_delta_free (struct iv_ca_delta **delta)
5931 {
5932   struct iv_ca_delta *act, *next;
5933
5934   for (act = *delta; act; act = next)
5935     {
5936       next = act->next_change;
5937       free (act);
5938     }
5939
5940   *delta = NULL;
5941 }
5942
5943 /* Allocates new iv candidates assignment.  */
5944
5945 static struct iv_ca *
5946 iv_ca_new (struct ivopts_data *data)
5947 {
5948   struct iv_ca *nw = XNEW (struct iv_ca);
5949
5950   nw->upto = 0;
5951   nw->bad_uses = 0;
5952   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5953   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5954   nw->cands = BITMAP_ALLOC (NULL);
5955   nw->n_cands = 0;
5956   nw->n_regs = 0;
5957   nw->cand_use_cost = no_cost;
5958   nw->cand_cost = 0;
5959   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5960   nw->cost = no_cost;
5961   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5962   nw->num_used_inv_expr = 0;
5963
5964   return nw;
5965 }
5966
5967 /* Free memory occupied by the set IVS.  */
5968
5969 static void
5970 iv_ca_free (struct iv_ca **ivs)
5971 {
5972   free ((*ivs)->cand_for_use);
5973   free ((*ivs)->n_cand_uses);
5974   BITMAP_FREE ((*ivs)->cands);
5975   free ((*ivs)->n_invariant_uses);
5976   free ((*ivs)->used_inv_expr);
5977   free (*ivs);
5978   *ivs = NULL;
5979 }
5980
5981 /* Dumps IVS to FILE.  */
5982
5983 static void
5984 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5985 {
5986   const char *pref = "  invariants ";
5987   unsigned i;
5988   comp_cost cost = iv_ca_cost (ivs);
5989
5990   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5991   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5992            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5993   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5994
5995    for (i = 0; i < ivs->upto; i++)
5996     {
5997       struct iv_use *use = iv_use (data, i);
5998       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5999       if (cp)
6000         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6001                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6002       else
6003         fprintf (file, "   use:%d --> ??\n", use->id);
6004     }
6005
6006   for (i = 1; i <= data->max_inv_id; i++)
6007     if (ivs->n_invariant_uses[i])
6008       {
6009         fprintf (file, "%s%d", pref, i);
6010         pref = ", ";
6011       }
6012   fprintf (file, "\n\n");
6013 }
6014
6015 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6016    new set, and store differences in DELTA.  Number of induction variables
6017    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6018    the function will try to find a solution with mimimal iv candidates.  */
6019
6020 static comp_cost
6021 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6022               struct iv_cand *cand, struct iv_ca_delta **delta,
6023               unsigned *n_ivs, bool min_ncand)
6024 {
6025   unsigned i;
6026   comp_cost cost;
6027   struct iv_use *use;
6028   struct cost_pair *old_cp, *new_cp;
6029
6030   *delta = NULL;
6031   for (i = 0; i < ivs->upto; i++)
6032     {
6033       use = iv_use (data, i);
6034       old_cp = iv_ca_cand_for_use (ivs, use);
6035
6036       if (old_cp
6037           && old_cp->cand == cand)
6038         continue;
6039
6040       new_cp = get_use_iv_cost (data, use, cand);
6041       if (!new_cp)
6042         continue;
6043
6044       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6045         continue;
6046
6047       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6048         continue;
6049
6050       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6051     }
6052
6053   iv_ca_delta_commit (data, ivs, *delta, true);
6054   cost = iv_ca_cost (ivs);
6055   if (n_ivs)
6056     *n_ivs = iv_ca_n_cands (ivs);
6057   iv_ca_delta_commit (data, ivs, *delta, false);
6058
6059   return cost;
6060 }
6061
6062 /* Try narrowing set IVS by removing CAND.  Return the cost of
6063    the new set and store the differences in DELTA.  START is
6064    the candidate with which we start narrowing.  */
6065
6066 static comp_cost
6067 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6068               struct iv_cand *cand, struct iv_cand *start,
6069               struct iv_ca_delta **delta)
6070 {
6071   unsigned i, ci;
6072   struct iv_use *use;
6073   struct cost_pair *old_cp, *new_cp, *cp;
6074   bitmap_iterator bi;
6075   struct iv_cand *cnd;
6076   comp_cost cost, best_cost, acost;
6077
6078   *delta = NULL;
6079   for (i = 0; i < n_iv_uses (data); i++)
6080     {
6081       use = iv_use (data, i);
6082
6083       old_cp = iv_ca_cand_for_use (ivs, use);
6084       if (old_cp->cand != cand)
6085         continue;
6086
6087       best_cost = iv_ca_cost (ivs);
6088       /* Start narrowing with START.  */
6089       new_cp = get_use_iv_cost (data, use, start);
6090
6091       if (data->consider_all_candidates)
6092         {
6093           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6094             {
6095               if (ci == cand->id || (start && ci == start->id))
6096                 continue;
6097
6098               cnd = iv_cand (data, ci);
6099
6100               cp = get_use_iv_cost (data, use, cnd);
6101               if (!cp)
6102                 continue;
6103
6104               iv_ca_set_cp (data, ivs, use, cp);
6105               acost = iv_ca_cost (ivs);
6106
6107               if (compare_costs (acost, best_cost) < 0)
6108                 {
6109                   best_cost = acost;
6110                   new_cp = cp;
6111                 }
6112             }
6113         }
6114       else
6115         {
6116           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6117             {
6118               if (ci == cand->id || (start && ci == start->id))
6119                 continue;
6120
6121               cnd = iv_cand (data, ci);
6122
6123               cp = get_use_iv_cost (data, use, cnd);
6124               if (!cp)
6125                 continue;
6126
6127               iv_ca_set_cp (data, ivs, use, cp);
6128               acost = iv_ca_cost (ivs);
6129
6130               if (compare_costs (acost, best_cost) < 0)
6131                 {
6132                   best_cost = acost;
6133                   new_cp = cp;
6134                 }
6135             }
6136         }
6137       /* Restore to old cp for use.  */
6138       iv_ca_set_cp (data, ivs, use, old_cp);
6139
6140       if (!new_cp)
6141         {
6142           iv_ca_delta_free (delta);
6143           return infinite_cost;
6144         }
6145
6146       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6147     }
6148
6149   iv_ca_delta_commit (data, ivs, *delta, true);
6150   cost = iv_ca_cost (ivs);
6151   iv_ca_delta_commit (data, ivs, *delta, false);
6152
6153   return cost;
6154 }
6155
6156 /* Try optimizing the set of candidates IVS by removing candidates different
6157    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6158    differences in DELTA.  */
6159
6160 static comp_cost
6161 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6162              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6163 {
6164   bitmap_iterator bi;
6165   struct iv_ca_delta *act_delta, *best_delta;
6166   unsigned i;
6167   comp_cost best_cost, acost;
6168   struct iv_cand *cand;
6169
6170   best_delta = NULL;
6171   best_cost = iv_ca_cost (ivs);
6172
6173   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6174     {
6175       cand = iv_cand (data, i);
6176
6177       if (cand == except_cand)
6178         continue;
6179
6180       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6181
6182       if (compare_costs (acost, best_cost) < 0)
6183         {
6184           best_cost = acost;
6185           iv_ca_delta_free (&best_delta);
6186           best_delta = act_delta;
6187         }
6188       else
6189         iv_ca_delta_free (&act_delta);
6190     }
6191
6192   if (!best_delta)
6193     {
6194       *delta = NULL;
6195       return best_cost;
6196     }
6197
6198   /* Recurse to possibly remove other unnecessary ivs.  */
6199   iv_ca_delta_commit (data, ivs, best_delta, true);
6200   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6201   iv_ca_delta_commit (data, ivs, best_delta, false);
6202   *delta = iv_ca_delta_join (best_delta, *delta);
6203   return best_cost;
6204 }
6205
6206 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6207    cheaper local cost for USE than BEST_CP.  Return pointer to
6208    the corresponding cost_pair, otherwise just return BEST_CP.  */
6209
6210 static struct cost_pair*
6211 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6212                         unsigned int cand_idx, struct iv_cand *old_cand,
6213                         struct cost_pair *best_cp)
6214 {
6215   struct iv_cand *cand;
6216   struct cost_pair *cp;
6217
6218   gcc_assert (old_cand != NULL && best_cp != NULL);
6219   if (cand_idx == old_cand->id)
6220     return best_cp;
6221
6222   cand = iv_cand (data, cand_idx);
6223   cp = get_use_iv_cost (data, use, cand);
6224   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6225     return cp;
6226
6227   return best_cp;
6228 }
6229
6230 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6231    which are used by more than one iv uses.  For each of those candidates,
6232    this function tries to represent iv uses under that candidate using
6233    other ones with lower local cost, then tries to prune the new set.
6234    If the new set has lower cost, It returns the new cost after recording
6235    candidate replacement in list DELTA.  */
6236
6237 static comp_cost
6238 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6239                struct iv_ca_delta **delta)
6240 {
6241   bitmap_iterator bi, bj;
6242   unsigned int i, j, k;
6243   struct iv_use *use;
6244   struct iv_cand *cand;
6245   comp_cost orig_cost, acost;
6246   struct iv_ca_delta *act_delta, *tmp_delta;
6247   struct cost_pair *old_cp, *best_cp = NULL;
6248
6249   *delta = NULL;
6250   orig_cost = iv_ca_cost (ivs);
6251
6252   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6253     {
6254       if (ivs->n_cand_uses[i] == 1
6255           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6256         continue;
6257
6258       cand = iv_cand (data, i);
6259
6260       act_delta = NULL;
6261       /*  Represent uses under current candidate using other ones with
6262           lower local cost.  */
6263       for (j = 0; j < ivs->upto; j++)
6264         {
6265           use = iv_use (data, j);
6266           old_cp = iv_ca_cand_for_use (ivs, use);
6267
6268           if (old_cp->cand != cand)
6269             continue;
6270
6271           best_cp = old_cp;
6272           if (data->consider_all_candidates)
6273             for (k = 0; k < n_iv_cands (data); k++)
6274               best_cp = cheaper_cost_with_cand (data, use, k,
6275                                                 old_cp->cand, best_cp);
6276           else
6277             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6278               best_cp = cheaper_cost_with_cand (data, use, k,
6279                                                 old_cp->cand, best_cp);
6280
6281           if (best_cp == old_cp)
6282             continue;
6283
6284           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6285         }
6286       /* No need for further prune.  */
6287       if (!act_delta)
6288         continue;
6289
6290       /* Prune the new candidate set.  */
6291       iv_ca_delta_commit (data, ivs, act_delta, true);
6292       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6293       iv_ca_delta_commit (data, ivs, act_delta, false);
6294       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6295
6296       if (compare_costs (acost, orig_cost) < 0)
6297         {
6298           *delta = act_delta;
6299           return acost;
6300         }
6301       else
6302         iv_ca_delta_free (&act_delta);
6303     }
6304
6305   return orig_cost;
6306 }
6307
6308 /* Tries to extend the sets IVS in the best possible way in order
6309    to express the USE.  If ORIGINALP is true, prefer candidates from
6310    the original set of IVs, otherwise favor important candidates not
6311    based on any memory object.  */
6312
6313 static bool
6314 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6315                   struct iv_use *use, bool originalp)
6316 {
6317   comp_cost best_cost, act_cost;
6318   unsigned i;
6319   bitmap_iterator bi;
6320   struct iv_cand *cand;
6321   struct iv_ca_delta *best_delta = NULL, *act_delta;
6322   struct cost_pair *cp;
6323
6324   iv_ca_add_use (data, ivs, use);
6325   best_cost = iv_ca_cost (ivs);
6326   cp = iv_ca_cand_for_use (ivs, use);
6327   if (cp)
6328     {
6329       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6330       iv_ca_set_no_cp (data, ivs, use);
6331     }
6332
6333   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6334      first try important candidates not based on any memory object.  Only if
6335      this fails, try the specific ones.  Rationale -- in loops with many
6336      variables the best choice often is to use just one generic biv.  If we
6337      added here many ivs specific to the uses, the optimization algorithm later
6338      would be likely to get stuck in a local minimum, thus causing us to create
6339      too many ivs.  The approach from few ivs to more seems more likely to be
6340      successful -- starting from few ivs, replacing an expensive use by a
6341      specific iv should always be a win.  */
6342   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6343     {
6344       cand = iv_cand (data, i);
6345
6346       if (originalp && cand->pos !=IP_ORIGINAL)
6347         continue;
6348
6349       if (!originalp && cand->iv->base_object != NULL_TREE)
6350         continue;
6351
6352       if (iv_ca_cand_used_p (ivs, cand))
6353         continue;
6354
6355       cp = get_use_iv_cost (data, use, cand);
6356       if (!cp)
6357         continue;
6358
6359       iv_ca_set_cp (data, ivs, use, cp);
6360       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6361                                true);
6362       iv_ca_set_no_cp (data, ivs, use);
6363       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6364
6365       if (compare_costs (act_cost, best_cost) < 0)
6366         {
6367           best_cost = act_cost;
6368
6369           iv_ca_delta_free (&best_delta);
6370           best_delta = act_delta;
6371         }
6372       else
6373         iv_ca_delta_free (&act_delta);
6374     }
6375
6376   if (infinite_cost_p (best_cost))
6377     {
6378       for (i = 0; i < use->n_map_members; i++)
6379         {
6380           cp = use->cost_map + i;
6381           cand = cp->cand;
6382           if (!cand)
6383             continue;
6384
6385           /* Already tried this.  */
6386           if (cand->important)
6387             {
6388               if (originalp && cand->pos == IP_ORIGINAL)
6389                 continue;
6390               if (!originalp && cand->iv->base_object == NULL_TREE)
6391                 continue;
6392             }
6393
6394           if (iv_ca_cand_used_p (ivs, cand))
6395             continue;
6396
6397           act_delta = NULL;
6398           iv_ca_set_cp (data, ivs, use, cp);
6399           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6400           iv_ca_set_no_cp (data, ivs, use);
6401           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6402                                        cp, act_delta);
6403
6404           if (compare_costs (act_cost, best_cost) < 0)
6405             {
6406               best_cost = act_cost;
6407
6408               if (best_delta)
6409                 iv_ca_delta_free (&best_delta);
6410               best_delta = act_delta;
6411             }
6412           else
6413             iv_ca_delta_free (&act_delta);
6414         }
6415     }
6416
6417   iv_ca_delta_commit (data, ivs, best_delta, true);
6418   iv_ca_delta_free (&best_delta);
6419
6420   return !infinite_cost_p (best_cost);
6421 }
6422
6423 /* Finds an initial assignment of candidates to uses.  */
6424
6425 static struct iv_ca *
6426 get_initial_solution (struct ivopts_data *data, bool originalp)
6427 {
6428   struct iv_ca *ivs = iv_ca_new (data);
6429   unsigned i;
6430
6431   for (i = 0; i < n_iv_uses (data); i++)
6432     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6433       {
6434         iv_ca_free (&ivs);
6435         return NULL;
6436       }
6437
6438   return ivs;
6439 }
6440
6441 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6442    points to a bool variable, this function tries to break local
6443    optimal fixed-point by replacing candidates in IVS if it's true.  */
6444
6445 static bool
6446 try_improve_iv_set (struct ivopts_data *data,
6447                     struct iv_ca *ivs, bool *try_replace_p)
6448 {
6449   unsigned i, n_ivs;
6450   comp_cost acost, best_cost = iv_ca_cost (ivs);
6451   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6452   struct iv_cand *cand;
6453
6454   /* Try extending the set of induction variables by one.  */
6455   for (i = 0; i < n_iv_cands (data); i++)
6456     {
6457       cand = iv_cand (data, i);
6458
6459       if (iv_ca_cand_used_p (ivs, cand))
6460         continue;
6461
6462       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6463       if (!act_delta)
6464         continue;
6465
6466       /* If we successfully added the candidate and the set is small enough,
6467          try optimizing it by removing other candidates.  */
6468       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6469         {
6470           iv_ca_delta_commit (data, ivs, act_delta, true);
6471           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6472           iv_ca_delta_commit (data, ivs, act_delta, false);
6473           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6474         }
6475
6476       if (compare_costs (acost, best_cost) < 0)
6477         {
6478           best_cost = acost;
6479           iv_ca_delta_free (&best_delta);
6480           best_delta = act_delta;
6481         }
6482       else
6483         iv_ca_delta_free (&act_delta);
6484     }
6485
6486   if (!best_delta)
6487     {
6488       /* Try removing the candidates from the set instead.  */
6489       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6490
6491       if (!best_delta && *try_replace_p)
6492         {
6493           *try_replace_p = false;
6494           /* So far candidate selecting algorithm tends to choose fewer IVs
6495              so that it can handle cases in which loops have many variables
6496              but the best choice is often to use only one general biv.  One
6497              weakness is it can't handle opposite cases, in which different
6498              candidates should be chosen with respect to each use.  To solve
6499              the problem, we replace candidates in a manner described by the
6500              comments of iv_ca_replace, thus give general algorithm a chance
6501              to break local optimal fixed-point in these cases.  */
6502           best_cost = iv_ca_replace (data, ivs, &best_delta);
6503         }
6504
6505       if (!best_delta)
6506         return false;
6507     }
6508
6509   iv_ca_delta_commit (data, ivs, best_delta, true);
6510   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6511   iv_ca_delta_free (&best_delta);
6512   return true;
6513 }
6514
6515 /* Attempts to find the optimal set of induction variables.  We do simple
6516    greedy heuristic -- we try to replace at most one candidate in the selected
6517    solution and remove the unused ivs while this improves the cost.  */
6518
6519 static struct iv_ca *
6520 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6521 {
6522   struct iv_ca *set;
6523   bool try_replace_p = true;
6524
6525   /* Get the initial solution.  */
6526   set = get_initial_solution (data, originalp);
6527   if (!set)
6528     {
6529       if (dump_file && (dump_flags & TDF_DETAILS))
6530         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6531       return NULL;
6532     }
6533
6534   if (dump_file && (dump_flags & TDF_DETAILS))
6535     {
6536       fprintf (dump_file, "Initial set of candidates:\n");
6537       iv_ca_dump (data, dump_file, set);
6538     }
6539
6540   while (try_improve_iv_set (data, set, &try_replace_p))
6541     {
6542       if (dump_file && (dump_flags & TDF_DETAILS))
6543         {
6544           fprintf (dump_file, "Improved to:\n");
6545           iv_ca_dump (data, dump_file, set);
6546         }
6547     }
6548
6549   return set;
6550 }
6551
6552 static struct iv_ca *
6553 find_optimal_iv_set (struct ivopts_data *data)
6554 {
6555   unsigned i;
6556   struct iv_ca *set, *origset;
6557   struct iv_use *use;
6558   comp_cost cost, origcost;
6559
6560   /* Determine the cost based on a strategy that starts with original IVs,
6561      and try again using a strategy that prefers candidates not based
6562      on any IVs.  */
6563   origset = find_optimal_iv_set_1 (data, true);
6564   set = find_optimal_iv_set_1 (data, false);
6565
6566   if (!origset && !set)
6567     return NULL;
6568
6569   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6570   cost = set ? iv_ca_cost (set) : infinite_cost;
6571
6572   if (dump_file && (dump_flags & TDF_DETAILS))
6573     {
6574       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6575                origcost.cost, origcost.complexity);
6576       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6577                cost.cost, cost.complexity);
6578     }
6579
6580   /* Choose the one with the best cost.  */
6581   if (compare_costs (origcost, cost) <= 0)
6582     {
6583       if (set)
6584         iv_ca_free (&set);
6585       set = origset;
6586     }
6587   else if (origset)
6588     iv_ca_free (&origset);
6589
6590   for (i = 0; i < n_iv_uses (data); i++)
6591     {
6592       use = iv_use (data, i);
6593       use->selected = iv_ca_cand_for_use (set, use)->cand;
6594     }
6595
6596   return set;
6597 }
6598
6599 /* Creates a new induction variable corresponding to CAND.  */
6600
6601 static void
6602 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6603 {
6604   gimple_stmt_iterator incr_pos;
6605   tree base;
6606   bool after = false;
6607
6608   if (!cand->iv)
6609     return;
6610
6611   switch (cand->pos)
6612     {
6613     case IP_NORMAL:
6614       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6615       break;
6616
6617     case IP_END:
6618       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6619       after = true;
6620       break;
6621
6622     case IP_AFTER_USE:
6623       after = true;
6624       /* fall through */
6625     case IP_BEFORE_USE:
6626       incr_pos = gsi_for_stmt (cand->incremented_at);
6627       break;
6628
6629     case IP_ORIGINAL:
6630       /* Mark that the iv is preserved.  */
6631       name_info (data, cand->var_before)->preserve_biv = true;
6632       name_info (data, cand->var_after)->preserve_biv = true;
6633
6634       /* Rewrite the increment so that it uses var_before directly.  */
6635       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6636       return;
6637     }
6638
6639   gimple_add_tmp_var (cand->var_before);
6640
6641   base = unshare_expr (cand->iv->base);
6642
6643   create_iv (base, unshare_expr (cand->iv->step),
6644              cand->var_before, data->current_loop,
6645              &incr_pos, after, &cand->var_before, &cand->var_after);
6646 }
6647
6648 /* Creates new induction variables described in SET.  */
6649
6650 static void
6651 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6652 {
6653   unsigned i;
6654   struct iv_cand *cand;
6655   bitmap_iterator bi;
6656
6657   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6658     {
6659       cand = iv_cand (data, i);
6660       create_new_iv (data, cand);
6661     }
6662
6663   if (dump_file && (dump_flags & TDF_DETAILS))
6664     {
6665       fprintf (dump_file, "Selected IV set for loop %d",
6666                data->current_loop->num);
6667       if (data->loop_loc != UNKNOWN_LOCATION)
6668         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6669                  LOCATION_LINE (data->loop_loc));
6670       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6671       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6672         {
6673           cand = iv_cand (data, i);
6674           dump_cand (dump_file, cand);
6675         }
6676       fprintf (dump_file, "\n");
6677     }
6678 }
6679
6680 /* Rewrites USE (definition of iv used in a nonlinear expression)
6681    using candidate CAND.  */
6682
6683 static void
6684 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6685                             struct iv_use *use, struct iv_cand *cand)
6686 {
6687   tree comp;
6688   tree op, tgt;
6689   gassign *ass;
6690   gimple_stmt_iterator bsi;
6691
6692   /* An important special case -- if we are asked to express value of
6693      the original iv by itself, just exit; there is no need to
6694      introduce a new computation (that might also need casting the
6695      variable to unsigned and back).  */
6696   if (cand->pos == IP_ORIGINAL
6697       && cand->incremented_at == use->stmt)
6698     {
6699       enum tree_code stmt_code;
6700
6701       gcc_assert (is_gimple_assign (use->stmt));
6702       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6703
6704       /* Check whether we may leave the computation unchanged.
6705          This is the case only if it does not rely on other
6706          computations in the loop -- otherwise, the computation
6707          we rely upon may be removed in remove_unused_ivs,
6708          thus leading to ICE.  */
6709       stmt_code = gimple_assign_rhs_code (use->stmt);
6710       if (stmt_code == PLUS_EXPR
6711           || stmt_code == MINUS_EXPR
6712           || stmt_code == POINTER_PLUS_EXPR)
6713         {
6714           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6715             op = gimple_assign_rhs2 (use->stmt);
6716           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6717             op = gimple_assign_rhs1 (use->stmt);
6718           else
6719             op = NULL_TREE;
6720         }
6721       else
6722         op = NULL_TREE;
6723
6724       if (op && expr_invariant_in_loop_p (data->current_loop, op))
6725         return;
6726     }
6727
6728   comp = get_computation (data->current_loop, use, cand);
6729   gcc_assert (comp != NULL_TREE);
6730
6731   switch (gimple_code (use->stmt))
6732     {
6733     case GIMPLE_PHI:
6734       tgt = PHI_RESULT (use->stmt);
6735
6736       /* If we should keep the biv, do not replace it.  */
6737       if (name_info (data, tgt)->preserve_biv)
6738         return;
6739
6740       bsi = gsi_after_labels (gimple_bb (use->stmt));
6741       break;
6742
6743     case GIMPLE_ASSIGN:
6744       tgt = gimple_assign_lhs (use->stmt);
6745       bsi = gsi_for_stmt (use->stmt);
6746       break;
6747
6748     default:
6749       gcc_unreachable ();
6750     }
6751
6752   if (!valid_gimple_rhs_p (comp)
6753       || (gimple_code (use->stmt) != GIMPLE_PHI
6754           /* We can't allow re-allocating the stmt as it might be pointed
6755              to still.  */
6756           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6757               >= gimple_num_ops (gsi_stmt (bsi)))))
6758     {
6759       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6760                                        true, GSI_SAME_STMT);
6761       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6762         {
6763           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6764           /* As this isn't a plain copy we have to reset alignment
6765              information.  */
6766           if (SSA_NAME_PTR_INFO (comp))
6767             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6768         }
6769     }
6770
6771   if (gimple_code (use->stmt) == GIMPLE_PHI)
6772     {
6773       ass = gimple_build_assign (tgt, comp);
6774       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6775
6776       bsi = gsi_for_stmt (use->stmt);
6777       remove_phi_node (&bsi, false);
6778     }
6779   else
6780     {
6781       gimple_assign_set_rhs_from_tree (&bsi, comp);
6782       use->stmt = gsi_stmt (bsi);
6783     }
6784 }
6785
6786 /* Performs a peephole optimization to reorder the iv update statement with
6787    a mem ref to enable instruction combining in later phases. The mem ref uses
6788    the iv value before the update, so the reordering transformation requires
6789    adjustment of the offset. CAND is the selected IV_CAND.
6790
6791    Example:
6792
6793    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6794    iv2 = iv1 + 1;
6795
6796    if (t < val)      (1)
6797      goto L;
6798    goto Head;
6799
6800
6801    directly propagating t over to (1) will introduce overlapping live range
6802    thus increase register pressure. This peephole transform it into:
6803
6804
6805    iv2 = iv1 + 1;
6806    t = MEM_REF (base, iv2, 8, 8);
6807    if (t < val)
6808      goto L;
6809    goto Head;
6810 */
6811
6812 static void
6813 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6814 {
6815   tree var_after;
6816   gimple iv_update, stmt;
6817   basic_block bb;
6818   gimple_stmt_iterator gsi, gsi_iv;
6819
6820   if (cand->pos != IP_NORMAL)
6821     return;
6822
6823   var_after = cand->var_after;
6824   iv_update = SSA_NAME_DEF_STMT (var_after);
6825
6826   bb = gimple_bb (iv_update);
6827   gsi = gsi_last_nondebug_bb (bb);
6828   stmt = gsi_stmt (gsi);
6829
6830   /* Only handle conditional statement for now.  */
6831   if (gimple_code (stmt) != GIMPLE_COND)
6832     return;
6833
6834   gsi_prev_nondebug (&gsi);
6835   stmt = gsi_stmt (gsi);
6836   if (stmt != iv_update)
6837     return;
6838
6839   gsi_prev_nondebug (&gsi);
6840   if (gsi_end_p (gsi))
6841     return;
6842
6843   stmt = gsi_stmt (gsi);
6844   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6845     return;
6846
6847   if (stmt != use->stmt)
6848     return;
6849
6850   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6851     return;
6852
6853   if (dump_file && (dump_flags & TDF_DETAILS))
6854     {
6855       fprintf (dump_file, "Reordering \n");
6856       print_gimple_stmt (dump_file, iv_update, 0, 0);
6857       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6858       fprintf (dump_file, "\n");
6859     }
6860
6861   gsi = gsi_for_stmt (use->stmt);
6862   gsi_iv = gsi_for_stmt (iv_update);
6863   gsi_move_before (&gsi_iv, &gsi);
6864
6865   cand->pos = IP_BEFORE_USE;
6866   cand->incremented_at = use->stmt;
6867 }
6868
6869 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6870
6871 static void
6872 rewrite_use_address_1 (struct ivopts_data *data,
6873                        struct iv_use *use, struct iv_cand *cand)
6874 {
6875   aff_tree aff;
6876   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6877   tree base_hint = NULL_TREE;
6878   tree ref, iv;
6879   bool ok;
6880
6881   adjust_iv_update_pos (cand, use);
6882   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6883   gcc_assert (ok);
6884   unshare_aff_combination (&aff);
6885
6886   /* To avoid undefined overflow problems, all IV candidates use unsigned
6887      integer types.  The drawback is that this makes it impossible for
6888      create_mem_ref to distinguish an IV that is based on a memory object
6889      from one that represents simply an offset.
6890
6891      To work around this problem, we pass a hint to create_mem_ref that
6892      indicates which variable (if any) in aff is an IV based on a memory
6893      object.  Note that we only consider the candidate.  If this is not
6894      based on an object, the base of the reference is in some subexpression
6895      of the use -- but these will use pointer types, so they are recognized
6896      by the create_mem_ref heuristics anyway.  */
6897   if (cand->iv->base_object)
6898     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6899
6900   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6901   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6902                         reference_alias_ptr_type (*use->op_p),
6903                         iv, base_hint, data->speed);
6904   copy_ref_info (ref, *use->op_p);
6905   *use->op_p = ref;
6906 }
6907
6908 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
6909    first use of a group, rewrites sub uses in the group too.  */
6910
6911 static void
6912 rewrite_use_address (struct ivopts_data *data,
6913                       struct iv_use *use, struct iv_cand *cand)
6914 {
6915   struct iv_use *next;
6916
6917   gcc_assert (use->sub_id == 0);
6918   rewrite_use_address_1 (data, use, cand);
6919   update_stmt (use->stmt);
6920
6921   for (next = use->next; next != NULL; next = next->next)
6922     {
6923       rewrite_use_address_1 (data, next, cand);
6924       update_stmt (next->stmt);
6925     }
6926
6927   return;
6928 }
6929
6930 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6931    candidate CAND.  */
6932
6933 static void
6934 rewrite_use_compare (struct ivopts_data *data,
6935                      struct iv_use *use, struct iv_cand *cand)
6936 {
6937   tree comp, *var_p, op, bound;
6938   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6939   enum tree_code compare;
6940   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6941   bool ok;
6942
6943   bound = cp->value;
6944   if (bound)
6945     {
6946       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6947       tree var_type = TREE_TYPE (var);
6948       gimple_seq stmts;
6949
6950       if (dump_file && (dump_flags & TDF_DETAILS))
6951         {
6952           fprintf (dump_file, "Replacing exit test: ");
6953           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6954         }
6955       compare = cp->comp;
6956       bound = unshare_expr (fold_convert (var_type, bound));
6957       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6958       if (stmts)
6959         gsi_insert_seq_on_edge_immediate (
6960                 loop_preheader_edge (data->current_loop),
6961                 stmts);
6962
6963       gcond *cond_stmt = as_a <gcond *> (use->stmt);
6964       gimple_cond_set_lhs (cond_stmt, var);
6965       gimple_cond_set_code (cond_stmt, compare);
6966       gimple_cond_set_rhs (cond_stmt, op);
6967       return;
6968     }
6969
6970   /* The induction variable elimination failed; just express the original
6971      giv.  */
6972   comp = get_computation (data->current_loop, use, cand);
6973   gcc_assert (comp != NULL_TREE);
6974
6975   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6976   gcc_assert (ok);
6977
6978   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6979                                      true, GSI_SAME_STMT);
6980 }
6981
6982 /* Rewrites USE using candidate CAND.  */
6983
6984 static void
6985 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6986 {
6987   switch (use->type)
6988     {
6989       case USE_NONLINEAR_EXPR:
6990         rewrite_use_nonlinear_expr (data, use, cand);
6991         break;
6992
6993       case USE_ADDRESS:
6994         rewrite_use_address (data, use, cand);
6995         break;
6996
6997       case USE_COMPARE:
6998         rewrite_use_compare (data, use, cand);
6999         break;
7000
7001       default:
7002         gcc_unreachable ();
7003     }
7004
7005   update_stmt (use->stmt);
7006 }
7007
7008 /* Rewrite the uses using the selected induction variables.  */
7009
7010 static void
7011 rewrite_uses (struct ivopts_data *data)
7012 {
7013   unsigned i;
7014   struct iv_cand *cand;
7015   struct iv_use *use;
7016
7017   for (i = 0; i < n_iv_uses (data); i++)
7018     {
7019       use = iv_use (data, i);
7020       cand = use->selected;
7021       gcc_assert (cand);
7022
7023       rewrite_use (data, use, cand);
7024     }
7025 }
7026
7027 /* Removes the ivs that are not used after rewriting.  */
7028
7029 static void
7030 remove_unused_ivs (struct ivopts_data *data)
7031 {
7032   unsigned j;
7033   bitmap_iterator bi;
7034   bitmap toremove = BITMAP_ALLOC (NULL);
7035
7036   /* Figure out an order in which to release SSA DEFs so that we don't
7037      release something that we'd have to propagate into a debug stmt
7038      afterwards.  */
7039   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7040     {
7041       struct version_info *info;
7042
7043       info = ver_info (data, j);
7044       if (info->iv
7045           && !integer_zerop (info->iv->step)
7046           && !info->inv_id
7047           && !info->iv->have_use_for
7048           && !info->preserve_biv)
7049         {
7050           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7051
7052           tree def = info->iv->ssa_name;
7053
7054           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7055             {
7056               imm_use_iterator imm_iter;
7057               use_operand_p use_p;
7058               gimple stmt;
7059               int count = 0;
7060
7061               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7062                 {
7063                   if (!gimple_debug_bind_p (stmt))
7064                     continue;
7065
7066                   /* We just want to determine whether to do nothing
7067                      (count == 0), to substitute the computed
7068                      expression into a single use of the SSA DEF by
7069                      itself (count == 1), or to use a debug temp
7070                      because the SSA DEF is used multiple times or as
7071                      part of a larger expression (count > 1). */
7072                   count++;
7073                   if (gimple_debug_bind_get_value (stmt) != def)
7074                     count++;
7075
7076                   if (count > 1)
7077                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7078                 }
7079
7080               if (!count)
7081                 continue;
7082
7083               struct iv_use dummy_use;
7084               struct iv_cand *best_cand = NULL, *cand;
7085               unsigned i, best_pref = 0, cand_pref;
7086
7087               memset (&dummy_use, 0, sizeof (dummy_use));
7088               dummy_use.iv = info->iv;
7089               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7090                 {
7091                   cand = iv_use (data, i)->selected;
7092                   if (cand == best_cand)
7093                     continue;
7094                   cand_pref = operand_equal_p (cand->iv->step,
7095                                                info->iv->step, 0)
7096                     ? 4 : 0;
7097                   cand_pref
7098                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7099                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7100                     ? 2 : 0;
7101                   cand_pref
7102                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7103                     ? 1 : 0;
7104                   if (best_cand == NULL || best_pref < cand_pref)
7105                     {
7106                       best_cand = cand;
7107                       best_pref = cand_pref;
7108                     }
7109                 }
7110
7111               if (!best_cand)
7112                 continue;
7113
7114               tree comp = get_computation_at (data->current_loop,
7115                                               &dummy_use, best_cand,
7116                                               SSA_NAME_DEF_STMT (def));
7117               if (!comp)
7118                 continue;
7119
7120               if (count > 1)
7121                 {
7122                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7123                   DECL_ARTIFICIAL (vexpr) = 1;
7124                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7125                   if (SSA_NAME_VAR (def))
7126                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7127                   else
7128                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7129                   gdebug *def_temp
7130                     = gimple_build_debug_bind (vexpr, comp, NULL);
7131                   gimple_stmt_iterator gsi;
7132
7133                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7134                     gsi = gsi_after_labels (gimple_bb
7135                                             (SSA_NAME_DEF_STMT (def)));
7136                   else
7137                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7138
7139                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7140                   comp = vexpr;
7141                 }
7142
7143               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7144                 {
7145                   if (!gimple_debug_bind_p (stmt))
7146                     continue;
7147
7148                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7149                     SET_USE (use_p, comp);
7150
7151                   update_stmt (stmt);
7152                 }
7153             }
7154         }
7155     }
7156
7157   release_defs_bitset (toremove);
7158
7159   BITMAP_FREE (toremove);
7160 }
7161
7162 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7163    for hash_map::traverse.  */
7164
7165 bool
7166 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7167 {
7168   free (value);
7169   return true;
7170 }
7171
7172 /* Frees data allocated by the optimization of a single loop.  */
7173
7174 static void
7175 free_loop_data (struct ivopts_data *data)
7176 {
7177   unsigned i, j;
7178   bitmap_iterator bi;
7179   tree obj;
7180
7181   if (data->niters)
7182     {
7183       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7184       delete data->niters;
7185       data->niters = NULL;
7186     }
7187
7188   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7189     {
7190       struct version_info *info;
7191
7192       info = ver_info (data, i);
7193       free (info->iv);
7194       info->iv = NULL;
7195       info->has_nonlin_use = false;
7196       info->preserve_biv = false;
7197       info->inv_id = 0;
7198     }
7199   bitmap_clear (data->relevant);
7200   bitmap_clear (data->important_candidates);
7201
7202   for (i = 0; i < n_iv_uses (data); i++)
7203     {
7204       struct iv_use *use = iv_use (data, i);
7205       struct iv_use *pre = use, *sub = use->next;
7206
7207       while (sub)
7208         {
7209           gcc_assert (sub->related_cands == NULL);
7210           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7211
7212           free (sub->iv);
7213           pre = sub;
7214           sub = sub->next;
7215           free (pre);
7216         }
7217
7218       free (use->iv);
7219       BITMAP_FREE (use->related_cands);
7220       for (j = 0; j < use->n_map_members; j++)
7221         if (use->cost_map[j].depends_on)
7222           BITMAP_FREE (use->cost_map[j].depends_on);
7223       free (use->cost_map);
7224       free (use);
7225     }
7226   data->iv_uses.truncate (0);
7227
7228   for (i = 0; i < n_iv_cands (data); i++)
7229     {
7230       struct iv_cand *cand = iv_cand (data, i);
7231
7232       free (cand->iv);
7233       if (cand->depends_on)
7234         BITMAP_FREE (cand->depends_on);
7235       free (cand);
7236     }
7237   data->iv_candidates.truncate (0);
7238
7239   if (data->version_info_size < num_ssa_names)
7240     {
7241       data->version_info_size = 2 * num_ssa_names;
7242       free (data->version_info);
7243       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7244     }
7245
7246   data->max_inv_id = 0;
7247
7248   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7249     SET_DECL_RTL (obj, NULL_RTX);
7250
7251   decl_rtl_to_reset.truncate (0);
7252
7253   data->inv_expr_tab->empty ();
7254   data->inv_expr_id = 0;
7255 }
7256
7257 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7258    loop tree.  */
7259
7260 static void
7261 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7262 {
7263   free_loop_data (data);
7264   free (data->version_info);
7265   BITMAP_FREE (data->relevant);
7266   BITMAP_FREE (data->important_candidates);
7267
7268   decl_rtl_to_reset.release ();
7269   data->iv_uses.release ();
7270   data->iv_candidates.release ();
7271   delete data->inv_expr_tab;
7272   data->inv_expr_tab = NULL;
7273   free_affine_expand_cache (&data->name_expansion_cache);
7274 }
7275
7276 /* Returns true if the loop body BODY includes any function calls.  */
7277
7278 static bool
7279 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7280 {
7281   gimple_stmt_iterator gsi;
7282   unsigned i;
7283
7284   for (i = 0; i < num_nodes; i++)
7285     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7286       {
7287         gimple stmt = gsi_stmt (gsi);
7288         if (is_gimple_call (stmt)
7289             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7290           return true;
7291       }
7292   return false;
7293 }
7294
7295 /* Optimizes the LOOP.  Returns true if anything changed.  */
7296
7297 static bool
7298 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7299 {
7300   bool changed = false;
7301   struct iv_ca *iv_ca;
7302   edge exit = single_dom_exit (loop);
7303   basic_block *body;
7304
7305   gcc_assert (!data->niters);
7306   data->current_loop = loop;
7307   data->loop_loc = find_loop_location (loop);
7308   data->speed = optimize_loop_for_speed_p (loop);
7309
7310   if (dump_file && (dump_flags & TDF_DETAILS))
7311     {
7312       fprintf (dump_file, "Processing loop %d", loop->num);
7313       if (data->loop_loc != UNKNOWN_LOCATION)
7314         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7315                  LOCATION_LINE (data->loop_loc));
7316       fprintf (dump_file, "\n");
7317
7318       if (exit)
7319         {
7320           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7321                    exit->src->index, exit->dest->index);
7322           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7323           fprintf (dump_file, "\n");
7324         }
7325
7326       fprintf (dump_file, "\n");
7327     }
7328
7329   body = get_loop_body (loop);
7330   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7331   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7332   free (body);
7333
7334   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7335
7336   /* For each ssa name determines whether it behaves as an induction variable
7337      in some loop.  */
7338   if (!find_induction_variables (data))
7339     goto finish;
7340
7341   /* Finds interesting uses (item 1).  */
7342   find_interesting_uses (data);
7343   group_address_uses (data);
7344   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7345     goto finish;
7346
7347   /* Finds candidates for the induction variables (item 2).  */
7348   find_iv_candidates (data);
7349
7350   /* Calculates the costs (item 3, part 1).  */
7351   determine_iv_costs (data);
7352   determine_use_iv_costs (data);
7353   determine_set_costs (data);
7354
7355   /* Find the optimal set of induction variables (item 3, part 2).  */
7356   iv_ca = find_optimal_iv_set (data);
7357   if (!iv_ca)
7358     goto finish;
7359   changed = true;
7360
7361   /* Create the new induction variables (item 4, part 1).  */
7362   create_new_ivs (data, iv_ca);
7363   iv_ca_free (&iv_ca);
7364
7365   /* Rewrite the uses (item 4, part 2).  */
7366   rewrite_uses (data);
7367
7368   /* Remove the ivs that are unused after rewriting.  */
7369   remove_unused_ivs (data);
7370
7371   /* We have changed the structure of induction variables; it might happen
7372      that definitions in the scev database refer to some of them that were
7373      eliminated.  */
7374   scev_reset ();
7375
7376 finish:
7377   free_loop_data (data);
7378
7379   return changed;
7380 }
7381
7382 /* Main entry point.  Optimizes induction variables in loops.  */
7383
7384 void
7385 tree_ssa_iv_optimize (void)
7386 {
7387   struct loop *loop;
7388   struct ivopts_data data;
7389
7390   tree_ssa_iv_optimize_init (&data);
7391
7392   /* Optimize the loops starting with the innermost ones.  */
7393   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7394     {
7395       if (dump_file && (dump_flags & TDF_DETAILS))
7396         flow_loop_dump (loop, dump_file, NULL, 1);
7397
7398       tree_ssa_iv_optimize_loop (&data, loop);
7399     }
7400
7401   tree_ssa_iv_optimize_finalize (&data);
7402 }