gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "tm_p.h"
  79 #include "ssa.h"
  80 #include "expmed.h"
  81 #include "insn-config.h"
  82 #include "emit-rtl.h"
  83 #include "recog.h"
  84 #include "cgraph.h"
  85 #include "gimple-pretty-print.h"
  86 #include "alias.h"
  87 #include "fold-const.h"
  88 #include "stor-layout.h"
  89 #include "tree-eh.h"
  90 #include "gimplify.h"
  91 #include "gimple-iterator.h"
  92 #include "gimplify-me.h"
  93 #include "tree-cfg.h"
  94 #include "tree-ssa-loop-ivopts.h"
  95 #include "tree-ssa-loop-manip.h"
  96 #include "tree-ssa-loop-niter.h"
  97 #include "tree-ssa-loop.h"
  98 #include "explow.h"
  99 #include "expr.h"
 100 #include "tree-dfa.h"
 101 #include "tree-ssa.h"
 102 #include "cfgloop.h"
 103 #include "tree-scalar-evolution.h"
 104 #include "params.h"
 105 #include "tree-affine.h"
 106 #include "tree-ssa-propagate.h"
 107 #include "tree-ssa-address.h"
 108 #include "builtins.h"
 109 #include "tree-vectorizer.h"
 110
 111 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 112    cost of different addressing modes.  This should be moved to a TBD
 113    interface between the GIMPLE and RTL worlds.  */
 114
 115 /* The infinite cost.  */
 116 #define INFTY 10000000
 117
 118 #define AVG_LOOP_NITER(LOOP) 5
 119
 120 /* Returns the expected number of loop iterations for LOOP.
 121    The average trip count is computed from profile data if it
 122    exists. */
 123
 124 static inline HOST_WIDE_INT
 125 avg_loop_niter (struct loop *loop)
 126 {
 127   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 128   if (niter == -1)
 129     {
 130       niter = likely_max_stmt_executions_int (loop);
 131       if (niter == -1 || niter > AVG_LOOP_NITER (loop))
 132         return AVG_LOOP_NITER (loop);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   comp_cost (): cost (0), complexity (0), scratch (0)
 177   {}
 178
 179   comp_cost (int cost, unsigned complexity, int scratch = 0)
 180     : cost (cost), complexity (complexity), scratch (scratch)
 181   {}
 182
 183   /* Returns true if COST is infinite.  */
 184   bool infinite_cost_p ();
 185
 186   /* Adds costs COST1 and COST2.  */
 187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 188
 189   /* Adds COST to the comp_cost.  */
 190   comp_cost operator+= (comp_cost cost);
 191
 192   /* Adds constant C to this comp_cost.  */
 193   comp_cost operator+= (HOST_WIDE_INT c);
 194
 195   /* Subtracts constant C to this comp_cost.  */
 196   comp_cost operator-= (HOST_WIDE_INT c);
 197
 198   /* Divide the comp_cost by constant C.  */
 199   comp_cost operator/= (HOST_WIDE_INT c);
 200
 201   /* Multiply the comp_cost by constant C.  */
 202   comp_cost operator*= (HOST_WIDE_INT c);
 203
 204   /* Subtracts costs COST1 and COST2.  */
 205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 206
 207   /* Subtracts COST from this comp_cost.  */
 208   comp_cost operator-= (comp_cost cost);
 209
 210   /* Returns true if COST1 is smaller than COST2.  */
 211   friend bool operator< (comp_cost cost1, comp_cost cost2);
 212
 213   /* Returns true if COST1 and COST2 are equal.  */
 214   friend bool operator== (comp_cost cost1, comp_cost cost2);
 215
 216   /* Returns true if COST1 is smaller or equal than COST2.  */
 217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 218
 219   int cost;             /* The runtime cost.  */
 220   unsigned complexity;  /* The estimate of the complexity of the code for
 221                            the computation (in no concrete units --
 222                            complexity field should be larger for more
 223                            complex expressions and addressing modes).  */
 224   int scratch;          /* Scratch used during cost computation.  */
 225 };
 226
 227 static const comp_cost no_cost;
 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 229
 230 bool
 231 comp_cost::infinite_cost_p ()
 232 {
 233   return cost == INFTY;
 234 }
 235
 236 comp_cost
 237 operator+ (comp_cost cost1, comp_cost cost2)
 238 {
 239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 240     return infinite_cost;
 241
 242   cost1.cost += cost2.cost;
 243   cost1.complexity += cost2.complexity;
 244
 245   return cost1;
 246 }
 247
 248 comp_cost
 249 operator- (comp_cost cost1, comp_cost cost2)
 250 {
 251   if (cost1.infinite_cost_p ())
 252     return infinite_cost;
 253
 254   gcc_assert (!cost2.infinite_cost_p ());
 255
 256   cost1.cost -= cost2.cost;
 257   cost1.complexity -= cost2.complexity;
 258
 259   return cost1;
 260 }
 261
 262 comp_cost
 263 comp_cost::operator+= (comp_cost cost)
 264 {
 265   *this = *this + cost;
 266   return *this;
 267 }
 268
 269 comp_cost
 270 comp_cost::operator+= (HOST_WIDE_INT c)
 271 {
 272   if (infinite_cost_p ())
 273     return *this;
 274
 275   this->cost += c;
 276
 277   return *this;
 278 }
 279
 280 comp_cost
 281 comp_cost::operator-= (HOST_WIDE_INT c)
 282 {
 283   if (infinite_cost_p ())
 284     return *this;
 285
 286   this->cost -= c;
 287
 288   return *this;
 289 }
 290
 291 comp_cost
 292 comp_cost::operator/= (HOST_WIDE_INT c)
 293 {
 294   if (infinite_cost_p ())
 295     return *this;
 296
 297   this->cost /= c;
 298
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator*= (HOST_WIDE_INT c)
 304 {
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   this->cost *= c;
 309
 310   return *this;
 311 }
 312
 313 comp_cost
 314 comp_cost::operator-= (comp_cost cost)
 315 {
 316   *this = *this - cost;
 317   return *this;
 318 }
 319
 320 bool
 321 operator< (comp_cost cost1, comp_cost cost2)
 322 {
 323   if (cost1.cost == cost2.cost)
 324     return cost1.complexity < cost2.complexity;
 325
 326   return cost1.cost < cost2.cost;
 327 }
 328
 329 bool
 330 operator== (comp_cost cost1, comp_cost cost2)
 331 {
 332   return cost1.cost == cost2.cost
 333     && cost1.complexity == cost2.complexity;
 334 }
 335
 336 bool
 337 operator<= (comp_cost cost1, comp_cost cost2)
 338 {
 339   return cost1 < cost2 || cost1 == cost2;
 340 }
 341
 342 struct iv_inv_expr_ent;
 343
 344 /* The candidate - cost pair.  */
 345 struct cost_pair
 346 {
 347   struct iv_cand *cand; /* The candidate.  */
 348   comp_cost cost;       /* The cost.  */
 349   bitmap depends_on;    /* The list of invariants that have to be
 350                            preserved.  */
 351   tree value;           /* For final value elimination, the expression for
 352                            the final value of the iv.  For iv elimination,
 353                            the new bound to compare with.  */
 354   enum tree_code comp;  /* For iv elimination, the comparison.  */
 355   iv_inv_expr_ent *inv_expr; /* Loop invariant expression.  */
 356 };
 357
 358 /* Use.  */
 359 struct iv_use
 360 {
 361   unsigned id;          /* The id of the use.  */
 362   unsigned group_id;    /* The group id the use belongs to.  */
 363   enum use_type type;   /* Type of the use.  */
 364   struct iv *iv;        /* The induction variable it is based on.  */
 365   gimple *stmt;         /* Statement in that it occurs.  */
 366   tree *op_p;           /* The place where it occurs.  */
 367
 368   tree addr_base;       /* Base address with const offset stripped.  */
 369   unsigned HOST_WIDE_INT addr_offset;
 370                         /* Const offset stripped from base address.  */
 371 };
 372
 373 /* Group of uses.  */
 374 struct iv_group
 375 {
 376   /* The id of the group.  */
 377   unsigned id;
 378   /* Uses of the group are of the same type.  */
 379   enum use_type type;
 380   /* The set of "related" IV candidates, plus the important ones.  */
 381   bitmap related_cands;
 382   /* Number of IV candidates in the cost_map.  */
 383   unsigned n_map_members;
 384   /* The costs wrto the iv candidates.  */
 385   struct cost_pair *cost_map;
 386   /* The selected candidate for the group.  */
 387   struct iv_cand *selected;
 388   /* Uses in the group.  */
 389   vec<struct iv_use *> vuses;
 390 };
 391
 392 /* The position where the iv is computed.  */
 393 enum iv_position
 394 {
 395   IP_NORMAL,            /* At the end, just before the exit condition.  */
 396   IP_END,               /* At the end of the latch block.  */
 397   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 398   IP_AFTER_USE,         /* Immediately after a specific use.  */
 399   IP_ORIGINAL           /* The original biv.  */
 400 };
 401
 402 /* The induction variable candidate.  */
 403 struct iv_cand
 404 {
 405   unsigned id;          /* The number of the candidate.  */
 406   bool important;       /* Whether this is an "important" candidate, i.e. such
 407                            that it should be considered by all uses.  */
 408   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 409   gimple *incremented_at;/* For original biv, the statement where it is
 410                            incremented.  */
 411   tree var_before;      /* The variable used for it before increment.  */
 412   tree var_after;       /* The variable used for it after increment.  */
 413   struct iv *iv;        /* The value of the candidate.  NULL for
 414                            "pseudocandidate" used to indicate the possibility
 415                            to replace the final value of an iv by direct
 416                            computation of the value.  */
 417   unsigned cost;        /* Cost of the candidate.  */
 418   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 419   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 420                               where it is incremented.  */
 421   bitmap depends_on;    /* The list of invariants that are used in step of the
 422                            biv.  */
 423   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 424                            smaller type.  */
 425 };
 426
 427 /* Hashtable entry for common candidate derived from iv uses.  */
 428 struct iv_common_cand
 429 {
 430   tree base;
 431   tree step;
 432   /* IV uses from which this common candidate is derived.  */
 433   auto_vec<struct iv_use *> uses;
 434   hashval_t hash;
 435 };
 436
 437 /* Hashtable helpers.  */
 438
 439 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 440 {
 441   static inline hashval_t hash (const iv_common_cand *);
 442   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 443 };
 444
 445 /* Hash function for possible common candidates.  */
 446
 447 inline hashval_t
 448 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 449 {
 450   return ccand->hash;
 451 }
 452
 453 /* Hash table equality function for common candidates.  */
 454
 455 inline bool
 456 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 457                               const iv_common_cand *ccand2)
 458 {
 459   return (ccand1->hash == ccand2->hash
 460           && operand_equal_p (ccand1->base, ccand2->base, 0)
 461           && operand_equal_p (ccand1->step, ccand2->step, 0)
 462           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 463               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 464 }
 465
 466 /* Loop invariant expression hashtable entry.  */
 467
 468 struct iv_inv_expr_ent
 469 {
 470   /* Tree expression of the entry.  */
 471   tree expr;
 472   /* Unique indentifier.  */
 473   int id;
 474   /* Hash value.  */
 475   hashval_t hash;
 476 };
 477
 478 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 479
 480 static int
 481 sort_iv_inv_expr_ent (const void *a, const void *b)
 482 {
 483   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 484   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 485
 486   unsigned id1 = (*e1)->id;
 487   unsigned id2 = (*e2)->id;
 488
 489   if (id1 < id2)
 490     return -1;
 491   else if (id1 > id2)
 492     return 1;
 493   else
 494     return 0;
 495 }
 496
 497 /* Hashtable helpers.  */
 498
 499 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 500 {
 501   static inline hashval_t hash (const iv_inv_expr_ent *);
 502   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 503 };
 504
 505 /* Hash function for loop invariant expressions.  */
 506
 507 inline hashval_t
 508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 509 {
 510   return expr->hash;
 511 }
 512
 513 /* Hash table equality function for expressions.  */
 514
 515 inline bool
 516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 517                            const iv_inv_expr_ent *expr2)
 518 {
 519   return expr1->hash == expr2->hash
 520          && operand_equal_p (expr1->expr, expr2->expr, 0);
 521 }
 522
 523 struct ivopts_data
 524 {
 525   /* The currently optimized loop.  */
 526   struct loop *current_loop;
 527   source_location loop_loc;
 528
 529   /* Numbers of iterations for all exits of the current loop.  */
 530   hash_map<edge, tree_niter_desc *> *niters;
 531
 532   /* Number of registers used in it.  */
 533   unsigned regs_used;
 534
 535   /* The size of version_info array allocated.  */
 536   unsigned version_info_size;
 537
 538   /* The array of information for the ssa names.  */
 539   struct version_info *version_info;
 540
 541   /* The hashtable of loop invariant expressions created
 542      by ivopt.  */
 543   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 544
 545   /* Loop invariant expression id.  */
 546   int max_inv_expr_id;
 547
 548   /* The bitmap of indices in version_info whose value was changed.  */
 549   bitmap relevant;
 550
 551   /* The uses of induction variables.  */
 552   vec<iv_group *> vgroups;
 553
 554   /* The candidates.  */
 555   vec<iv_cand *> vcands;
 556
 557   /* A bitmap of important candidates.  */
 558   bitmap important_candidates;
 559
 560   /* Cache used by tree_to_aff_combination_expand.  */
 561   hash_map<tree, name_expansion *> *name_expansion_cache;
 562
 563   /* The hashtable of common candidates derived from iv uses.  */
 564   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 565
 566   /* The common candidates.  */
 567   vec<iv_common_cand *> iv_common_cands;
 568
 569   /* The maximum invariant id.  */
 570   unsigned max_inv_id;
 571
 572   /* Number of no_overflow BIVs which are not used in memory address.  */
 573   unsigned bivs_not_used_in_addr;
 574
 575   /* Obstack for iv structure.  */
 576   struct obstack iv_obstack;
 577
 578   /* Whether to consider just related and important candidates when replacing a
 579      use.  */
 580   bool consider_all_candidates;
 581
 582   /* Are we optimizing for speed?  */
 583   bool speed;
 584
 585   /* Whether the loop body includes any function calls.  */
 586   bool body_includes_call;
 587
 588   /* Whether the loop body can only be exited via single exit.  */
 589   bool loop_single_exit_p;
 590 };
 591
 592 /* An assignment of iv candidates to uses.  */
 593
 594 struct iv_ca
 595 {
 596   /* The number of uses covered by the assignment.  */
 597   unsigned upto;
 598
 599   /* Number of uses that cannot be expressed by the candidates in the set.  */
 600   unsigned bad_groups;
 601
 602   /* Candidate assigned to a use, together with the related costs.  */
 603   struct cost_pair **cand_for_group;
 604
 605   /* Number of times each candidate is used.  */
 606   unsigned *n_cand_uses;
 607
 608   /* The candidates used.  */
 609   bitmap cands;
 610
 611   /* The number of candidates in the set.  */
 612   unsigned n_cands;
 613
 614   /* Total number of registers needed.  */
 615   unsigned n_regs;
 616
 617   /* Total cost of expressing uses.  */
 618   comp_cost cand_use_cost;
 619
 620   /* Total cost of candidates.  */
 621   unsigned cand_cost;
 622
 623   /* Number of times each invariant is used.  */
 624   unsigned *n_invariant_uses;
 625
 626   /* Hash set with used invariant expression.  */
 627   hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
 628
 629   /* Total cost of the assignment.  */
 630   comp_cost cost;
 631 };
 632
 633 /* Difference of two iv candidate assignments.  */
 634
 635 struct iv_ca_delta
 636 {
 637   /* Changed group.  */
 638   struct iv_group *group;
 639
 640   /* An old assignment (for rollback purposes).  */
 641   struct cost_pair *old_cp;
 642
 643   /* A new assignment.  */
 644   struct cost_pair *new_cp;
 645
 646   /* Next change in the list.  */
 647   struct iv_ca_delta *next;
 648 };
 649
 650 /* Bound on number of candidates below that all candidates are considered.  */
 651
 652 #define CONSIDER_ALL_CANDIDATES_BOUND \
 653   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 654
 655 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 656    optimizing such a loop would help, and it would take ages).  */
 657
 658 #define MAX_CONSIDERED_GROUPS \
 659   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 660
 661 /* If there are at most this number of ivs in the set, try removing unnecessary
 662    ivs from the set always.  */
 663
 664 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 665   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 666
 667 /* The list of trees for that the decl_rtl field must be reset is stored
 668    here.  */
 669
 670 static vec<tree> decl_rtl_to_reset;
 671
 672 static comp_cost force_expr_to_var_cost (tree, bool);
 673
 674 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 675
 676 edge
 677 single_dom_exit (struct loop *loop)
 678 {
 679   edge exit = single_exit (loop);
 680
 681   if (!exit)
 682     return NULL;
 683
 684   if (!just_once_each_iteration_p (loop, exit->src))
 685     return NULL;
 686
 687   return exit;
 688 }
 689
 690 /* Dumps information about the induction variable IV to FILE.  Don't dump
 691    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 692    preceding spaces indicated by INDENT_LEVEL.  */
 693
 694 void
 695 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 696 {
 697   const char *p;
 698   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 699
 700   if (indent_level > 4)
 701     indent_level = 4;
 702   p = spaces + 8 - (indent_level << 1);
 703
 704   fprintf (file, "%sIV struct:\n", p);
 705   if (iv->ssa_name && dump_name)
 706     {
 707       fprintf (file, "%s  SSA_NAME:\t", p);
 708       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 709       fprintf (file, "\n");
 710     }
 711
 712   fprintf (file, "%s  Type:\t", p);
 713   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 714   fprintf (file, "\n");
 715
 716   fprintf (file, "%s  Base:\t", p);
 717   print_generic_expr (file, iv->base, TDF_SLIM);
 718   fprintf (file, "\n");
 719
 720   fprintf (file, "%s  Step:\t", p);
 721   print_generic_expr (file, iv->step, TDF_SLIM);
 722   fprintf (file, "\n");
 723
 724   if (iv->base_object)
 725     {
 726       fprintf (file, "%s  Object:\t", p);
 727       print_generic_expr (file, iv->base_object, TDF_SLIM);
 728       fprintf (file, "\n");
 729     }
 730
 731   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 732
 733   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 734            p, iv->no_overflow ? "No-overflow" : "Overflow");
 735 }
 736
 737 /* Dumps information about the USE to FILE.  */
 738
 739 void
 740 dump_use (FILE *file, struct iv_use *use)
 741 {
 742   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 743   fprintf (file, "    At stmt:\t");
 744   print_gimple_stmt (file, use->stmt, 0, 0);
 745   fprintf (file, "    At pos:\t");
 746   if (use->op_p)
 747     print_generic_expr (file, *use->op_p, TDF_SLIM);
 748   fprintf (file, "\n");
 749   dump_iv (file, use->iv, false, 2);
 750 }
 751
 752 /* Dumps information about the uses to FILE.  */
 753
 754 void
 755 dump_groups (FILE *file, struct ivopts_data *data)
 756 {
 757   unsigned i, j;
 758   struct iv_group *group;
 759
 760   for (i = 0; i < data->vgroups.length (); i++)
 761     {
 762       group = data->vgroups[i];
 763       fprintf (file, "Group %d:\n", group->id);
 764       if (group->type == USE_NONLINEAR_EXPR)
 765         fprintf (file, "  Type:\tGENERIC\n");
 766       else if (group->type == USE_ADDRESS)
 767         fprintf (file, "  Type:\tADDRESS\n");
 768       else
 769         {
 770           gcc_assert (group->type == USE_COMPARE);
 771           fprintf (file, "  Type:\tCOMPARE\n");
 772         }
 773       for (j = 0; j < group->vuses.length (); j++)
 774         dump_use (file, group->vuses[j]);
 775     }
 776 }
 777
 778 /* Dumps information about induction variable candidate CAND to FILE.  */
 779
 780 void
 781 dump_cand (FILE *file, struct iv_cand *cand)
 782 {
 783   struct iv *iv = cand->iv;
 784
 785   fprintf (file, "Candidate %d:\n", cand->id);
 786   if (cand->depends_on)
 787     {
 788       fprintf (file, "  Depend on: ");
 789       dump_bitmap (file, cand->depends_on);
 790     }
 791
 792   if (cand->var_before)
 793     {
 794       fprintf (file, "  Var befor: ");
 795       print_generic_expr (file, cand->var_before, TDF_SLIM);
 796       fprintf (file, "\n");
 797     }
 798   if (cand->var_after)
 799     {
 800       fprintf (file, "  Var after: ");
 801       print_generic_expr (file, cand->var_after, TDF_SLIM);
 802       fprintf (file, "\n");
 803     }
 804
 805   switch (cand->pos)
 806     {
 807     case IP_NORMAL:
 808       fprintf (file, "  Incr POS: before exit test\n");
 809       break;
 810
 811     case IP_BEFORE_USE:
 812       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 813       break;
 814
 815     case IP_AFTER_USE:
 816       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 817       break;
 818
 819     case IP_END:
 820       fprintf (file, "  Incr POS: at end\n");
 821       break;
 822
 823     case IP_ORIGINAL:
 824       fprintf (file, "  Incr POS: orig biv\n");
 825       break;
 826     }
 827
 828   dump_iv (file, iv, false, 1);
 829 }
 830
 831 /* Returns the info for ssa version VER.  */
 832
 833 static inline struct version_info *
 834 ver_info (struct ivopts_data *data, unsigned ver)
 835 {
 836   return data->version_info + ver;
 837 }
 838
 839 /* Returns the info for ssa name NAME.  */
 840
 841 static inline struct version_info *
 842 name_info (struct ivopts_data *data, tree name)
 843 {
 844   return ver_info (data, SSA_NAME_VERSION (name));
 845 }
 846
 847 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 848    emitted in LOOP.  */
 849
 850 static bool
 851 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 852 {
 853   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 854
 855   gcc_assert (bb);
 856
 857   if (sbb == loop->latch)
 858     return true;
 859
 860   if (sbb != bb)
 861     return false;
 862
 863   return stmt == last_stmt (bb);
 864 }
 865
 866 /* Returns true if STMT if after the place where the original induction
 867    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 868    if the positions are identical.  */
 869
 870 static bool
 871 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 872 {
 873   basic_block cand_bb = gimple_bb (cand->incremented_at);
 874   basic_block stmt_bb = gimple_bb (stmt);
 875
 876   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 877     return false;
 878
 879   if (stmt_bb != cand_bb)
 880     return true;
 881
 882   if (true_if_equal
 883       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 884     return true;
 885   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 886 }
 887
 888 /* Returns true if STMT if after the place where the induction variable
 889    CAND is incremented in LOOP.  */
 890
 891 static bool
 892 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 893 {
 894   switch (cand->pos)
 895     {
 896     case IP_END:
 897       return false;
 898
 899     case IP_NORMAL:
 900       return stmt_after_ip_normal_pos (loop, stmt);
 901
 902     case IP_ORIGINAL:
 903     case IP_AFTER_USE:
 904       return stmt_after_inc_pos (cand, stmt, false);
 905
 906     case IP_BEFORE_USE:
 907       return stmt_after_inc_pos (cand, stmt, true);
 908
 909     default:
 910       gcc_unreachable ();
 911     }
 912 }
 913
 914 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 915
 916 static bool
 917 abnormal_ssa_name_p (tree exp)
 918 {
 919   if (!exp)
 920     return false;
 921
 922   if (TREE_CODE (exp) != SSA_NAME)
 923     return false;
 924
 925   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 926 }
 927
 928 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 929    abnormal phi node.  Callback for for_each_index.  */
 930
 931 static bool
 932 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 933                                   void *data ATTRIBUTE_UNUSED)
 934 {
 935   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 936     {
 937       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 938         return false;
 939       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 940         return false;
 941     }
 942
 943   return !abnormal_ssa_name_p (*index);
 944 }
 945
 946 /* Returns true if EXPR contains a ssa name that occurs in an
 947    abnormal phi node.  */
 948
 949 bool
 950 contains_abnormal_ssa_name_p (tree expr)
 951 {
 952   enum tree_code code;
 953   enum tree_code_class codeclass;
 954
 955   if (!expr)
 956     return false;
 957
 958   code = TREE_CODE (expr);
 959   codeclass = TREE_CODE_CLASS (code);
 960
 961   if (code == SSA_NAME)
 962     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 963
 964   if (code == INTEGER_CST
 965       || is_gimple_min_invariant (expr))
 966     return false;
 967
 968   if (code == ADDR_EXPR)
 969     return !for_each_index (&TREE_OPERAND (expr, 0),
 970                             idx_contains_abnormal_ssa_name_p,
 971                             NULL);
 972
 973   if (code == COND_EXPR)
 974     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 975       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 976       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 977
 978   switch (codeclass)
 979     {
 980     case tcc_binary:
 981     case tcc_comparison:
 982       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 983         return true;
 984
 985       /* Fallthru.  */
 986     case tcc_unary:
 987       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 988         return true;
 989
 990       break;
 991
 992     default:
 993       gcc_unreachable ();
 994     }
 995
 996   return false;
 997 }
 998
 999 /*  Returns the structure describing number of iterations determined from
1000     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1001
1002 static struct tree_niter_desc *
1003 niter_for_exit (struct ivopts_data *data, edge exit)
1004 {
1005   struct tree_niter_desc *desc;
1006   tree_niter_desc **slot;
1007
1008   if (!data->niters)
1009     {
1010       data->niters = new hash_map<edge, tree_niter_desc *>;
1011       slot = NULL;
1012     }
1013   else
1014     slot = data->niters->get (exit);
1015
1016   if (!slot)
1017     {
1018       /* Try to determine number of iterations.  We cannot safely work with ssa
1019          names that appear in phi nodes on abnormal edges, so that we do not
1020          create overlapping life ranges for them (PR 27283).  */
1021       desc = XNEW (struct tree_niter_desc);
1022       if (!number_of_iterations_exit (data->current_loop,
1023                                       exit, desc, true)
1024           || contains_abnormal_ssa_name_p (desc->niter))
1025         {
1026           XDELETE (desc);
1027           desc = NULL;
1028         }
1029       data->niters->put (exit, desc);
1030     }
1031   else
1032     desc = *slot;
1033
1034   return desc;
1035 }
1036
1037 /* Returns the structure describing number of iterations determined from
1038    single dominating exit of DATA->current_loop, or NULL if something
1039    goes wrong.  */
1040
1041 static struct tree_niter_desc *
1042 niter_for_single_dom_exit (struct ivopts_data *data)
1043 {
1044   edge exit = single_dom_exit (data->current_loop);
1045
1046   if (!exit)
1047     return NULL;
1048
1049   return niter_for_exit (data, exit);
1050 }
1051
1052 /* Initializes data structures used by the iv optimization pass, stored
1053    in DATA.  */
1054
1055 static void
1056 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1057 {
1058   data->version_info_size = 2 * num_ssa_names;
1059   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1060   data->relevant = BITMAP_ALLOC (NULL);
1061   data->important_candidates = BITMAP_ALLOC (NULL);
1062   data->max_inv_id = 0;
1063   data->niters = NULL;
1064   data->vgroups.create (20);
1065   data->vcands.create (20);
1066   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1067   data->max_inv_expr_id = 0;
1068   data->name_expansion_cache = NULL;
1069   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1070   data->iv_common_cands.create (20);
1071   decl_rtl_to_reset.create (20);
1072   gcc_obstack_init (&data->iv_obstack);
1073 }
1074
1075 /* Returns a memory object to that EXPR points.  In case we are able to
1076    determine that it does not point to any such object, NULL is returned.  */
1077
1078 static tree
1079 determine_base_object (tree expr)
1080 {
1081   enum tree_code code = TREE_CODE (expr);
1082   tree base, obj;
1083
1084   /* If this is a pointer casted to any type, we need to determine
1085      the base object for the pointer; so handle conversions before
1086      throwing away non-pointer expressions.  */
1087   if (CONVERT_EXPR_P (expr))
1088     return determine_base_object (TREE_OPERAND (expr, 0));
1089
1090   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1091     return NULL_TREE;
1092
1093   switch (code)
1094     {
1095     case INTEGER_CST:
1096       return NULL_TREE;
1097
1098     case ADDR_EXPR:
1099       obj = TREE_OPERAND (expr, 0);
1100       base = get_base_address (obj);
1101
1102       if (!base)
1103         return expr;
1104
1105       if (TREE_CODE (base) == MEM_REF)
1106         return determine_base_object (TREE_OPERAND (base, 0));
1107
1108       return fold_convert (ptr_type_node,
1109                            build_fold_addr_expr (base));
1110
1111     case POINTER_PLUS_EXPR:
1112       return determine_base_object (TREE_OPERAND (expr, 0));
1113
1114     case PLUS_EXPR:
1115     case MINUS_EXPR:
1116       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1117       gcc_unreachable ();
1118
1119     default:
1120       return fold_convert (ptr_type_node, expr);
1121     }
1122 }
1123
1124 /* Return true if address expression with non-DECL_P operand appears
1125    in EXPR.  */
1126
1127 static bool
1128 contain_complex_addr_expr (tree expr)
1129 {
1130   bool res = false;
1131
1132   STRIP_NOPS (expr);
1133   switch (TREE_CODE (expr))
1134     {
1135     case POINTER_PLUS_EXPR:
1136     case PLUS_EXPR:
1137     case MINUS_EXPR:
1138       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1139       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1140       break;
1141
1142     case ADDR_EXPR:
1143       return (!DECL_P (TREE_OPERAND (expr, 0)));
1144
1145     default:
1146       return false;
1147     }
1148
1149   return res;
1150 }
1151
1152 /* Allocates an induction variable with given initial value BASE and step STEP
1153    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1154
1155 static struct iv *
1156 alloc_iv (struct ivopts_data *data, tree base, tree step,
1157           bool no_overflow = false)
1158 {
1159   tree expr = base;
1160   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1161                                               sizeof (struct iv));
1162   gcc_assert (step != NULL_TREE);
1163
1164   /* Lower address expression in base except ones with DECL_P as operand.
1165      By doing this:
1166        1) More accurate cost can be computed for address expressions;
1167        2) Duplicate candidates won't be created for bases in different
1168           forms, like &a[0] and &a.  */
1169   STRIP_NOPS (expr);
1170   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1171       || contain_complex_addr_expr (expr))
1172     {
1173       aff_tree comb;
1174       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1175       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1176     }
1177
1178   iv->base = base;
1179   iv->base_object = determine_base_object (base);
1180   iv->step = step;
1181   iv->biv_p = false;
1182   iv->nonlin_use = NULL;
1183   iv->ssa_name = NULL_TREE;
1184   iv->no_overflow = no_overflow;
1185   iv->have_address_use = false;
1186
1187   return iv;
1188 }
1189
1190 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1191    doesn't overflow.  */
1192
1193 static void
1194 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1195         bool no_overflow)
1196 {
1197   struct version_info *info = name_info (data, iv);
1198
1199   gcc_assert (!info->iv);
1200
1201   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1202   info->iv = alloc_iv (data, base, step, no_overflow);
1203   info->iv->ssa_name = iv;
1204 }
1205
1206 /* Finds induction variable declaration for VAR.  */
1207
1208 static struct iv *
1209 get_iv (struct ivopts_data *data, tree var)
1210 {
1211   basic_block bb;
1212   tree type = TREE_TYPE (var);
1213
1214   if (!POINTER_TYPE_P (type)
1215       && !INTEGRAL_TYPE_P (type))
1216     return NULL;
1217
1218   if (!name_info (data, var)->iv)
1219     {
1220       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1221
1222       if (!bb
1223           || !flow_bb_inside_loop_p (data->current_loop, bb))
1224         set_iv (data, var, var, build_int_cst (type, 0), true);
1225     }
1226
1227   return name_info (data, var)->iv;
1228 }
1229
1230 /* Return the first non-invariant ssa var found in EXPR.  */
1231
1232 static tree
1233 extract_single_var_from_expr (tree expr)
1234 {
1235   int i, n;
1236   tree tmp;
1237   enum tree_code code;
1238
1239   if (!expr || is_gimple_min_invariant (expr))
1240     return NULL;
1241
1242   code = TREE_CODE (expr);
1243   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1244     {
1245       n = TREE_OPERAND_LENGTH (expr);
1246       for (i = 0; i < n; i++)
1247         {
1248           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1249
1250           if (tmp)
1251             return tmp;
1252         }
1253     }
1254   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1255 }
1256
1257 /* Finds basic ivs.  */
1258
1259 static bool
1260 find_bivs (struct ivopts_data *data)
1261 {
1262   gphi *phi;
1263   affine_iv iv;
1264   tree step, type, base, stop;
1265   bool found = false;
1266   struct loop *loop = data->current_loop;
1267   gphi_iterator psi;
1268
1269   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1270     {
1271       phi = psi.phi ();
1272
1273       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1274         continue;
1275
1276       if (virtual_operand_p (PHI_RESULT (phi)))
1277         continue;
1278
1279       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1280         continue;
1281
1282       if (integer_zerop (iv.step))
1283         continue;
1284
1285       step = iv.step;
1286       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1287       /* Stop expanding iv base at the first ssa var referred by iv step.
1288          Ideally we should stop at any ssa var, because that's expensive
1289          and unusual to happen, we just do it on the first one.
1290
1291          See PR64705 for the rationale.  */
1292       stop = extract_single_var_from_expr (step);
1293       base = expand_simple_operations (base, stop);
1294       if (contains_abnormal_ssa_name_p (base)
1295           || contains_abnormal_ssa_name_p (step))
1296         continue;
1297
1298       type = TREE_TYPE (PHI_RESULT (phi));
1299       base = fold_convert (type, base);
1300       if (step)
1301         {
1302           if (POINTER_TYPE_P (type))
1303             step = convert_to_ptrofftype (step);
1304           else
1305             step = fold_convert (type, step);
1306         }
1307
1308       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1309       found = true;
1310     }
1311
1312   return found;
1313 }
1314
1315 /* Marks basic ivs.  */
1316
1317 static void
1318 mark_bivs (struct ivopts_data *data)
1319 {
1320   gphi *phi;
1321   gimple *def;
1322   tree var;
1323   struct iv *iv, *incr_iv;
1324   struct loop *loop = data->current_loop;
1325   basic_block incr_bb;
1326   gphi_iterator psi;
1327
1328   data->bivs_not_used_in_addr = 0;
1329   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1330     {
1331       phi = psi.phi ();
1332
1333       iv = get_iv (data, PHI_RESULT (phi));
1334       if (!iv)
1335         continue;
1336
1337       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1338       def = SSA_NAME_DEF_STMT (var);
1339       /* Don't mark iv peeled from other one as biv.  */
1340       if (def
1341           && gimple_code (def) == GIMPLE_PHI
1342           && gimple_bb (def) == loop->header)
1343         continue;
1344
1345       incr_iv = get_iv (data, var);
1346       if (!incr_iv)
1347         continue;
1348
1349       /* If the increment is in the subloop, ignore it.  */
1350       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1351       if (incr_bb->loop_father != data->current_loop
1352           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1353         continue;
1354
1355       iv->biv_p = true;
1356       incr_iv->biv_p = true;
1357       if (iv->no_overflow)
1358         data->bivs_not_used_in_addr++;
1359       if (incr_iv->no_overflow)
1360         data->bivs_not_used_in_addr++;
1361     }
1362 }
1363
1364 /* Checks whether STMT defines a linear induction variable and stores its
1365    parameters to IV.  */
1366
1367 static bool
1368 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1369 {
1370   tree lhs, stop;
1371   struct loop *loop = data->current_loop;
1372
1373   iv->base = NULL_TREE;
1374   iv->step = NULL_TREE;
1375
1376   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1377     return false;
1378
1379   lhs = gimple_assign_lhs (stmt);
1380   if (TREE_CODE (lhs) != SSA_NAME)
1381     return false;
1382
1383   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1384     return false;
1385
1386   /* Stop expanding iv base at the first ssa var referred by iv step.
1387      Ideally we should stop at any ssa var, because that's expensive
1388      and unusual to happen, we just do it on the first one.
1389
1390      See PR64705 for the rationale.  */
1391   stop = extract_single_var_from_expr (iv->step);
1392   iv->base = expand_simple_operations (iv->base, stop);
1393   if (contains_abnormal_ssa_name_p (iv->base)
1394       || contains_abnormal_ssa_name_p (iv->step))
1395     return false;
1396
1397   /* If STMT could throw, then do not consider STMT as defining a GIV.
1398      While this will suppress optimizations, we can not safely delete this
1399      GIV and associated statements, even if it appears it is not used.  */
1400   if (stmt_could_throw_p (stmt))
1401     return false;
1402
1403   return true;
1404 }
1405
1406 /* Finds general ivs in statement STMT.  */
1407
1408 static void
1409 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1410 {
1411   affine_iv iv;
1412
1413   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1414     return;
1415
1416   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1417 }
1418
1419 /* Finds general ivs in basic block BB.  */
1420
1421 static void
1422 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1423 {
1424   gimple_stmt_iterator bsi;
1425
1426   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1427     find_givs_in_stmt (data, gsi_stmt (bsi));
1428 }
1429
1430 /* Finds general ivs.  */
1431
1432 static void
1433 find_givs (struct ivopts_data *data)
1434 {
1435   struct loop *loop = data->current_loop;
1436   basic_block *body = get_loop_body_in_dom_order (loop);
1437   unsigned i;
1438
1439   for (i = 0; i < loop->num_nodes; i++)
1440     find_givs_in_bb (data, body[i]);
1441   free (body);
1442 }
1443
1444 /* For each ssa name defined in LOOP determines whether it is an induction
1445    variable and if so, its initial value and step.  */
1446
1447 static bool
1448 find_induction_variables (struct ivopts_data *data)
1449 {
1450   unsigned i;
1451   bitmap_iterator bi;
1452
1453   if (!find_bivs (data))
1454     return false;
1455
1456   find_givs (data);
1457   mark_bivs (data);
1458
1459   if (dump_file && (dump_flags & TDF_DETAILS))
1460     {
1461       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1462
1463       if (niter)
1464         {
1465           fprintf (dump_file, "  number of iterations ");
1466           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1467           if (!integer_zerop (niter->may_be_zero))
1468             {
1469               fprintf (dump_file, "; zero if ");
1470               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1471             }
1472           fprintf (dump_file, "\n");
1473         };
1474
1475       fprintf (dump_file, "\n<Induction Vars>:\n");
1476       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1477         {
1478           struct version_info *info = ver_info (data, i);
1479           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1480             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1481         }
1482     }
1483
1484   return true;
1485 }
1486
1487 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1488    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1489    is the const offset stripped from IV base; for other types use, both
1490    are zero by default.  */
1491
1492 static struct iv_use *
1493 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1494             gimple *stmt, enum use_type type, tree addr_base,
1495             unsigned HOST_WIDE_INT addr_offset)
1496 {
1497   struct iv_use *use = XCNEW (struct iv_use);
1498
1499   use->id = group->vuses.length ();
1500   use->group_id = group->id;
1501   use->type = type;
1502   use->iv = iv;
1503   use->stmt = stmt;
1504   use->op_p = use_p;
1505   use->addr_base = addr_base;
1506   use->addr_offset = addr_offset;
1507
1508   group->vuses.safe_push (use);
1509   return use;
1510 }
1511
1512 /* Checks whether OP is a loop-level invariant and if so, records it.
1513    NONLINEAR_USE is true if the invariant is used in a way we do not
1514    handle specially.  */
1515
1516 static void
1517 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1518 {
1519   basic_block bb;
1520   struct version_info *info;
1521
1522   if (TREE_CODE (op) != SSA_NAME
1523       || virtual_operand_p (op))
1524     return;
1525
1526   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1527   if (bb
1528       && flow_bb_inside_loop_p (data->current_loop, bb))
1529     return;
1530
1531   info = name_info (data, op);
1532   info->name = op;
1533   info->has_nonlin_use |= nonlinear_use;
1534   if (!info->inv_id)
1535     info->inv_id = ++data->max_inv_id;
1536   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1537 }
1538
1539 static tree
1540 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1541
1542 /* Record a group of TYPE.  */
1543
1544 static struct iv_group *
1545 record_group (struct ivopts_data *data, enum use_type type)
1546 {
1547   struct iv_group *group = XCNEW (struct iv_group);
1548
1549   group->id = data->vgroups.length ();
1550   group->type = type;
1551   group->related_cands = BITMAP_ALLOC (NULL);
1552   group->vuses.create (1);
1553
1554   data->vgroups.safe_push (group);
1555   return group;
1556 }
1557
1558 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1559    New group will be created if there is no existing group for the use.  */
1560
1561 static struct iv_use *
1562 record_group_use (struct ivopts_data *data, tree *use_p,
1563                   struct iv *iv, gimple *stmt, enum use_type type)
1564 {
1565   tree addr_base = NULL;
1566   struct iv_group *group = NULL;
1567   unsigned HOST_WIDE_INT addr_offset = 0;
1568
1569   /* Record non address type use in a new group.  */
1570   if (type == USE_ADDRESS && iv->base_object)
1571     {
1572       unsigned int i;
1573
1574       addr_base = strip_offset (iv->base, &addr_offset);
1575       for (i = 0; i < data->vgroups.length (); i++)
1576         {
1577           struct iv_use *use;
1578
1579           group = data->vgroups[i];
1580           use = group->vuses[0];
1581           if (use->type != USE_ADDRESS || !use->iv->base_object)
1582             continue;
1583
1584           /* Check if it has the same stripped base and step.  */
1585           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1586               && operand_equal_p (iv->step, use->iv->step, 0)
1587               && operand_equal_p (addr_base, use->addr_base, 0))
1588             break;
1589         }
1590       if (i == data->vgroups.length ())
1591         group = NULL;
1592     }
1593
1594   if (!group)
1595     group = record_group (data, type);
1596
1597   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1598 }
1599
1600 /* Checks whether the use OP is interesting and if so, records it.  */
1601
1602 static struct iv_use *
1603 find_interesting_uses_op (struct ivopts_data *data, tree op)
1604 {
1605   struct iv *iv;
1606   gimple *stmt;
1607   struct iv_use *use;
1608
1609   if (TREE_CODE (op) != SSA_NAME)
1610     return NULL;
1611
1612   iv = get_iv (data, op);
1613   if (!iv)
1614     return NULL;
1615
1616   if (iv->nonlin_use)
1617     {
1618       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1619       return iv->nonlin_use;
1620     }
1621
1622   if (integer_zerop (iv->step))
1623     {
1624       record_invariant (data, op, true);
1625       return NULL;
1626     }
1627
1628   stmt = SSA_NAME_DEF_STMT (op);
1629   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1630
1631   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1632   iv->nonlin_use = use;
1633   return use;
1634 }
1635
1636 /* Given a condition in statement STMT, checks whether it is a compare
1637    of an induction variable and an invariant.  If this is the case,
1638    CONTROL_VAR is set to location of the iv, BOUND to the location of
1639    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1640    induction variable descriptions, and true is returned.  If this is not
1641    the case, CONTROL_VAR and BOUND are set to the arguments of the
1642    condition and false is returned.  */
1643
1644 static bool
1645 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1646                        tree **control_var, tree **bound,
1647                        struct iv **iv_var, struct iv **iv_bound)
1648 {
1649   /* The objects returned when COND has constant operands.  */
1650   static struct iv const_iv;
1651   static tree zero;
1652   tree *op0 = &zero, *op1 = &zero;
1653   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1654   bool ret = false;
1655
1656   if (gimple_code (stmt) == GIMPLE_COND)
1657     {
1658       gcond *cond_stmt = as_a <gcond *> (stmt);
1659       op0 = gimple_cond_lhs_ptr (cond_stmt);
1660       op1 = gimple_cond_rhs_ptr (cond_stmt);
1661     }
1662   else
1663     {
1664       op0 = gimple_assign_rhs1_ptr (stmt);
1665       op1 = gimple_assign_rhs2_ptr (stmt);
1666     }
1667
1668   zero = integer_zero_node;
1669   const_iv.step = integer_zero_node;
1670
1671   if (TREE_CODE (*op0) == SSA_NAME)
1672     iv0 = get_iv (data, *op0);
1673   if (TREE_CODE (*op1) == SSA_NAME)
1674     iv1 = get_iv (data, *op1);
1675
1676   /* Exactly one of the compared values must be an iv, and the other one must
1677      be an invariant.  */
1678   if (!iv0 || !iv1)
1679     goto end;
1680
1681   if (integer_zerop (iv0->step))
1682     {
1683       /* Control variable may be on the other side.  */
1684       std::swap (op0, op1);
1685       std::swap (iv0, iv1);
1686     }
1687   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1688
1689 end:
1690   if (control_var)
1691     *control_var = op0;
1692   if (iv_var)
1693     *iv_var = iv0;
1694   if (bound)
1695     *bound = op1;
1696   if (iv_bound)
1697     *iv_bound = iv1;
1698
1699   return ret;
1700 }
1701
1702 /* Checks whether the condition in STMT is interesting and if so,
1703    records it.  */
1704
1705 static void
1706 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1707 {
1708   tree *var_p, *bound_p;
1709   struct iv *var_iv;
1710
1711   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1712     {
1713       find_interesting_uses_op (data, *var_p);
1714       find_interesting_uses_op (data, *bound_p);
1715       return;
1716     }
1717
1718   record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1719 }
1720
1721 /* Returns the outermost loop EXPR is obviously invariant in
1722    relative to the loop LOOP, i.e. if all its operands are defined
1723    outside of the returned loop.  Returns NULL if EXPR is not
1724    even obviously invariant in LOOP.  */
1725
1726 struct loop *
1727 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1728 {
1729   basic_block def_bb;
1730   unsigned i, len;
1731
1732   if (is_gimple_min_invariant (expr))
1733     return current_loops->tree_root;
1734
1735   if (TREE_CODE (expr) == SSA_NAME)
1736     {
1737       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1738       if (def_bb)
1739         {
1740           if (flow_bb_inside_loop_p (loop, def_bb))
1741             return NULL;
1742           return superloop_at_depth (loop,
1743                                      loop_depth (def_bb->loop_father) + 1);
1744         }
1745
1746       return current_loops->tree_root;
1747     }
1748
1749   if (!EXPR_P (expr))
1750     return NULL;
1751
1752   unsigned maxdepth = 0;
1753   len = TREE_OPERAND_LENGTH (expr);
1754   for (i = 0; i < len; i++)
1755     {
1756       struct loop *ivloop;
1757       if (!TREE_OPERAND (expr, i))
1758         continue;
1759
1760       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1761       if (!ivloop)
1762         return NULL;
1763       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1764     }
1765
1766   return superloop_at_depth (loop, maxdepth);
1767 }
1768
1769 /* Returns true if expression EXPR is obviously invariant in LOOP,
1770    i.e. if all its operands are defined outside of the LOOP.  LOOP
1771    should not be the function body.  */
1772
1773 bool
1774 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1775 {
1776   basic_block def_bb;
1777   unsigned i, len;
1778
1779   gcc_assert (loop_depth (loop) > 0);
1780
1781   if (is_gimple_min_invariant (expr))
1782     return true;
1783
1784   if (TREE_CODE (expr) == SSA_NAME)
1785     {
1786       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1787       if (def_bb
1788           && flow_bb_inside_loop_p (loop, def_bb))
1789         return false;
1790
1791       return true;
1792     }
1793
1794   if (!EXPR_P (expr))
1795     return false;
1796
1797   len = TREE_OPERAND_LENGTH (expr);
1798   for (i = 0; i < len; i++)
1799     if (TREE_OPERAND (expr, i)
1800         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1801       return false;
1802
1803   return true;
1804 }
1805
1806 /* Given expression EXPR which computes inductive values with respect
1807    to loop recorded in DATA, this function returns biv from which EXPR
1808    is derived by tracing definition chains of ssa variables in EXPR.  */
1809
1810 static struct iv*
1811 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1812 {
1813   struct iv *iv;
1814   unsigned i, n;
1815   tree e2, e1;
1816   enum tree_code code;
1817   gimple *stmt;
1818
1819   if (expr == NULL_TREE)
1820     return NULL;
1821
1822   if (is_gimple_min_invariant (expr))
1823     return NULL;
1824
1825   code = TREE_CODE (expr);
1826   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1827     {
1828       n = TREE_OPERAND_LENGTH (expr);
1829       for (i = 0; i < n; i++)
1830         {
1831           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1832           if (iv)
1833             return iv;
1834         }
1835     }
1836
1837   /* Stop if it's not ssa name.  */
1838   if (code != SSA_NAME)
1839     return NULL;
1840
1841   iv = get_iv (data, expr);
1842   if (!iv || integer_zerop (iv->step))
1843     return NULL;
1844   else if (iv->biv_p)
1845     return iv;
1846
1847   stmt = SSA_NAME_DEF_STMT (expr);
1848   if (gphi *phi = dyn_cast <gphi *> (stmt))
1849     {
1850       ssa_op_iter iter;
1851       use_operand_p use_p;
1852
1853       if (virtual_operand_p (gimple_phi_result (phi)))
1854         return NULL;
1855
1856       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1857         {
1858           tree use = USE_FROM_PTR (use_p);
1859           iv = find_deriving_biv_for_expr (data, use);
1860           if (iv)
1861             return iv;
1862         }
1863       return NULL;
1864     }
1865   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1866     return NULL;
1867
1868   e1 = gimple_assign_rhs1 (stmt);
1869   code = gimple_assign_rhs_code (stmt);
1870   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1871     return find_deriving_biv_for_expr (data, e1);
1872
1873   switch (code)
1874     {
1875     case MULT_EXPR:
1876     case PLUS_EXPR:
1877     case MINUS_EXPR:
1878     case POINTER_PLUS_EXPR:
1879       /* Increments, decrements and multiplications by a constant
1880          are simple.  */
1881       e2 = gimple_assign_rhs2 (stmt);
1882       iv = find_deriving_biv_for_expr (data, e2);
1883       if (iv)
1884         return iv;
1885
1886       /* Fallthru.  */
1887     CASE_CONVERT:
1888       /* Casts are simple.  */
1889       return find_deriving_biv_for_expr (data, e1);
1890
1891     default:
1892       break;
1893     }
1894
1895   return NULL;
1896 }
1897
1898 /* Record BIV, its predecessor and successor that they are used in
1899    address type uses.  */
1900
1901 static void
1902 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1903 {
1904   unsigned i;
1905   tree type, base_1, base_2;
1906   bitmap_iterator bi;
1907
1908   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1909       || biv->have_address_use || !biv->no_overflow)
1910     return;
1911
1912   type = TREE_TYPE (biv->base);
1913   if (!INTEGRAL_TYPE_P (type))
1914     return;
1915
1916   biv->have_address_use = true;
1917   data->bivs_not_used_in_addr--;
1918   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1919   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1920     {
1921       struct iv *iv = ver_info (data, i)->iv;
1922
1923       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1924           || iv->have_address_use || !iv->no_overflow)
1925         continue;
1926
1927       if (type != TREE_TYPE (iv->base)
1928           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1929         continue;
1930
1931       if (!operand_equal_p (biv->step, iv->step, 0))
1932         continue;
1933
1934       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1935       if (operand_equal_p (base_1, iv->base, 0)
1936           || operand_equal_p (base_2, biv->base, 0))
1937         {
1938           iv->have_address_use = true;
1939           data->bivs_not_used_in_addr--;
1940         }
1941     }
1942 }
1943
1944 /* Cumulates the steps of indices into DATA and replaces their values with the
1945    initial ones.  Returns false when the value of the index cannot be determined.
1946    Callback for for_each_index.  */
1947
1948 struct ifs_ivopts_data
1949 {
1950   struct ivopts_data *ivopts_data;
1951   gimple *stmt;
1952   tree step;
1953 };
1954
1955 static bool
1956 idx_find_step (tree base, tree *idx, void *data)
1957 {
1958   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1959   struct iv *iv;
1960   bool use_overflow_semantics = false;
1961   tree step, iv_base, iv_step, lbound, off;
1962   struct loop *loop = dta->ivopts_data->current_loop;
1963
1964   /* If base is a component ref, require that the offset of the reference
1965      be invariant.  */
1966   if (TREE_CODE (base) == COMPONENT_REF)
1967     {
1968       off = component_ref_field_offset (base);
1969       return expr_invariant_in_loop_p (loop, off);
1970     }
1971
1972   /* If base is array, first check whether we will be able to move the
1973      reference out of the loop (in order to take its address in strength
1974      reduction).  In order for this to work we need both lower bound
1975      and step to be loop invariants.  */
1976   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1977     {
1978       /* Moreover, for a range, the size needs to be invariant as well.  */
1979       if (TREE_CODE (base) == ARRAY_RANGE_REF
1980           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1981         return false;
1982
1983       step = array_ref_element_size (base);
1984       lbound = array_ref_low_bound (base);
1985
1986       if (!expr_invariant_in_loop_p (loop, step)
1987           || !expr_invariant_in_loop_p (loop, lbound))
1988         return false;
1989     }
1990
1991   if (TREE_CODE (*idx) != SSA_NAME)
1992     return true;
1993
1994   iv = get_iv (dta->ivopts_data, *idx);
1995   if (!iv)
1996     return false;
1997
1998   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1999           *&x[0], which is not folded and does not trigger the
2000           ARRAY_REF path below.  */
2001   *idx = iv->base;
2002
2003   if (integer_zerop (iv->step))
2004     return true;
2005
2006   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2007     {
2008       step = array_ref_element_size (base);
2009
2010       /* We only handle addresses whose step is an integer constant.  */
2011       if (TREE_CODE (step) != INTEGER_CST)
2012         return false;
2013     }
2014   else
2015     /* The step for pointer arithmetics already is 1 byte.  */
2016     step = size_one_node;
2017
2018   iv_base = iv->base;
2019   iv_step = iv->step;
2020   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2021     use_overflow_semantics = true;
2022
2023   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2024                             sizetype, &iv_base, &iv_step, dta->stmt,
2025                             use_overflow_semantics))
2026     {
2027       /* The index might wrap.  */
2028       return false;
2029     }
2030
2031   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2032   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2033
2034   if (dta->ivopts_data->bivs_not_used_in_addr)
2035     {
2036       if (!iv->biv_p)
2037         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2038
2039       record_biv_for_address_use (dta->ivopts_data, iv);
2040     }
2041   return true;
2042 }
2043
2044 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2045    object is passed to it in DATA.  */
2046
2047 static bool
2048 idx_record_use (tree base, tree *idx,
2049                 void *vdata)
2050 {
2051   struct ivopts_data *data = (struct ivopts_data *) vdata;
2052   find_interesting_uses_op (data, *idx);
2053   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2054     {
2055       find_interesting_uses_op (data, array_ref_element_size (base));
2056       find_interesting_uses_op (data, array_ref_low_bound (base));
2057     }
2058   return true;
2059 }
2060
2061 /* If we can prove that TOP = cst * BOT for some constant cst,
2062    store cst to MUL and return true.  Otherwise return false.
2063    The returned value is always sign-extended, regardless of the
2064    signedness of TOP and BOT.  */
2065
2066 static bool
2067 constant_multiple_of (tree top, tree bot, widest_int *mul)
2068 {
2069   tree mby;
2070   enum tree_code code;
2071   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2072   widest_int res, p0, p1;
2073
2074   STRIP_NOPS (top);
2075   STRIP_NOPS (bot);
2076
2077   if (operand_equal_p (top, bot, 0))
2078     {
2079       *mul = 1;
2080       return true;
2081     }
2082
2083   code = TREE_CODE (top);
2084   switch (code)
2085     {
2086     case MULT_EXPR:
2087       mby = TREE_OPERAND (top, 1);
2088       if (TREE_CODE (mby) != INTEGER_CST)
2089         return false;
2090
2091       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2092         return false;
2093
2094       *mul = wi::sext (res * wi::to_widest (mby), precision);
2095       return true;
2096
2097     case PLUS_EXPR:
2098     case MINUS_EXPR:
2099       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2100           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2101         return false;
2102
2103       if (code == MINUS_EXPR)
2104         p1 = -p1;
2105       *mul = wi::sext (p0 + p1, precision);
2106       return true;
2107
2108     case INTEGER_CST:
2109       if (TREE_CODE (bot) != INTEGER_CST)
2110         return false;
2111
2112       p0 = widest_int::from (top, SIGNED);
2113       p1 = widest_int::from (bot, SIGNED);
2114       if (p1 == 0)
2115         return false;
2116       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2117       return res == 0;
2118
2119     default:
2120       return false;
2121     }
2122 }
2123
2124 /* Return true if memory reference REF with step STEP may be unaligned.  */
2125
2126 static bool
2127 may_be_unaligned_p (tree ref, tree step)
2128 {
2129   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2130      thus they are not misaligned.  */
2131   if (TREE_CODE (ref) == TARGET_MEM_REF)
2132     return false;
2133
2134   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2135   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2136     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2137
2138   unsigned HOST_WIDE_INT bitpos;
2139   unsigned int ref_align;
2140   get_object_alignment_1 (ref, &ref_align, &bitpos);
2141   if (ref_align < align
2142       || (bitpos % align) != 0
2143       || (bitpos % BITS_PER_UNIT) != 0)
2144     return true;
2145
2146   unsigned int trailing_zeros = tree_ctz (step);
2147   if (trailing_zeros < HOST_BITS_PER_INT
2148       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2149     return true;
2150
2151   return false;
2152 }
2153
2154 /* Return true if EXPR may be non-addressable.   */
2155
2156 bool
2157 may_be_nonaddressable_p (tree expr)
2158 {
2159   switch (TREE_CODE (expr))
2160     {
2161     case TARGET_MEM_REF:
2162       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2163          target, thus they are always addressable.  */
2164       return false;
2165
2166     case MEM_REF:
2167       /* Likewise for MEM_REFs, modulo the storage order.  */
2168       return REF_REVERSE_STORAGE_ORDER (expr);
2169
2170     case BIT_FIELD_REF:
2171       if (REF_REVERSE_STORAGE_ORDER (expr))
2172         return true;
2173       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2174
2175     case COMPONENT_REF:
2176       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2177         return true;
2178       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2179              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2180
2181     case ARRAY_REF:
2182     case ARRAY_RANGE_REF:
2183       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2184         return true;
2185       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2186
2187     case VIEW_CONVERT_EXPR:
2188       /* This kind of view-conversions may wrap non-addressable objects
2189          and make them look addressable.  After some processing the
2190          non-addressability may be uncovered again, causing ADDR_EXPRs
2191          of inappropriate objects to be built.  */
2192       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2193           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2194         return true;
2195       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2196
2197     CASE_CONVERT:
2198       return true;
2199
2200     default:
2201       break;
2202     }
2203
2204   return false;
2205 }
2206
2207 /* Finds addresses in *OP_P inside STMT.  */
2208
2209 static void
2210 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2211                                tree *op_p)
2212 {
2213   tree base = *op_p, step = size_zero_node;
2214   struct iv *civ;
2215   struct ifs_ivopts_data ifs_ivopts_data;
2216
2217   /* Do not play with volatile memory references.  A bit too conservative,
2218      perhaps, but safe.  */
2219   if (gimple_has_volatile_ops (stmt))
2220     goto fail;
2221
2222   /* Ignore bitfields for now.  Not really something terribly complicated
2223      to handle.  TODO.  */
2224   if (TREE_CODE (base) == BIT_FIELD_REF)
2225     goto fail;
2226
2227   base = unshare_expr (base);
2228
2229   if (TREE_CODE (base) == TARGET_MEM_REF)
2230     {
2231       tree type = build_pointer_type (TREE_TYPE (base));
2232       tree astep;
2233
2234       if (TMR_BASE (base)
2235           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2236         {
2237           civ = get_iv (data, TMR_BASE (base));
2238           if (!civ)
2239             goto fail;
2240
2241           TMR_BASE (base) = civ->base;
2242           step = civ->step;
2243         }
2244       if (TMR_INDEX2 (base)
2245           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2246         {
2247           civ = get_iv (data, TMR_INDEX2 (base));
2248           if (!civ)
2249             goto fail;
2250
2251           TMR_INDEX2 (base) = civ->base;
2252           step = civ->step;
2253         }
2254       if (TMR_INDEX (base)
2255           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2256         {
2257           civ = get_iv (data, TMR_INDEX (base));
2258           if (!civ)
2259             goto fail;
2260
2261           TMR_INDEX (base) = civ->base;
2262           astep = civ->step;
2263
2264           if (astep)
2265             {
2266               if (TMR_STEP (base))
2267                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2268
2269               step = fold_build2 (PLUS_EXPR, type, step, astep);
2270             }
2271         }
2272
2273       if (integer_zerop (step))
2274         goto fail;
2275       base = tree_mem_ref_addr (type, base);
2276     }
2277   else
2278     {
2279       ifs_ivopts_data.ivopts_data = data;
2280       ifs_ivopts_data.stmt = stmt;
2281       ifs_ivopts_data.step = size_zero_node;
2282       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2283           || integer_zerop (ifs_ivopts_data.step))
2284         goto fail;
2285       step = ifs_ivopts_data.step;
2286
2287       /* Check that the base expression is addressable.  This needs
2288          to be done after substituting bases of IVs into it.  */
2289       if (may_be_nonaddressable_p (base))
2290         goto fail;
2291
2292       /* Moreover, on strict alignment platforms, check that it is
2293          sufficiently aligned.  */
2294       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2295         goto fail;
2296
2297       base = build_fold_addr_expr (base);
2298
2299       /* Substituting bases of IVs into the base expression might
2300          have caused folding opportunities.  */
2301       if (TREE_CODE (base) == ADDR_EXPR)
2302         {
2303           tree *ref = &TREE_OPERAND (base, 0);
2304           while (handled_component_p (*ref))
2305             ref = &TREE_OPERAND (*ref, 0);
2306           if (TREE_CODE (*ref) == MEM_REF)
2307             {
2308               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2309                                       TREE_OPERAND (*ref, 0),
2310                                       TREE_OPERAND (*ref, 1));
2311               if (tem)
2312                 *ref = tem;
2313             }
2314         }
2315     }
2316
2317   civ = alloc_iv (data, base, step);
2318   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2319   return;
2320
2321 fail:
2322   for_each_index (op_p, idx_record_use, data);
2323 }
2324
2325 /* Finds and records invariants used in STMT.  */
2326
2327 static void
2328 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2329 {
2330   ssa_op_iter iter;
2331   use_operand_p use_p;
2332   tree op;
2333
2334   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2335     {
2336       op = USE_FROM_PTR (use_p);
2337       record_invariant (data, op, false);
2338     }
2339 }
2340
2341 /* Finds interesting uses of induction variables in the statement STMT.  */
2342
2343 static void
2344 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2345 {
2346   struct iv *iv;
2347   tree op, *lhs, *rhs;
2348   ssa_op_iter iter;
2349   use_operand_p use_p;
2350   enum tree_code code;
2351
2352   find_invariants_stmt (data, stmt);
2353
2354   if (gimple_code (stmt) == GIMPLE_COND)
2355     {
2356       find_interesting_uses_cond (data, stmt);
2357       return;
2358     }
2359
2360   if (is_gimple_assign (stmt))
2361     {
2362       lhs = gimple_assign_lhs_ptr (stmt);
2363       rhs = gimple_assign_rhs1_ptr (stmt);
2364
2365       if (TREE_CODE (*lhs) == SSA_NAME)
2366         {
2367           /* If the statement defines an induction variable, the uses are not
2368              interesting by themselves.  */
2369
2370           iv = get_iv (data, *lhs);
2371
2372           if (iv && !integer_zerop (iv->step))
2373             return;
2374         }
2375
2376       code = gimple_assign_rhs_code (stmt);
2377       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2378           && (REFERENCE_CLASS_P (*rhs)
2379               || is_gimple_val (*rhs)))
2380         {
2381           if (REFERENCE_CLASS_P (*rhs))
2382             find_interesting_uses_address (data, stmt, rhs);
2383           else
2384             find_interesting_uses_op (data, *rhs);
2385
2386           if (REFERENCE_CLASS_P (*lhs))
2387             find_interesting_uses_address (data, stmt, lhs);
2388           return;
2389         }
2390       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2391         {
2392           find_interesting_uses_cond (data, stmt);
2393           return;
2394         }
2395
2396       /* TODO -- we should also handle address uses of type
2397
2398          memory = call (whatever);
2399
2400          and
2401
2402          call (memory).  */
2403     }
2404
2405   if (gimple_code (stmt) == GIMPLE_PHI
2406       && gimple_bb (stmt) == data->current_loop->header)
2407     {
2408       iv = get_iv (data, PHI_RESULT (stmt));
2409
2410       if (iv && !integer_zerop (iv->step))
2411         return;
2412     }
2413
2414   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2415     {
2416       op = USE_FROM_PTR (use_p);
2417
2418       if (TREE_CODE (op) != SSA_NAME)
2419         continue;
2420
2421       iv = get_iv (data, op);
2422       if (!iv)
2423         continue;
2424
2425       find_interesting_uses_op (data, op);
2426     }
2427 }
2428
2429 /* Finds interesting uses of induction variables outside of loops
2430    on loop exit edge EXIT.  */
2431
2432 static void
2433 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2434 {
2435   gphi *phi;
2436   gphi_iterator psi;
2437   tree def;
2438
2439   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2440     {
2441       phi = psi.phi ();
2442       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2443       if (!virtual_operand_p (def))
2444         find_interesting_uses_op (data, def);
2445     }
2446 }
2447
2448 /* Compute maximum offset of [base + offset] addressing mode
2449    for memory reference represented by USE.  */
2450
2451 static HOST_WIDE_INT
2452 compute_max_addr_offset (struct iv_use *use)
2453 {
2454   int width;
2455   rtx reg, addr;
2456   HOST_WIDE_INT i, off;
2457   unsigned list_index, num;
2458   addr_space_t as;
2459   machine_mode mem_mode, addr_mode;
2460   static vec<HOST_WIDE_INT> max_offset_list;
2461
2462   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2463   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2464
2465   num = max_offset_list.length ();
2466   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2467   if (list_index >= num)
2468     {
2469       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2470       for (; num < max_offset_list.length (); num++)
2471         max_offset_list[num] = -1;
2472     }
2473
2474   off = max_offset_list[list_index];
2475   if (off != -1)
2476     return off;
2477
2478   addr_mode = targetm.addr_space.address_mode (as);
2479   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2480   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2481
2482   width = GET_MODE_BITSIZE (addr_mode) - 1;
2483   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2484     width = HOST_BITS_PER_WIDE_INT - 1;
2485
2486   for (i = width; i > 0; i--)
2487     {
2488       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2489       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2490       if (memory_address_addr_space_p (mem_mode, addr, as))
2491         break;
2492
2493       /* For some strict-alignment targets, the offset must be naturally
2494          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2495       off = ((unsigned HOST_WIDE_INT) 1 << i);
2496       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2497         {
2498           off -= GET_MODE_SIZE (mem_mode);
2499           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2500           if (memory_address_addr_space_p (mem_mode, addr, as))
2501             break;
2502         }
2503     }
2504   if (i == 0)
2505     off = 0;
2506
2507   max_offset_list[list_index] = off;
2508   return off;
2509 }
2510
2511 /* Comparison function to sort group in ascending order of addr_offset.  */
2512
2513 static int
2514 group_compare_offset (const void *a, const void *b)
2515 {
2516   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2517   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2518
2519   if ((*u1)->addr_offset != (*u2)->addr_offset)
2520     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2521   else
2522     return 0;
2523 }
2524
2525 /* Check if small groups should be split.  Return true if no group
2526    contains more than two uses with distinct addr_offsets.  Return
2527    false otherwise.  We want to split such groups because:
2528
2529      1) Small groups don't have much benefit and may interfer with
2530         general candidate selection.
2531      2) Size for problem with only small groups is usually small and
2532         general algorithm can handle it well.
2533
2534    TODO -- Above claim may not hold when we want to merge memory
2535    accesses with conseuctive addresses.  */
2536
2537 static bool
2538 split_small_address_groups_p (struct ivopts_data *data)
2539 {
2540   unsigned int i, j, distinct = 1;
2541   struct iv_use *pre;
2542   struct iv_group *group;
2543
2544   for (i = 0; i < data->vgroups.length (); i++)
2545     {
2546       group = data->vgroups[i];
2547       if (group->vuses.length () == 1)
2548         continue;
2549
2550       gcc_assert (group->type == USE_ADDRESS);
2551       if (group->vuses.length () == 2)
2552         {
2553           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2554             std::swap (group->vuses[0], group->vuses[1]);
2555         }
2556       else
2557         group->vuses.qsort (group_compare_offset);
2558
2559       if (distinct > 2)
2560         continue;
2561
2562       distinct = 1;
2563       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2564         {
2565           if (group->vuses[j]->addr_offset != pre->addr_offset)
2566             {
2567               pre = group->vuses[j];
2568               distinct++;
2569             }
2570
2571           if (distinct > 2)
2572             break;
2573         }
2574     }
2575
2576   return (distinct <= 2);
2577 }
2578
2579 /* For each group of address type uses, this function further groups
2580    these uses according to the maximum offset supported by target's
2581    [base + offset] addressing mode.  */
2582
2583 static void
2584 split_address_groups (struct ivopts_data *data)
2585 {
2586   unsigned int i, j;
2587   HOST_WIDE_INT max_offset = -1;
2588
2589   /* Reset max offset to split all small groups.  */
2590   if (split_small_address_groups_p (data))
2591     max_offset = 0;
2592
2593   for (i = 0; i < data->vgroups.length (); i++)
2594     {
2595       struct iv_group *group = data->vgroups[i];
2596       struct iv_use *use = group->vuses[0];
2597
2598       use->id = 0;
2599       use->group_id = group->id;
2600       if (group->vuses.length () == 1)
2601         continue;
2602
2603       if (max_offset != 0)
2604         max_offset = compute_max_addr_offset (use);
2605
2606       for (j = 1; j < group->vuses.length (); j++)
2607         {
2608           struct iv_use *next = group->vuses[j];
2609
2610           /* Only uses with offset that can fit in offset part against
2611              the first use can be grouped together.  */
2612           if (next->addr_offset - use->addr_offset
2613               > (unsigned HOST_WIDE_INT) max_offset)
2614             break;
2615
2616           next->id = j;
2617           next->group_id = group->id;
2618         }
2619       /* Split group.  */
2620       if (j < group->vuses.length ())
2621         {
2622           struct iv_group *new_group = record_group (data, group->type);
2623           new_group->vuses.safe_splice (group->vuses);
2624           new_group->vuses.block_remove (0, j);
2625           group->vuses.truncate (j);
2626         }
2627     }
2628 }
2629
2630 /* Finds uses of the induction variables that are interesting.  */
2631
2632 static void
2633 find_interesting_uses (struct ivopts_data *data)
2634 {
2635   basic_block bb;
2636   gimple_stmt_iterator bsi;
2637   basic_block *body = get_loop_body (data->current_loop);
2638   unsigned i;
2639   edge e;
2640
2641   for (i = 0; i < data->current_loop->num_nodes; i++)
2642     {
2643       edge_iterator ei;
2644       bb = body[i];
2645
2646       FOR_EACH_EDGE (e, ei, bb->succs)
2647         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2648             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2649           find_interesting_uses_outside (data, e);
2650
2651       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2652         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2653       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2654         if (!is_gimple_debug (gsi_stmt (bsi)))
2655           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2656     }
2657
2658   split_address_groups (data);
2659
2660   if (dump_file && (dump_flags & TDF_DETAILS))
2661     {
2662       bitmap_iterator bi;
2663
2664       fprintf (dump_file, "\n<Invariant Vars>:\n");
2665       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2666         {
2667           struct version_info *info = ver_info (data, i);
2668           if (info->inv_id)
2669             {
2670               fprintf (dump_file, "Inv %d:\t", info->inv_id);
2671               print_generic_expr (dump_file, info->name, TDF_SLIM);
2672               fprintf (dump_file, "%s\n",
2673                        info->has_nonlin_use ? "" : "\t(eliminable)");
2674             }
2675         }
2676
2677       fprintf (dump_file, "\n<IV Groups>:\n");
2678       dump_groups (dump_file, data);
2679       fprintf (dump_file, "\n");
2680     }
2681
2682   free (body);
2683 }
2684
2685 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2686    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2687    we are at the top-level of the processed address.  */
2688
2689 static tree
2690 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2691                 HOST_WIDE_INT *offset)
2692 {
2693   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2694   enum tree_code code;
2695   tree type, orig_type = TREE_TYPE (expr);
2696   HOST_WIDE_INT off0, off1, st;
2697   tree orig_expr = expr;
2698
2699   STRIP_NOPS (expr);
2700
2701   type = TREE_TYPE (expr);
2702   code = TREE_CODE (expr);
2703   *offset = 0;
2704
2705   switch (code)
2706     {
2707     case INTEGER_CST:
2708       if (!cst_and_fits_in_hwi (expr)
2709           || integer_zerop (expr))
2710         return orig_expr;
2711
2712       *offset = int_cst_value (expr);
2713       return build_int_cst (orig_type, 0);
2714
2715     case POINTER_PLUS_EXPR:
2716     case PLUS_EXPR:
2717     case MINUS_EXPR:
2718       op0 = TREE_OPERAND (expr, 0);
2719       op1 = TREE_OPERAND (expr, 1);
2720
2721       op0 = strip_offset_1 (op0, false, false, &off0);
2722       op1 = strip_offset_1 (op1, false, false, &off1);
2723
2724       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2725       if (op0 == TREE_OPERAND (expr, 0)
2726           && op1 == TREE_OPERAND (expr, 1))
2727         return orig_expr;
2728
2729       if (integer_zerop (op1))
2730         expr = op0;
2731       else if (integer_zerop (op0))
2732         {
2733           if (code == MINUS_EXPR)
2734             expr = fold_build1 (NEGATE_EXPR, type, op1);
2735           else
2736             expr = op1;
2737         }
2738       else
2739         expr = fold_build2 (code, type, op0, op1);
2740
2741       return fold_convert (orig_type, expr);
2742
2743     case MULT_EXPR:
2744       op1 = TREE_OPERAND (expr, 1);
2745       if (!cst_and_fits_in_hwi (op1))
2746         return orig_expr;
2747
2748       op0 = TREE_OPERAND (expr, 0);
2749       op0 = strip_offset_1 (op0, false, false, &off0);
2750       if (op0 == TREE_OPERAND (expr, 0))
2751         return orig_expr;
2752
2753       *offset = off0 * int_cst_value (op1);
2754       if (integer_zerop (op0))
2755         expr = op0;
2756       else
2757         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2758
2759       return fold_convert (orig_type, expr);
2760
2761     case ARRAY_REF:
2762     case ARRAY_RANGE_REF:
2763       if (!inside_addr)
2764         return orig_expr;
2765
2766       step = array_ref_element_size (expr);
2767       if (!cst_and_fits_in_hwi (step))
2768         break;
2769
2770       st = int_cst_value (step);
2771       op1 = TREE_OPERAND (expr, 1);
2772       op1 = strip_offset_1 (op1, false, false, &off1);
2773       *offset = off1 * st;
2774
2775       if (top_compref
2776           && integer_zerop (op1))
2777         {
2778           /* Strip the component reference completely.  */
2779           op0 = TREE_OPERAND (expr, 0);
2780           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2781           *offset += off0;
2782           return op0;
2783         }
2784       break;
2785
2786     case COMPONENT_REF:
2787       {
2788         tree field;
2789
2790         if (!inside_addr)
2791           return orig_expr;
2792
2793         tmp = component_ref_field_offset (expr);
2794         field = TREE_OPERAND (expr, 1);
2795         if (top_compref
2796             && cst_and_fits_in_hwi (tmp)
2797             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2798           {
2799             HOST_WIDE_INT boffset, abs_off;
2800
2801             /* Strip the component reference completely.  */
2802             op0 = TREE_OPERAND (expr, 0);
2803             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2804             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2805             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2806             if (boffset < 0)
2807               abs_off = -abs_off;
2808
2809             *offset = off0 + int_cst_value (tmp) + abs_off;
2810             return op0;
2811           }
2812       }
2813       break;
2814
2815     case ADDR_EXPR:
2816       op0 = TREE_OPERAND (expr, 0);
2817       op0 = strip_offset_1 (op0, true, true, &off0);
2818       *offset += off0;
2819
2820       if (op0 == TREE_OPERAND (expr, 0))
2821         return orig_expr;
2822
2823       expr = build_fold_addr_expr (op0);
2824       return fold_convert (orig_type, expr);
2825
2826     case MEM_REF:
2827       /* ???  Offset operand?  */
2828       inside_addr = false;
2829       break;
2830
2831     default:
2832       return orig_expr;
2833     }
2834
2835   /* Default handling of expressions for that we want to recurse into
2836      the first operand.  */
2837   op0 = TREE_OPERAND (expr, 0);
2838   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2839   *offset += off0;
2840
2841   if (op0 == TREE_OPERAND (expr, 0)
2842       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2843     return orig_expr;
2844
2845   expr = copy_node (expr);
2846   TREE_OPERAND (expr, 0) = op0;
2847   if (op1)
2848     TREE_OPERAND (expr, 1) = op1;
2849
2850   /* Inside address, we might strip the top level component references,
2851      thus changing type of the expression.  Handling of ADDR_EXPR
2852      will fix that.  */
2853   expr = fold_convert (orig_type, expr);
2854
2855   return expr;
2856 }
2857
2858 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2859
2860 static tree
2861 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2862 {
2863   HOST_WIDE_INT off;
2864   tree core = strip_offset_1 (expr, false, false, &off);
2865   *offset = off;
2866   return core;
2867 }
2868
2869 /* Returns variant of TYPE that can be used as base for different uses.
2870    We return unsigned type with the same precision, which avoids problems
2871    with overflows.  */
2872
2873 static tree
2874 generic_type_for (tree type)
2875 {
2876   if (POINTER_TYPE_P (type))
2877     return unsigned_type_for (type);
2878
2879   if (TYPE_UNSIGNED (type))
2880     return type;
2881
2882   return unsigned_type_for (type);
2883 }
2884
2885 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2886    the bitmap to that we should store it.  */
2887
2888 static struct ivopts_data *fd_ivopts_data;
2889 static tree
2890 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2891 {
2892   bitmap *depends_on = (bitmap *) data;
2893   struct version_info *info;
2894
2895   if (TREE_CODE (*expr_p) != SSA_NAME)
2896     return NULL_TREE;
2897   info = name_info (fd_ivopts_data, *expr_p);
2898
2899   if (!info->inv_id || info->has_nonlin_use)
2900     return NULL_TREE;
2901
2902   if (!*depends_on)
2903     *depends_on = BITMAP_ALLOC (NULL);
2904   bitmap_set_bit (*depends_on, info->inv_id);
2905
2906   return NULL_TREE;
2907 }
2908
2909 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2910    position to POS.  If USE is not NULL, the candidate is set as related to
2911    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2912    replacement of the final value of the iv by a direct computation.  */
2913
2914 static struct iv_cand *
2915 add_candidate_1 (struct ivopts_data *data,
2916                  tree base, tree step, bool important, enum iv_position pos,
2917                  struct iv_use *use, gimple *incremented_at,
2918                  struct iv *orig_iv = NULL)
2919 {
2920   unsigned i;
2921   struct iv_cand *cand = NULL;
2922   tree type, orig_type;
2923
2924   gcc_assert (base && step);
2925
2926   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2927      live, but the ivopts code may replace a real pointer with one
2928      pointing before or after the memory block that is then adjusted
2929      into the memory block during the loop.  FIXME: It would likely be
2930      better to actually force the pointer live and still use ivopts;
2931      for example, it would be enough to write the pointer into memory
2932      and keep it there until after the loop.  */
2933   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2934     return NULL;
2935
2936   /* For non-original variables, make sure their values are computed in a type
2937      that does not invoke undefined behavior on overflows (since in general,
2938      we cannot prove that these induction variables are non-wrapping).  */
2939   if (pos != IP_ORIGINAL)
2940     {
2941       orig_type = TREE_TYPE (base);
2942       type = generic_type_for (orig_type);
2943       if (type != orig_type)
2944         {
2945           base = fold_convert (type, base);
2946           step = fold_convert (type, step);
2947         }
2948     }
2949
2950   for (i = 0; i < data->vcands.length (); i++)
2951     {
2952       cand = data->vcands[i];
2953
2954       if (cand->pos != pos)
2955         continue;
2956
2957       if (cand->incremented_at != incremented_at
2958           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2959               && cand->ainc_use != use))
2960         continue;
2961
2962       if (operand_equal_p (base, cand->iv->base, 0)
2963           && operand_equal_p (step, cand->iv->step, 0)
2964           && (TYPE_PRECISION (TREE_TYPE (base))
2965               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2966         break;
2967     }
2968
2969   if (i == data->vcands.length ())
2970     {
2971       cand = XCNEW (struct iv_cand);
2972       cand->id = i;
2973       cand->iv = alloc_iv (data, base, step);
2974       cand->pos = pos;
2975       if (pos != IP_ORIGINAL)
2976         {
2977           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2978           cand->var_after = cand->var_before;
2979         }
2980       cand->important = important;
2981       cand->incremented_at = incremented_at;
2982       data->vcands.safe_push (cand);
2983
2984       if (TREE_CODE (step) != INTEGER_CST)
2985         {
2986           fd_ivopts_data = data;
2987           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2988         }
2989
2990       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2991         cand->ainc_use = use;
2992       else
2993         cand->ainc_use = NULL;
2994
2995       cand->orig_iv = orig_iv;
2996       if (dump_file && (dump_flags & TDF_DETAILS))
2997         dump_cand (dump_file, cand);
2998     }
2999
3000   cand->important |= important;
3001
3002   /* Relate candidate to the group for which it is added.  */
3003   if (use)
3004     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3005
3006   return cand;
3007 }
3008
3009 /* Returns true if incrementing the induction variable at the end of the LOOP
3010    is allowed.
3011
3012    The purpose is to avoid splitting latch edge with a biv increment, thus
3013    creating a jump, possibly confusing other optimization passes and leaving
3014    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
3015    is not available (so we do not have a better alternative), or if the latch
3016    edge is already nonempty.  */
3017
3018 static bool
3019 allow_ip_end_pos_p (struct loop *loop)
3020 {
3021   if (!ip_normal_pos (loop))
3022     return true;
3023
3024   if (!empty_block_p (ip_end_pos (loop)))
3025     return true;
3026
3027   return false;
3028 }
3029
3030 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3031    Important field is set to IMPORTANT.  */
3032
3033 static void
3034 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3035                         bool important, struct iv_use *use)
3036 {
3037   basic_block use_bb = gimple_bb (use->stmt);
3038   machine_mode mem_mode;
3039   unsigned HOST_WIDE_INT cstepi;
3040
3041   /* If we insert the increment in any position other than the standard
3042      ones, we must ensure that it is incremented once per iteration.
3043      It must not be in an inner nested loop, or one side of an if
3044      statement.  */
3045   if (use_bb->loop_father != data->current_loop
3046       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3047       || stmt_could_throw_p (use->stmt)
3048       || !cst_and_fits_in_hwi (step))
3049     return;
3050
3051   cstepi = int_cst_value (step);
3052
3053   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3054   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3055         || USE_STORE_PRE_INCREMENT (mem_mode))
3056        && GET_MODE_SIZE (mem_mode) == cstepi)
3057       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3058            || USE_STORE_PRE_DECREMENT (mem_mode))
3059           && GET_MODE_SIZE (mem_mode) == -cstepi))
3060     {
3061       enum tree_code code = MINUS_EXPR;
3062       tree new_base;
3063       tree new_step = step;
3064
3065       if (POINTER_TYPE_P (TREE_TYPE (base)))
3066         {
3067           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3068           code = POINTER_PLUS_EXPR;
3069         }
3070       else
3071         new_step = fold_convert (TREE_TYPE (base), new_step);
3072       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3073       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3074                        use->stmt);
3075     }
3076   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3077         || USE_STORE_POST_INCREMENT (mem_mode))
3078        && GET_MODE_SIZE (mem_mode) == cstepi)
3079       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3080            || USE_STORE_POST_DECREMENT (mem_mode))
3081           && GET_MODE_SIZE (mem_mode) == -cstepi))
3082     {
3083       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3084                        use->stmt);
3085     }
3086 }
3087
3088 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3089    position to POS.  If USE is not NULL, the candidate is set as related to
3090    it.  The candidate computation is scheduled before exit condition and at
3091    the end of loop.  */
3092
3093 static void
3094 add_candidate (struct ivopts_data *data,
3095                tree base, tree step, bool important, struct iv_use *use,
3096                struct iv *orig_iv = NULL)
3097 {
3098   if (ip_normal_pos (data->current_loop))
3099     add_candidate_1 (data, base, step, important,
3100                      IP_NORMAL, use, NULL, orig_iv);
3101   if (ip_end_pos (data->current_loop)
3102       && allow_ip_end_pos_p (data->current_loop))
3103     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3104 }
3105
3106 /* Adds standard iv candidates.  */
3107
3108 static void
3109 add_standard_iv_candidates (struct ivopts_data *data)
3110 {
3111   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3112
3113   /* The same for a double-integer type if it is still fast enough.  */
3114   if (TYPE_PRECISION
3115         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3116       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3117     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3118                    build_int_cst (long_integer_type_node, 1), true, NULL);
3119
3120   /* The same for a double-integer type if it is still fast enough.  */
3121   if (TYPE_PRECISION
3122         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3123       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3124     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3125                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3126 }
3127
3128
3129 /* Adds candidates bases on the old induction variable IV.  */
3130
3131 static void
3132 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3133 {
3134   gimple *phi;
3135   tree def;
3136   struct iv_cand *cand;
3137
3138   /* Check if this biv is used in address type use.  */
3139   if (iv->no_overflow  && iv->have_address_use
3140       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3141       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3142     {
3143       tree base = fold_convert (sizetype, iv->base);
3144       tree step = fold_convert (sizetype, iv->step);
3145
3146       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3147       add_candidate (data, base, step, true, NULL, iv);
3148       /* Add iv cand of the original type only if it has nonlinear use.  */
3149       if (iv->nonlin_use)
3150         add_candidate (data, iv->base, iv->step, true, NULL);
3151     }
3152   else
3153     add_candidate (data, iv->base, iv->step, true, NULL);
3154
3155   /* The same, but with initial value zero.  */
3156   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3157     add_candidate (data, size_int (0), iv->step, true, NULL);
3158   else
3159     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3160                    iv->step, true, NULL);
3161
3162   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3163   if (gimple_code (phi) == GIMPLE_PHI)
3164     {
3165       /* Additionally record the possibility of leaving the original iv
3166          untouched.  */
3167       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3168       /* Don't add candidate if it's from another PHI node because
3169          it's an affine iv appearing in the form of PEELED_CHREC.  */
3170       phi = SSA_NAME_DEF_STMT (def);
3171       if (gimple_code (phi) != GIMPLE_PHI)
3172         {
3173           cand = add_candidate_1 (data,
3174                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3175                                   SSA_NAME_DEF_STMT (def));
3176           if (cand)
3177             {
3178               cand->var_before = iv->ssa_name;
3179               cand->var_after = def;
3180             }
3181         }
3182       else
3183         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3184     }
3185 }
3186
3187 /* Adds candidates based on the old induction variables.  */
3188
3189 static void
3190 add_iv_candidate_for_bivs (struct ivopts_data *data)
3191 {
3192   unsigned i;
3193   struct iv *iv;
3194   bitmap_iterator bi;
3195
3196   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3197     {
3198       iv = ver_info (data, i)->iv;
3199       if (iv && iv->biv_p && !integer_zerop (iv->step))
3200         add_iv_candidate_for_biv (data, iv);
3201     }
3202 }
3203
3204 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3205
3206 static void
3207 record_common_cand (struct ivopts_data *data, tree base,
3208                     tree step, struct iv_use *use)
3209 {
3210   struct iv_common_cand ent;
3211   struct iv_common_cand **slot;
3212
3213   ent.base = base;
3214   ent.step = step;
3215   ent.hash = iterative_hash_expr (base, 0);
3216   ent.hash = iterative_hash_expr (step, ent.hash);
3217
3218   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3219   if (*slot == NULL)
3220     {
3221       *slot = new iv_common_cand ();
3222       (*slot)->base = base;
3223       (*slot)->step = step;
3224       (*slot)->uses.create (8);
3225       (*slot)->hash = ent.hash;
3226       data->iv_common_cands.safe_push ((*slot));
3227     }
3228
3229   gcc_assert (use != NULL);
3230   (*slot)->uses.safe_push (use);
3231   return;
3232 }
3233
3234 /* Comparison function used to sort common candidates.  */
3235
3236 static int
3237 common_cand_cmp (const void *p1, const void *p2)
3238 {
3239   unsigned n1, n2;
3240   const struct iv_common_cand *const *const ccand1
3241     = (const struct iv_common_cand *const *)p1;
3242   const struct iv_common_cand *const *const ccand2
3243     = (const struct iv_common_cand *const *)p2;
3244
3245   n1 = (*ccand1)->uses.length ();
3246   n2 = (*ccand2)->uses.length ();
3247   return n2 - n1;
3248 }
3249
3250 /* Adds IV candidates based on common candidated recorded.  */
3251
3252 static void
3253 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3254 {
3255   unsigned i, j;
3256   struct iv_cand *cand_1, *cand_2;
3257
3258   data->iv_common_cands.qsort (common_cand_cmp);
3259   for (i = 0; i < data->iv_common_cands.length (); i++)
3260     {
3261       struct iv_common_cand *ptr = data->iv_common_cands[i];
3262
3263       /* Only add IV candidate if it's derived from multiple uses.  */
3264       if (ptr->uses.length () <= 1)
3265         break;
3266
3267       cand_1 = NULL;
3268       cand_2 = NULL;
3269       if (ip_normal_pos (data->current_loop))
3270         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3271                                   false, IP_NORMAL, NULL, NULL);
3272
3273       if (ip_end_pos (data->current_loop)
3274           && allow_ip_end_pos_p (data->current_loop))
3275         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3276                                   false, IP_END, NULL, NULL);
3277
3278       /* Bind deriving uses and the new candidates.  */
3279       for (j = 0; j < ptr->uses.length (); j++)
3280         {
3281           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3282           if (cand_1)
3283             bitmap_set_bit (group->related_cands, cand_1->id);
3284           if (cand_2)
3285             bitmap_set_bit (group->related_cands, cand_2->id);
3286         }
3287     }
3288
3289   /* Release data since it is useless from this point.  */
3290   data->iv_common_cand_tab->empty ();
3291   data->iv_common_cands.truncate (0);
3292 }
3293
3294 /* Adds candidates based on the value of USE's iv.  */
3295
3296 static void
3297 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3298 {
3299   unsigned HOST_WIDE_INT offset;
3300   tree base;
3301   tree basetype;
3302   struct iv *iv = use->iv;
3303
3304   add_candidate (data, iv->base, iv->step, false, use);
3305
3306   /* Record common candidate for use in case it can be shared by others.  */
3307   record_common_cand (data, iv->base, iv->step, use);
3308
3309   /* Record common candidate with initial value zero.  */
3310   basetype = TREE_TYPE (iv->base);
3311   if (POINTER_TYPE_P (basetype))
3312     basetype = sizetype;
3313   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3314
3315   /* Record common candidate with constant offset stripped in base.
3316      Like the use itself, we also add candidate directly for it.  */
3317   base = strip_offset (iv->base, &offset);
3318   if (offset || base != iv->base)
3319     {
3320       record_common_cand (data, base, iv->step, use);
3321       add_candidate (data, base, iv->step, false, use);
3322     }
3323
3324   /* Record common candidate with base_object removed in base.  */
3325   if (iv->base_object != NULL)
3326     {
3327       unsigned i;
3328       aff_tree aff_base;
3329       tree step, base_object = iv->base_object;
3330
3331       base = iv->base;
3332       step = iv->step;
3333       STRIP_NOPS (base);
3334       STRIP_NOPS (step);
3335       STRIP_NOPS (base_object);
3336       tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3337       for (i = 0; i < aff_base.n; i++)
3338         {
3339           if (aff_base.elts[i].coef != 1)
3340             continue;
3341
3342           if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3343             break;
3344         }
3345       if (i < aff_base.n)
3346         {
3347           aff_combination_remove_elt (&aff_base, i);
3348           base = aff_combination_to_tree (&aff_base);
3349           basetype = TREE_TYPE (base);
3350           if (POINTER_TYPE_P (basetype))
3351             basetype = sizetype;
3352
3353           step = fold_convert (basetype, step);
3354           record_common_cand (data, base, step, use);
3355           /* Also record common candidate with offset stripped.  */
3356           base = strip_offset (base, &offset);
3357           if (offset)
3358             record_common_cand (data, base, step, use);
3359         }
3360     }
3361
3362   /* At last, add auto-incremental candidates.  Make such variables
3363      important since other iv uses with same base object may be based
3364      on it.  */
3365   if (use != NULL && use->type == USE_ADDRESS)
3366     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3367 }
3368
3369 /* Adds candidates based on the uses.  */
3370
3371 static void
3372 add_iv_candidate_for_groups (struct ivopts_data *data)
3373 {
3374   unsigned i;
3375
3376   /* Only add candidate for the first use in group.  */
3377   for (i = 0; i < data->vgroups.length (); i++)
3378     {
3379       struct iv_group *group = data->vgroups[i];
3380
3381       gcc_assert (group->vuses[0] != NULL);
3382       add_iv_candidate_for_use (data, group->vuses[0]);
3383     }
3384   add_iv_candidate_derived_from_uses (data);
3385 }
3386
3387 /* Record important candidates and add them to related_cands bitmaps.  */
3388
3389 static void
3390 record_important_candidates (struct ivopts_data *data)
3391 {
3392   unsigned i;
3393   struct iv_group *group;
3394
3395   for (i = 0; i < data->vcands.length (); i++)
3396     {
3397       struct iv_cand *cand = data->vcands[i];
3398
3399       if (cand->important)
3400         bitmap_set_bit (data->important_candidates, i);
3401     }
3402
3403   data->consider_all_candidates = (data->vcands.length ()
3404                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3405
3406   /* Add important candidates to groups' related_cands bitmaps.  */
3407   for (i = 0; i < data->vgroups.length (); i++)
3408     {
3409       group = data->vgroups[i];
3410       bitmap_ior_into (group->related_cands, data->important_candidates);
3411     }
3412 }
3413
3414 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3415    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3416    we allocate a simple list to every use.  */
3417
3418 static void
3419 alloc_use_cost_map (struct ivopts_data *data)
3420 {
3421   unsigned i, size, s;
3422
3423   for (i = 0; i < data->vgroups.length (); i++)
3424     {
3425       struct iv_group *group = data->vgroups[i];
3426
3427       if (data->consider_all_candidates)
3428         size = data->vcands.length ();
3429       else
3430         {
3431           s = bitmap_count_bits (group->related_cands);
3432
3433           /* Round up to the power of two, so that moduling by it is fast.  */
3434           size = s ? (1 << ceil_log2 (s)) : 1;
3435         }
3436
3437       group->n_map_members = size;
3438       group->cost_map = XCNEWVEC (struct cost_pair, size);
3439     }
3440 }
3441
3442 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3443    on invariants DEPENDS_ON and that the value used in expressing it
3444    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3445
3446 static void
3447 set_group_iv_cost (struct ivopts_data *data,
3448                    struct iv_group *group, struct iv_cand *cand,
3449                    comp_cost cost, bitmap depends_on, tree value,
3450                    enum tree_code comp, iv_inv_expr_ent *inv_expr)
3451 {
3452   unsigned i, s;
3453
3454   if (cost.infinite_cost_p ())
3455     {
3456       BITMAP_FREE (depends_on);
3457       return;
3458     }
3459
3460   if (data->consider_all_candidates)
3461     {
3462       group->cost_map[cand->id].cand = cand;
3463       group->cost_map[cand->id].cost = cost;
3464       group->cost_map[cand->id].depends_on = depends_on;
3465       group->cost_map[cand->id].value = value;
3466       group->cost_map[cand->id].comp = comp;
3467       group->cost_map[cand->id].inv_expr = inv_expr;
3468       return;
3469     }
3470
3471   /* n_map_members is a power of two, so this computes modulo.  */
3472   s = cand->id & (group->n_map_members - 1);
3473   for (i = s; i < group->n_map_members; i++)
3474     if (!group->cost_map[i].cand)
3475       goto found;
3476   for (i = 0; i < s; i++)
3477     if (!group->cost_map[i].cand)
3478       goto found;
3479
3480   gcc_unreachable ();
3481
3482 found:
3483   group->cost_map[i].cand = cand;
3484   group->cost_map[i].cost = cost;
3485   group->cost_map[i].depends_on = depends_on;
3486   group->cost_map[i].value = value;
3487   group->cost_map[i].comp = comp;
3488   group->cost_map[i].inv_expr = inv_expr;
3489 }
3490
3491 /* Gets cost of (GROUP, CAND) pair.  */
3492
3493 static struct cost_pair *
3494 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3495                    struct iv_cand *cand)
3496 {
3497   unsigned i, s;
3498   struct cost_pair *ret;
3499
3500   if (!cand)
3501     return NULL;
3502
3503   if (data->consider_all_candidates)
3504     {
3505       ret = group->cost_map + cand->id;
3506       if (!ret->cand)
3507         return NULL;
3508
3509       return ret;
3510     }
3511
3512   /* n_map_members is a power of two, so this computes modulo.  */
3513   s = cand->id & (group->n_map_members - 1);
3514   for (i = s; i < group->n_map_members; i++)
3515     if (group->cost_map[i].cand == cand)
3516       return group->cost_map + i;
3517     else if (group->cost_map[i].cand == NULL)
3518       return NULL;
3519   for (i = 0; i < s; i++)
3520     if (group->cost_map[i].cand == cand)
3521       return group->cost_map + i;
3522     else if (group->cost_map[i].cand == NULL)
3523       return NULL;
3524
3525   return NULL;
3526 }
3527
3528 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3529 static rtx
3530 produce_memory_decl_rtl (tree obj, int *regno)
3531 {
3532   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3533   machine_mode address_mode = targetm.addr_space.address_mode (as);
3534   rtx x;
3535
3536   gcc_assert (obj);
3537   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3538     {
3539       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3540       x = gen_rtx_SYMBOL_REF (address_mode, name);
3541       SET_SYMBOL_REF_DECL (x, obj);
3542       x = gen_rtx_MEM (DECL_MODE (obj), x);
3543       set_mem_addr_space (x, as);
3544       targetm.encode_section_info (obj, x, true);
3545     }
3546   else
3547     {
3548       x = gen_raw_REG (address_mode, (*regno)++);
3549       x = gen_rtx_MEM (DECL_MODE (obj), x);
3550       set_mem_addr_space (x, as);
3551     }
3552
3553   return x;
3554 }
3555
3556 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3557    walk_tree.  DATA contains the actual fake register number.  */
3558
3559 static tree
3560 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3561 {
3562   tree obj = NULL_TREE;
3563   rtx x = NULL_RTX;
3564   int *regno = (int *) data;
3565
3566   switch (TREE_CODE (*expr_p))
3567     {
3568     case ADDR_EXPR:
3569       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3570            handled_component_p (*expr_p);
3571            expr_p = &TREE_OPERAND (*expr_p, 0))
3572         continue;
3573       obj = *expr_p;
3574       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3575         x = produce_memory_decl_rtl (obj, regno);
3576       break;
3577
3578     case SSA_NAME:
3579       *ws = 0;
3580       obj = SSA_NAME_VAR (*expr_p);
3581       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3582       if (!obj)
3583         return NULL_TREE;
3584       if (!DECL_RTL_SET_P (obj))
3585         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3586       break;
3587
3588     case VAR_DECL:
3589     case PARM_DECL:
3590     case RESULT_DECL:
3591       *ws = 0;
3592       obj = *expr_p;
3593
3594       if (DECL_RTL_SET_P (obj))
3595         break;
3596
3597       if (DECL_MODE (obj) == BLKmode)
3598         x = produce_memory_decl_rtl (obj, regno);
3599       else
3600         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3601
3602       break;
3603
3604     default:
3605       break;
3606     }
3607
3608   if (x)
3609     {
3610       decl_rtl_to_reset.safe_push (obj);
3611       SET_DECL_RTL (obj, x);
3612     }
3613
3614   return NULL_TREE;
3615 }
3616
3617 /* Determines cost of the computation of EXPR.  */
3618
3619 static unsigned
3620 computation_cost (tree expr, bool speed)
3621 {
3622   rtx_insn *seq;
3623   rtx rslt;
3624   tree type = TREE_TYPE (expr);
3625   unsigned cost;
3626   /* Avoid using hard regs in ways which may be unsupported.  */
3627   int regno = LAST_VIRTUAL_REGISTER + 1;
3628   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3629   enum node_frequency real_frequency = node->frequency;
3630
3631   node->frequency = NODE_FREQUENCY_NORMAL;
3632   crtl->maybe_hot_insn_p = speed;
3633   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3634   start_sequence ();
3635   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3636   seq = get_insns ();
3637   end_sequence ();
3638   default_rtl_profile ();
3639   node->frequency = real_frequency;
3640
3641   cost = seq_cost (seq, speed);
3642   if (MEM_P (rslt))
3643     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3644                           TYPE_ADDR_SPACE (type), speed);
3645   else if (!REG_P (rslt))
3646     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3647
3648   return cost;
3649 }
3650
3651 /* Returns variable containing the value of candidate CAND at statement AT.  */
3652
3653 static tree
3654 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3655 {
3656   if (stmt_after_increment (loop, cand, stmt))
3657     return cand->var_after;
3658   else
3659     return cand->var_before;
3660 }
3661
3662 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3663    same precision that is at least as wide as the precision of TYPE, stores
3664    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3665    type of A and B.  */
3666
3667 static tree
3668 determine_common_wider_type (tree *a, tree *b)
3669 {
3670   tree wider_type = NULL;
3671   tree suba, subb;
3672   tree atype = TREE_TYPE (*a);
3673
3674   if (CONVERT_EXPR_P (*a))
3675     {
3676       suba = TREE_OPERAND (*a, 0);
3677       wider_type = TREE_TYPE (suba);
3678       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3679         return atype;
3680     }
3681   else
3682     return atype;
3683
3684   if (CONVERT_EXPR_P (*b))
3685     {
3686       subb = TREE_OPERAND (*b, 0);
3687       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3688         return atype;
3689     }
3690   else
3691     return atype;
3692
3693   *a = suba;
3694   *b = subb;
3695   return wider_type;
3696 }
3697
3698 /* Determines the expression by that USE is expressed from induction variable
3699    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3700    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3701
3702 static bool
3703 get_computation_aff (struct loop *loop,
3704                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3705                      struct aff_tree *aff)
3706 {
3707   tree ubase = use->iv->base;
3708   tree ustep = use->iv->step;
3709   tree cbase = cand->iv->base;
3710   tree cstep = cand->iv->step, cstep_common;
3711   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3712   tree common_type, var;
3713   tree uutype;
3714   aff_tree cbase_aff, var_aff;
3715   widest_int rat;
3716
3717   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3718     {
3719       /* We do not have a precision to express the values of use.  */
3720       return false;
3721     }
3722
3723   var = var_at_stmt (loop, cand, at);
3724   uutype = unsigned_type_for (utype);
3725
3726   /* If the conversion is not noop, perform it.  */
3727   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3728     {
3729       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3730           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3731         {
3732           tree inner_base, inner_step, inner_type;
3733           inner_base = TREE_OPERAND (cbase, 0);
3734           if (CONVERT_EXPR_P (cstep))
3735             inner_step = TREE_OPERAND (cstep, 0);
3736           else
3737             inner_step = cstep;
3738
3739           inner_type = TREE_TYPE (inner_base);
3740           /* If candidate is added from a biv whose type is smaller than
3741              ctype, we know both candidate and the biv won't overflow.
3742              In this case, it's safe to skip the convertion in candidate.
3743              As an example, (unsigned short)((unsigned long)A) equals to
3744              (unsigned short)A, if A has a type no larger than short.  */
3745           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3746             {
3747               cbase = inner_base;
3748               cstep = inner_step;
3749             }
3750         }
3751       cstep = fold_convert (uutype, cstep);
3752       cbase = fold_convert (uutype, cbase);
3753       var = fold_convert (uutype, var);
3754     }
3755
3756   /* Ratio is 1 when computing the value of biv cand by itself.
3757      We can't rely on constant_multiple_of in this case because the
3758      use is created after the original biv is selected.  The call
3759      could fail because of inconsistent fold behavior.  See PR68021
3760      for more information.  */
3761   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3762     {
3763       gcc_assert (is_gimple_assign (use->stmt));
3764       gcc_assert (use->iv->ssa_name == cand->var_after);
3765       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3766       rat = 1;
3767     }
3768   else if (!constant_multiple_of (ustep, cstep, &rat))
3769     return false;
3770
3771   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3772      type, we achieve better folding by computing their difference in this
3773      wider type, and cast the result to UUTYPE.  We do not need to worry about
3774      overflows, as all the arithmetics will in the end be performed in UUTYPE
3775      anyway.  */
3776   common_type = determine_common_wider_type (&ubase, &cbase);
3777
3778   /* use = ubase - ratio * cbase + ratio * var.  */
3779   tree_to_aff_combination (ubase, common_type, aff);
3780   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3781   tree_to_aff_combination (var, uutype, &var_aff);
3782
3783   /* We need to shift the value if we are after the increment.  */
3784   if (stmt_after_increment (loop, cand, at))
3785     {
3786       aff_tree cstep_aff;
3787
3788       if (common_type != uutype)
3789         cstep_common = fold_convert (common_type, cstep);
3790       else
3791         cstep_common = cstep;
3792
3793       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3794       aff_combination_add (&cbase_aff, &cstep_aff);
3795     }
3796
3797   aff_combination_scale (&cbase_aff, -rat);
3798   aff_combination_add (aff, &cbase_aff);
3799   if (common_type != uutype)
3800     aff_combination_convert (aff, uutype);
3801
3802   aff_combination_scale (&var_aff, rat);
3803   aff_combination_add (aff, &var_aff);
3804
3805   return true;
3806 }
3807
3808 /* Return the type of USE.  */
3809
3810 static tree
3811 get_use_type (struct iv_use *use)
3812 {
3813   tree base_type = TREE_TYPE (use->iv->base);
3814   tree type;
3815
3816   if (use->type == USE_ADDRESS)
3817     {
3818       /* The base_type may be a void pointer.  Create a pointer type based on
3819          the mem_ref instead.  */
3820       type = build_pointer_type (TREE_TYPE (*use->op_p));
3821       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3822                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3823     }
3824   else
3825     type = base_type;
3826
3827   return type;
3828 }
3829
3830 /* Determines the expression by that USE is expressed from induction variable
3831    CAND at statement AT in LOOP.  The computation is unshared.  */
3832
3833 static tree
3834 get_computation_at (struct loop *loop,
3835                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3836 {
3837   aff_tree aff;
3838   tree type = get_use_type (use);
3839
3840   if (!get_computation_aff (loop, use, cand, at, &aff))
3841     return NULL_TREE;
3842   unshare_aff_combination (&aff);
3843   return fold_convert (type, aff_combination_to_tree (&aff));
3844 }
3845
3846 /* Determines the expression by that USE is expressed from induction variable
3847    CAND in LOOP.  The computation is unshared.  */
3848
3849 static tree
3850 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3851 {
3852   return get_computation_at (loop, use, cand, use->stmt);
3853 }
3854
3855 /* Adjust the cost COST for being in loop setup rather than loop body.
3856    If we're optimizing for space, the loop setup overhead is constant;
3857    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3858 static unsigned
3859 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3860 {
3861   if (cost == INFTY)
3862     return cost;
3863   else if (optimize_loop_for_speed_p (data->current_loop))
3864     return cost / avg_loop_niter (data->current_loop);
3865   else
3866     return cost;
3867 }
3868
3869 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3870    validity for a memory reference accessing memory of mode MODE in
3871    address space AS.  */
3872
3873
3874 bool
3875 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3876                                  addr_space_t as)
3877 {
3878 #define MAX_RATIO 128
3879   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3880   static vec<sbitmap> valid_mult_list;
3881   sbitmap valid_mult;
3882
3883   if (data_index >= valid_mult_list.length ())
3884     valid_mult_list.safe_grow_cleared (data_index + 1);
3885
3886   valid_mult = valid_mult_list[data_index];
3887   if (!valid_mult)
3888     {
3889       machine_mode address_mode = targetm.addr_space.address_mode (as);
3890       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3891       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3892       rtx addr, scaled;
3893       HOST_WIDE_INT i;
3894
3895       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3896       bitmap_clear (valid_mult);
3897       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3898       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3899       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3900         {
3901           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3902           if (memory_address_addr_space_p (mode, addr, as)
3903               || memory_address_addr_space_p (mode, scaled, as))
3904             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3905         }
3906
3907       if (dump_file && (dump_flags & TDF_DETAILS))
3908         {
3909           fprintf (dump_file, "  allowed multipliers:");
3910           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3911             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3912               fprintf (dump_file, " %d", (int) i);
3913           fprintf (dump_file, "\n");
3914           fprintf (dump_file, "\n");
3915         }
3916
3917       valid_mult_list[data_index] = valid_mult;
3918     }
3919
3920   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3921     return false;
3922
3923   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3924 }
3925
3926 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3927    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3928    variable is omitted.  Compute the cost for a memory reference that accesses
3929    a memory location of mode MEM_MODE in address space AS.
3930
3931    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3932    size of MEM_MODE / RATIO) is available.  To make this determination, we
3933    look at the size of the increment to be made, which is given in CSTEP.
3934    CSTEP may be zero if the step is unknown.
3935    STMT_AFTER_INC is true iff the statement we're looking at is after the
3936    increment of the original biv.
3937
3938    TODO -- there must be some better way.  This all is quite crude.  */
3939
3940 enum ainc_type
3941 {
3942   AINC_PRE_INC,         /* Pre increment.  */
3943   AINC_PRE_DEC,         /* Pre decrement.  */
3944   AINC_POST_INC,        /* Post increment.  */
3945   AINC_POST_DEC,        /* Post decrement.  */
3946   AINC_NONE             /* Also the number of auto increment types.  */
3947 };
3948
3949 struct address_cost_data
3950 {
3951   HOST_WIDE_INT min_offset, max_offset;
3952   unsigned costs[2][2][2][2];
3953   unsigned ainc_costs[AINC_NONE];
3954 };
3955
3956
3957 static comp_cost
3958 get_address_cost (bool symbol_present, bool var_present,
3959                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3960                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3961                   addr_space_t as, bool speed,
3962                   bool stmt_after_inc, bool *may_autoinc)
3963 {
3964   machine_mode address_mode = targetm.addr_space.address_mode (as);
3965   static vec<address_cost_data *> address_cost_data_list;
3966   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3967   address_cost_data *data;
3968   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3969   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3970   unsigned cost, acost, complexity;
3971   enum ainc_type autoinc_type;
3972   bool offset_p, ratio_p, autoinc;
3973   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3974   unsigned HOST_WIDE_INT mask;
3975   unsigned bits;
3976
3977   if (data_index >= address_cost_data_list.length ())
3978     address_cost_data_list.safe_grow_cleared (data_index + 1);
3979
3980   data = address_cost_data_list[data_index];
3981   if (!data)
3982     {
3983       HOST_WIDE_INT i;
3984       HOST_WIDE_INT rat, off = 0;
3985       int old_cse_not_expected, width;
3986       unsigned sym_p, var_p, off_p, rat_p, add_c;
3987       rtx_insn *seq;
3988       rtx addr, base;
3989       rtx reg0, reg1;
3990
3991       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3992
3993       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3994
3995       width = GET_MODE_BITSIZE (address_mode) - 1;
3996       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3997         width = HOST_BITS_PER_WIDE_INT - 1;
3998       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3999
4000       for (i = width; i >= 0; i--)
4001         {
4002           off = -((unsigned HOST_WIDE_INT) 1 << i);
4003           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4004           if (memory_address_addr_space_p (mem_mode, addr, as))
4005             break;
4006         }
4007       data->min_offset = (i == -1? 0 : off);
4008
4009       for (i = width; i >= 0; i--)
4010         {
4011           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4012           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4013           if (memory_address_addr_space_p (mem_mode, addr, as))
4014             break;
4015           /* For some strict-alignment targets, the offset must be naturally
4016              aligned.  Try an aligned offset if mem_mode is not QImode.  */
4017           off = mem_mode != QImode
4018                 ? ((unsigned HOST_WIDE_INT) 1 << i)
4019                     - GET_MODE_SIZE (mem_mode)
4020                 : 0;
4021           if (off > 0)
4022             {
4023               XEXP (addr, 1) = gen_int_mode (off, address_mode);
4024               if (memory_address_addr_space_p (mem_mode, addr, as))
4025                 break;
4026             }
4027         }
4028       if (i == -1)
4029         off = 0;
4030       data->max_offset = off;
4031
4032       if (dump_file && (dump_flags & TDF_DETAILS))
4033         {
4034           fprintf (dump_file, "get_address_cost:\n");
4035           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4036                    GET_MODE_NAME (mem_mode),
4037                    data->min_offset);
4038           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4039                    GET_MODE_NAME (mem_mode),
4040                    data->max_offset);
4041         }
4042
4043       rat = 1;
4044       for (i = 2; i <= MAX_RATIO; i++)
4045         if (multiplier_allowed_in_address_p (i, mem_mode, as))
4046           {
4047             rat = i;
4048             break;
4049           }
4050
4051       /* Compute the cost of various addressing modes.  */
4052       acost = 0;
4053       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4054       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4055
4056       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4057           || USE_STORE_PRE_DECREMENT (mem_mode))
4058         {
4059           addr = gen_rtx_PRE_DEC (address_mode, reg0);
4060           has_predec[mem_mode]
4061             = memory_address_addr_space_p (mem_mode, addr, as);
4062
4063           if (has_predec[mem_mode])
4064             data->ainc_costs[AINC_PRE_DEC]
4065               = address_cost (addr, mem_mode, as, speed);
4066         }
4067       if (USE_LOAD_POST_DECREMENT (mem_mode)
4068           || USE_STORE_POST_DECREMENT (mem_mode))
4069         {
4070           addr = gen_rtx_POST_DEC (address_mode, reg0);
4071           has_postdec[mem_mode]
4072             = memory_address_addr_space_p (mem_mode, addr, as);
4073
4074           if (has_postdec[mem_mode])
4075             data->ainc_costs[AINC_POST_DEC]
4076               = address_cost (addr, mem_mode, as, speed);
4077         }
4078       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4079           || USE_STORE_PRE_DECREMENT (mem_mode))
4080         {
4081           addr = gen_rtx_PRE_INC (address_mode, reg0);
4082           has_preinc[mem_mode]
4083             = memory_address_addr_space_p (mem_mode, addr, as);
4084
4085           if (has_preinc[mem_mode])
4086             data->ainc_costs[AINC_PRE_INC]
4087               = address_cost (addr, mem_mode, as, speed);
4088         }
4089       if (USE_LOAD_POST_INCREMENT (mem_mode)
4090           || USE_STORE_POST_INCREMENT (mem_mode))
4091         {
4092           addr = gen_rtx_POST_INC (address_mode, reg0);
4093           has_postinc[mem_mode]
4094             = memory_address_addr_space_p (mem_mode, addr, as);
4095
4096           if (has_postinc[mem_mode])
4097             data->ainc_costs[AINC_POST_INC]
4098               = address_cost (addr, mem_mode, as, speed);
4099         }
4100       for (i = 0; i < 16; i++)
4101         {
4102           sym_p = i & 1;
4103           var_p = (i >> 1) & 1;
4104           off_p = (i >> 2) & 1;
4105           rat_p = (i >> 3) & 1;
4106
4107           addr = reg0;
4108           if (rat_p)
4109             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4110                                    gen_int_mode (rat, address_mode));
4111
4112           if (var_p)
4113             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4114
4115           if (sym_p)
4116             {
4117               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4118               /* ??? We can run into trouble with some backends by presenting
4119                  it with symbols which haven't been properly passed through
4120                  targetm.encode_section_info.  By setting the local bit, we
4121                  enhance the probability of things working.  */
4122               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4123
4124               if (off_p)
4125                 base = gen_rtx_fmt_e (CONST, address_mode,
4126                                       gen_rtx_fmt_ee
4127                                         (PLUS, address_mode, base,
4128                                          gen_int_mode (off, address_mode)));
4129             }
4130           else if (off_p)
4131             base = gen_int_mode (off, address_mode);
4132           else
4133             base = NULL_RTX;
4134
4135           if (base)
4136             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4137
4138           start_sequence ();
4139           /* To avoid splitting addressing modes, pretend that no cse will
4140              follow.  */
4141           old_cse_not_expected = cse_not_expected;
4142           cse_not_expected = true;
4143           addr = memory_address_addr_space (mem_mode, addr, as);
4144           cse_not_expected = old_cse_not_expected;
4145           seq = get_insns ();
4146           end_sequence ();
4147
4148           acost = seq_cost (seq, speed);
4149           acost += address_cost (addr, mem_mode, as, speed);
4150
4151           if (!acost)
4152             acost = 1;
4153           data->costs[sym_p][var_p][off_p][rat_p] = acost;
4154         }
4155
4156       /* On some targets, it is quite expensive to load symbol to a register,
4157          which makes addresses that contain symbols look much more expensive.
4158          However, the symbol will have to be loaded in any case before the
4159          loop (and quite likely we have it in register already), so it does not
4160          make much sense to penalize them too heavily.  So make some final
4161          tweaks for the SYMBOL_PRESENT modes:
4162
4163          If VAR_PRESENT is false, and the mode obtained by changing symbol to
4164          var is cheaper, use this mode with small penalty.
4165          If VAR_PRESENT is true, try whether the mode with
4166          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4167          if this is the case, use it.  */
4168       add_c = add_cost (speed, address_mode);
4169       for (i = 0; i < 8; i++)
4170         {
4171           var_p = i & 1;
4172           off_p = (i >> 1) & 1;
4173           rat_p = (i >> 2) & 1;
4174
4175           acost = data->costs[0][1][off_p][rat_p] + 1;
4176           if (var_p)
4177             acost += add_c;
4178
4179           if (acost < data->costs[1][var_p][off_p][rat_p])
4180             data->costs[1][var_p][off_p][rat_p] = acost;
4181         }
4182
4183       if (dump_file && (dump_flags & TDF_DETAILS))
4184         {
4185           fprintf (dump_file, "<Address Costs>:\n");
4186
4187           for (i = 0; i < 16; i++)
4188             {
4189               sym_p = i & 1;
4190               var_p = (i >> 1) & 1;
4191               off_p = (i >> 2) & 1;
4192               rat_p = (i >> 3) & 1;
4193
4194               fprintf (dump_file, "  ");
4195               if (sym_p)
4196                 fprintf (dump_file, "sym + ");
4197               if (var_p)
4198                 fprintf (dump_file, "var + ");
4199               if (off_p)
4200                 fprintf (dump_file, "cst + ");
4201               if (rat_p)
4202                 fprintf (dump_file, "rat * ");
4203
4204               acost = data->costs[sym_p][var_p][off_p][rat_p];
4205               fprintf (dump_file, "index costs %d\n", acost);
4206             }
4207           if (has_predec[mem_mode] || has_postdec[mem_mode]
4208               || has_preinc[mem_mode] || has_postinc[mem_mode])
4209             fprintf (dump_file, "  May include autoinc/dec\n");
4210           fprintf (dump_file, "\n");
4211         }
4212
4213       address_cost_data_list[data_index] = data;
4214     }
4215
4216   bits = GET_MODE_BITSIZE (address_mode);
4217   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4218   offset &= mask;
4219   if ((offset >> (bits - 1) & 1))
4220     offset |= ~mask;
4221   s_offset = offset;
4222
4223   autoinc = false;
4224   autoinc_type = AINC_NONE;
4225   msize = GET_MODE_SIZE (mem_mode);
4226   autoinc_offset = offset;
4227   if (stmt_after_inc)
4228     autoinc_offset += ratio * cstep;
4229   if (symbol_present || var_present || ratio != 1)
4230     autoinc = false;
4231   else
4232     {
4233       if (has_postinc[mem_mode] && autoinc_offset == 0
4234           && msize == cstep)
4235         autoinc_type = AINC_POST_INC;
4236       else if (has_postdec[mem_mode] && autoinc_offset == 0
4237                && msize == -cstep)
4238         autoinc_type = AINC_POST_DEC;
4239       else if (has_preinc[mem_mode] && autoinc_offset == msize
4240                && msize == cstep)
4241         autoinc_type = AINC_PRE_INC;
4242       else if (has_predec[mem_mode] && autoinc_offset == -msize
4243                && msize == -cstep)
4244         autoinc_type = AINC_PRE_DEC;
4245
4246       if (autoinc_type != AINC_NONE)
4247         autoinc = true;
4248     }
4249
4250   cost = 0;
4251   offset_p = (s_offset != 0
4252               && data->min_offset <= s_offset
4253               && s_offset <= data->max_offset);
4254   ratio_p = (ratio != 1
4255              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4256
4257   if (ratio != 1 && !ratio_p)
4258     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4259
4260   if (s_offset && !offset_p && !symbol_present)
4261     cost += add_cost (speed, address_mode);
4262
4263   if (may_autoinc)
4264     *may_autoinc = autoinc;
4265   if (autoinc)
4266     acost = data->ainc_costs[autoinc_type];
4267   else
4268     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4269   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4270   return comp_cost (cost + acost, complexity);
4271 }
4272
4273  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4274     EXPR operand holding the shift.  COST0 and COST1 are the costs for
4275     calculating the operands of EXPR.  Returns true if successful, and returns
4276     the cost in COST.  */
4277
4278 static bool
4279 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4280                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4281 {
4282   comp_cost res;
4283   tree op1 = TREE_OPERAND (expr, 1);
4284   tree cst = TREE_OPERAND (mult, 1);
4285   tree multop = TREE_OPERAND (mult, 0);
4286   int m = exact_log2 (int_cst_value (cst));
4287   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4288   int as_cost, sa_cost;
4289   bool mult_in_op1;
4290
4291   if (!(m >= 0 && m < maxm))
4292     return false;
4293
4294   STRIP_NOPS (op1);
4295   mult_in_op1 = operand_equal_p (op1, mult, 0);
4296
4297   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4298
4299   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4300      use that in preference to a shift insn followed by an add insn.  */
4301   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4302              ? shiftadd_cost (speed, mode, m)
4303              : (mult_in_op1
4304                 ? shiftsub1_cost (speed, mode, m)
4305                 : shiftsub0_cost (speed, mode, m)));
4306
4307   res = comp_cost (MIN (as_cost, sa_cost), 0);
4308   res += (mult_in_op1 ? cost0 : cost1);
4309
4310   STRIP_NOPS (multop);
4311   if (!is_gimple_val (multop))
4312     res += force_expr_to_var_cost (multop, speed);
4313
4314   *cost = res;
4315   return true;
4316 }
4317
4318 /* Estimates cost of forcing expression EXPR into a variable.  */
4319
4320 static comp_cost
4321 force_expr_to_var_cost (tree expr, bool speed)
4322 {
4323   static bool costs_initialized = false;
4324   static unsigned integer_cost [2];
4325   static unsigned symbol_cost [2];
4326   static unsigned address_cost [2];
4327   tree op0, op1;
4328   comp_cost cost0, cost1, cost;
4329   machine_mode mode;
4330
4331   if (!costs_initialized)
4332     {
4333       tree type = build_pointer_type (integer_type_node);
4334       tree var, addr;
4335       rtx x;
4336       int i;
4337
4338       var = create_tmp_var_raw (integer_type_node, "test_var");
4339       TREE_STATIC (var) = 1;
4340       x = produce_memory_decl_rtl (var, NULL);
4341       SET_DECL_RTL (var, x);
4342
4343       addr = build1 (ADDR_EXPR, type, var);
4344
4345
4346       for (i = 0; i < 2; i++)
4347         {
4348           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4349                                                              2000), i);
4350
4351           symbol_cost[i] = computation_cost (addr, i) + 1;
4352
4353           address_cost[i]
4354             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4355           if (dump_file && (dump_flags & TDF_DETAILS))
4356             {
4357               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4358               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4359               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4360               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4361               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4362               fprintf (dump_file, "\n");
4363             }
4364         }
4365
4366       costs_initialized = true;
4367     }
4368
4369   STRIP_NOPS (expr);
4370
4371   if (SSA_VAR_P (expr))
4372     return no_cost;
4373
4374   if (is_gimple_min_invariant (expr))
4375     {
4376       if (TREE_CODE (expr) == INTEGER_CST)
4377         return comp_cost (integer_cost [speed], 0);
4378
4379       if (TREE_CODE (expr) == ADDR_EXPR)
4380         {
4381           tree obj = TREE_OPERAND (expr, 0);
4382
4383           if (TREE_CODE (obj) == VAR_DECL
4384               || TREE_CODE (obj) == PARM_DECL
4385               || TREE_CODE (obj) == RESULT_DECL)
4386             return comp_cost (symbol_cost [speed], 0);
4387         }
4388
4389       return comp_cost (address_cost [speed], 0);
4390     }
4391
4392   switch (TREE_CODE (expr))
4393     {
4394     case POINTER_PLUS_EXPR:
4395     case PLUS_EXPR:
4396     case MINUS_EXPR:
4397     case MULT_EXPR:
4398       op0 = TREE_OPERAND (expr, 0);
4399       op1 = TREE_OPERAND (expr, 1);
4400       STRIP_NOPS (op0);
4401       STRIP_NOPS (op1);
4402       break;
4403
4404     CASE_CONVERT:
4405     case NEGATE_EXPR:
4406       op0 = TREE_OPERAND (expr, 0);
4407       STRIP_NOPS (op0);
4408       op1 = NULL_TREE;
4409       break;
4410
4411     default:
4412       /* Just an arbitrary value, FIXME.  */
4413       return comp_cost (target_spill_cost[speed], 0);
4414     }
4415
4416   if (op0 == NULL_TREE
4417       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4418     cost0 = no_cost;
4419   else
4420     cost0 = force_expr_to_var_cost (op0, speed);
4421
4422   if (op1 == NULL_TREE
4423       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4424     cost1 = no_cost;
4425   else
4426     cost1 = force_expr_to_var_cost (op1, speed);
4427
4428   mode = TYPE_MODE (TREE_TYPE (expr));
4429   switch (TREE_CODE (expr))
4430     {
4431     case POINTER_PLUS_EXPR:
4432     case PLUS_EXPR:
4433     case MINUS_EXPR:
4434     case NEGATE_EXPR:
4435       cost = comp_cost (add_cost (speed, mode), 0);
4436       if (TREE_CODE (expr) != NEGATE_EXPR)
4437         {
4438           tree mult = NULL_TREE;
4439           comp_cost sa_cost;
4440           if (TREE_CODE (op1) == MULT_EXPR)
4441             mult = op1;
4442           else if (TREE_CODE (op0) == MULT_EXPR)
4443             mult = op0;
4444
4445           if (mult != NULL_TREE
4446               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4447               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4448                                     speed, &sa_cost))
4449             return sa_cost;
4450         }
4451       break;
4452
4453     CASE_CONVERT:
4454       {
4455         tree inner_mode, outer_mode;
4456         outer_mode = TREE_TYPE (expr);
4457         inner_mode = TREE_TYPE (op0);
4458         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4459                                        TYPE_MODE (inner_mode), speed), 0);
4460       }
4461       break;
4462
4463     case MULT_EXPR:
4464       if (cst_and_fits_in_hwi (op0))
4465         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4466                                              mode, speed), 0);
4467       else if (cst_and_fits_in_hwi (op1))
4468         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4469                                              mode, speed), 0);
4470       else
4471         return comp_cost (target_spill_cost [speed], 0);
4472       break;
4473
4474     default:
4475       gcc_unreachable ();
4476     }
4477
4478   cost += cost0;
4479   cost += cost1;
4480
4481   /* Bound the cost by target_spill_cost.  The parts of complicated
4482      computations often are either loop invariant or at least can
4483      be shared between several iv uses, so letting this grow without
4484      limits would not give reasonable results.  */
4485   if (cost.cost > (int) target_spill_cost [speed])
4486     cost.cost = target_spill_cost [speed];
4487
4488   return cost;
4489 }
4490
4491 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4492    invariants the computation depends on.  */
4493
4494 static comp_cost
4495 force_var_cost (struct ivopts_data *data,
4496                 tree expr, bitmap *depends_on)
4497 {
4498   if (depends_on)
4499     {
4500       fd_ivopts_data = data;
4501       walk_tree (&expr, find_depends, depends_on, NULL);
4502     }
4503
4504   return force_expr_to_var_cost (expr, data->speed);
4505 }
4506
4507 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4508    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4509    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4510    invariants the computation depends on.  */
4511
4512 static comp_cost
4513 split_address_cost (struct ivopts_data *data,
4514                     tree addr, bool *symbol_present, bool *var_present,
4515                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4516 {
4517   tree core;
4518   HOST_WIDE_INT bitsize;
4519   HOST_WIDE_INT bitpos;
4520   tree toffset;
4521   machine_mode mode;
4522   int unsignedp, reversep, volatilep;
4523
4524   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4525                               &unsignedp, &reversep, &volatilep, false);
4526
4527   if (toffset != 0
4528       || bitpos % BITS_PER_UNIT != 0
4529       || reversep
4530       || TREE_CODE (core) != VAR_DECL)
4531     {
4532       *symbol_present = false;
4533       *var_present = true;
4534       fd_ivopts_data = data;
4535       if (depends_on)
4536         walk_tree (&addr, find_depends, depends_on, NULL);
4537
4538       return comp_cost (target_spill_cost[data->speed], 0);
4539     }
4540
4541   *offset += bitpos / BITS_PER_UNIT;
4542   if (TREE_STATIC (core)
4543       || DECL_EXTERNAL (core))
4544     {
4545       *symbol_present = true;
4546       *var_present = false;
4547       return no_cost;
4548     }
4549
4550   *symbol_present = false;
4551   *var_present = true;
4552   return no_cost;
4553 }
4554
4555 /* Estimates cost of expressing difference of addresses E1 - E2 as
4556    var + symbol + offset.  The value of offset is added to OFFSET,
4557    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4558    part is missing.  DEPENDS_ON is a set of the invariants the computation
4559    depends on.  */
4560
4561 static comp_cost
4562 ptr_difference_cost (struct ivopts_data *data,
4563                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4564                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4565 {
4566   HOST_WIDE_INT diff = 0;
4567   aff_tree aff_e1, aff_e2;
4568   tree type;
4569
4570   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4571
4572   if (ptr_difference_const (e1, e2, &diff))
4573     {
4574       *offset += diff;
4575       *symbol_present = false;
4576       *var_present = false;
4577       return no_cost;
4578     }
4579
4580   if (integer_zerop (e2))
4581     return split_address_cost (data, TREE_OPERAND (e1, 0),
4582                                symbol_present, var_present, offset, depends_on);
4583
4584   *symbol_present = false;
4585   *var_present = true;
4586
4587   type = signed_type_for (TREE_TYPE (e1));
4588   tree_to_aff_combination (e1, type, &aff_e1);
4589   tree_to_aff_combination (e2, type, &aff_e2);
4590   aff_combination_scale (&aff_e2, -1);
4591   aff_combination_add (&aff_e1, &aff_e2);
4592
4593   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4594 }
4595
4596 /* Estimates cost of expressing difference E1 - E2 as
4597    var + symbol + offset.  The value of offset is added to OFFSET,
4598    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4599    part is missing.  DEPENDS_ON is a set of the invariants the computation
4600    depends on.  */
4601
4602 static comp_cost
4603 difference_cost (struct ivopts_data *data,
4604                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4605                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4606 {
4607   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4608   unsigned HOST_WIDE_INT off1, off2;
4609   aff_tree aff_e1, aff_e2;
4610   tree type;
4611
4612   e1 = strip_offset (e1, &off1);
4613   e2 = strip_offset (e2, &off2);
4614   *offset += off1 - off2;
4615
4616   STRIP_NOPS (e1);
4617   STRIP_NOPS (e2);
4618
4619   if (TREE_CODE (e1) == ADDR_EXPR)
4620     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4621                                 offset, depends_on);
4622   *symbol_present = false;
4623
4624   if (operand_equal_p (e1, e2, 0))
4625     {
4626       *var_present = false;
4627       return no_cost;
4628     }
4629
4630   *var_present = true;
4631
4632   if (integer_zerop (e2))
4633     return force_var_cost (data, e1, depends_on);
4634
4635   if (integer_zerop (e1))
4636     {
4637       comp_cost cost = force_var_cost (data, e2, depends_on);
4638       cost += mult_by_coeff_cost (-1, mode, data->speed);
4639       return cost;
4640     }
4641
4642   type = signed_type_for (TREE_TYPE (e1));
4643   tree_to_aff_combination (e1, type, &aff_e1);
4644   tree_to_aff_combination (e2, type, &aff_e2);
4645   aff_combination_scale (&aff_e2, -1);
4646   aff_combination_add (&aff_e1, &aff_e2);
4647
4648   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4649 }
4650
4651 /* Returns true if AFF1 and AFF2 are identical.  */
4652
4653 static bool
4654 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4655 {
4656   unsigned i;
4657
4658   if (aff1->n != aff2->n)
4659     return false;
4660
4661   for (i = 0; i < aff1->n; i++)
4662     {
4663       if (aff1->elts[i].coef != aff2->elts[i].coef)
4664         return false;
4665
4666       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4667         return false;
4668     }
4669   return true;
4670 }
4671
4672 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
4673
4674 static iv_inv_expr_ent *
4675 record_inv_expr (struct ivopts_data *data, tree expr)
4676 {
4677   struct iv_inv_expr_ent ent;
4678   struct iv_inv_expr_ent **slot;
4679
4680   ent.expr = expr;
4681   ent.hash = iterative_hash_expr (expr, 0);
4682   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4683
4684   if (!*slot)
4685     {
4686       *slot = XNEW (struct iv_inv_expr_ent);
4687       (*slot)->expr = expr;
4688       (*slot)->hash = ent.hash;
4689       (*slot)->id = data->max_inv_expr_id++;
4690     }
4691
4692   return *slot;
4693 }
4694
4695 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4696    requires a new compiler generated temporary.  Returns -1 otherwise.
4697    ADDRESS_P is a flag indicating if the expression is for address
4698    computation.  */
4699
4700 static iv_inv_expr_ent *
4701 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4702                          tree cbase, HOST_WIDE_INT ratio,
4703                          bool address_p)
4704 {
4705   aff_tree ubase_aff, cbase_aff;
4706   tree expr, ub, cb;
4707
4708   STRIP_NOPS (ubase);
4709   STRIP_NOPS (cbase);
4710   ub = ubase;
4711   cb = cbase;
4712
4713   if ((TREE_CODE (ubase) == INTEGER_CST)
4714       && (TREE_CODE (cbase) == INTEGER_CST))
4715     return NULL;
4716
4717   /* Strips the constant part. */
4718   if (TREE_CODE (ubase) == PLUS_EXPR
4719       || TREE_CODE (ubase) == MINUS_EXPR
4720       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4721     {
4722       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4723         ubase = TREE_OPERAND (ubase, 0);
4724     }
4725
4726   /* Strips the constant part. */
4727   if (TREE_CODE (cbase) == PLUS_EXPR
4728       || TREE_CODE (cbase) == MINUS_EXPR
4729       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4730     {
4731       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4732         cbase = TREE_OPERAND (cbase, 0);
4733     }
4734
4735   if (address_p)
4736     {
4737       if (((TREE_CODE (ubase) == SSA_NAME)
4738            || (TREE_CODE (ubase) == ADDR_EXPR
4739                && is_gimple_min_invariant (ubase)))
4740           && (TREE_CODE (cbase) == INTEGER_CST))
4741         return NULL;
4742
4743       if (((TREE_CODE (cbase) == SSA_NAME)
4744            || (TREE_CODE (cbase) == ADDR_EXPR
4745                && is_gimple_min_invariant (cbase)))
4746           && (TREE_CODE (ubase) == INTEGER_CST))
4747         return NULL;
4748     }
4749
4750   if (ratio == 1)
4751     {
4752       if (operand_equal_p (ubase, cbase, 0))
4753         return NULL;
4754
4755       if (TREE_CODE (ubase) == ADDR_EXPR
4756           && TREE_CODE (cbase) == ADDR_EXPR)
4757         {
4758           tree usym, csym;
4759
4760           usym = TREE_OPERAND (ubase, 0);
4761           csym = TREE_OPERAND (cbase, 0);
4762           if (TREE_CODE (usym) == ARRAY_REF)
4763             {
4764               tree ind = TREE_OPERAND (usym, 1);
4765               if (TREE_CODE (ind) == INTEGER_CST
4766                   && tree_fits_shwi_p (ind)
4767                   && tree_to_shwi (ind) == 0)
4768                 usym = TREE_OPERAND (usym, 0);
4769             }
4770           if (TREE_CODE (csym) == ARRAY_REF)
4771             {
4772               tree ind = TREE_OPERAND (csym, 1);
4773               if (TREE_CODE (ind) == INTEGER_CST
4774                   && tree_fits_shwi_p (ind)
4775                   && tree_to_shwi (ind) == 0)
4776                 csym = TREE_OPERAND (csym, 0);
4777             }
4778           if (operand_equal_p (usym, csym, 0))
4779             return NULL;
4780         }
4781       /* Now do more complex comparison  */
4782       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4783       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4784       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4785         return NULL;
4786     }
4787
4788   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4789   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4790
4791   aff_combination_scale (&cbase_aff, -1 * ratio);
4792   aff_combination_add (&ubase_aff, &cbase_aff);
4793   expr = aff_combination_to_tree (&ubase_aff);
4794   return record_inv_expr (data, expr);
4795 }
4796
4797 /* Scale (multiply) the computed COST (except scratch part that should be
4798    hoisted out a loop) by header->frequency / AT->frequency,
4799    which makes expected cost more accurate.  */
4800
4801 static comp_cost
4802 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, iv_cand *cand,
4803                                 comp_cost cost)
4804 {
4805    int loop_freq = data->current_loop->header->frequency;
4806    int bb_freq = at->bb->frequency;
4807    if (loop_freq != 0)
4808      {
4809        gcc_assert (cost.scratch <= cost.cost);
4810        int scaled_cost
4811          = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4812
4813        if (dump_file && (dump_flags & TDF_DETAILS))
4814          fprintf (dump_file, "Scaling iv_use based on cand %d "
4815                   "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4816                   cand->id, 1.0f * bb_freq / loop_freq, cost.cost,
4817                   cost.scratch, scaled_cost, bb_freq, loop_freq);
4818
4819        cost.cost = scaled_cost;
4820      }
4821
4822   return cost;
4823 }
4824
4825 /* Determines the cost of the computation by that USE is expressed
4826    from induction variable CAND.  If ADDRESS_P is true, we just need
4827    to create an address from it, otherwise we want to get it into
4828    register.  A set of invariants we depend on is stored in
4829    DEPENDS_ON.  AT is the statement at that the value is computed.
4830    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4831    addressing is likely.  */
4832
4833 static comp_cost
4834 get_computation_cost_at (struct ivopts_data *data,
4835                          struct iv_use *use, struct iv_cand *cand,
4836                          bool address_p, bitmap *depends_on, gimple *at,
4837                          bool *can_autoinc,
4838                          iv_inv_expr_ent **inv_expr)
4839 {
4840   tree ubase = use->iv->base, ustep = use->iv->step;
4841   tree cbase, cstep;
4842   tree utype = TREE_TYPE (ubase), ctype;
4843   unsigned HOST_WIDE_INT cstepi, offset = 0;
4844   HOST_WIDE_INT ratio, aratio;
4845   bool var_present, symbol_present, stmt_is_after_inc;
4846   comp_cost cost;
4847   widest_int rat;
4848   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4849   machine_mode mem_mode = (address_p
4850                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4851                                 : VOIDmode);
4852
4853   if (depends_on)
4854     *depends_on = NULL;
4855
4856   /* Only consider real candidates.  */
4857   if (!cand->iv)
4858     return infinite_cost;
4859
4860   cbase = cand->iv->base;
4861   cstep = cand->iv->step;
4862   ctype = TREE_TYPE (cbase);
4863
4864   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4865     {
4866       /* We do not have a precision to express the values of use.  */
4867       return infinite_cost;
4868     }
4869
4870   if (address_p
4871       || (use->iv->base_object
4872           && cand->iv->base_object
4873           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4874           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4875     {
4876       /* Do not try to express address of an object with computation based
4877          on address of a different object.  This may cause problems in rtl
4878          level alias analysis (that does not expect this to be happening,
4879          as this is illegal in C), and would be unlikely to be useful
4880          anyway.  */
4881       if (use->iv->base_object
4882           && cand->iv->base_object
4883           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4884         return infinite_cost;
4885     }
4886
4887   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4888     {
4889       /* TODO -- add direct handling of this case.  */
4890       goto fallback;
4891     }
4892
4893   /* CSTEPI is removed from the offset in case statement is after the
4894      increment.  If the step is not constant, we use zero instead.
4895      This is a bit imprecise (there is the extra addition), but
4896      redundancy elimination is likely to transform the code so that
4897      it uses value of the variable before increment anyway,
4898      so it is not that much unrealistic.  */
4899   if (cst_and_fits_in_hwi (cstep))
4900     cstepi = int_cst_value (cstep);
4901   else
4902     cstepi = 0;
4903
4904   if (!constant_multiple_of (ustep, cstep, &rat))
4905     return infinite_cost;
4906
4907   if (wi::fits_shwi_p (rat))
4908     ratio = rat.to_shwi ();
4909   else
4910     return infinite_cost;
4911
4912   STRIP_NOPS (cbase);
4913   ctype = TREE_TYPE (cbase);
4914
4915   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4916
4917   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4918      or ratio == 1, it is better to handle this like
4919
4920      ubase - ratio * cbase + ratio * var
4921
4922      (also holds in the case ratio == -1, TODO.  */
4923
4924   if (cst_and_fits_in_hwi (cbase))
4925     {
4926       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4927       cost = difference_cost (data,
4928                               ubase, build_int_cst (utype, 0),
4929                               &symbol_present, &var_present, &offset,
4930                               depends_on);
4931       cost /= avg_loop_niter (data->current_loop);
4932     }
4933   else if (ratio == 1)
4934     {
4935       tree real_cbase = cbase;
4936
4937       /* Check to see if any adjustment is needed.  */
4938       if (cstepi == 0 && stmt_is_after_inc)
4939         {
4940           aff_tree real_cbase_aff;
4941           aff_tree cstep_aff;
4942
4943           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4944                                    &real_cbase_aff);
4945           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4946
4947           aff_combination_add (&real_cbase_aff, &cstep_aff);
4948           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4949         }
4950
4951       cost = difference_cost (data,
4952                               ubase, real_cbase,
4953                               &symbol_present, &var_present, &offset,
4954                               depends_on);
4955       cost /= avg_loop_niter (data->current_loop);
4956     }
4957   else if (address_p
4958            && !POINTER_TYPE_P (ctype)
4959            && multiplier_allowed_in_address_p
4960                 (ratio, mem_mode,
4961                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4962     {
4963       tree real_cbase = cbase;
4964
4965       if (cstepi == 0 && stmt_is_after_inc)
4966         {
4967           if (POINTER_TYPE_P (ctype))
4968             real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4969           else
4970             real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4971         }
4972       real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4973                                 build_int_cst (ctype, ratio));
4974       cost = difference_cost (data,
4975                               ubase, real_cbase,
4976                               &symbol_present, &var_present, &offset,
4977                               depends_on);
4978       cost /= avg_loop_niter (data->current_loop);
4979     }
4980   else
4981     {
4982       cost = force_var_cost (data, cbase, depends_on);
4983       cost += difference_cost (data, ubase, build_int_cst (utype, 0),
4984                                &symbol_present, &var_present, &offset,
4985                                depends_on);
4986       cost /= avg_loop_niter (data->current_loop);
4987       cost += add_cost (data->speed, TYPE_MODE (ctype));
4988     }
4989
4990   /* Record setup cost in scratch field.  */
4991   cost.scratch = cost.cost;
4992
4993   if (inv_expr && depends_on && *depends_on)
4994     {
4995       *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
4996                                            address_p);
4997       /* Clear depends on.  */
4998       if (*inv_expr != NULL)
4999         bitmap_clear (*depends_on);
5000     }
5001
5002   /* If we are after the increment, the value of the candidate is higher by
5003      one iteration.  */
5004   if (stmt_is_after_inc)
5005     offset -= ratio * cstepi;
5006
5007   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
5008      (symbol/var1/const parts may be omitted).  If we are looking for an
5009      address, find the cost of addressing this.  */
5010   if (address_p)
5011     {
5012       cost += get_address_cost (symbol_present, var_present,
5013                                 offset, ratio, cstepi,
5014                                 mem_mode,
5015                                 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
5016                                 speed, stmt_is_after_inc, can_autoinc);
5017       return get_scaled_computation_cost_at (data, at, cand, cost);
5018     }
5019
5020   /* Otherwise estimate the costs for computing the expression.  */
5021   if (!symbol_present && !var_present && !offset)
5022     {
5023       if (ratio != 1)
5024         cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
5025       return get_scaled_computation_cost_at (data, at, cand, cost);
5026     }
5027
5028   /* Symbol + offset should be compile-time computable so consider that they
5029       are added once to the variable, if present.  */
5030   if (var_present && (symbol_present || offset))
5031     cost += adjust_setup_cost (data,
5032                                     add_cost (speed, TYPE_MODE (ctype)));
5033
5034   /* Having offset does not affect runtime cost in case it is added to
5035      symbol, but it increases complexity.  */
5036   if (offset)
5037     cost.complexity++;
5038
5039   cost += add_cost (speed, TYPE_MODE (ctype));
5040
5041   aratio = ratio > 0 ? ratio : -ratio;
5042   if (aratio != 1)
5043     cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5044
5045   return get_scaled_computation_cost_at (data, at, cand, cost);
5046
5047 fallback:
5048   if (can_autoinc)
5049     *can_autoinc = false;
5050
5051   /* Just get the expression, expand it and measure the cost.  */
5052   tree comp = get_computation_at (data->current_loop, use, cand, at);
5053
5054   if (!comp)
5055     return infinite_cost;
5056
5057   if (address_p)
5058     comp = build_simple_mem_ref (comp);
5059
5060   cost = comp_cost (computation_cost (comp, speed), 0);
5061
5062   return get_scaled_computation_cost_at (data, at, cand, cost);
5063 }
5064
5065 /* Determines the cost of the computation by that USE is expressed
5066    from induction variable CAND.  If ADDRESS_P is true, we just need
5067    to create an address from it, otherwise we want to get it into
5068    register.  A set of invariants we depend on is stored in
5069    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
5070    autoinc addressing is likely.  */
5071
5072 static comp_cost
5073 get_computation_cost (struct ivopts_data *data,
5074                       struct iv_use *use, struct iv_cand *cand,
5075                       bool address_p, bitmap *depends_on,
5076                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
5077 {
5078   return get_computation_cost_at (data,
5079                                   use, cand, address_p, depends_on, use->stmt,
5080                                   can_autoinc, inv_expr);
5081 }
5082
5083 /* Determines cost of computing the use in GROUP with CAND in a generic
5084    expression.  */
5085
5086 static bool
5087 determine_group_iv_cost_generic (struct ivopts_data *data,
5088                                  struct iv_group *group, struct iv_cand *cand)
5089 {
5090   comp_cost cost;
5091   iv_inv_expr_ent *inv_expr = NULL;
5092   bitmap depends_on = NULL;
5093   struct iv_use *use = group->vuses[0];
5094
5095   /* The simple case first -- if we need to express value of the preserved
5096      original biv, the cost is 0.  This also prevents us from counting the
5097      cost of increment twice -- once at this use and once in the cost of
5098      the candidate.  */
5099   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5100     cost = no_cost;
5101   else
5102     cost = get_computation_cost (data, use, cand, false,
5103                                  &depends_on, NULL, &inv_expr);
5104
5105   set_group_iv_cost (data, group, cand, cost, depends_on,
5106                      NULL_TREE, ERROR_MARK, inv_expr);
5107   return !cost.infinite_cost_p ();
5108 }
5109
5110 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5111
5112 static bool
5113 determine_group_iv_cost_address (struct ivopts_data *data,
5114                                  struct iv_group *group, struct iv_cand *cand)
5115 {
5116   unsigned i;
5117   bitmap depends_on;
5118   bool can_autoinc;
5119   iv_inv_expr_ent *inv_expr = NULL;
5120   struct iv_use *use = group->vuses[0];
5121   comp_cost sum_cost = no_cost, cost;
5122
5123   cost = get_computation_cost (data, use, cand, true,
5124                                &depends_on, &can_autoinc, &inv_expr);
5125
5126   sum_cost = cost;
5127   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5128     {
5129       if (can_autoinc)
5130         sum_cost -= cand->cost_step;
5131       /* If we generated the candidate solely for exploiting autoincrement
5132          opportunities, and it turns out it can't be used, set the cost to
5133          infinity to make sure we ignore it.  */
5134       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5135         sum_cost = infinite_cost;
5136     }
5137
5138   /* Uses in a group can share setup code, so only add setup cost once.  */
5139   cost -= cost.scratch;
5140   /* Compute and add costs for rest uses of this group.  */
5141   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5142     {
5143       struct iv_use *next = group->vuses[i];
5144
5145       /* TODO: We could skip computing cost for sub iv_use when it has the
5146          same cost as the first iv_use, but the cost really depends on the
5147          offset and where the iv_use is.  */
5148         cost = get_computation_cost (data, next, cand, true,
5149                                      NULL, &can_autoinc, NULL);
5150       sum_cost += cost;
5151     }
5152   set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5153                      NULL_TREE, ERROR_MARK, inv_expr);
5154
5155   return !sum_cost.infinite_cost_p ();
5156 }
5157
5158 /* Computes value of candidate CAND at position AT in iteration NITER, and
5159    stores it to VAL.  */
5160
5161 static void
5162 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5163                aff_tree *val)
5164 {
5165   aff_tree step, delta, nit;
5166   struct iv *iv = cand->iv;
5167   tree type = TREE_TYPE (iv->base);
5168   tree steptype = type;
5169   if (POINTER_TYPE_P (type))
5170     steptype = sizetype;
5171   steptype = unsigned_type_for (type);
5172
5173   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5174   aff_combination_convert (&step, steptype);
5175   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5176   aff_combination_convert (&nit, steptype);
5177   aff_combination_mult (&nit, &step, &delta);
5178   if (stmt_after_increment (loop, cand, at))
5179     aff_combination_add (&delta, &step);
5180
5181   tree_to_aff_combination (iv->base, type, val);
5182   if (!POINTER_TYPE_P (type))
5183     aff_combination_convert (val, steptype);
5184   aff_combination_add (val, &delta);
5185 }
5186
5187 /* Returns period of induction variable iv.  */
5188
5189 static tree
5190 iv_period (struct iv *iv)
5191 {
5192   tree step = iv->step, period, type;
5193   tree pow2div;
5194
5195   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5196
5197   type = unsigned_type_for (TREE_TYPE (step));
5198   /* Period of the iv is lcm (step, type_range)/step -1,
5199      i.e., N*type_range/step - 1. Since type range is power
5200      of two, N == (step >> num_of_ending_zeros_binary (step),
5201      so the final result is
5202
5203        (type_range >> num_of_ending_zeros_binary (step)) - 1
5204
5205   */
5206   pow2div = num_ending_zeros (step);
5207
5208   period = build_low_bits_mask (type,
5209                                 (TYPE_PRECISION (type)
5210                                  - tree_to_uhwi (pow2div)));
5211
5212   return period;
5213 }
5214
5215 /* Returns the comparison operator used when eliminating the iv USE.  */
5216
5217 static enum tree_code
5218 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5219 {
5220   struct loop *loop = data->current_loop;
5221   basic_block ex_bb;
5222   edge exit;
5223
5224   ex_bb = gimple_bb (use->stmt);
5225   exit = EDGE_SUCC (ex_bb, 0);
5226   if (flow_bb_inside_loop_p (loop, exit->dest))
5227     exit = EDGE_SUCC (ex_bb, 1);
5228
5229   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5230 }
5231
5232 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5233    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5234    calculation is performed in non-wrapping type.
5235
5236    TODO: More generally, we could test for the situation that
5237          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5238          This would require knowing the sign of OFFSET.  */
5239
5240 static bool
5241 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5242 {
5243   enum tree_code code;
5244   tree e1, e2;
5245   aff_tree aff_e1, aff_e2, aff_offset;
5246
5247   if (!nowrap_type_p (TREE_TYPE (base)))
5248     return false;
5249
5250   base = expand_simple_operations (base);
5251
5252   if (TREE_CODE (base) == SSA_NAME)
5253     {
5254       gimple *stmt = SSA_NAME_DEF_STMT (base);
5255
5256       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5257         return false;
5258
5259       code = gimple_assign_rhs_code (stmt);
5260       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5261         return false;
5262
5263       e1 = gimple_assign_rhs1 (stmt);
5264       e2 = gimple_assign_rhs2 (stmt);
5265     }
5266   else
5267     {
5268       code = TREE_CODE (base);
5269       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5270         return false;
5271       e1 = TREE_OPERAND (base, 0);
5272       e2 = TREE_OPERAND (base, 1);
5273     }
5274
5275   /* Use affine expansion as deeper inspection to prove the equality.  */
5276   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5277                                   &aff_e2, &data->name_expansion_cache);
5278   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5279                                   &aff_offset, &data->name_expansion_cache);
5280   aff_combination_scale (&aff_offset, -1);
5281   switch (code)
5282     {
5283     case PLUS_EXPR:
5284       aff_combination_add (&aff_e2, &aff_offset);
5285       if (aff_combination_zero_p (&aff_e2))
5286         return true;
5287
5288       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5289                                       &aff_e1, &data->name_expansion_cache);
5290       aff_combination_add (&aff_e1, &aff_offset);
5291       return aff_combination_zero_p (&aff_e1);
5292
5293     case POINTER_PLUS_EXPR:
5294       aff_combination_add (&aff_e2, &aff_offset);
5295       return aff_combination_zero_p (&aff_e2);
5296
5297     default:
5298       return false;
5299     }
5300 }
5301
5302 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5303    comparison with CAND.  NITER describes the number of iterations of
5304    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5305
5306    We aim to handle the following situation:
5307
5308    sometype *base, *p;
5309    int a, b, i;
5310
5311    i = a;
5312    p = p_0 = base + a;
5313
5314    do
5315      {
5316        bla (*p);
5317        p++;
5318        i++;
5319      }
5320    while (i < b);
5321
5322    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5323    We aim to optimize this to
5324
5325    p = p_0 = base + a;
5326    do
5327      {
5328        bla (*p);
5329        p++;
5330      }
5331    while (p < p_0 - a + b);
5332
5333    This preserves the correctness, since the pointer arithmetics does not
5334    overflow.  More precisely:
5335
5336    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5337       overflow in computing it or the values of p.
5338    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5339       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5340
5341 static bool
5342 iv_elimination_compare_lt (struct ivopts_data *data,
5343                            struct iv_cand *cand, enum tree_code *comp_p,
5344                            struct tree_niter_desc *niter)
5345 {
5346   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5347   struct aff_tree nit, tmpa, tmpb;
5348   enum tree_code comp;
5349   HOST_WIDE_INT step;
5350
5351   /* We need to know that the candidate induction variable does not overflow.
5352      While more complex analysis may be used to prove this, for now just
5353      check that the variable appears in the original program and that it
5354      is computed in a type that guarantees no overflows.  */
5355   cand_type = TREE_TYPE (cand->iv->base);
5356   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5357     return false;
5358
5359   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5360      the calculation of the BOUND could overflow, making the comparison
5361      invalid.  */
5362   if (!data->loop_single_exit_p)
5363     return false;
5364
5365   /* We need to be able to decide whether candidate is increasing or decreasing
5366      in order to choose the right comparison operator.  */
5367   if (!cst_and_fits_in_hwi (cand->iv->step))
5368     return false;
5369   step = int_cst_value (cand->iv->step);
5370
5371   /* Check that the number of iterations matches the expected pattern:
5372      a + 1 > b ? 0 : b - a - 1.  */
5373   mbz = niter->may_be_zero;
5374   if (TREE_CODE (mbz) == GT_EXPR)
5375     {
5376       /* Handle a + 1 > b.  */
5377       tree op0 = TREE_OPERAND (mbz, 0);
5378       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5379         {
5380           a = TREE_OPERAND (op0, 0);
5381           b = TREE_OPERAND (mbz, 1);
5382         }
5383       else
5384         return false;
5385     }
5386   else if (TREE_CODE (mbz) == LT_EXPR)
5387     {
5388       tree op1 = TREE_OPERAND (mbz, 1);
5389
5390       /* Handle b < a + 1.  */
5391       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5392         {
5393           a = TREE_OPERAND (op1, 0);
5394           b = TREE_OPERAND (mbz, 0);
5395         }
5396       else
5397         return false;
5398     }
5399   else
5400     return false;
5401
5402   /* Expected number of iterations is B - A - 1.  Check that it matches
5403      the actual number, i.e., that B - A - NITER = 1.  */
5404   tree_to_aff_combination (niter->niter, nit_type, &nit);
5405   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5406   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5407   aff_combination_scale (&nit, -1);
5408   aff_combination_scale (&tmpa, -1);
5409   aff_combination_add (&tmpb, &tmpa);
5410   aff_combination_add (&tmpb, &nit);
5411   if (tmpb.n != 0 || tmpb.offset != 1)
5412     return false;
5413
5414   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5415      overflow.  */
5416   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5417                         cand->iv->step,
5418                         fold_convert (TREE_TYPE (cand->iv->step), a));
5419   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5420     return false;
5421
5422   /* Determine the new comparison operator.  */
5423   comp = step < 0 ? GT_EXPR : LT_EXPR;
5424   if (*comp_p == NE_EXPR)
5425     *comp_p = comp;
5426   else if (*comp_p == EQ_EXPR)
5427     *comp_p = invert_tree_comparison (comp, false);
5428   else
5429     gcc_unreachable ();
5430
5431   return true;
5432 }
5433
5434 /* Check whether it is possible to express the condition in USE by comparison
5435    of candidate CAND.  If so, store the value compared with to BOUND, and the
5436    comparison operator to COMP.  */
5437
5438 static bool
5439 may_eliminate_iv (struct ivopts_data *data,
5440                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5441                   enum tree_code *comp)
5442 {
5443   basic_block ex_bb;
5444   edge exit;
5445   tree period;
5446   struct loop *loop = data->current_loop;
5447   aff_tree bnd;
5448   struct tree_niter_desc *desc = NULL;
5449
5450   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5451     return false;
5452
5453   /* For now works only for exits that dominate the loop latch.
5454      TODO: extend to other conditions inside loop body.  */
5455   ex_bb = gimple_bb (use->stmt);
5456   if (use->stmt != last_stmt (ex_bb)
5457       || gimple_code (use->stmt) != GIMPLE_COND
5458       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5459     return false;
5460
5461   exit = EDGE_SUCC (ex_bb, 0);
5462   if (flow_bb_inside_loop_p (loop, exit->dest))
5463     exit = EDGE_SUCC (ex_bb, 1);
5464   if (flow_bb_inside_loop_p (loop, exit->dest))
5465     return false;
5466
5467   desc = niter_for_exit (data, exit);
5468   if (!desc)
5469     return false;
5470
5471   /* Determine whether we can use the variable to test the exit condition.
5472      This is the case iff the period of the induction variable is greater
5473      than the number of iterations for which the exit condition is true.  */
5474   period = iv_period (cand->iv);
5475
5476   /* If the number of iterations is constant, compare against it directly.  */
5477   if (TREE_CODE (desc->niter) == INTEGER_CST)
5478     {
5479       /* See cand_value_at.  */
5480       if (stmt_after_increment (loop, cand, use->stmt))
5481         {
5482           if (!tree_int_cst_lt (desc->niter, period))
5483             return false;
5484         }
5485       else
5486         {
5487           if (tree_int_cst_lt (period, desc->niter))
5488             return false;
5489         }
5490     }
5491
5492   /* If not, and if this is the only possible exit of the loop, see whether
5493      we can get a conservative estimate on the number of iterations of the
5494      entire loop and compare against that instead.  */
5495   else
5496     {
5497       widest_int period_value, max_niter;
5498
5499       max_niter = desc->max;
5500       if (stmt_after_increment (loop, cand, use->stmt))
5501         max_niter += 1;
5502       period_value = wi::to_widest (period);
5503       if (wi::gtu_p (max_niter, period_value))
5504         {
5505           /* See if we can take advantage of inferred loop bound
5506              information.  */
5507           if (data->loop_single_exit_p)
5508             {
5509               if (!max_loop_iterations (loop, &max_niter))
5510                 return false;
5511               /* The loop bound is already adjusted by adding 1.  */
5512               if (wi::gtu_p (max_niter, period_value))
5513                 return false;
5514             }
5515           else
5516             return false;
5517         }
5518     }
5519
5520   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5521
5522   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5523                          aff_combination_to_tree (&bnd));
5524   *comp = iv_elimination_compare (data, use);
5525
5526   /* It is unlikely that computing the number of iterations using division
5527      would be more profitable than keeping the original induction variable.  */
5528   if (expression_expensive_p (*bound))
5529     return false;
5530
5531   /* Sometimes, it is possible to handle the situation that the number of
5532      iterations may be zero unless additional assumtions by using <
5533      instead of != in the exit condition.
5534
5535      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5536            base the exit condition on it.  However, that is often too
5537            expensive.  */
5538   if (!integer_zerop (desc->may_be_zero))
5539     return iv_elimination_compare_lt (data, cand, comp, desc);
5540
5541   return true;
5542 }
5543
5544  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5545     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5546
5547 static int
5548 parm_decl_cost (struct ivopts_data *data, tree bound)
5549 {
5550   tree sbound = bound;
5551   STRIP_NOPS (sbound);
5552
5553   if (TREE_CODE (sbound) == SSA_NAME
5554       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5555       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5556       && data->body_includes_call)
5557     return COSTS_N_INSNS (1);
5558
5559   return 0;
5560 }
5561
5562 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5563
5564 static bool
5565 determine_group_iv_cost_cond (struct ivopts_data *data,
5566                               struct iv_group *group, struct iv_cand *cand)
5567 {
5568   tree bound = NULL_TREE;
5569   struct iv *cmp_iv;
5570   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5571   comp_cost elim_cost, express_cost, cost, bound_cost;
5572   bool ok;
5573   iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5574   tree *control_var, *bound_cst;
5575   enum tree_code comp = ERROR_MARK;
5576   struct iv_use *use = group->vuses[0];
5577
5578   gcc_assert (cand->iv);
5579
5580   /* Try iv elimination.  */
5581   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5582     {
5583       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5584       if (elim_cost.cost == 0)
5585         elim_cost.cost = parm_decl_cost (data, bound);
5586       else if (TREE_CODE (bound) == INTEGER_CST)
5587         elim_cost.cost = 0;
5588       /* If we replace a loop condition 'i < n' with 'p < base + n',
5589          depends_on_elim will have 'base' and 'n' set, which implies
5590          that both 'base' and 'n' will be live during the loop.  More likely,
5591          'base + n' will be loop invariant, resulting in only one live value
5592          during the loop.  So in that case we clear depends_on_elim and set
5593         elim_inv_expr_id instead.  */
5594       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5595         {
5596           elim_inv_expr = record_inv_expr (data, bound);
5597           bitmap_clear (depends_on_elim);
5598         }
5599       /* The bound is a loop invariant, so it will be only computed
5600          once.  */
5601       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5602     }
5603   else
5604     elim_cost = infinite_cost;
5605
5606   /* Try expressing the original giv.  If it is compared with an invariant,
5607      note that we cannot get rid of it.  */
5608   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5609                               NULL, &cmp_iv);
5610   gcc_assert (ok);
5611
5612   /* When the condition is a comparison of the candidate IV against
5613      zero, prefer this IV.
5614
5615      TODO: The constant that we're subtracting from the cost should
5616      be target-dependent.  This information should be added to the
5617      target costs for each backend.  */
5618   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5619       && integer_zerop (*bound_cst)
5620       && (operand_equal_p (*control_var, cand->var_after, 0)
5621           || operand_equal_p (*control_var, cand->var_before, 0)))
5622     elim_cost -= 1;
5623
5624   express_cost = get_computation_cost (data, use, cand, false,
5625                                        &depends_on_express, NULL,
5626                                        &express_inv_expr);
5627   fd_ivopts_data = data;
5628   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5629
5630   /* Count the cost of the original bound as well.  */
5631   bound_cost = force_var_cost (data, *bound_cst, NULL);
5632   if (bound_cost.cost == 0)
5633     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5634   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5635     bound_cost.cost = 0;
5636   express_cost += bound_cost;
5637
5638   /* Choose the better approach, preferring the eliminated IV. */
5639   if (elim_cost <= express_cost)
5640     {
5641       cost = elim_cost;
5642       depends_on = depends_on_elim;
5643       depends_on_elim = NULL;
5644       inv_expr = elim_inv_expr;
5645     }
5646   else
5647     {
5648       cost = express_cost;
5649       depends_on = depends_on_express;
5650       depends_on_express = NULL;
5651       bound = NULL_TREE;
5652       comp = ERROR_MARK;
5653       inv_expr = express_inv_expr;
5654     }
5655
5656   set_group_iv_cost (data, group, cand, cost,
5657                      depends_on, bound, comp, inv_expr);
5658
5659   if (depends_on_elim)
5660     BITMAP_FREE (depends_on_elim);
5661   if (depends_on_express)
5662     BITMAP_FREE (depends_on_express);
5663
5664   return !cost.infinite_cost_p ();
5665 }
5666
5667 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5668    if USE cannot be represented with CAND.  */
5669
5670 static bool
5671 determine_group_iv_cost (struct ivopts_data *data,
5672                          struct iv_group *group, struct iv_cand *cand)
5673 {
5674   switch (group->type)
5675     {
5676     case USE_NONLINEAR_EXPR:
5677       return determine_group_iv_cost_generic (data, group, cand);
5678
5679     case USE_ADDRESS:
5680       return determine_group_iv_cost_address (data, group, cand);
5681
5682     case USE_COMPARE:
5683       return determine_group_iv_cost_cond (data, group, cand);
5684
5685     default:
5686       gcc_unreachable ();
5687     }
5688 }
5689
5690 /* Return true if get_computation_cost indicates that autoincrement is
5691    a possibility for the pair of USE and CAND, false otherwise.  */
5692
5693 static bool
5694 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5695                            struct iv_cand *cand)
5696 {
5697   bitmap depends_on;
5698   bool can_autoinc;
5699   comp_cost cost;
5700
5701   if (use->type != USE_ADDRESS)
5702     return false;
5703
5704   cost = get_computation_cost (data, use, cand, true, &depends_on,
5705                                &can_autoinc, NULL);
5706
5707   BITMAP_FREE (depends_on);
5708
5709   return !cost.infinite_cost_p () && can_autoinc;
5710 }
5711
5712 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5713    use that allows autoincrement, and set their AINC_USE if possible.  */
5714
5715 static void
5716 set_autoinc_for_original_candidates (struct ivopts_data *data)
5717 {
5718   unsigned i, j;
5719
5720   for (i = 0; i < data->vcands.length (); i++)
5721     {
5722       struct iv_cand *cand = data->vcands[i];
5723       struct iv_use *closest_before = NULL;
5724       struct iv_use *closest_after = NULL;
5725       if (cand->pos != IP_ORIGINAL)
5726         continue;
5727
5728       for (j = 0; j < data->vgroups.length (); j++)
5729         {
5730           struct iv_group *group = data->vgroups[j];
5731           struct iv_use *use = group->vuses[0];
5732           unsigned uid = gimple_uid (use->stmt);
5733
5734           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5735             continue;
5736
5737           if (uid < gimple_uid (cand->incremented_at)
5738               && (closest_before == NULL
5739                   || uid > gimple_uid (closest_before->stmt)))
5740             closest_before = use;
5741
5742           if (uid > gimple_uid (cand->incremented_at)
5743               && (closest_after == NULL
5744                   || uid < gimple_uid (closest_after->stmt)))
5745             closest_after = use;
5746         }
5747
5748       if (closest_before != NULL
5749           && autoinc_possible_for_pair (data, closest_before, cand))
5750         cand->ainc_use = closest_before;
5751       else if (closest_after != NULL
5752                && autoinc_possible_for_pair (data, closest_after, cand))
5753         cand->ainc_use = closest_after;
5754     }
5755 }
5756
5757 /* Finds the candidates for the induction variables.  */
5758
5759 static void
5760 find_iv_candidates (struct ivopts_data *data)
5761 {
5762   /* Add commonly used ivs.  */
5763   add_standard_iv_candidates (data);
5764
5765   /* Add old induction variables.  */
5766   add_iv_candidate_for_bivs (data);
5767
5768   /* Add induction variables derived from uses.  */
5769   add_iv_candidate_for_groups (data);
5770
5771   set_autoinc_for_original_candidates (data);
5772
5773   /* Record the important candidates.  */
5774   record_important_candidates (data);
5775
5776   if (dump_file && (dump_flags & TDF_DETAILS))
5777     {
5778       unsigned i;
5779
5780       fprintf (dump_file, "\n<Important Candidates>:\t");
5781       for (i = 0; i < data->vcands.length (); i++)
5782         if (data->vcands[i]->important)
5783           fprintf (dump_file, " %d,", data->vcands[i]->id);
5784       fprintf (dump_file, "\n");
5785
5786       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5787       for (i = 0; i < data->vgroups.length (); i++)
5788         {
5789           struct iv_group *group = data->vgroups[i];
5790
5791           if (group->related_cands)
5792             {
5793               fprintf (dump_file, "  Group %d:\t", group->id);
5794               dump_bitmap (dump_file, group->related_cands);
5795             }
5796         }
5797       fprintf (dump_file, "\n");
5798     }
5799 }
5800
5801 /* Determines costs of computing use of iv with an iv candidate.  */
5802
5803 static void
5804 determine_group_iv_costs (struct ivopts_data *data)
5805 {
5806   unsigned i, j;
5807   struct iv_cand *cand;
5808   struct iv_group *group;
5809   bitmap to_clear = BITMAP_ALLOC (NULL);
5810
5811   alloc_use_cost_map (data);
5812
5813   for (i = 0; i < data->vgroups.length (); i++)
5814     {
5815       group = data->vgroups[i];
5816
5817       if (data->consider_all_candidates)
5818         {
5819           for (j = 0; j < data->vcands.length (); j++)
5820             {
5821               cand = data->vcands[j];
5822               determine_group_iv_cost (data, group, cand);
5823             }
5824         }
5825       else
5826         {
5827           bitmap_iterator bi;
5828
5829           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5830             {
5831               cand = data->vcands[j];
5832               if (!determine_group_iv_cost (data, group, cand))
5833                 bitmap_set_bit (to_clear, j);
5834             }
5835
5836           /* Remove the candidates for that the cost is infinite from
5837              the list of related candidates.  */
5838           bitmap_and_compl_into (group->related_cands, to_clear);
5839           bitmap_clear (to_clear);
5840         }
5841     }
5842
5843   BITMAP_FREE (to_clear);
5844
5845   if (dump_file && (dump_flags & TDF_DETAILS))
5846     {
5847       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5848       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5849
5850       for (hash_table<iv_inv_expr_hasher>::iterator it
5851            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5852            ++it)
5853         list.safe_push (*it);
5854
5855       list.qsort (sort_iv_inv_expr_ent);
5856
5857       for (i = 0; i < list.length (); ++i)
5858         {
5859           fprintf (dump_file, "inv_expr %d: \t", i);
5860           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5861           fprintf (dump_file, "\n");
5862         }
5863
5864       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5865
5866       for (i = 0; i < data->vgroups.length (); i++)
5867         {
5868           group = data->vgroups[i];
5869
5870           fprintf (dump_file, "Group %d:\n", i);
5871           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5872           for (j = 0; j < group->n_map_members; j++)
5873             {
5874               if (!group->cost_map[j].cand
5875                   || group->cost_map[j].cost.infinite_cost_p ())
5876                 continue;
5877
5878               fprintf (dump_file, "  %d\t%d\t%d\t",
5879                        group->cost_map[j].cand->id,
5880                        group->cost_map[j].cost.cost,
5881                        group->cost_map[j].cost.complexity);
5882               if (group->cost_map[j].inv_expr != NULL)
5883                 fprintf (dump_file, "%d\t",
5884                          group->cost_map[j].inv_expr->id);
5885               else
5886                 fprintf (dump_file, "\t");
5887               if (group->cost_map[j].depends_on)
5888                 bitmap_print (dump_file,
5889                               group->cost_map[j].depends_on, "","");
5890               fprintf (dump_file, "\n");
5891             }
5892
5893           fprintf (dump_file, "\n");
5894         }
5895       fprintf (dump_file, "\n");
5896     }
5897 }
5898
5899 /* Determines cost of the candidate CAND.  */
5900
5901 static void
5902 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5903 {
5904   comp_cost cost_base;
5905   unsigned cost, cost_step;
5906   tree base;
5907
5908   if (!cand->iv)
5909     {
5910       cand->cost = 0;
5911       return;
5912     }
5913
5914   /* There are two costs associated with the candidate -- its increment
5915      and its initialization.  The second is almost negligible for any loop
5916      that rolls enough, so we take it just very little into account.  */
5917
5918   base = cand->iv->base;
5919   cost_base = force_var_cost (data, base, NULL);
5920   /* It will be exceptional that the iv register happens to be initialized with
5921      the proper value at no cost.  In general, there will at least be a regcopy
5922      or a const set.  */
5923   if (cost_base.cost == 0)
5924     cost_base.cost = COSTS_N_INSNS (1);
5925   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5926
5927   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5928
5929   /* Prefer the original ivs unless we may gain something by replacing it.
5930      The reason is to make debugging simpler; so this is not relevant for
5931      artificial ivs created by other optimization passes.  */
5932   if (cand->pos != IP_ORIGINAL
5933       || !SSA_NAME_VAR (cand->var_before)
5934       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5935     cost++;
5936
5937   /* Prefer not to insert statements into latch unless there are some
5938      already (so that we do not create unnecessary jumps).  */
5939   if (cand->pos == IP_END
5940       && empty_block_p (ip_end_pos (data->current_loop)))
5941     cost++;
5942
5943   cand->cost = cost;
5944   cand->cost_step = cost_step;
5945 }
5946
5947 /* Determines costs of computation of the candidates.  */
5948
5949 static void
5950 determine_iv_costs (struct ivopts_data *data)
5951 {
5952   unsigned i;
5953
5954   if (dump_file && (dump_flags & TDF_DETAILS))
5955     {
5956       fprintf (dump_file, "<Candidate Costs>:\n");
5957       fprintf (dump_file, "  cand\tcost\n");
5958     }
5959
5960   for (i = 0; i < data->vcands.length (); i++)
5961     {
5962       struct iv_cand *cand = data->vcands[i];
5963
5964       determine_iv_cost (data, cand);
5965
5966       if (dump_file && (dump_flags & TDF_DETAILS))
5967         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5968     }
5969
5970   if (dump_file && (dump_flags & TDF_DETAILS))
5971     fprintf (dump_file, "\n");
5972 }
5973
5974 /* Calculates cost for having SIZE induction variables.  */
5975
5976 static unsigned
5977 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5978 {
5979   /* We add size to the cost, so that we prefer eliminating ivs
5980      if possible.  */
5981   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5982                                             data->body_includes_call);
5983 }
5984
5985 /* For each size of the induction variable set determine the penalty.  */
5986
5987 static void
5988 determine_set_costs (struct ivopts_data *data)
5989 {
5990   unsigned j, n;
5991   gphi *phi;
5992   gphi_iterator psi;
5993   tree op;
5994   struct loop *loop = data->current_loop;
5995   bitmap_iterator bi;
5996
5997   if (dump_file && (dump_flags & TDF_DETAILS))
5998     {
5999       fprintf (dump_file, "<Global Costs>:\n");
6000       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
6001       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
6002       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
6003       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
6004     }
6005
6006   n = 0;
6007   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6008     {
6009       phi = psi.phi ();
6010       op = PHI_RESULT (phi);
6011
6012       if (virtual_operand_p (op))
6013         continue;
6014
6015       if (get_iv (data, op))
6016         continue;
6017
6018       n++;
6019     }
6020
6021   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6022     {
6023       struct version_info *info = ver_info (data, j);
6024
6025       if (info->inv_id && info->has_nonlin_use)
6026         n++;
6027     }
6028
6029   data->regs_used = n;
6030   if (dump_file && (dump_flags & TDF_DETAILS))
6031     fprintf (dump_file, "  regs_used %d\n", n);
6032
6033   if (dump_file && (dump_flags & TDF_DETAILS))
6034     {
6035       fprintf (dump_file, "  cost for size:\n");
6036       fprintf (dump_file, "  ivs\tcost\n");
6037       for (j = 0; j <= 2 * target_avail_regs; j++)
6038         fprintf (dump_file, "  %d\t%d\n", j,
6039                  ivopts_global_cost_for_size (data, j));
6040       fprintf (dump_file, "\n");
6041     }
6042 }
6043
6044 /* Returns true if A is a cheaper cost pair than B.  */
6045
6046 static bool
6047 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6048 {
6049   if (!a)
6050     return false;
6051
6052   if (!b)
6053     return true;
6054
6055   if (a->cost < b->cost)
6056     return true;
6057
6058   if (b->cost < a->cost)
6059     return false;
6060
6061   /* In case the costs are the same, prefer the cheaper candidate.  */
6062   if (a->cand->cost < b->cand->cost)
6063     return true;
6064
6065   return false;
6066 }
6067
6068
6069 /* Returns candidate by that USE is expressed in IVS.  */
6070
6071 static struct cost_pair *
6072 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
6073 {
6074   return ivs->cand_for_group[group->id];
6075 }
6076
6077 /* Computes the cost field of IVS structure.  */
6078
6079 static void
6080 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6081 {
6082   comp_cost cost = ivs->cand_use_cost;
6083
6084   cost += ivs->cand_cost;
6085
6086   cost += ivopts_global_cost_for_size (data,
6087                                        ivs->n_regs
6088                                        + ivs->used_inv_exprs->elements ());
6089
6090   ivs->cost = cost;
6091 }
6092
6093 /* Remove invariants in set INVS to set IVS.  */
6094
6095 static void
6096 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6097 {
6098   bitmap_iterator bi;
6099   unsigned iid;
6100
6101   if (!invs)
6102     return;
6103
6104   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6105     {
6106       ivs->n_invariant_uses[iid]--;
6107       if (ivs->n_invariant_uses[iid] == 0)
6108         ivs->n_regs--;
6109     }
6110 }
6111
6112 /* Set USE not to be expressed by any candidate in IVS.  */
6113
6114 static void
6115 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6116                  struct iv_group *group)
6117 {
6118   unsigned gid = group->id, cid;
6119   struct cost_pair *cp;
6120
6121   cp = ivs->cand_for_group[gid];
6122   if (!cp)
6123     return;
6124   cid = cp->cand->id;
6125
6126   ivs->bad_groups++;
6127   ivs->cand_for_group[gid] = NULL;
6128   ivs->n_cand_uses[cid]--;
6129
6130   if (ivs->n_cand_uses[cid] == 0)
6131     {
6132       bitmap_clear_bit (ivs->cands, cid);
6133       /* Do not count the pseudocandidates.  */
6134       if (cp->cand->iv)
6135         ivs->n_regs--;
6136       ivs->n_cands--;
6137       ivs->cand_cost -= cp->cand->cost;
6138
6139       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6140     }
6141
6142   ivs->cand_use_cost -= cp->cost;
6143
6144   iv_ca_set_remove_invariants (ivs, cp->depends_on);
6145
6146   if (cp->inv_expr != NULL)
6147     {
6148       unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6149       --(*slot);
6150       if (*slot == 0)
6151         ivs->used_inv_exprs->remove (cp->inv_expr);
6152     }
6153   iv_ca_recount_cost (data, ivs);
6154 }
6155
6156 /* Add invariants in set INVS to set IVS.  */
6157
6158 static void
6159 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6160 {
6161   bitmap_iterator bi;
6162   unsigned iid;
6163
6164   if (!invs)
6165     return;
6166
6167   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6168     {
6169       ivs->n_invariant_uses[iid]++;
6170       if (ivs->n_invariant_uses[iid] == 1)
6171         ivs->n_regs++;
6172     }
6173 }
6174
6175 /* Set cost pair for GROUP in set IVS to CP.  */
6176
6177 static void
6178 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6179               struct iv_group *group, struct cost_pair *cp)
6180 {
6181   unsigned gid = group->id, cid;
6182
6183   if (ivs->cand_for_group[gid] == cp)
6184     return;
6185
6186   if (ivs->cand_for_group[gid])
6187     iv_ca_set_no_cp (data, ivs, group);
6188
6189   if (cp)
6190     {
6191       cid = cp->cand->id;
6192
6193       ivs->bad_groups--;
6194       ivs->cand_for_group[gid] = cp;
6195       ivs->n_cand_uses[cid]++;
6196       if (ivs->n_cand_uses[cid] == 1)
6197         {
6198           bitmap_set_bit (ivs->cands, cid);
6199           /* Do not count the pseudocandidates.  */
6200           if (cp->cand->iv)
6201             ivs->n_regs++;
6202           ivs->n_cands++;
6203           ivs->cand_cost += cp->cand->cost;
6204
6205           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6206         }
6207
6208       ivs->cand_use_cost += cp->cost;
6209       iv_ca_set_add_invariants (ivs, cp->depends_on);
6210
6211       if (cp->inv_expr != NULL)
6212         {
6213           unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6214           ++(*slot);
6215         }
6216       iv_ca_recount_cost (data, ivs);
6217     }
6218 }
6219
6220 /* Extend set IVS by expressing USE by some of the candidates in it
6221    if possible.  Consider all important candidates if candidates in
6222    set IVS don't give any result.  */
6223
6224 static void
6225 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6226                struct iv_group *group)
6227 {
6228   struct cost_pair *best_cp = NULL, *cp;
6229   bitmap_iterator bi;
6230   unsigned i;
6231   struct iv_cand *cand;
6232
6233   gcc_assert (ivs->upto >= group->id);
6234   ivs->upto++;
6235   ivs->bad_groups++;
6236
6237   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6238     {
6239       cand = data->vcands[i];
6240       cp = get_group_iv_cost (data, group, cand);
6241       if (cheaper_cost_pair (cp, best_cp))
6242         best_cp = cp;
6243     }
6244
6245   if (best_cp == NULL)
6246     {
6247       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6248         {
6249           cand = data->vcands[i];
6250           cp = get_group_iv_cost (data, group, cand);
6251           if (cheaper_cost_pair (cp, best_cp))
6252             best_cp = cp;
6253         }
6254     }
6255
6256   iv_ca_set_cp (data, ivs, group, best_cp);
6257 }
6258
6259 /* Get cost for assignment IVS.  */
6260
6261 static comp_cost
6262 iv_ca_cost (struct iv_ca *ivs)
6263 {
6264   /* This was a conditional expression but it triggered a bug in
6265      Sun C 5.5.  */
6266   if (ivs->bad_groups)
6267     return infinite_cost;
6268   else
6269     return ivs->cost;
6270 }
6271
6272 /* Returns true if all dependences of CP are among invariants in IVS.  */
6273
6274 static bool
6275 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6276 {
6277   unsigned i;
6278   bitmap_iterator bi;
6279
6280   if (!cp->depends_on)
6281     return true;
6282
6283   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6284     {
6285       if (ivs->n_invariant_uses[i] == 0)
6286         return false;
6287     }
6288
6289   return true;
6290 }
6291
6292 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6293    it before NEXT.  */
6294
6295 static struct iv_ca_delta *
6296 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6297                  struct cost_pair *new_cp, struct iv_ca_delta *next)
6298 {
6299   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6300
6301   change->group = group;
6302   change->old_cp = old_cp;
6303   change->new_cp = new_cp;
6304   change->next = next;
6305
6306   return change;
6307 }
6308
6309 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6310    are rewritten.  */
6311
6312 static struct iv_ca_delta *
6313 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6314 {
6315   struct iv_ca_delta *last;
6316
6317   if (!l2)
6318     return l1;
6319
6320   if (!l1)
6321     return l2;
6322
6323   for (last = l1; last->next; last = last->next)
6324     continue;
6325   last->next = l2;
6326
6327   return l1;
6328 }
6329
6330 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6331
6332 static struct iv_ca_delta *
6333 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6334 {
6335   struct iv_ca_delta *act, *next, *prev = NULL;
6336
6337   for (act = delta; act; act = next)
6338     {
6339       next = act->next;
6340       act->next = prev;
6341       prev = act;
6342
6343       std::swap (act->old_cp, act->new_cp);
6344     }
6345
6346   return prev;
6347 }
6348
6349 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6350    reverted instead.  */
6351
6352 static void
6353 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6354                     struct iv_ca_delta *delta, bool forward)
6355 {
6356   struct cost_pair *from, *to;
6357   struct iv_ca_delta *act;
6358
6359   if (!forward)
6360     delta = iv_ca_delta_reverse (delta);
6361
6362   for (act = delta; act; act = act->next)
6363     {
6364       from = act->old_cp;
6365       to = act->new_cp;
6366       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6367       iv_ca_set_cp (data, ivs, act->group, to);
6368     }
6369
6370   if (!forward)
6371     iv_ca_delta_reverse (delta);
6372 }
6373
6374 /* Returns true if CAND is used in IVS.  */
6375
6376 static bool
6377 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6378 {
6379   return ivs->n_cand_uses[cand->id] > 0;
6380 }
6381
6382 /* Returns number of induction variable candidates in the set IVS.  */
6383
6384 static unsigned
6385 iv_ca_n_cands (struct iv_ca *ivs)
6386 {
6387   return ivs->n_cands;
6388 }
6389
6390 /* Free the list of changes DELTA.  */
6391
6392 static void
6393 iv_ca_delta_free (struct iv_ca_delta **delta)
6394 {
6395   struct iv_ca_delta *act, *next;
6396
6397   for (act = *delta; act; act = next)
6398     {
6399       next = act->next;
6400       free (act);
6401     }
6402
6403   *delta = NULL;
6404 }
6405
6406 /* Allocates new iv candidates assignment.  */
6407
6408 static struct iv_ca *
6409 iv_ca_new (struct ivopts_data *data)
6410 {
6411   struct iv_ca *nw = XNEW (struct iv_ca);
6412
6413   nw->upto = 0;
6414   nw->bad_groups = 0;
6415   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6416                                  data->vgroups.length ());
6417   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6418   nw->cands = BITMAP_ALLOC (NULL);
6419   nw->n_cands = 0;
6420   nw->n_regs = 0;
6421   nw->cand_use_cost = no_cost;
6422   nw->cand_cost = 0;
6423   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6424   nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6425   nw->cost = no_cost;
6426
6427   return nw;
6428 }
6429
6430 /* Free memory occupied by the set IVS.  */
6431
6432 static void
6433 iv_ca_free (struct iv_ca **ivs)
6434 {
6435   free ((*ivs)->cand_for_group);
6436   free ((*ivs)->n_cand_uses);
6437   BITMAP_FREE ((*ivs)->cands);
6438   free ((*ivs)->n_invariant_uses);
6439   delete ((*ivs)->used_inv_exprs);
6440   free (*ivs);
6441   *ivs = NULL;
6442 }
6443
6444 /* Dumps IVS to FILE.  */
6445
6446 static void
6447 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6448 {
6449   unsigned i;
6450   comp_cost cost = iv_ca_cost (ivs);
6451
6452   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6453            cost.complexity);
6454   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6455            ivs->cand_cost, ivs->cand_use_cost.cost,
6456            ivs->cand_use_cost.complexity);
6457   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6458
6459   for (i = 0; i < ivs->upto; i++)
6460     {
6461       struct iv_group *group = data->vgroups[i];
6462       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6463       if (cp)
6464         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6465                  group->id, cp->cand->id, cp->cost.cost,
6466                  cp->cost.complexity);
6467       else
6468         fprintf (file, "   group:%d --> ??\n", group->id);
6469     }
6470
6471   const char *pref = "";
6472   fprintf (file, "  invariant variables: ");
6473   for (i = 1; i <= data->max_inv_id; i++)
6474     if (ivs->n_invariant_uses[i])
6475       {
6476         fprintf (file, "%s%d", pref, i);
6477         pref = ", ";
6478       }
6479
6480   pref = "";
6481   fprintf (file, "\n  invariant expressions: ");
6482   for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6483        = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6484     {
6485         fprintf (file, "%s%d", pref, (*it).first->id);
6486         pref = ", ";
6487     }
6488
6489   fprintf (file, "\n\n");
6490 }
6491
6492 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6493    new set, and store differences in DELTA.  Number of induction variables
6494    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6495    the function will try to find a solution with mimimal iv candidates.  */
6496
6497 static comp_cost
6498 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6499               struct iv_cand *cand, struct iv_ca_delta **delta,
6500               unsigned *n_ivs, bool min_ncand)
6501 {
6502   unsigned i;
6503   comp_cost cost;
6504   struct iv_group *group;
6505   struct cost_pair *old_cp, *new_cp;
6506
6507   *delta = NULL;
6508   for (i = 0; i < ivs->upto; i++)
6509     {
6510       group = data->vgroups[i];
6511       old_cp = iv_ca_cand_for_group (ivs, group);
6512
6513       if (old_cp
6514           && old_cp->cand == cand)
6515         continue;
6516
6517       new_cp = get_group_iv_cost (data, group, cand);
6518       if (!new_cp)
6519         continue;
6520
6521       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6522         continue;
6523
6524       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6525         continue;
6526
6527       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6528     }
6529
6530   iv_ca_delta_commit (data, ivs, *delta, true);
6531   cost = iv_ca_cost (ivs);
6532   if (n_ivs)
6533     *n_ivs = iv_ca_n_cands (ivs);
6534   iv_ca_delta_commit (data, ivs, *delta, false);
6535
6536   return cost;
6537 }
6538
6539 /* Try narrowing set IVS by removing CAND.  Return the cost of
6540    the new set and store the differences in DELTA.  START is
6541    the candidate with which we start narrowing.  */
6542
6543 static comp_cost
6544 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6545               struct iv_cand *cand, struct iv_cand *start,
6546               struct iv_ca_delta **delta)
6547 {
6548   unsigned i, ci;
6549   struct iv_group *group;
6550   struct cost_pair *old_cp, *new_cp, *cp;
6551   bitmap_iterator bi;
6552   struct iv_cand *cnd;
6553   comp_cost cost, best_cost, acost;
6554
6555   *delta = NULL;
6556   for (i = 0; i < data->vgroups.length (); i++)
6557     {
6558       group = data->vgroups[i];
6559
6560       old_cp = iv_ca_cand_for_group (ivs, group);
6561       if (old_cp->cand != cand)
6562         continue;
6563
6564       best_cost = iv_ca_cost (ivs);
6565       /* Start narrowing with START.  */
6566       new_cp = get_group_iv_cost (data, group, start);
6567
6568       if (data->consider_all_candidates)
6569         {
6570           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6571             {
6572               if (ci == cand->id || (start && ci == start->id))
6573                 continue;
6574
6575               cnd = data->vcands[ci];
6576
6577               cp = get_group_iv_cost (data, group, cnd);
6578               if (!cp)
6579                 continue;
6580
6581               iv_ca_set_cp (data, ivs, group, cp);
6582               acost = iv_ca_cost (ivs);
6583
6584               if (acost < best_cost)
6585                 {
6586                   best_cost = acost;
6587                   new_cp = cp;
6588                 }
6589             }
6590         }
6591       else
6592         {
6593           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6594             {
6595               if (ci == cand->id || (start && ci == start->id))
6596                 continue;
6597
6598               cnd = data->vcands[ci];
6599
6600               cp = get_group_iv_cost (data, group, cnd);
6601               if (!cp)
6602                 continue;
6603
6604               iv_ca_set_cp (data, ivs, group, cp);
6605               acost = iv_ca_cost (ivs);
6606
6607               if (acost < best_cost)
6608                 {
6609                   best_cost = acost;
6610                   new_cp = cp;
6611                 }
6612             }
6613         }
6614       /* Restore to old cp for use.  */
6615       iv_ca_set_cp (data, ivs, group, old_cp);
6616
6617       if (!new_cp)
6618         {
6619           iv_ca_delta_free (delta);
6620           return infinite_cost;
6621         }
6622
6623       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6624     }
6625
6626   iv_ca_delta_commit (data, ivs, *delta, true);
6627   cost = iv_ca_cost (ivs);
6628   iv_ca_delta_commit (data, ivs, *delta, false);
6629
6630   return cost;
6631 }
6632
6633 /* Try optimizing the set of candidates IVS by removing candidates different
6634    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6635    differences in DELTA.  */
6636
6637 static comp_cost
6638 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6639              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6640 {
6641   bitmap_iterator bi;
6642   struct iv_ca_delta *act_delta, *best_delta;
6643   unsigned i;
6644   comp_cost best_cost, acost;
6645   struct iv_cand *cand;
6646
6647   best_delta = NULL;
6648   best_cost = iv_ca_cost (ivs);
6649
6650   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6651     {
6652       cand = data->vcands[i];
6653
6654       if (cand == except_cand)
6655         continue;
6656
6657       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6658
6659       if (acost < best_cost)
6660         {
6661           best_cost = acost;
6662           iv_ca_delta_free (&best_delta);
6663           best_delta = act_delta;
6664         }
6665       else
6666         iv_ca_delta_free (&act_delta);
6667     }
6668
6669   if (!best_delta)
6670     {
6671       *delta = NULL;
6672       return best_cost;
6673     }
6674
6675   /* Recurse to possibly remove other unnecessary ivs.  */
6676   iv_ca_delta_commit (data, ivs, best_delta, true);
6677   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6678   iv_ca_delta_commit (data, ivs, best_delta, false);
6679   *delta = iv_ca_delta_join (best_delta, *delta);
6680   return best_cost;
6681 }
6682
6683 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6684    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6685    the corresponding cost_pair, otherwise just return BEST_CP.  */
6686
6687 static struct cost_pair*
6688 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6689                         unsigned int cand_idx, struct iv_cand *old_cand,
6690                         struct cost_pair *best_cp)
6691 {
6692   struct iv_cand *cand;
6693   struct cost_pair *cp;
6694
6695   gcc_assert (old_cand != NULL && best_cp != NULL);
6696   if (cand_idx == old_cand->id)
6697     return best_cp;
6698
6699   cand = data->vcands[cand_idx];
6700   cp = get_group_iv_cost (data, group, cand);
6701   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6702     return cp;
6703
6704   return best_cp;
6705 }
6706
6707 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6708    which are used by more than one iv uses.  For each of those candidates,
6709    this function tries to represent iv uses under that candidate using
6710    other ones with lower local cost, then tries to prune the new set.
6711    If the new set has lower cost, It returns the new cost after recording
6712    candidate replacement in list DELTA.  */
6713
6714 static comp_cost
6715 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6716                struct iv_ca_delta **delta)
6717 {
6718   bitmap_iterator bi, bj;
6719   unsigned int i, j, k;
6720   struct iv_cand *cand;
6721   comp_cost orig_cost, acost;
6722   struct iv_ca_delta *act_delta, *tmp_delta;
6723   struct cost_pair *old_cp, *best_cp = NULL;
6724
6725   *delta = NULL;
6726   orig_cost = iv_ca_cost (ivs);
6727
6728   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6729     {
6730       if (ivs->n_cand_uses[i] == 1
6731           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6732         continue;
6733
6734       cand = data->vcands[i];
6735
6736       act_delta = NULL;
6737       /*  Represent uses under current candidate using other ones with
6738           lower local cost.  */
6739       for (j = 0; j < ivs->upto; j++)
6740         {
6741           struct iv_group *group = data->vgroups[j];
6742           old_cp = iv_ca_cand_for_group (ivs, group);
6743
6744           if (old_cp->cand != cand)
6745             continue;
6746
6747           best_cp = old_cp;
6748           if (data->consider_all_candidates)
6749             for (k = 0; k < data->vcands.length (); k++)
6750               best_cp = cheaper_cost_with_cand (data, group, k,
6751                                                 old_cp->cand, best_cp);
6752           else
6753             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6754               best_cp = cheaper_cost_with_cand (data, group, k,
6755                                                 old_cp->cand, best_cp);
6756
6757           if (best_cp == old_cp)
6758             continue;
6759
6760           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6761         }
6762       /* No need for further prune.  */
6763       if (!act_delta)
6764         continue;
6765
6766       /* Prune the new candidate set.  */
6767       iv_ca_delta_commit (data, ivs, act_delta, true);
6768       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6769       iv_ca_delta_commit (data, ivs, act_delta, false);
6770       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6771
6772       if (acost < orig_cost)
6773         {
6774           *delta = act_delta;
6775           return acost;
6776         }
6777       else
6778         iv_ca_delta_free (&act_delta);
6779     }
6780
6781   return orig_cost;
6782 }
6783
6784 /* Tries to extend the sets IVS in the best possible way in order to
6785    express the GROUP.  If ORIGINALP is true, prefer candidates from
6786    the original set of IVs, otherwise favor important candidates not
6787    based on any memory object.  */
6788
6789 static bool
6790 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6791                   struct iv_group *group, bool originalp)
6792 {
6793   comp_cost best_cost, act_cost;
6794   unsigned i;
6795   bitmap_iterator bi;
6796   struct iv_cand *cand;
6797   struct iv_ca_delta *best_delta = NULL, *act_delta;
6798   struct cost_pair *cp;
6799
6800   iv_ca_add_group (data, ivs, group);
6801   best_cost = iv_ca_cost (ivs);
6802   cp = iv_ca_cand_for_group (ivs, group);
6803   if (cp)
6804     {
6805       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6806       iv_ca_set_no_cp (data, ivs, group);
6807     }
6808
6809   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6810      first try important candidates not based on any memory object.  Only if
6811      this fails, try the specific ones.  Rationale -- in loops with many
6812      variables the best choice often is to use just one generic biv.  If we
6813      added here many ivs specific to the uses, the optimization algorithm later
6814      would be likely to get stuck in a local minimum, thus causing us to create
6815      too many ivs.  The approach from few ivs to more seems more likely to be
6816      successful -- starting from few ivs, replacing an expensive use by a
6817      specific iv should always be a win.  */
6818   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6819     {
6820       cand = data->vcands[i];
6821
6822       if (originalp && cand->pos !=IP_ORIGINAL)
6823         continue;
6824
6825       if (!originalp && cand->iv->base_object != NULL_TREE)
6826         continue;
6827
6828       if (iv_ca_cand_used_p (ivs, cand))
6829         continue;
6830
6831       cp = get_group_iv_cost (data, group, cand);
6832       if (!cp)
6833         continue;
6834
6835       iv_ca_set_cp (data, ivs, group, cp);
6836       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6837                                true);
6838       iv_ca_set_no_cp (data, ivs, group);
6839       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6840
6841       if (act_cost < best_cost)
6842         {
6843           best_cost = act_cost;
6844
6845           iv_ca_delta_free (&best_delta);
6846           best_delta = act_delta;
6847         }
6848       else
6849         iv_ca_delta_free (&act_delta);
6850     }
6851
6852   if (best_cost.infinite_cost_p ())
6853     {
6854       for (i = 0; i < group->n_map_members; i++)
6855         {
6856           cp = group->cost_map + i;
6857           cand = cp->cand;
6858           if (!cand)
6859             continue;
6860
6861           /* Already tried this.  */
6862           if (cand->important)
6863             {
6864               if (originalp && cand->pos == IP_ORIGINAL)
6865                 continue;
6866               if (!originalp && cand->iv->base_object == NULL_TREE)
6867                 continue;
6868             }
6869
6870           if (iv_ca_cand_used_p (ivs, cand))
6871             continue;
6872
6873           act_delta = NULL;
6874           iv_ca_set_cp (data, ivs, group, cp);
6875           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6876           iv_ca_set_no_cp (data, ivs, group);
6877           act_delta = iv_ca_delta_add (group,
6878                                        iv_ca_cand_for_group (ivs, group),
6879                                        cp, act_delta);
6880
6881           if (act_cost < best_cost)
6882             {
6883               best_cost = act_cost;
6884
6885               if (best_delta)
6886                 iv_ca_delta_free (&best_delta);
6887               best_delta = act_delta;
6888             }
6889           else
6890             iv_ca_delta_free (&act_delta);
6891         }
6892     }
6893
6894   iv_ca_delta_commit (data, ivs, best_delta, true);
6895   iv_ca_delta_free (&best_delta);
6896
6897   return !best_cost.infinite_cost_p ();
6898 }
6899
6900 /* Finds an initial assignment of candidates to uses.  */
6901
6902 static struct iv_ca *
6903 get_initial_solution (struct ivopts_data *data, bool originalp)
6904 {
6905   unsigned i;
6906   struct iv_ca *ivs = iv_ca_new (data);
6907
6908   for (i = 0; i < data->vgroups.length (); i++)
6909     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6910       {
6911         iv_ca_free (&ivs);
6912         return NULL;
6913       }
6914
6915   return ivs;
6916 }
6917
6918 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6919    points to a bool variable, this function tries to break local
6920    optimal fixed-point by replacing candidates in IVS if it's true.  */
6921
6922 static bool
6923 try_improve_iv_set (struct ivopts_data *data,
6924                     struct iv_ca *ivs, bool *try_replace_p)
6925 {
6926   unsigned i, n_ivs;
6927   comp_cost acost, best_cost = iv_ca_cost (ivs);
6928   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6929   struct iv_cand *cand;
6930
6931   /* Try extending the set of induction variables by one.  */
6932   for (i = 0; i < data->vcands.length (); i++)
6933     {
6934       cand = data->vcands[i];
6935
6936       if (iv_ca_cand_used_p (ivs, cand))
6937         continue;
6938
6939       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6940       if (!act_delta)
6941         continue;
6942
6943       /* If we successfully added the candidate and the set is small enough,
6944          try optimizing it by removing other candidates.  */
6945       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6946         {
6947           iv_ca_delta_commit (data, ivs, act_delta, true);
6948           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6949           iv_ca_delta_commit (data, ivs, act_delta, false);
6950           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6951         }
6952
6953       if (acost < best_cost)
6954         {
6955           best_cost = acost;
6956           iv_ca_delta_free (&best_delta);
6957           best_delta = act_delta;
6958         }
6959       else
6960         iv_ca_delta_free (&act_delta);
6961     }
6962
6963   if (!best_delta)
6964     {
6965       /* Try removing the candidates from the set instead.  */
6966       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6967
6968       if (!best_delta && *try_replace_p)
6969         {
6970           *try_replace_p = false;
6971           /* So far candidate selecting algorithm tends to choose fewer IVs
6972              so that it can handle cases in which loops have many variables
6973              but the best choice is often to use only one general biv.  One
6974              weakness is it can't handle opposite cases, in which different
6975              candidates should be chosen with respect to each use.  To solve
6976              the problem, we replace candidates in a manner described by the
6977              comments of iv_ca_replace, thus give general algorithm a chance
6978              to break local optimal fixed-point in these cases.  */
6979           best_cost = iv_ca_replace (data, ivs, &best_delta);
6980         }
6981
6982       if (!best_delta)
6983         return false;
6984     }
6985
6986   iv_ca_delta_commit (data, ivs, best_delta, true);
6987   gcc_assert (best_cost == iv_ca_cost (ivs));
6988   iv_ca_delta_free (&best_delta);
6989   return true;
6990 }
6991
6992 /* Attempts to find the optimal set of induction variables.  We do simple
6993    greedy heuristic -- we try to replace at most one candidate in the selected
6994    solution and remove the unused ivs while this improves the cost.  */
6995
6996 static struct iv_ca *
6997 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6998 {
6999   struct iv_ca *set;
7000   bool try_replace_p = true;
7001
7002   /* Get the initial solution.  */
7003   set = get_initial_solution (data, originalp);
7004   if (!set)
7005     {
7006       if (dump_file && (dump_flags & TDF_DETAILS))
7007         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7008       return NULL;
7009     }
7010
7011   if (dump_file && (dump_flags & TDF_DETAILS))
7012     {
7013       fprintf (dump_file, "Initial set of candidates:\n");
7014       iv_ca_dump (data, dump_file, set);
7015     }
7016
7017   while (try_improve_iv_set (data, set, &try_replace_p))
7018     {
7019       if (dump_file && (dump_flags & TDF_DETAILS))
7020         {
7021           fprintf (dump_file, "Improved to:\n");
7022           iv_ca_dump (data, dump_file, set);
7023         }
7024     }
7025
7026   return set;
7027 }
7028
7029 static struct iv_ca *
7030 find_optimal_iv_set (struct ivopts_data *data)
7031 {
7032   unsigned i;
7033   comp_cost cost, origcost;
7034   struct iv_ca *set, *origset;
7035
7036   /* Determine the cost based on a strategy that starts with original IVs,
7037      and try again using a strategy that prefers candidates not based
7038      on any IVs.  */
7039   origset = find_optimal_iv_set_1 (data, true);
7040   set = find_optimal_iv_set_1 (data, false);
7041
7042   if (!origset && !set)
7043     return NULL;
7044
7045   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7046   cost = set ? iv_ca_cost (set) : infinite_cost;
7047
7048   if (dump_file && (dump_flags & TDF_DETAILS))
7049     {
7050       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7051                origcost.cost, origcost.complexity);
7052       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7053                cost.cost, cost.complexity);
7054     }
7055
7056   /* Choose the one with the best cost.  */
7057   if (origcost <= cost)
7058     {
7059       if (set)
7060         iv_ca_free (&set);
7061       set = origset;
7062     }
7063   else if (origset)
7064     iv_ca_free (&origset);
7065
7066   for (i = 0; i < data->vgroups.length (); i++)
7067     {
7068       struct iv_group *group = data->vgroups[i];
7069       group->selected = iv_ca_cand_for_group (set, group)->cand;
7070     }
7071
7072   return set;
7073 }
7074
7075 /* Creates a new induction variable corresponding to CAND.  */
7076
7077 static void
7078 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7079 {
7080   gimple_stmt_iterator incr_pos;
7081   tree base;
7082   struct iv_use *use;
7083   struct iv_group *group;
7084   bool after = false;
7085
7086   if (!cand->iv)
7087     return;
7088
7089   switch (cand->pos)
7090     {
7091     case IP_NORMAL:
7092       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7093       break;
7094
7095     case IP_END:
7096       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7097       after = true;
7098       break;
7099
7100     case IP_AFTER_USE:
7101       after = true;
7102       /* fall through */
7103     case IP_BEFORE_USE:
7104       incr_pos = gsi_for_stmt (cand->incremented_at);
7105       break;
7106
7107     case IP_ORIGINAL:
7108       /* Mark that the iv is preserved.  */
7109       name_info (data, cand->var_before)->preserve_biv = true;
7110       name_info (data, cand->var_after)->preserve_biv = true;
7111
7112       /* Rewrite the increment so that it uses var_before directly.  */
7113       use = find_interesting_uses_op (data, cand->var_after);
7114       group = data->vgroups[use->group_id];
7115       group->selected = cand;
7116       return;
7117     }
7118
7119   gimple_add_tmp_var (cand->var_before);
7120
7121   base = unshare_expr (cand->iv->base);
7122
7123   create_iv (base, unshare_expr (cand->iv->step),
7124              cand->var_before, data->current_loop,
7125              &incr_pos, after, &cand->var_before, &cand->var_after);
7126 }
7127
7128 /* Creates new induction variables described in SET.  */
7129
7130 static void
7131 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7132 {
7133   unsigned i;
7134   struct iv_cand *cand;
7135   bitmap_iterator bi;
7136
7137   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7138     {
7139       cand = data->vcands[i];
7140       create_new_iv (data, cand);
7141     }
7142
7143   if (dump_file && (dump_flags & TDF_DETAILS))
7144     {
7145       fprintf (dump_file, "Selected IV set for loop %d",
7146                data->current_loop->num);
7147       if (data->loop_loc != UNKNOWN_LOCATION)
7148         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7149                  LOCATION_LINE (data->loop_loc));
7150       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7151                avg_loop_niter (data->current_loop));
7152       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7153                (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7154       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7155       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7156         {
7157           cand = data->vcands[i];
7158           dump_cand (dump_file, cand);
7159         }
7160       fprintf (dump_file, "\n");
7161     }
7162 }
7163
7164 /* Rewrites USE (definition of iv used in a nonlinear expression)
7165    using candidate CAND.  */
7166
7167 static void
7168 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7169                             struct iv_use *use, struct iv_cand *cand)
7170 {
7171   tree comp;
7172   tree op, tgt;
7173   gassign *ass;
7174   gimple_stmt_iterator bsi;
7175
7176   /* An important special case -- if we are asked to express value of
7177      the original iv by itself, just exit; there is no need to
7178      introduce a new computation (that might also need casting the
7179      variable to unsigned and back).  */
7180   if (cand->pos == IP_ORIGINAL
7181       && cand->incremented_at == use->stmt)
7182     {
7183       enum tree_code stmt_code;
7184
7185       gcc_assert (is_gimple_assign (use->stmt));
7186       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7187
7188       /* Check whether we may leave the computation unchanged.
7189          This is the case only if it does not rely on other
7190          computations in the loop -- otherwise, the computation
7191          we rely upon may be removed in remove_unused_ivs,
7192          thus leading to ICE.  */
7193       stmt_code = gimple_assign_rhs_code (use->stmt);
7194       if (stmt_code == PLUS_EXPR
7195           || stmt_code == MINUS_EXPR
7196           || stmt_code == POINTER_PLUS_EXPR)
7197         {
7198           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7199             op = gimple_assign_rhs2 (use->stmt);
7200           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7201             op = gimple_assign_rhs1 (use->stmt);
7202           else
7203             op = NULL_TREE;
7204         }
7205       else
7206         op = NULL_TREE;
7207
7208       if (op && expr_invariant_in_loop_p (data->current_loop, op))
7209         return;
7210     }
7211
7212   comp = get_computation (data->current_loop, use, cand);
7213   gcc_assert (comp != NULL_TREE);
7214
7215   switch (gimple_code (use->stmt))
7216     {
7217     case GIMPLE_PHI:
7218       tgt = PHI_RESULT (use->stmt);
7219
7220       /* If we should keep the biv, do not replace it.  */
7221       if (name_info (data, tgt)->preserve_biv)
7222         return;
7223
7224       bsi = gsi_after_labels (gimple_bb (use->stmt));
7225       break;
7226
7227     case GIMPLE_ASSIGN:
7228       tgt = gimple_assign_lhs (use->stmt);
7229       bsi = gsi_for_stmt (use->stmt);
7230       break;
7231
7232     default:
7233       gcc_unreachable ();
7234     }
7235
7236   if (!valid_gimple_rhs_p (comp)
7237       || (gimple_code (use->stmt) != GIMPLE_PHI
7238           /* We can't allow re-allocating the stmt as it might be pointed
7239              to still.  */
7240           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7241               >= gimple_num_ops (gsi_stmt (bsi)))))
7242     {
7243       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7244                                        true, GSI_SAME_STMT);
7245       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7246         {
7247           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7248           /* As this isn't a plain copy we have to reset alignment
7249              information.  */
7250           if (SSA_NAME_PTR_INFO (comp))
7251             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7252         }
7253     }
7254
7255   if (gimple_code (use->stmt) == GIMPLE_PHI)
7256     {
7257       ass = gimple_build_assign (tgt, comp);
7258       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7259
7260       bsi = gsi_for_stmt (use->stmt);
7261       remove_phi_node (&bsi, false);
7262     }
7263   else
7264     {
7265       gimple_assign_set_rhs_from_tree (&bsi, comp);
7266       use->stmt = gsi_stmt (bsi);
7267     }
7268 }
7269
7270 /* Performs a peephole optimization to reorder the iv update statement with
7271    a mem ref to enable instruction combining in later phases. The mem ref uses
7272    the iv value before the update, so the reordering transformation requires
7273    adjustment of the offset. CAND is the selected IV_CAND.
7274
7275    Example:
7276
7277    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7278    iv2 = iv1 + 1;
7279
7280    if (t < val)      (1)
7281      goto L;
7282    goto Head;
7283
7284
7285    directly propagating t over to (1) will introduce overlapping live range
7286    thus increase register pressure. This peephole transform it into:
7287
7288
7289    iv2 = iv1 + 1;
7290    t = MEM_REF (base, iv2, 8, 8);
7291    if (t < val)
7292      goto L;
7293    goto Head;
7294 */
7295
7296 static void
7297 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7298 {
7299   tree var_after;
7300   gimple *iv_update, *stmt;
7301   basic_block bb;
7302   gimple_stmt_iterator gsi, gsi_iv;
7303
7304   if (cand->pos != IP_NORMAL)
7305     return;
7306
7307   var_after = cand->var_after;
7308   iv_update = SSA_NAME_DEF_STMT (var_after);
7309
7310   bb = gimple_bb (iv_update);
7311   gsi = gsi_last_nondebug_bb (bb);
7312   stmt = gsi_stmt (gsi);
7313
7314   /* Only handle conditional statement for now.  */
7315   if (gimple_code (stmt) != GIMPLE_COND)
7316     return;
7317
7318   gsi_prev_nondebug (&gsi);
7319   stmt = gsi_stmt (gsi);
7320   if (stmt != iv_update)
7321     return;
7322
7323   gsi_prev_nondebug (&gsi);
7324   if (gsi_end_p (gsi))
7325     return;
7326
7327   stmt = gsi_stmt (gsi);
7328   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7329     return;
7330
7331   if (stmt != use->stmt)
7332     return;
7333
7334   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7335     return;
7336
7337   if (dump_file && (dump_flags & TDF_DETAILS))
7338     {
7339       fprintf (dump_file, "Reordering \n");
7340       print_gimple_stmt (dump_file, iv_update, 0, 0);
7341       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7342       fprintf (dump_file, "\n");
7343     }
7344
7345   gsi = gsi_for_stmt (use->stmt);
7346   gsi_iv = gsi_for_stmt (iv_update);
7347   gsi_move_before (&gsi_iv, &gsi);
7348
7349   cand->pos = IP_BEFORE_USE;
7350   cand->incremented_at = use->stmt;
7351 }
7352
7353 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7354
7355 static void
7356 rewrite_use_address (struct ivopts_data *data,
7357                      struct iv_use *use, struct iv_cand *cand)
7358 {
7359   aff_tree aff;
7360   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7361   tree base_hint = NULL_TREE;
7362   tree ref, iv;
7363   bool ok;
7364
7365   adjust_iv_update_pos (cand, use);
7366   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7367   gcc_assert (ok);
7368   unshare_aff_combination (&aff);
7369
7370   /* To avoid undefined overflow problems, all IV candidates use unsigned
7371      integer types.  The drawback is that this makes it impossible for
7372      create_mem_ref to distinguish an IV that is based on a memory object
7373      from one that represents simply an offset.
7374
7375      To work around this problem, we pass a hint to create_mem_ref that
7376      indicates which variable (if any) in aff is an IV based on a memory
7377      object.  Note that we only consider the candidate.  If this is not
7378      based on an object, the base of the reference is in some subexpression
7379      of the use -- but these will use pointer types, so they are recognized
7380      by the create_mem_ref heuristics anyway.  */
7381   if (cand->iv->base_object)
7382     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7383
7384   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7385   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7386                         reference_alias_ptr_type (*use->op_p),
7387                         iv, base_hint, data->speed);
7388   copy_ref_info (ref, *use->op_p);
7389   *use->op_p = ref;
7390 }
7391
7392 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7393    candidate CAND.  */
7394
7395 static void
7396 rewrite_use_compare (struct ivopts_data *data,
7397                      struct iv_use *use, struct iv_cand *cand)
7398 {
7399   tree comp, *var_p, op, bound;
7400   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7401   enum tree_code compare;
7402   struct iv_group *group = data->vgroups[use->group_id];
7403   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7404   bool ok;
7405
7406   bound = cp->value;
7407   if (bound)
7408     {
7409       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7410       tree var_type = TREE_TYPE (var);
7411       gimple_seq stmts;
7412
7413       if (dump_file && (dump_flags & TDF_DETAILS))
7414         {
7415           fprintf (dump_file, "Replacing exit test: ");
7416           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7417         }
7418       compare = cp->comp;
7419       bound = unshare_expr (fold_convert (var_type, bound));
7420       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7421       if (stmts)
7422         gsi_insert_seq_on_edge_immediate (
7423                 loop_preheader_edge (data->current_loop),
7424                 stmts);
7425
7426       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7427       gimple_cond_set_lhs (cond_stmt, var);
7428       gimple_cond_set_code (cond_stmt, compare);
7429       gimple_cond_set_rhs (cond_stmt, op);
7430       return;
7431     }
7432
7433   /* The induction variable elimination failed; just express the original
7434      giv.  */
7435   comp = get_computation (data->current_loop, use, cand);
7436   gcc_assert (comp != NULL_TREE);
7437
7438   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7439   gcc_assert (ok);
7440
7441   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7442                                      true, GSI_SAME_STMT);
7443 }
7444
7445 /* Rewrite the groups using the selected induction variables.  */
7446
7447 static void
7448 rewrite_groups (struct ivopts_data *data)
7449 {
7450   unsigned i, j;
7451
7452   for (i = 0; i < data->vgroups.length (); i++)
7453     {
7454       struct iv_group *group = data->vgroups[i];
7455       struct iv_cand *cand = group->selected;
7456
7457       gcc_assert (cand);
7458
7459       if (group->type == USE_NONLINEAR_EXPR)
7460         {
7461           for (j = 0; j < group->vuses.length (); j++)
7462             {
7463               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7464               update_stmt (group->vuses[j]->stmt);
7465             }
7466         }
7467       else if (group->type == USE_ADDRESS)
7468         {
7469           for (j = 0; j < group->vuses.length (); j++)
7470             {
7471               rewrite_use_address (data, group->vuses[j], cand);
7472               update_stmt (group->vuses[j]->stmt);
7473             }
7474         }
7475       else
7476         {
7477           gcc_assert (group->type == USE_COMPARE);
7478
7479           for (j = 0; j < group->vuses.length (); j++)
7480             {
7481               rewrite_use_compare (data, group->vuses[j], cand);
7482               update_stmt (group->vuses[j]->stmt);
7483             }
7484         }
7485     }
7486 }
7487
7488 /* Removes the ivs that are not used after rewriting.  */
7489
7490 static void
7491 remove_unused_ivs (struct ivopts_data *data)
7492 {
7493   unsigned j;
7494   bitmap_iterator bi;
7495   bitmap toremove = BITMAP_ALLOC (NULL);
7496
7497   /* Figure out an order in which to release SSA DEFs so that we don't
7498      release something that we'd have to propagate into a debug stmt
7499      afterwards.  */
7500   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7501     {
7502       struct version_info *info;
7503
7504       info = ver_info (data, j);
7505       if (info->iv
7506           && !integer_zerop (info->iv->step)
7507           && !info->inv_id
7508           && !info->iv->nonlin_use
7509           && !info->preserve_biv)
7510         {
7511           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7512
7513           tree def = info->iv->ssa_name;
7514
7515           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7516             {
7517               imm_use_iterator imm_iter;
7518               use_operand_p use_p;
7519               gimple *stmt;
7520               int count = 0;
7521
7522               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7523                 {
7524                   if (!gimple_debug_bind_p (stmt))
7525                     continue;
7526
7527                   /* We just want to determine whether to do nothing
7528                      (count == 0), to substitute the computed
7529                      expression into a single use of the SSA DEF by
7530                      itself (count == 1), or to use a debug temp
7531                      because the SSA DEF is used multiple times or as
7532                      part of a larger expression (count > 1). */
7533                   count++;
7534                   if (gimple_debug_bind_get_value (stmt) != def)
7535                     count++;
7536
7537                   if (count > 1)
7538                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7539                 }
7540
7541               if (!count)
7542                 continue;
7543
7544               struct iv_use dummy_use;
7545               struct iv_cand *best_cand = NULL, *cand;
7546               unsigned i, best_pref = 0, cand_pref;
7547
7548               memset (&dummy_use, 0, sizeof (dummy_use));
7549               dummy_use.iv = info->iv;
7550               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7551                 {
7552                   cand = data->vgroups[i]->selected;
7553                   if (cand == best_cand)
7554                     continue;
7555                   cand_pref = operand_equal_p (cand->iv->step,
7556                                                info->iv->step, 0)
7557                     ? 4 : 0;
7558                   cand_pref
7559                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7560                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7561                     ? 2 : 0;
7562                   cand_pref
7563                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7564                     ? 1 : 0;
7565                   if (best_cand == NULL || best_pref < cand_pref)
7566                     {
7567                       best_cand = cand;
7568                       best_pref = cand_pref;
7569                     }
7570                 }
7571
7572               if (!best_cand)
7573                 continue;
7574
7575               tree comp = get_computation_at (data->current_loop,
7576                                               &dummy_use, best_cand,
7577                                               SSA_NAME_DEF_STMT (def));
7578               if (!comp)
7579                 continue;
7580
7581               if (count > 1)
7582                 {
7583                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7584                   DECL_ARTIFICIAL (vexpr) = 1;
7585                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7586                   if (SSA_NAME_VAR (def))
7587                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7588                   else
7589                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7590                   gdebug *def_temp
7591                     = gimple_build_debug_bind (vexpr, comp, NULL);
7592                   gimple_stmt_iterator gsi;
7593
7594                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7595                     gsi = gsi_after_labels (gimple_bb
7596                                             (SSA_NAME_DEF_STMT (def)));
7597                   else
7598                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7599
7600                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7601                   comp = vexpr;
7602                 }
7603
7604               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7605                 {
7606                   if (!gimple_debug_bind_p (stmt))
7607                     continue;
7608
7609                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7610                     SET_USE (use_p, comp);
7611
7612                   update_stmt (stmt);
7613                 }
7614             }
7615         }
7616     }
7617
7618   release_defs_bitset (toremove);
7619
7620   BITMAP_FREE (toremove);
7621 }
7622
7623 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7624    for hash_map::traverse.  */
7625
7626 bool
7627 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7628 {
7629   free (value);
7630   return true;
7631 }
7632
7633 /* Frees data allocated by the optimization of a single loop.  */
7634
7635 static void
7636 free_loop_data (struct ivopts_data *data)
7637 {
7638   unsigned i, j;
7639   bitmap_iterator bi;
7640   tree obj;
7641
7642   if (data->niters)
7643     {
7644       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7645       delete data->niters;
7646       data->niters = NULL;
7647     }
7648
7649   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7650     {
7651       struct version_info *info;
7652
7653       info = ver_info (data, i);
7654       info->iv = NULL;
7655       info->has_nonlin_use = false;
7656       info->preserve_biv = false;
7657       info->inv_id = 0;
7658     }
7659   bitmap_clear (data->relevant);
7660   bitmap_clear (data->important_candidates);
7661
7662   for (i = 0; i < data->vgroups.length (); i++)
7663     {
7664       struct iv_group *group = data->vgroups[i];
7665
7666       for (j = 0; j < group->vuses.length (); j++)
7667         free (group->vuses[j]);
7668       group->vuses.release ();
7669
7670       BITMAP_FREE (group->related_cands);
7671       for (j = 0; j < group->n_map_members; j++)
7672         if (group->cost_map[j].depends_on)
7673           BITMAP_FREE (group->cost_map[j].depends_on);
7674
7675       free (group->cost_map);
7676       free (group);
7677     }
7678   data->vgroups.truncate (0);
7679
7680   for (i = 0; i < data->vcands.length (); i++)
7681     {
7682       struct iv_cand *cand = data->vcands[i];
7683
7684       if (cand->depends_on)
7685         BITMAP_FREE (cand->depends_on);
7686       free (cand);
7687     }
7688   data->vcands.truncate (0);
7689
7690   if (data->version_info_size < num_ssa_names)
7691     {
7692       data->version_info_size = 2 * num_ssa_names;
7693       free (data->version_info);
7694       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7695     }
7696
7697   data->max_inv_id = 0;
7698
7699   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7700     SET_DECL_RTL (obj, NULL_RTX);
7701
7702   decl_rtl_to_reset.truncate (0);
7703
7704   data->inv_expr_tab->empty ();
7705   data->max_inv_expr_id = 0;
7706
7707   data->iv_common_cand_tab->empty ();
7708   data->iv_common_cands.truncate (0);
7709 }
7710
7711 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7712    loop tree.  */
7713
7714 static void
7715 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7716 {
7717   free_loop_data (data);
7718   free (data->version_info);
7719   BITMAP_FREE (data->relevant);
7720   BITMAP_FREE (data->important_candidates);
7721
7722   decl_rtl_to_reset.release ();
7723   data->vgroups.release ();
7724   data->vcands.release ();
7725   delete data->inv_expr_tab;
7726   data->inv_expr_tab = NULL;
7727   free_affine_expand_cache (&data->name_expansion_cache);
7728   delete data->iv_common_cand_tab;
7729   data->iv_common_cand_tab = NULL;
7730   data->iv_common_cands.release ();
7731   obstack_free (&data->iv_obstack, NULL);
7732 }
7733
7734 /* Returns true if the loop body BODY includes any function calls.  */
7735
7736 static bool
7737 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7738 {
7739   gimple_stmt_iterator gsi;
7740   unsigned i;
7741
7742   for (i = 0; i < num_nodes; i++)
7743     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7744       {
7745         gimple *stmt = gsi_stmt (gsi);
7746         if (is_gimple_call (stmt)
7747             && !gimple_call_internal_p (stmt)
7748             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7749           return true;
7750       }
7751   return false;
7752 }
7753
7754 /* Optimizes the LOOP.  Returns true if anything changed.  */
7755
7756 static bool
7757 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7758 {
7759   bool changed = false;
7760   struct iv_ca *iv_ca;
7761   edge exit = single_dom_exit (loop);
7762   basic_block *body;
7763
7764   gcc_assert (!data->niters);
7765   data->current_loop = loop;
7766   data->loop_loc = find_loop_location (loop);
7767   data->speed = optimize_loop_for_speed_p (loop);
7768
7769   if (dump_file && (dump_flags & TDF_DETAILS))
7770     {
7771       fprintf (dump_file, "Processing loop %d", loop->num);
7772       if (data->loop_loc != UNKNOWN_LOCATION)
7773         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7774                  LOCATION_LINE (data->loop_loc));
7775       fprintf (dump_file, "\n");
7776
7777       if (exit)
7778         {
7779           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7780                    exit->src->index, exit->dest->index);
7781           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7782           fprintf (dump_file, "\n");
7783         }
7784
7785       fprintf (dump_file, "\n");
7786     }
7787
7788   body = get_loop_body (loop);
7789   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7790   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7791   free (body);
7792
7793   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7794
7795   /* For each ssa name determines whether it behaves as an induction variable
7796      in some loop.  */
7797   if (!find_induction_variables (data))
7798     goto finish;
7799
7800   /* Finds interesting uses (item 1).  */
7801   find_interesting_uses (data);
7802   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7803     goto finish;
7804
7805   /* Finds candidates for the induction variables (item 2).  */
7806   find_iv_candidates (data);
7807
7808   /* Calculates the costs (item 3, part 1).  */
7809   determine_iv_costs (data);
7810   determine_group_iv_costs (data);
7811   determine_set_costs (data);
7812
7813   /* Find the optimal set of induction variables (item 3, part 2).  */
7814   iv_ca = find_optimal_iv_set (data);
7815   if (!iv_ca)
7816     goto finish;
7817   changed = true;
7818
7819   /* Create the new induction variables (item 4, part 1).  */
7820   create_new_ivs (data, iv_ca);
7821   iv_ca_free (&iv_ca);
7822
7823   /* Rewrite the uses (item 4, part 2).  */
7824   rewrite_groups (data);
7825
7826   /* Remove the ivs that are unused after rewriting.  */
7827   remove_unused_ivs (data);
7828
7829   /* We have changed the structure of induction variables; it might happen
7830      that definitions in the scev database refer to some of them that were
7831      eliminated.  */
7832   scev_reset ();
7833
7834 finish:
7835   free_loop_data (data);
7836
7837   return changed;
7838 }
7839
7840 /* Main entry point.  Optimizes induction variables in loops.  */
7841
7842 void
7843 tree_ssa_iv_optimize (void)
7844 {
7845   struct loop *loop;
7846   struct ivopts_data data;
7847
7848   tree_ssa_iv_optimize_init (&data);
7849
7850   /* Optimize the loops starting with the innermost ones.  */
7851   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7852     {
7853       if (dump_file && (dump_flags & TDF_DETAILS))
7854         flow_loop_dump (loop, dump_file, NULL, 1);
7855
7856       tree_ssa_iv_optimize_loop (&data, loop);
7857     }
7858
7859   tree_ssa_iv_optimize_finalize (&data);
7860 }