gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "tm_p.h"
  79 #include "ssa.h"
  80 #include "expmed.h"
  81 #include "insn-config.h"
  82 #include "emit-rtl.h"
  83 #include "recog.h"
  84 #include "cgraph.h"
  85 #include "gimple-pretty-print.h"
  86 #include "alias.h"
  87 #include "fold-const.h"
  88 #include "stor-layout.h"
  89 #include "tree-eh.h"
  90 #include "gimplify.h"
  91 #include "gimple-iterator.h"
  92 #include "gimplify-me.h"
  93 #include "tree-cfg.h"
  94 #include "tree-ssa-loop-ivopts.h"
  95 #include "tree-ssa-loop-manip.h"
  96 #include "tree-ssa-loop-niter.h"
  97 #include "tree-ssa-loop.h"
  98 #include "explow.h"
  99 #include "expr.h"
 100 #include "tree-dfa.h"
 101 #include "tree-ssa.h"
 102 #include "cfgloop.h"
 103 #include "tree-scalar-evolution.h"
 104 #include "params.h"
 105 #include "tree-affine.h"
 106 #include "tree-ssa-propagate.h"
 107 #include "tree-ssa-address.h"
 108 #include "builtins.h"
 109 #include "tree-vectorizer.h"
 110
 111 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 112    cost of different addressing modes.  This should be moved to a TBD
 113    interface between the GIMPLE and RTL worlds.  */
 114
 115 /* The infinite cost.  */
 116 #define INFTY 10000000
 117
 118 #define AVG_LOOP_NITER(LOOP) 5
 119
 120 /* Returns the expected number of loop iterations for LOOP.
 121    The average trip count is computed from profile data if it
 122    exists. */
 123
 124 static inline HOST_WIDE_INT
 125 avg_loop_niter (struct loop *loop)
 126 {
 127   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 128   if (niter == -1)
 129     {
 130       niter = likely_max_stmt_executions_int (loop);
 131       if (niter == -1 || niter > AVG_LOOP_NITER (loop))
 132         return AVG_LOOP_NITER (loop);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   comp_cost (): cost (0), complexity (0), scratch (0)
 177   {}
 178
 179   comp_cost (int cost, unsigned complexity, int scratch = 0)
 180     : cost (cost), complexity (complexity), scratch (scratch)
 181   {}
 182
 183   /* Returns true if COST is infinite.  */
 184   bool infinite_cost_p ();
 185
 186   /* Adds costs COST1 and COST2.  */
 187   friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
 188
 189   /* Adds COST to the comp_cost.  */
 190   comp_cost operator+= (comp_cost cost);
 191
 192   /* Adds constant C to this comp_cost.  */
 193   comp_cost operator+= (HOST_WIDE_INT c);
 194
 195   /* Subtracts constant C to this comp_cost.  */
 196   comp_cost operator-= (HOST_WIDE_INT c);
 197
 198   /* Divide the comp_cost by constant C.  */
 199   comp_cost operator/= (HOST_WIDE_INT c);
 200
 201   /* Multiply the comp_cost by constant C.  */
 202   comp_cost operator*= (HOST_WIDE_INT c);
 203
 204   /* Subtracts costs COST1 and COST2.  */
 205   friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
 206
 207   /* Subtracts COST from this comp_cost.  */
 208   comp_cost operator-= (comp_cost cost);
 209
 210   /* Returns true if COST1 is smaller than COST2.  */
 211   friend bool operator< (comp_cost cost1, comp_cost cost2);
 212
 213   /* Returns true if COST1 and COST2 are equal.  */
 214   friend bool operator== (comp_cost cost1, comp_cost cost2);
 215
 216   /* Returns true if COST1 is smaller or equal than COST2.  */
 217   friend bool operator<= (comp_cost cost1, comp_cost cost2);
 218
 219   int cost;             /* The runtime cost.  */
 220   unsigned complexity;  /* The estimate of the complexity of the code for
 221                            the computation (in no concrete units --
 222                            complexity field should be larger for more
 223                            complex expressions and addressing modes).  */
 224   int scratch;          /* Scratch used during cost computation.  */
 225 };
 226
 227 static const comp_cost no_cost;
 228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
 229
 230 bool
 231 comp_cost::infinite_cost_p ()
 232 {
 233   return cost == INFTY;
 234 }
 235
 236 comp_cost
 237 operator+ (comp_cost cost1, comp_cost cost2)
 238 {
 239   if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
 240     return infinite_cost;
 241
 242   cost1.cost += cost2.cost;
 243   cost1.complexity += cost2.complexity;
 244
 245   return cost1;
 246 }
 247
 248 comp_cost
 249 operator- (comp_cost cost1, comp_cost cost2)
 250 {
 251   if (cost1.infinite_cost_p ())
 252     return infinite_cost;
 253
 254   gcc_assert (!cost2.infinite_cost_p ());
 255
 256   cost1.cost -= cost2.cost;
 257   cost1.complexity -= cost2.complexity;
 258
 259   return cost1;
 260 }
 261
 262 comp_cost
 263 comp_cost::operator+= (comp_cost cost)
 264 {
 265   *this = *this + cost;
 266   return *this;
 267 }
 268
 269 comp_cost
 270 comp_cost::operator+= (HOST_WIDE_INT c)
 271 {
 272   if (infinite_cost_p ())
 273     return *this;
 274
 275   this->cost += c;
 276
 277   return *this;
 278 }
 279
 280 comp_cost
 281 comp_cost::operator-= (HOST_WIDE_INT c)
 282 {
 283   if (infinite_cost_p ())
 284     return *this;
 285
 286   this->cost -= c;
 287
 288   return *this;
 289 }
 290
 291 comp_cost
 292 comp_cost::operator/= (HOST_WIDE_INT c)
 293 {
 294   if (infinite_cost_p ())
 295     return *this;
 296
 297   this->cost /= c;
 298
 299   return *this;
 300 }
 301
 302 comp_cost
 303 comp_cost::operator*= (HOST_WIDE_INT c)
 304 {
 305   if (infinite_cost_p ())
 306     return *this;
 307
 308   this->cost *= c;
 309
 310   return *this;
 311 }
 312
 313 comp_cost
 314 comp_cost::operator-= (comp_cost cost)
 315 {
 316   *this = *this - cost;
 317   return *this;
 318 }
 319
 320 bool
 321 operator< (comp_cost cost1, comp_cost cost2)
 322 {
 323   if (cost1.cost == cost2.cost)
 324     return cost1.complexity < cost2.complexity;
 325
 326   return cost1.cost < cost2.cost;
 327 }
 328
 329 bool
 330 operator== (comp_cost cost1, comp_cost cost2)
 331 {
 332   return cost1.cost == cost2.cost
 333     && cost1.complexity == cost2.complexity;
 334 }
 335
 336 bool
 337 operator<= (comp_cost cost1, comp_cost cost2)
 338 {
 339   return cost1 < cost2 || cost1 == cost2;
 340 }
 341
 342 struct iv_inv_expr_ent;
 343
 344 /* The candidate - cost pair.  */
 345 struct cost_pair
 346 {
 347   struct iv_cand *cand; /* The candidate.  */
 348   comp_cost cost;       /* The cost.  */
 349   bitmap depends_on;    /* The list of invariants that have to be
 350                            preserved.  */
 351   tree value;           /* For final value elimination, the expression for
 352                            the final value of the iv.  For iv elimination,
 353                            the new bound to compare with.  */
 354   enum tree_code comp;  /* For iv elimination, the comparison.  */
 355   iv_inv_expr_ent *inv_expr; /* Loop invariant expression.  */
 356 };
 357
 358 /* Use.  */
 359 struct iv_use
 360 {
 361   unsigned id;          /* The id of the use.  */
 362   unsigned group_id;    /* The group id the use belongs to.  */
 363   enum use_type type;   /* Type of the use.  */
 364   struct iv *iv;        /* The induction variable it is based on.  */
 365   gimple *stmt;         /* Statement in that it occurs.  */
 366   tree *op_p;           /* The place where it occurs.  */
 367
 368   tree addr_base;       /* Base address with const offset stripped.  */
 369   unsigned HOST_WIDE_INT addr_offset;
 370                         /* Const offset stripped from base address.  */
 371 };
 372
 373 /* Group of uses.  */
 374 struct iv_group
 375 {
 376   /* The id of the group.  */
 377   unsigned id;
 378   /* Uses of the group are of the same type.  */
 379   enum use_type type;
 380   /* The set of "related" IV candidates, plus the important ones.  */
 381   bitmap related_cands;
 382   /* Number of IV candidates in the cost_map.  */
 383   unsigned n_map_members;
 384   /* The costs wrto the iv candidates.  */
 385   struct cost_pair *cost_map;
 386   /* The selected candidate for the group.  */
 387   struct iv_cand *selected;
 388   /* Uses in the group.  */
 389   vec<struct iv_use *> vuses;
 390 };
 391
 392 /* The position where the iv is computed.  */
 393 enum iv_position
 394 {
 395   IP_NORMAL,            /* At the end, just before the exit condition.  */
 396   IP_END,               /* At the end of the latch block.  */
 397   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 398   IP_AFTER_USE,         /* Immediately after a specific use.  */
 399   IP_ORIGINAL           /* The original biv.  */
 400 };
 401
 402 /* The induction variable candidate.  */
 403 struct iv_cand
 404 {
 405   unsigned id;          /* The number of the candidate.  */
 406   bool important;       /* Whether this is an "important" candidate, i.e. such
 407                            that it should be considered by all uses.  */
 408   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 409   gimple *incremented_at;/* For original biv, the statement where it is
 410                            incremented.  */
 411   tree var_before;      /* The variable used for it before increment.  */
 412   tree var_after;       /* The variable used for it after increment.  */
 413   struct iv *iv;        /* The value of the candidate.  NULL for
 414                            "pseudocandidate" used to indicate the possibility
 415                            to replace the final value of an iv by direct
 416                            computation of the value.  */
 417   unsigned cost;        /* Cost of the candidate.  */
 418   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 419   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 420                               where it is incremented.  */
 421   bitmap depends_on;    /* The list of invariants that are used in step of the
 422                            biv.  */
 423   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 424                            smaller type.  */
 425 };
 426
 427 /* Hashtable entry for common candidate derived from iv uses.  */
 428 struct iv_common_cand
 429 {
 430   tree base;
 431   tree step;
 432   /* IV uses from which this common candidate is derived.  */
 433   auto_vec<struct iv_use *> uses;
 434   hashval_t hash;
 435 };
 436
 437 /* Hashtable helpers.  */
 438
 439 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 440 {
 441   static inline hashval_t hash (const iv_common_cand *);
 442   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 443 };
 444
 445 /* Hash function for possible common candidates.  */
 446
 447 inline hashval_t
 448 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 449 {
 450   return ccand->hash;
 451 }
 452
 453 /* Hash table equality function for common candidates.  */
 454
 455 inline bool
 456 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 457                               const iv_common_cand *ccand2)
 458 {
 459   return (ccand1->hash == ccand2->hash
 460           && operand_equal_p (ccand1->base, ccand2->base, 0)
 461           && operand_equal_p (ccand1->step, ccand2->step, 0)
 462           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 463               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 464 }
 465
 466 /* Loop invariant expression hashtable entry.  */
 467
 468 struct iv_inv_expr_ent
 469 {
 470   /* Tree expression of the entry.  */
 471   tree expr;
 472   /* Unique indentifier.  */
 473   int id;
 474   /* Hash value.  */
 475   hashval_t hash;
 476 };
 477
 478 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 479
 480 static int
 481 sort_iv_inv_expr_ent (const void *a, const void *b)
 482 {
 483   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 484   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 485
 486   unsigned id1 = (*e1)->id;
 487   unsigned id2 = (*e2)->id;
 488
 489   if (id1 < id2)
 490     return -1;
 491   else if (id1 > id2)
 492     return 1;
 493   else
 494     return 0;
 495 }
 496
 497 /* Hashtable helpers.  */
 498
 499 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 500 {
 501   static inline hashval_t hash (const iv_inv_expr_ent *);
 502   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 503 };
 504
 505 /* Hash function for loop invariant expressions.  */
 506
 507 inline hashval_t
 508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 509 {
 510   return expr->hash;
 511 }
 512
 513 /* Hash table equality function for expressions.  */
 514
 515 inline bool
 516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 517                            const iv_inv_expr_ent *expr2)
 518 {
 519   return expr1->hash == expr2->hash
 520          && operand_equal_p (expr1->expr, expr2->expr, 0);
 521 }
 522
 523 struct ivopts_data
 524 {
 525   /* The currently optimized loop.  */
 526   struct loop *current_loop;
 527   source_location loop_loc;
 528
 529   /* Numbers of iterations for all exits of the current loop.  */
 530   hash_map<edge, tree_niter_desc *> *niters;
 531
 532   /* Number of registers used in it.  */
 533   unsigned regs_used;
 534
 535   /* The size of version_info array allocated.  */
 536   unsigned version_info_size;
 537
 538   /* The array of information for the ssa names.  */
 539   struct version_info *version_info;
 540
 541   /* The hashtable of loop invariant expressions created
 542      by ivopt.  */
 543   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 544
 545   /* Loop invariant expression id.  */
 546   int max_inv_expr_id;
 547
 548   /* The bitmap of indices in version_info whose value was changed.  */
 549   bitmap relevant;
 550
 551   /* The uses of induction variables.  */
 552   vec<iv_group *> vgroups;
 553
 554   /* The candidates.  */
 555   vec<iv_cand *> vcands;
 556
 557   /* A bitmap of important candidates.  */
 558   bitmap important_candidates;
 559
 560   /* Cache used by tree_to_aff_combination_expand.  */
 561   hash_map<tree, name_expansion *> *name_expansion_cache;
 562
 563   /* The hashtable of common candidates derived from iv uses.  */
 564   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 565
 566   /* The common candidates.  */
 567   vec<iv_common_cand *> iv_common_cands;
 568
 569   /* The maximum invariant id.  */
 570   unsigned max_inv_id;
 571
 572   /* Number of no_overflow BIVs which are not used in memory address.  */
 573   unsigned bivs_not_used_in_addr;
 574
 575   /* Obstack for iv structure.  */
 576   struct obstack iv_obstack;
 577
 578   /* Whether to consider just related and important candidates when replacing a
 579      use.  */
 580   bool consider_all_candidates;
 581
 582   /* Are we optimizing for speed?  */
 583   bool speed;
 584
 585   /* Whether the loop body includes any function calls.  */
 586   bool body_includes_call;
 587
 588   /* Whether the loop body can only be exited via single exit.  */
 589   bool loop_single_exit_p;
 590 };
 591
 592 /* An assignment of iv candidates to uses.  */
 593
 594 struct iv_ca
 595 {
 596   /* The number of uses covered by the assignment.  */
 597   unsigned upto;
 598
 599   /* Number of uses that cannot be expressed by the candidates in the set.  */
 600   unsigned bad_groups;
 601
 602   /* Candidate assigned to a use, together with the related costs.  */
 603   struct cost_pair **cand_for_group;
 604
 605   /* Number of times each candidate is used.  */
 606   unsigned *n_cand_uses;
 607
 608   /* The candidates used.  */
 609   bitmap cands;
 610
 611   /* The number of candidates in the set.  */
 612   unsigned n_cands;
 613
 614   /* Total number of registers needed.  */
 615   unsigned n_regs;
 616
 617   /* Total cost of expressing uses.  */
 618   comp_cost cand_use_cost;
 619
 620   /* Total cost of candidates.  */
 621   unsigned cand_cost;
 622
 623   /* Number of times each invariant is used.  */
 624   unsigned *n_invariant_uses;
 625
 626   /* Hash set with used invariant expression.  */
 627   hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
 628
 629   /* Total cost of the assignment.  */
 630   comp_cost cost;
 631 };
 632
 633 /* Difference of two iv candidate assignments.  */
 634
 635 struct iv_ca_delta
 636 {
 637   /* Changed group.  */
 638   struct iv_group *group;
 639
 640   /* An old assignment (for rollback purposes).  */
 641   struct cost_pair *old_cp;
 642
 643   /* A new assignment.  */
 644   struct cost_pair *new_cp;
 645
 646   /* Next change in the list.  */
 647   struct iv_ca_delta *next;
 648 };
 649
 650 /* Bound on number of candidates below that all candidates are considered.  */
 651
 652 #define CONSIDER_ALL_CANDIDATES_BOUND \
 653   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 654
 655 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 656    optimizing such a loop would help, and it would take ages).  */
 657
 658 #define MAX_CONSIDERED_GROUPS \
 659   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 660
 661 /* If there are at most this number of ivs in the set, try removing unnecessary
 662    ivs from the set always.  */
 663
 664 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 665   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 666
 667 /* The list of trees for that the decl_rtl field must be reset is stored
 668    here.  */
 669
 670 static vec<tree> decl_rtl_to_reset;
 671
 672 static comp_cost force_expr_to_var_cost (tree, bool);
 673
 674 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 675
 676 edge
 677 single_dom_exit (struct loop *loop)
 678 {
 679   edge exit = single_exit (loop);
 680
 681   if (!exit)
 682     return NULL;
 683
 684   if (!just_once_each_iteration_p (loop, exit->src))
 685     return NULL;
 686
 687   return exit;
 688 }
 689
 690 /* Dumps information about the induction variable IV to FILE.  Don't dump
 691    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 692    preceding spaces indicated by INDENT_LEVEL.  */
 693
 694 void
 695 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 696 {
 697   const char *p;
 698   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 699
 700   if (indent_level > 4)
 701     indent_level = 4;
 702   p = spaces + 8 - (indent_level << 1);
 703
 704   fprintf (file, "%sIV struct:\n", p);
 705   if (iv->ssa_name && dump_name)
 706     {
 707       fprintf (file, "%s  SSA_NAME:\t", p);
 708       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 709       fprintf (file, "\n");
 710     }
 711
 712   fprintf (file, "%s  Type:\t", p);
 713   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 714   fprintf (file, "\n");
 715
 716   fprintf (file, "%s  Base:\t", p);
 717   print_generic_expr (file, iv->base, TDF_SLIM);
 718   fprintf (file, "\n");
 719
 720   fprintf (file, "%s  Step:\t", p);
 721   print_generic_expr (file, iv->step, TDF_SLIM);
 722   fprintf (file, "\n");
 723
 724   if (iv->base_object)
 725     {
 726       fprintf (file, "%s  Object:\t", p);
 727       print_generic_expr (file, iv->base_object, TDF_SLIM);
 728       fprintf (file, "\n");
 729     }
 730
 731   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 732
 733   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 734            p, iv->no_overflow ? "No-overflow" : "Overflow");
 735 }
 736
 737 /* Dumps information about the USE to FILE.  */
 738
 739 void
 740 dump_use (FILE *file, struct iv_use *use)
 741 {
 742   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 743   fprintf (file, "    At stmt:\t");
 744   print_gimple_stmt (file, use->stmt, 0, 0);
 745   fprintf (file, "    At pos:\t");
 746   if (use->op_p)
 747     print_generic_expr (file, *use->op_p, TDF_SLIM);
 748   fprintf (file, "\n");
 749   dump_iv (file, use->iv, false, 2);
 750 }
 751
 752 /* Dumps information about the uses to FILE.  */
 753
 754 void
 755 dump_groups (FILE *file, struct ivopts_data *data)
 756 {
 757   unsigned i, j;
 758   struct iv_group *group;
 759
 760   for (i = 0; i < data->vgroups.length (); i++)
 761     {
 762       group = data->vgroups[i];
 763       fprintf (file, "Group %d:\n", group->id);
 764       if (group->type == USE_NONLINEAR_EXPR)
 765         fprintf (file, "  Type:\tGENERIC\n");
 766       else if (group->type == USE_ADDRESS)
 767         fprintf (file, "  Type:\tADDRESS\n");
 768       else
 769         {
 770           gcc_assert (group->type == USE_COMPARE);
 771           fprintf (file, "  Type:\tCOMPARE\n");
 772         }
 773       for (j = 0; j < group->vuses.length (); j++)
 774         dump_use (file, group->vuses[j]);
 775     }
 776 }
 777
 778 /* Dumps information about induction variable candidate CAND to FILE.  */
 779
 780 void
 781 dump_cand (FILE *file, struct iv_cand *cand)
 782 {
 783   struct iv *iv = cand->iv;
 784
 785   fprintf (file, "Candidate %d:\n", cand->id);
 786   if (cand->depends_on)
 787     {
 788       fprintf (file, "  Depend on: ");
 789       dump_bitmap (file, cand->depends_on);
 790     }
 791
 792   if (cand->var_before)
 793     {
 794       fprintf (file, "  Var befor: ");
 795       print_generic_expr (file, cand->var_before, TDF_SLIM);
 796       fprintf (file, "\n");
 797     }
 798   if (cand->var_after)
 799     {
 800       fprintf (file, "  Var after: ");
 801       print_generic_expr (file, cand->var_after, TDF_SLIM);
 802       fprintf (file, "\n");
 803     }
 804
 805   switch (cand->pos)
 806     {
 807     case IP_NORMAL:
 808       fprintf (file, "  Incr POS: before exit test\n");
 809       break;
 810
 811     case IP_BEFORE_USE:
 812       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 813       break;
 814
 815     case IP_AFTER_USE:
 816       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 817       break;
 818
 819     case IP_END:
 820       fprintf (file, "  Incr POS: at end\n");
 821       break;
 822
 823     case IP_ORIGINAL:
 824       fprintf (file, "  Incr POS: orig biv\n");
 825       break;
 826     }
 827
 828   dump_iv (file, iv, false, 1);
 829 }
 830
 831 /* Returns the info for ssa version VER.  */
 832
 833 static inline struct version_info *
 834 ver_info (struct ivopts_data *data, unsigned ver)
 835 {
 836   return data->version_info + ver;
 837 }
 838
 839 /* Returns the info for ssa name NAME.  */
 840
 841 static inline struct version_info *
 842 name_info (struct ivopts_data *data, tree name)
 843 {
 844   return ver_info (data, SSA_NAME_VERSION (name));
 845 }
 846
 847 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 848    emitted in LOOP.  */
 849
 850 static bool
 851 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 852 {
 853   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 854
 855   gcc_assert (bb);
 856
 857   if (sbb == loop->latch)
 858     return true;
 859
 860   if (sbb != bb)
 861     return false;
 862
 863   return stmt == last_stmt (bb);
 864 }
 865
 866 /* Returns true if STMT if after the place where the original induction
 867    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 868    if the positions are identical.  */
 869
 870 static bool
 871 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 872 {
 873   basic_block cand_bb = gimple_bb (cand->incremented_at);
 874   basic_block stmt_bb = gimple_bb (stmt);
 875
 876   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 877     return false;
 878
 879   if (stmt_bb != cand_bb)
 880     return true;
 881
 882   if (true_if_equal
 883       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 884     return true;
 885   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 886 }
 887
 888 /* Returns true if STMT if after the place where the induction variable
 889    CAND is incremented in LOOP.  */
 890
 891 static bool
 892 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 893 {
 894   switch (cand->pos)
 895     {
 896     case IP_END:
 897       return false;
 898
 899     case IP_NORMAL:
 900       return stmt_after_ip_normal_pos (loop, stmt);
 901
 902     case IP_ORIGINAL:
 903     case IP_AFTER_USE:
 904       return stmt_after_inc_pos (cand, stmt, false);
 905
 906     case IP_BEFORE_USE:
 907       return stmt_after_inc_pos (cand, stmt, true);
 908
 909     default:
 910       gcc_unreachable ();
 911     }
 912 }
 913
 914 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 915
 916 static bool
 917 abnormal_ssa_name_p (tree exp)
 918 {
 919   if (!exp)
 920     return false;
 921
 922   if (TREE_CODE (exp) != SSA_NAME)
 923     return false;
 924
 925   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 926 }
 927
 928 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 929    abnormal phi node.  Callback for for_each_index.  */
 930
 931 static bool
 932 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 933                                   void *data ATTRIBUTE_UNUSED)
 934 {
 935   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 936     {
 937       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 938         return false;
 939       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 940         return false;
 941     }
 942
 943   return !abnormal_ssa_name_p (*index);
 944 }
 945
 946 /* Returns true if EXPR contains a ssa name that occurs in an
 947    abnormal phi node.  */
 948
 949 bool
 950 contains_abnormal_ssa_name_p (tree expr)
 951 {
 952   enum tree_code code;
 953   enum tree_code_class codeclass;
 954
 955   if (!expr)
 956     return false;
 957
 958   code = TREE_CODE (expr);
 959   codeclass = TREE_CODE_CLASS (code);
 960
 961   if (code == SSA_NAME)
 962     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 963
 964   if (code == INTEGER_CST
 965       || is_gimple_min_invariant (expr))
 966     return false;
 967
 968   if (code == ADDR_EXPR)
 969     return !for_each_index (&TREE_OPERAND (expr, 0),
 970                             idx_contains_abnormal_ssa_name_p,
 971                             NULL);
 972
 973   if (code == COND_EXPR)
 974     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 975       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 976       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 977
 978   switch (codeclass)
 979     {
 980     case tcc_binary:
 981     case tcc_comparison:
 982       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 983         return true;
 984
 985       /* Fallthru.  */
 986     case tcc_unary:
 987       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 988         return true;
 989
 990       break;
 991
 992     default:
 993       gcc_unreachable ();
 994     }
 995
 996   return false;
 997 }
 998
 999 /*  Returns the structure describing number of iterations determined from
1000     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
1001
1002 static struct tree_niter_desc *
1003 niter_for_exit (struct ivopts_data *data, edge exit)
1004 {
1005   struct tree_niter_desc *desc;
1006   tree_niter_desc **slot;
1007
1008   if (!data->niters)
1009     {
1010       data->niters = new hash_map<edge, tree_niter_desc *>;
1011       slot = NULL;
1012     }
1013   else
1014     slot = data->niters->get (exit);
1015
1016   if (!slot)
1017     {
1018       /* Try to determine number of iterations.  We cannot safely work with ssa
1019          names that appear in phi nodes on abnormal edges, so that we do not
1020          create overlapping life ranges for them (PR 27283).  */
1021       desc = XNEW (struct tree_niter_desc);
1022       if (!number_of_iterations_exit (data->current_loop,
1023                                       exit, desc, true)
1024           || contains_abnormal_ssa_name_p (desc->niter))
1025         {
1026           XDELETE (desc);
1027           desc = NULL;
1028         }
1029       data->niters->put (exit, desc);
1030     }
1031   else
1032     desc = *slot;
1033
1034   return desc;
1035 }
1036
1037 /* Returns the structure describing number of iterations determined from
1038    single dominating exit of DATA->current_loop, or NULL if something
1039    goes wrong.  */
1040
1041 static struct tree_niter_desc *
1042 niter_for_single_dom_exit (struct ivopts_data *data)
1043 {
1044   edge exit = single_dom_exit (data->current_loop);
1045
1046   if (!exit)
1047     return NULL;
1048
1049   return niter_for_exit (data, exit);
1050 }
1051
1052 /* Initializes data structures used by the iv optimization pass, stored
1053    in DATA.  */
1054
1055 static void
1056 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1057 {
1058   data->version_info_size = 2 * num_ssa_names;
1059   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1060   data->relevant = BITMAP_ALLOC (NULL);
1061   data->important_candidates = BITMAP_ALLOC (NULL);
1062   data->max_inv_id = 0;
1063   data->niters = NULL;
1064   data->vgroups.create (20);
1065   data->vcands.create (20);
1066   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1067   data->max_inv_expr_id = 0;
1068   data->name_expansion_cache = NULL;
1069   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1070   data->iv_common_cands.create (20);
1071   decl_rtl_to_reset.create (20);
1072   gcc_obstack_init (&data->iv_obstack);
1073 }
1074
1075 /* Returns a memory object to that EXPR points.  In case we are able to
1076    determine that it does not point to any such object, NULL is returned.  */
1077
1078 static tree
1079 determine_base_object (tree expr)
1080 {
1081   enum tree_code code = TREE_CODE (expr);
1082   tree base, obj;
1083
1084   /* If this is a pointer casted to any type, we need to determine
1085      the base object for the pointer; so handle conversions before
1086      throwing away non-pointer expressions.  */
1087   if (CONVERT_EXPR_P (expr))
1088     return determine_base_object (TREE_OPERAND (expr, 0));
1089
1090   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1091     return NULL_TREE;
1092
1093   switch (code)
1094     {
1095     case INTEGER_CST:
1096       return NULL_TREE;
1097
1098     case ADDR_EXPR:
1099       obj = TREE_OPERAND (expr, 0);
1100       base = get_base_address (obj);
1101
1102       if (!base)
1103         return expr;
1104
1105       if (TREE_CODE (base) == MEM_REF)
1106         return determine_base_object (TREE_OPERAND (base, 0));
1107
1108       return fold_convert (ptr_type_node,
1109                            build_fold_addr_expr (base));
1110
1111     case POINTER_PLUS_EXPR:
1112       return determine_base_object (TREE_OPERAND (expr, 0));
1113
1114     case PLUS_EXPR:
1115     case MINUS_EXPR:
1116       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
1117       gcc_unreachable ();
1118
1119     default:
1120       return fold_convert (ptr_type_node, expr);
1121     }
1122 }
1123
1124 /* Return true if address expression with non-DECL_P operand appears
1125    in EXPR.  */
1126
1127 static bool
1128 contain_complex_addr_expr (tree expr)
1129 {
1130   bool res = false;
1131
1132   STRIP_NOPS (expr);
1133   switch (TREE_CODE (expr))
1134     {
1135     case POINTER_PLUS_EXPR:
1136     case PLUS_EXPR:
1137     case MINUS_EXPR:
1138       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1139       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1140       break;
1141
1142     case ADDR_EXPR:
1143       return (!DECL_P (TREE_OPERAND (expr, 0)));
1144
1145     default:
1146       return false;
1147     }
1148
1149   return res;
1150 }
1151
1152 /* Allocates an induction variable with given initial value BASE and step STEP
1153    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1154
1155 static struct iv *
1156 alloc_iv (struct ivopts_data *data, tree base, tree step,
1157           bool no_overflow = false)
1158 {
1159   tree expr = base;
1160   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1161                                               sizeof (struct iv));
1162   gcc_assert (step != NULL_TREE);
1163
1164   /* Lower address expression in base except ones with DECL_P as operand.
1165      By doing this:
1166        1) More accurate cost can be computed for address expressions;
1167        2) Duplicate candidates won't be created for bases in different
1168           forms, like &a[0] and &a.  */
1169   STRIP_NOPS (expr);
1170   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1171       || contain_complex_addr_expr (expr))
1172     {
1173       aff_tree comb;
1174       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1175       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1176     }
1177
1178   iv->base = base;
1179   iv->base_object = determine_base_object (base);
1180   iv->step = step;
1181   iv->biv_p = false;
1182   iv->nonlin_use = NULL;
1183   iv->ssa_name = NULL_TREE;
1184   iv->no_overflow = no_overflow;
1185   iv->have_address_use = false;
1186
1187   return iv;
1188 }
1189
1190 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1191    doesn't overflow.  */
1192
1193 static void
1194 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1195         bool no_overflow)
1196 {
1197   struct version_info *info = name_info (data, iv);
1198
1199   gcc_assert (!info->iv);
1200
1201   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1202   info->iv = alloc_iv (data, base, step, no_overflow);
1203   info->iv->ssa_name = iv;
1204 }
1205
1206 /* Finds induction variable declaration for VAR.  */
1207
1208 static struct iv *
1209 get_iv (struct ivopts_data *data, tree var)
1210 {
1211   basic_block bb;
1212   tree type = TREE_TYPE (var);
1213
1214   if (!POINTER_TYPE_P (type)
1215       && !INTEGRAL_TYPE_P (type))
1216     return NULL;
1217
1218   if (!name_info (data, var)->iv)
1219     {
1220       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1221
1222       if (!bb
1223           || !flow_bb_inside_loop_p (data->current_loop, bb))
1224         set_iv (data, var, var, build_int_cst (type, 0), true);
1225     }
1226
1227   return name_info (data, var)->iv;
1228 }
1229
1230 /* Return the first non-invariant ssa var found in EXPR.  */
1231
1232 static tree
1233 extract_single_var_from_expr (tree expr)
1234 {
1235   int i, n;
1236   tree tmp;
1237   enum tree_code code;
1238
1239   if (!expr || is_gimple_min_invariant (expr))
1240     return NULL;
1241
1242   code = TREE_CODE (expr);
1243   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1244     {
1245       n = TREE_OPERAND_LENGTH (expr);
1246       for (i = 0; i < n; i++)
1247         {
1248           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1249
1250           if (tmp)
1251             return tmp;
1252         }
1253     }
1254   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1255 }
1256
1257 /* Finds basic ivs.  */
1258
1259 static bool
1260 find_bivs (struct ivopts_data *data)
1261 {
1262   gphi *phi;
1263   affine_iv iv;
1264   tree step, type, base, stop;
1265   bool found = false;
1266   struct loop *loop = data->current_loop;
1267   gphi_iterator psi;
1268
1269   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1270     {
1271       phi = psi.phi ();
1272
1273       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1274         continue;
1275
1276       if (virtual_operand_p (PHI_RESULT (phi)))
1277         continue;
1278
1279       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1280         continue;
1281
1282       if (integer_zerop (iv.step))
1283         continue;
1284
1285       step = iv.step;
1286       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1287       /* Stop expanding iv base at the first ssa var referred by iv step.
1288          Ideally we should stop at any ssa var, because that's expensive
1289          and unusual to happen, we just do it on the first one.
1290
1291          See PR64705 for the rationale.  */
1292       stop = extract_single_var_from_expr (step);
1293       base = expand_simple_operations (base, stop);
1294       if (contains_abnormal_ssa_name_p (base)
1295           || contains_abnormal_ssa_name_p (step))
1296         continue;
1297
1298       type = TREE_TYPE (PHI_RESULT (phi));
1299       base = fold_convert (type, base);
1300       if (step)
1301         {
1302           if (POINTER_TYPE_P (type))
1303             step = convert_to_ptrofftype (step);
1304           else
1305             step = fold_convert (type, step);
1306         }
1307
1308       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1309       found = true;
1310     }
1311
1312   return found;
1313 }
1314
1315 /* Marks basic ivs.  */
1316
1317 static void
1318 mark_bivs (struct ivopts_data *data)
1319 {
1320   gphi *phi;
1321   gimple *def;
1322   tree var;
1323   struct iv *iv, *incr_iv;
1324   struct loop *loop = data->current_loop;
1325   basic_block incr_bb;
1326   gphi_iterator psi;
1327
1328   data->bivs_not_used_in_addr = 0;
1329   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1330     {
1331       phi = psi.phi ();
1332
1333       iv = get_iv (data, PHI_RESULT (phi));
1334       if (!iv)
1335         continue;
1336
1337       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1338       def = SSA_NAME_DEF_STMT (var);
1339       /* Don't mark iv peeled from other one as biv.  */
1340       if (def
1341           && gimple_code (def) == GIMPLE_PHI
1342           && gimple_bb (def) == loop->header)
1343         continue;
1344
1345       incr_iv = get_iv (data, var);
1346       if (!incr_iv)
1347         continue;
1348
1349       /* If the increment is in the subloop, ignore it.  */
1350       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1351       if (incr_bb->loop_father != data->current_loop
1352           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1353         continue;
1354
1355       iv->biv_p = true;
1356       incr_iv->biv_p = true;
1357       if (iv->no_overflow)
1358         data->bivs_not_used_in_addr++;
1359       if (incr_iv->no_overflow)
1360         data->bivs_not_used_in_addr++;
1361     }
1362 }
1363
1364 /* Checks whether STMT defines a linear induction variable and stores its
1365    parameters to IV.  */
1366
1367 static bool
1368 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1369 {
1370   tree lhs, stop;
1371   struct loop *loop = data->current_loop;
1372
1373   iv->base = NULL_TREE;
1374   iv->step = NULL_TREE;
1375
1376   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1377     return false;
1378
1379   lhs = gimple_assign_lhs (stmt);
1380   if (TREE_CODE (lhs) != SSA_NAME)
1381     return false;
1382
1383   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1384     return false;
1385
1386   /* Stop expanding iv base at the first ssa var referred by iv step.
1387      Ideally we should stop at any ssa var, because that's expensive
1388      and unusual to happen, we just do it on the first one.
1389
1390      See PR64705 for the rationale.  */
1391   stop = extract_single_var_from_expr (iv->step);
1392   iv->base = expand_simple_operations (iv->base, stop);
1393   if (contains_abnormal_ssa_name_p (iv->base)
1394       || contains_abnormal_ssa_name_p (iv->step))
1395     return false;
1396
1397   /* If STMT could throw, then do not consider STMT as defining a GIV.
1398      While this will suppress optimizations, we can not safely delete this
1399      GIV and associated statements, even if it appears it is not used.  */
1400   if (stmt_could_throw_p (stmt))
1401     return false;
1402
1403   return true;
1404 }
1405
1406 /* Finds general ivs in statement STMT.  */
1407
1408 static void
1409 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1410 {
1411   affine_iv iv;
1412
1413   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1414     return;
1415
1416   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1417 }
1418
1419 /* Finds general ivs in basic block BB.  */
1420
1421 static void
1422 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1423 {
1424   gimple_stmt_iterator bsi;
1425
1426   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1427     find_givs_in_stmt (data, gsi_stmt (bsi));
1428 }
1429
1430 /* Finds general ivs.  */
1431
1432 static void
1433 find_givs (struct ivopts_data *data)
1434 {
1435   struct loop *loop = data->current_loop;
1436   basic_block *body = get_loop_body_in_dom_order (loop);
1437   unsigned i;
1438
1439   for (i = 0; i < loop->num_nodes; i++)
1440     find_givs_in_bb (data, body[i]);
1441   free (body);
1442 }
1443
1444 /* For each ssa name defined in LOOP determines whether it is an induction
1445    variable and if so, its initial value and step.  */
1446
1447 static bool
1448 find_induction_variables (struct ivopts_data *data)
1449 {
1450   unsigned i;
1451   bitmap_iterator bi;
1452
1453   if (!find_bivs (data))
1454     return false;
1455
1456   find_givs (data);
1457   mark_bivs (data);
1458
1459   if (dump_file && (dump_flags & TDF_DETAILS))
1460     {
1461       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1462
1463       if (niter)
1464         {
1465           fprintf (dump_file, "  number of iterations ");
1466           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1467           if (!integer_zerop (niter->may_be_zero))
1468             {
1469               fprintf (dump_file, "; zero if ");
1470               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1471             }
1472           fprintf (dump_file, "\n");
1473         };
1474
1475       fprintf (dump_file, "\n<Induction Vars>:\n");
1476       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1477         {
1478           struct version_info *info = ver_info (data, i);
1479           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1480             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1481         }
1482     }
1483
1484   return true;
1485 }
1486
1487 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1488    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1489    is the const offset stripped from IV base; for other types use, both
1490    are zero by default.  */
1491
1492 static struct iv_use *
1493 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1494             gimple *stmt, enum use_type type, tree addr_base,
1495             unsigned HOST_WIDE_INT addr_offset)
1496 {
1497   struct iv_use *use = XCNEW (struct iv_use);
1498
1499   use->id = group->vuses.length ();
1500   use->group_id = group->id;
1501   use->type = type;
1502   use->iv = iv;
1503   use->stmt = stmt;
1504   use->op_p = use_p;
1505   use->addr_base = addr_base;
1506   use->addr_offset = addr_offset;
1507
1508   group->vuses.safe_push (use);
1509   return use;
1510 }
1511
1512 /* Checks whether OP is a loop-level invariant and if so, records it.
1513    NONLINEAR_USE is true if the invariant is used in a way we do not
1514    handle specially.  */
1515
1516 static void
1517 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1518 {
1519   basic_block bb;
1520   struct version_info *info;
1521
1522   if (TREE_CODE (op) != SSA_NAME
1523       || virtual_operand_p (op))
1524     return;
1525
1526   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1527   if (bb
1528       && flow_bb_inside_loop_p (data->current_loop, bb))
1529     return;
1530
1531   info = name_info (data, op);
1532   info->name = op;
1533   info->has_nonlin_use |= nonlinear_use;
1534   if (!info->inv_id)
1535     info->inv_id = ++data->max_inv_id;
1536   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1537 }
1538
1539 static tree
1540 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1541
1542 /* Record a group of TYPE.  */
1543
1544 static struct iv_group *
1545 record_group (struct ivopts_data *data, enum use_type type)
1546 {
1547   struct iv_group *group = XCNEW (struct iv_group);
1548
1549   group->id = data->vgroups.length ();
1550   group->type = type;
1551   group->related_cands = BITMAP_ALLOC (NULL);
1552   group->vuses.create (1);
1553
1554   data->vgroups.safe_push (group);
1555   return group;
1556 }
1557
1558 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1559    New group will be created if there is no existing group for the use.  */
1560
1561 static struct iv_use *
1562 record_group_use (struct ivopts_data *data, tree *use_p,
1563                   struct iv *iv, gimple *stmt, enum use_type type)
1564 {
1565   tree addr_base = NULL;
1566   struct iv_group *group = NULL;
1567   unsigned HOST_WIDE_INT addr_offset = 0;
1568
1569   /* Record non address type use in a new group.  */
1570   if (type == USE_ADDRESS && iv->base_object)
1571     {
1572       unsigned int i;
1573
1574       addr_base = strip_offset (iv->base, &addr_offset);
1575       for (i = 0; i < data->vgroups.length (); i++)
1576         {
1577           struct iv_use *use;
1578
1579           group = data->vgroups[i];
1580           use = group->vuses[0];
1581           if (use->type != USE_ADDRESS || !use->iv->base_object)
1582             continue;
1583
1584           /* Check if it has the same stripped base and step.  */
1585           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1586               && operand_equal_p (iv->step, use->iv->step, 0)
1587               && operand_equal_p (addr_base, use->addr_base, 0))
1588             break;
1589         }
1590       if (i == data->vgroups.length ())
1591         group = NULL;
1592     }
1593
1594   if (!group)
1595     group = record_group (data, type);
1596
1597   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1598 }
1599
1600 /* Checks whether the use OP is interesting and if so, records it.  */
1601
1602 static struct iv_use *
1603 find_interesting_uses_op (struct ivopts_data *data, tree op)
1604 {
1605   struct iv *iv;
1606   gimple *stmt;
1607   struct iv_use *use;
1608
1609   if (TREE_CODE (op) != SSA_NAME)
1610     return NULL;
1611
1612   iv = get_iv (data, op);
1613   if (!iv)
1614     return NULL;
1615
1616   if (iv->nonlin_use)
1617     {
1618       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1619       return iv->nonlin_use;
1620     }
1621
1622   if (integer_zerop (iv->step))
1623     {
1624       record_invariant (data, op, true);
1625       return NULL;
1626     }
1627
1628   stmt = SSA_NAME_DEF_STMT (op);
1629   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1630
1631   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1632   iv->nonlin_use = use;
1633   return use;
1634 }
1635
1636 /* Given a condition in statement STMT, checks whether it is a compare
1637    of an induction variable and an invariant.  If this is the case,
1638    CONTROL_VAR is set to location of the iv, BOUND to the location of
1639    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1640    induction variable descriptions, and true is returned.  If this is not
1641    the case, CONTROL_VAR and BOUND are set to the arguments of the
1642    condition and false is returned.  */
1643
1644 static bool
1645 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1646                        tree **control_var, tree **bound,
1647                        struct iv **iv_var, struct iv **iv_bound)
1648 {
1649   /* The objects returned when COND has constant operands.  */
1650   static struct iv const_iv;
1651   static tree zero;
1652   tree *op0 = &zero, *op1 = &zero;
1653   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1654   bool ret = false;
1655
1656   if (gimple_code (stmt) == GIMPLE_COND)
1657     {
1658       gcond *cond_stmt = as_a <gcond *> (stmt);
1659       op0 = gimple_cond_lhs_ptr (cond_stmt);
1660       op1 = gimple_cond_rhs_ptr (cond_stmt);
1661     }
1662   else
1663     {
1664       op0 = gimple_assign_rhs1_ptr (stmt);
1665       op1 = gimple_assign_rhs2_ptr (stmt);
1666     }
1667
1668   zero = integer_zero_node;
1669   const_iv.step = integer_zero_node;
1670
1671   if (TREE_CODE (*op0) == SSA_NAME)
1672     iv0 = get_iv (data, *op0);
1673   if (TREE_CODE (*op1) == SSA_NAME)
1674     iv1 = get_iv (data, *op1);
1675
1676   /* Exactly one of the compared values must be an iv, and the other one must
1677      be an invariant.  */
1678   if (!iv0 || !iv1)
1679     goto end;
1680
1681   if (integer_zerop (iv0->step))
1682     {
1683       /* Control variable may be on the other side.  */
1684       std::swap (op0, op1);
1685       std::swap (iv0, iv1);
1686     }
1687   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1688
1689 end:
1690   if (control_var)
1691     *control_var = op0;
1692   if (iv_var)
1693     *iv_var = iv0;
1694   if (bound)
1695     *bound = op1;
1696   if (iv_bound)
1697     *iv_bound = iv1;
1698
1699   return ret;
1700 }
1701
1702 /* Checks whether the condition in STMT is interesting and if so,
1703    records it.  */
1704
1705 static void
1706 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1707 {
1708   tree *var_p, *bound_p;
1709   struct iv *var_iv;
1710
1711   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1712     {
1713       find_interesting_uses_op (data, *var_p);
1714       find_interesting_uses_op (data, *bound_p);
1715       return;
1716     }
1717
1718   record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1719 }
1720
1721 /* Returns the outermost loop EXPR is obviously invariant in
1722    relative to the loop LOOP, i.e. if all its operands are defined
1723    outside of the returned loop.  Returns NULL if EXPR is not
1724    even obviously invariant in LOOP.  */
1725
1726 struct loop *
1727 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1728 {
1729   basic_block def_bb;
1730   unsigned i, len;
1731
1732   if (is_gimple_min_invariant (expr))
1733     return current_loops->tree_root;
1734
1735   if (TREE_CODE (expr) == SSA_NAME)
1736     {
1737       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1738       if (def_bb)
1739         {
1740           if (flow_bb_inside_loop_p (loop, def_bb))
1741             return NULL;
1742           return superloop_at_depth (loop,
1743                                      loop_depth (def_bb->loop_father) + 1);
1744         }
1745
1746       return current_loops->tree_root;
1747     }
1748
1749   if (!EXPR_P (expr))
1750     return NULL;
1751
1752   unsigned maxdepth = 0;
1753   len = TREE_OPERAND_LENGTH (expr);
1754   for (i = 0; i < len; i++)
1755     {
1756       struct loop *ivloop;
1757       if (!TREE_OPERAND (expr, i))
1758         continue;
1759
1760       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1761       if (!ivloop)
1762         return NULL;
1763       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1764     }
1765
1766   return superloop_at_depth (loop, maxdepth);
1767 }
1768
1769 /* Returns true if expression EXPR is obviously invariant in LOOP,
1770    i.e. if all its operands are defined outside of the LOOP.  LOOP
1771    should not be the function body.  */
1772
1773 bool
1774 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1775 {
1776   basic_block def_bb;
1777   unsigned i, len;
1778
1779   gcc_assert (loop_depth (loop) > 0);
1780
1781   if (is_gimple_min_invariant (expr))
1782     return true;
1783
1784   if (TREE_CODE (expr) == SSA_NAME)
1785     {
1786       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1787       if (def_bb
1788           && flow_bb_inside_loop_p (loop, def_bb))
1789         return false;
1790
1791       return true;
1792     }
1793
1794   if (!EXPR_P (expr))
1795     return false;
1796
1797   len = TREE_OPERAND_LENGTH (expr);
1798   for (i = 0; i < len; i++)
1799     if (TREE_OPERAND (expr, i)
1800         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1801       return false;
1802
1803   return true;
1804 }
1805
1806 /* Given expression EXPR which computes inductive values with respect
1807    to loop recorded in DATA, this function returns biv from which EXPR
1808    is derived by tracing definition chains of ssa variables in EXPR.  */
1809
1810 static struct iv*
1811 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1812 {
1813   struct iv *iv;
1814   unsigned i, n;
1815   tree e2, e1;
1816   enum tree_code code;
1817   gimple *stmt;
1818
1819   if (expr == NULL_TREE)
1820     return NULL;
1821
1822   if (is_gimple_min_invariant (expr))
1823     return NULL;
1824
1825   code = TREE_CODE (expr);
1826   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1827     {
1828       n = TREE_OPERAND_LENGTH (expr);
1829       for (i = 0; i < n; i++)
1830         {
1831           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1832           if (iv)
1833             return iv;
1834         }
1835     }
1836
1837   /* Stop if it's not ssa name.  */
1838   if (code != SSA_NAME)
1839     return NULL;
1840
1841   iv = get_iv (data, expr);
1842   if (!iv || integer_zerop (iv->step))
1843     return NULL;
1844   else if (iv->biv_p)
1845     return iv;
1846
1847   stmt = SSA_NAME_DEF_STMT (expr);
1848   if (gphi *phi = dyn_cast <gphi *> (stmt))
1849     {
1850       ssa_op_iter iter;
1851       use_operand_p use_p;
1852
1853       if (virtual_operand_p (gimple_phi_result (phi)))
1854         return NULL;
1855
1856       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1857         {
1858           tree use = USE_FROM_PTR (use_p);
1859           iv = find_deriving_biv_for_expr (data, use);
1860           if (iv)
1861             return iv;
1862         }
1863       return NULL;
1864     }
1865   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1866     return NULL;
1867
1868   e1 = gimple_assign_rhs1 (stmt);
1869   code = gimple_assign_rhs_code (stmt);
1870   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1871     return find_deriving_biv_for_expr (data, e1);
1872
1873   switch (code)
1874     {
1875     case MULT_EXPR:
1876     case PLUS_EXPR:
1877     case MINUS_EXPR:
1878     case POINTER_PLUS_EXPR:
1879       /* Increments, decrements and multiplications by a constant
1880          are simple.  */
1881       e2 = gimple_assign_rhs2 (stmt);
1882       iv = find_deriving_biv_for_expr (data, e2);
1883       if (iv)
1884         return iv;
1885
1886       /* Fallthru.  */
1887     CASE_CONVERT:
1888       /* Casts are simple.  */
1889       return find_deriving_biv_for_expr (data, e1);
1890
1891     default:
1892       break;
1893     }
1894
1895   return NULL;
1896 }
1897
1898 /* Record BIV, its predecessor and successor that they are used in
1899    address type uses.  */
1900
1901 static void
1902 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1903 {
1904   unsigned i;
1905   tree type, base_1, base_2;
1906   bitmap_iterator bi;
1907
1908   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1909       || biv->have_address_use || !biv->no_overflow)
1910     return;
1911
1912   type = TREE_TYPE (biv->base);
1913   if (!INTEGRAL_TYPE_P (type))
1914     return;
1915
1916   biv->have_address_use = true;
1917   data->bivs_not_used_in_addr--;
1918   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1919   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1920     {
1921       struct iv *iv = ver_info (data, i)->iv;
1922
1923       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1924           || iv->have_address_use || !iv->no_overflow)
1925         continue;
1926
1927       if (type != TREE_TYPE (iv->base)
1928           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1929         continue;
1930
1931       if (!operand_equal_p (biv->step, iv->step, 0))
1932         continue;
1933
1934       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1935       if (operand_equal_p (base_1, iv->base, 0)
1936           || operand_equal_p (base_2, biv->base, 0))
1937         {
1938           iv->have_address_use = true;
1939           data->bivs_not_used_in_addr--;
1940         }
1941     }
1942 }
1943
1944 /* Cumulates the steps of indices into DATA and replaces their values with the
1945    initial ones.  Returns false when the value of the index cannot be determined.
1946    Callback for for_each_index.  */
1947
1948 struct ifs_ivopts_data
1949 {
1950   struct ivopts_data *ivopts_data;
1951   gimple *stmt;
1952   tree step;
1953 };
1954
1955 static bool
1956 idx_find_step (tree base, tree *idx, void *data)
1957 {
1958   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1959   struct iv *iv;
1960   bool use_overflow_semantics = false;
1961   tree step, iv_base, iv_step, lbound, off;
1962   struct loop *loop = dta->ivopts_data->current_loop;
1963
1964   /* If base is a component ref, require that the offset of the reference
1965      be invariant.  */
1966   if (TREE_CODE (base) == COMPONENT_REF)
1967     {
1968       off = component_ref_field_offset (base);
1969       return expr_invariant_in_loop_p (loop, off);
1970     }
1971
1972   /* If base is array, first check whether we will be able to move the
1973      reference out of the loop (in order to take its address in strength
1974      reduction).  In order for this to work we need both lower bound
1975      and step to be loop invariants.  */
1976   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1977     {
1978       /* Moreover, for a range, the size needs to be invariant as well.  */
1979       if (TREE_CODE (base) == ARRAY_RANGE_REF
1980           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1981         return false;
1982
1983       step = array_ref_element_size (base);
1984       lbound = array_ref_low_bound (base);
1985
1986       if (!expr_invariant_in_loop_p (loop, step)
1987           || !expr_invariant_in_loop_p (loop, lbound))
1988         return false;
1989     }
1990
1991   if (TREE_CODE (*idx) != SSA_NAME)
1992     return true;
1993
1994   iv = get_iv (dta->ivopts_data, *idx);
1995   if (!iv)
1996     return false;
1997
1998   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1999           *&x[0], which is not folded and does not trigger the
2000           ARRAY_REF path below.  */
2001   *idx = iv->base;
2002
2003   if (integer_zerop (iv->step))
2004     return true;
2005
2006   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2007     {
2008       step = array_ref_element_size (base);
2009
2010       /* We only handle addresses whose step is an integer constant.  */
2011       if (TREE_CODE (step) != INTEGER_CST)
2012         return false;
2013     }
2014   else
2015     /* The step for pointer arithmetics already is 1 byte.  */
2016     step = size_one_node;
2017
2018   iv_base = iv->base;
2019   iv_step = iv->step;
2020   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2021     use_overflow_semantics = true;
2022
2023   if (!convert_affine_scev (dta->ivopts_data->current_loop,
2024                             sizetype, &iv_base, &iv_step, dta->stmt,
2025                             use_overflow_semantics))
2026     {
2027       /* The index might wrap.  */
2028       return false;
2029     }
2030
2031   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2032   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2033
2034   if (dta->ivopts_data->bivs_not_used_in_addr)
2035     {
2036       if (!iv->biv_p)
2037         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2038
2039       record_biv_for_address_use (dta->ivopts_data, iv);
2040     }
2041   return true;
2042 }
2043
2044 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
2045    object is passed to it in DATA.  */
2046
2047 static bool
2048 idx_record_use (tree base, tree *idx,
2049                 void *vdata)
2050 {
2051   struct ivopts_data *data = (struct ivopts_data *) vdata;
2052   find_interesting_uses_op (data, *idx);
2053   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2054     {
2055       find_interesting_uses_op (data, array_ref_element_size (base));
2056       find_interesting_uses_op (data, array_ref_low_bound (base));
2057     }
2058   return true;
2059 }
2060
2061 /* If we can prove that TOP = cst * BOT for some constant cst,
2062    store cst to MUL and return true.  Otherwise return false.
2063    The returned value is always sign-extended, regardless of the
2064    signedness of TOP and BOT.  */
2065
2066 static bool
2067 constant_multiple_of (tree top, tree bot, widest_int *mul)
2068 {
2069   tree mby;
2070   enum tree_code code;
2071   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2072   widest_int res, p0, p1;
2073
2074   STRIP_NOPS (top);
2075   STRIP_NOPS (bot);
2076
2077   if (operand_equal_p (top, bot, 0))
2078     {
2079       *mul = 1;
2080       return true;
2081     }
2082
2083   code = TREE_CODE (top);
2084   switch (code)
2085     {
2086     case MULT_EXPR:
2087       mby = TREE_OPERAND (top, 1);
2088       if (TREE_CODE (mby) != INTEGER_CST)
2089         return false;
2090
2091       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2092         return false;
2093
2094       *mul = wi::sext (res * wi::to_widest (mby), precision);
2095       return true;
2096
2097     case PLUS_EXPR:
2098     case MINUS_EXPR:
2099       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2100           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2101         return false;
2102
2103       if (code == MINUS_EXPR)
2104         p1 = -p1;
2105       *mul = wi::sext (p0 + p1, precision);
2106       return true;
2107
2108     case INTEGER_CST:
2109       if (TREE_CODE (bot) != INTEGER_CST)
2110         return false;
2111
2112       p0 = widest_int::from (top, SIGNED);
2113       p1 = widest_int::from (bot, SIGNED);
2114       if (p1 == 0)
2115         return false;
2116       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2117       return res == 0;
2118
2119     default:
2120       return false;
2121     }
2122 }
2123
2124 /* Return true if memory reference REF with step STEP may be unaligned.  */
2125
2126 static bool
2127 may_be_unaligned_p (tree ref, tree step)
2128 {
2129   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2130      thus they are not misaligned.  */
2131   if (TREE_CODE (ref) == TARGET_MEM_REF)
2132     return false;
2133
2134   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2135   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2136     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2137
2138   unsigned HOST_WIDE_INT bitpos;
2139   unsigned int ref_align;
2140   get_object_alignment_1 (ref, &ref_align, &bitpos);
2141   if (ref_align < align
2142       || (bitpos % align) != 0
2143       || (bitpos % BITS_PER_UNIT) != 0)
2144     return true;
2145
2146   unsigned int trailing_zeros = tree_ctz (step);
2147   if (trailing_zeros < HOST_BITS_PER_INT
2148       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2149     return true;
2150
2151   return false;
2152 }
2153
2154 /* Return true if EXPR may be non-addressable.   */
2155
2156 bool
2157 may_be_nonaddressable_p (tree expr)
2158 {
2159   switch (TREE_CODE (expr))
2160     {
2161     case TARGET_MEM_REF:
2162       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2163          target, thus they are always addressable.  */
2164       return false;
2165
2166     case MEM_REF:
2167       /* Likewise for MEM_REFs, modulo the storage order.  */
2168       return REF_REVERSE_STORAGE_ORDER (expr);
2169
2170     case BIT_FIELD_REF:
2171       if (REF_REVERSE_STORAGE_ORDER (expr))
2172         return true;
2173       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2174
2175     case COMPONENT_REF:
2176       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2177         return true;
2178       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2179              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2180
2181     case ARRAY_REF:
2182     case ARRAY_RANGE_REF:
2183       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2184         return true;
2185       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2186
2187     case VIEW_CONVERT_EXPR:
2188       /* This kind of view-conversions may wrap non-addressable objects
2189          and make them look addressable.  After some processing the
2190          non-addressability may be uncovered again, causing ADDR_EXPRs
2191          of inappropriate objects to be built.  */
2192       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2193           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2194         return true;
2195       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2196
2197     CASE_CONVERT:
2198       return true;
2199
2200     default:
2201       break;
2202     }
2203
2204   return false;
2205 }
2206
2207 /* Finds addresses in *OP_P inside STMT.  */
2208
2209 static void
2210 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2211                                tree *op_p)
2212 {
2213   tree base = *op_p, step = size_zero_node;
2214   struct iv *civ;
2215   struct ifs_ivopts_data ifs_ivopts_data;
2216
2217   /* Do not play with volatile memory references.  A bit too conservative,
2218      perhaps, but safe.  */
2219   if (gimple_has_volatile_ops (stmt))
2220     goto fail;
2221
2222   /* Ignore bitfields for now.  Not really something terribly complicated
2223      to handle.  TODO.  */
2224   if (TREE_CODE (base) == BIT_FIELD_REF)
2225     goto fail;
2226
2227   base = unshare_expr (base);
2228
2229   if (TREE_CODE (base) == TARGET_MEM_REF)
2230     {
2231       tree type = build_pointer_type (TREE_TYPE (base));
2232       tree astep;
2233
2234       if (TMR_BASE (base)
2235           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2236         {
2237           civ = get_iv (data, TMR_BASE (base));
2238           if (!civ)
2239             goto fail;
2240
2241           TMR_BASE (base) = civ->base;
2242           step = civ->step;
2243         }
2244       if (TMR_INDEX2 (base)
2245           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2246         {
2247           civ = get_iv (data, TMR_INDEX2 (base));
2248           if (!civ)
2249             goto fail;
2250
2251           TMR_INDEX2 (base) = civ->base;
2252           step = civ->step;
2253         }
2254       if (TMR_INDEX (base)
2255           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2256         {
2257           civ = get_iv (data, TMR_INDEX (base));
2258           if (!civ)
2259             goto fail;
2260
2261           TMR_INDEX (base) = civ->base;
2262           astep = civ->step;
2263
2264           if (astep)
2265             {
2266               if (TMR_STEP (base))
2267                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2268
2269               step = fold_build2 (PLUS_EXPR, type, step, astep);
2270             }
2271         }
2272
2273       if (integer_zerop (step))
2274         goto fail;
2275       base = tree_mem_ref_addr (type, base);
2276     }
2277   else
2278     {
2279       ifs_ivopts_data.ivopts_data = data;
2280       ifs_ivopts_data.stmt = stmt;
2281       ifs_ivopts_data.step = size_zero_node;
2282       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2283           || integer_zerop (ifs_ivopts_data.step))
2284         goto fail;
2285       step = ifs_ivopts_data.step;
2286
2287       /* Check that the base expression is addressable.  This needs
2288          to be done after substituting bases of IVs into it.  */
2289       if (may_be_nonaddressable_p (base))
2290         goto fail;
2291
2292       /* Moreover, on strict alignment platforms, check that it is
2293          sufficiently aligned.  */
2294       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2295         goto fail;
2296
2297       base = build_fold_addr_expr (base);
2298
2299       /* Substituting bases of IVs into the base expression might
2300          have caused folding opportunities.  */
2301       if (TREE_CODE (base) == ADDR_EXPR)
2302         {
2303           tree *ref = &TREE_OPERAND (base, 0);
2304           while (handled_component_p (*ref))
2305             ref = &TREE_OPERAND (*ref, 0);
2306           if (TREE_CODE (*ref) == MEM_REF)
2307             {
2308               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2309                                       TREE_OPERAND (*ref, 0),
2310                                       TREE_OPERAND (*ref, 1));
2311               if (tem)
2312                 *ref = tem;
2313             }
2314         }
2315     }
2316
2317   civ = alloc_iv (data, base, step);
2318   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2319   return;
2320
2321 fail:
2322   for_each_index (op_p, idx_record_use, data);
2323 }
2324
2325 /* Finds and records invariants used in STMT.  */
2326
2327 static void
2328 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2329 {
2330   ssa_op_iter iter;
2331   use_operand_p use_p;
2332   tree op;
2333
2334   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2335     {
2336       op = USE_FROM_PTR (use_p);
2337       record_invariant (data, op, false);
2338     }
2339 }
2340
2341 /* Finds interesting uses of induction variables in the statement STMT.  */
2342
2343 static void
2344 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2345 {
2346   struct iv *iv;
2347   tree op, *lhs, *rhs;
2348   ssa_op_iter iter;
2349   use_operand_p use_p;
2350   enum tree_code code;
2351
2352   find_invariants_stmt (data, stmt);
2353
2354   if (gimple_code (stmt) == GIMPLE_COND)
2355     {
2356       find_interesting_uses_cond (data, stmt);
2357       return;
2358     }
2359
2360   if (is_gimple_assign (stmt))
2361     {
2362       lhs = gimple_assign_lhs_ptr (stmt);
2363       rhs = gimple_assign_rhs1_ptr (stmt);
2364
2365       if (TREE_CODE (*lhs) == SSA_NAME)
2366         {
2367           /* If the statement defines an induction variable, the uses are not
2368              interesting by themselves.  */
2369
2370           iv = get_iv (data, *lhs);
2371
2372           if (iv && !integer_zerop (iv->step))
2373             return;
2374         }
2375
2376       code = gimple_assign_rhs_code (stmt);
2377       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2378           && (REFERENCE_CLASS_P (*rhs)
2379               || is_gimple_val (*rhs)))
2380         {
2381           if (REFERENCE_CLASS_P (*rhs))
2382             find_interesting_uses_address (data, stmt, rhs);
2383           else
2384             find_interesting_uses_op (data, *rhs);
2385
2386           if (REFERENCE_CLASS_P (*lhs))
2387             find_interesting_uses_address (data, stmt, lhs);
2388           return;
2389         }
2390       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2391         {
2392           find_interesting_uses_cond (data, stmt);
2393           return;
2394         }
2395
2396       /* TODO -- we should also handle address uses of type
2397
2398          memory = call (whatever);
2399
2400          and
2401
2402          call (memory).  */
2403     }
2404
2405   if (gimple_code (stmt) == GIMPLE_PHI
2406       && gimple_bb (stmt) == data->current_loop->header)
2407     {
2408       iv = get_iv (data, PHI_RESULT (stmt));
2409
2410       if (iv && !integer_zerop (iv->step))
2411         return;
2412     }
2413
2414   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2415     {
2416       op = USE_FROM_PTR (use_p);
2417
2418       if (TREE_CODE (op) != SSA_NAME)
2419         continue;
2420
2421       iv = get_iv (data, op);
2422       if (!iv)
2423         continue;
2424
2425       find_interesting_uses_op (data, op);
2426     }
2427 }
2428
2429 /* Finds interesting uses of induction variables outside of loops
2430    on loop exit edge EXIT.  */
2431
2432 static void
2433 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2434 {
2435   gphi *phi;
2436   gphi_iterator psi;
2437   tree def;
2438
2439   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2440     {
2441       phi = psi.phi ();
2442       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2443       if (!virtual_operand_p (def))
2444         find_interesting_uses_op (data, def);
2445     }
2446 }
2447
2448 /* Compute maximum offset of [base + offset] addressing mode
2449    for memory reference represented by USE.  */
2450
2451 static HOST_WIDE_INT
2452 compute_max_addr_offset (struct iv_use *use)
2453 {
2454   int width;
2455   rtx reg, addr;
2456   HOST_WIDE_INT i, off;
2457   unsigned list_index, num;
2458   addr_space_t as;
2459   machine_mode mem_mode, addr_mode;
2460   static vec<HOST_WIDE_INT> max_offset_list;
2461
2462   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2463   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2464
2465   num = max_offset_list.length ();
2466   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2467   if (list_index >= num)
2468     {
2469       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2470       for (; num < max_offset_list.length (); num++)
2471         max_offset_list[num] = -1;
2472     }
2473
2474   off = max_offset_list[list_index];
2475   if (off != -1)
2476     return off;
2477
2478   addr_mode = targetm.addr_space.address_mode (as);
2479   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2480   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2481
2482   width = GET_MODE_BITSIZE (addr_mode) - 1;
2483   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2484     width = HOST_BITS_PER_WIDE_INT - 1;
2485
2486   for (i = width; i > 0; i--)
2487     {
2488       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2489       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2490       if (memory_address_addr_space_p (mem_mode, addr, as))
2491         break;
2492
2493       /* For some strict-alignment targets, the offset must be naturally
2494          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2495       off = ((unsigned HOST_WIDE_INT) 1 << i);
2496       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2497         {
2498           off -= GET_MODE_SIZE (mem_mode);
2499           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2500           if (memory_address_addr_space_p (mem_mode, addr, as))
2501             break;
2502         }
2503     }
2504   if (i == 0)
2505     off = 0;
2506
2507   max_offset_list[list_index] = off;
2508   return off;
2509 }
2510
2511 /* Comparison function to sort group in ascending order of addr_offset.  */
2512
2513 static int
2514 group_compare_offset (const void *a, const void *b)
2515 {
2516   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2517   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2518
2519   if ((*u1)->addr_offset != (*u2)->addr_offset)
2520     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2521   else
2522     return 0;
2523 }
2524
2525 /* Check if small groups should be split.  Return true if no group
2526    contains more than two uses with distinct addr_offsets.  Return
2527    false otherwise.  We want to split such groups because:
2528
2529      1) Small groups don't have much benefit and may interfer with
2530         general candidate selection.
2531      2) Size for problem with only small groups is usually small and
2532         general algorithm can handle it well.
2533
2534    TODO -- Above claim may not hold when we want to merge memory
2535    accesses with conseuctive addresses.  */
2536
2537 static bool
2538 split_small_address_groups_p (struct ivopts_data *data)
2539 {
2540   unsigned int i, j, distinct = 1;
2541   struct iv_use *pre;
2542   struct iv_group *group;
2543
2544   for (i = 0; i < data->vgroups.length (); i++)
2545     {
2546       group = data->vgroups[i];
2547       if (group->vuses.length () == 1)
2548         continue;
2549
2550       gcc_assert (group->type == USE_ADDRESS);
2551       if (group->vuses.length () == 2)
2552         {
2553           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2554             std::swap (group->vuses[0], group->vuses[1]);
2555         }
2556       else
2557         group->vuses.qsort (group_compare_offset);
2558
2559       if (distinct > 2)
2560         continue;
2561
2562       distinct = 1;
2563       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2564         {
2565           if (group->vuses[j]->addr_offset != pre->addr_offset)
2566             {
2567               pre = group->vuses[j];
2568               distinct++;
2569             }
2570
2571           if (distinct > 2)
2572             break;
2573         }
2574     }
2575
2576   return (distinct <= 2);
2577 }
2578
2579 /* For each group of address type uses, this function further groups
2580    these uses according to the maximum offset supported by target's
2581    [base + offset] addressing mode.  */
2582
2583 static void
2584 split_address_groups (struct ivopts_data *data)
2585 {
2586   unsigned int i, j;
2587   HOST_WIDE_INT max_offset = -1;
2588
2589   /* Reset max offset to split all small groups.  */
2590   if (split_small_address_groups_p (data))
2591     max_offset = 0;
2592
2593   for (i = 0; i < data->vgroups.length (); i++)
2594     {
2595       struct iv_group *group = data->vgroups[i];
2596       struct iv_use *use = group->vuses[0];
2597
2598       use->id = 0;
2599       use->group_id = group->id;
2600       if (group->vuses.length () == 1)
2601         continue;
2602
2603       if (max_offset != 0)
2604         max_offset = compute_max_addr_offset (use);
2605
2606       for (j = 1; j < group->vuses.length (); j++)
2607         {
2608           struct iv_use *next = group->vuses[j];
2609
2610           /* Only uses with offset that can fit in offset part against
2611              the first use can be grouped together.  */
2612           if (next->addr_offset - use->addr_offset
2613               > (unsigned HOST_WIDE_INT) max_offset)
2614             break;
2615
2616           next->id = j;
2617           next->group_id = group->id;
2618         }
2619       /* Split group.  */
2620       if (j < group->vuses.length ())
2621         {
2622           struct iv_group *new_group = record_group (data, group->type);
2623           new_group->vuses.safe_splice (group->vuses);
2624           new_group->vuses.block_remove (0, j);
2625           group->vuses.truncate (j);
2626         }
2627     }
2628 }
2629
2630 /* Finds uses of the induction variables that are interesting.  */
2631
2632 static void
2633 find_interesting_uses (struct ivopts_data *data)
2634 {
2635   basic_block bb;
2636   gimple_stmt_iterator bsi;
2637   basic_block *body = get_loop_body (data->current_loop);
2638   unsigned i;
2639   edge e;
2640
2641   for (i = 0; i < data->current_loop->num_nodes; i++)
2642     {
2643       edge_iterator ei;
2644       bb = body[i];
2645
2646       FOR_EACH_EDGE (e, ei, bb->succs)
2647         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2648             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2649           find_interesting_uses_outside (data, e);
2650
2651       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2652         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2653       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2654         if (!is_gimple_debug (gsi_stmt (bsi)))
2655           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2656     }
2657
2658   split_address_groups (data);
2659
2660   if (dump_file && (dump_flags & TDF_DETAILS))
2661     {
2662       bitmap_iterator bi;
2663
2664       fprintf (dump_file, "\n<Invariant Vars>:\n");
2665       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2666         {
2667           struct version_info *info = ver_info (data, i);
2668           if (info->inv_id)
2669             {
2670               fprintf (dump_file, "Inv %d:\t", info->inv_id);
2671               print_generic_expr (dump_file, info->name, TDF_SLIM);
2672               fprintf (dump_file, "%s\n",
2673                        info->has_nonlin_use ? "" : "\t(eliminable)");
2674             }
2675         }
2676
2677       fprintf (dump_file, "\n<IV Groups>:\n");
2678       dump_groups (dump_file, data);
2679       fprintf (dump_file, "\n");
2680     }
2681
2682   free (body);
2683 }
2684
2685 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2686    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2687    we are at the top-level of the processed address.  */
2688
2689 static tree
2690 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2691                 HOST_WIDE_INT *offset)
2692 {
2693   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2694   enum tree_code code;
2695   tree type, orig_type = TREE_TYPE (expr);
2696   HOST_WIDE_INT off0, off1, st;
2697   tree orig_expr = expr;
2698
2699   STRIP_NOPS (expr);
2700
2701   type = TREE_TYPE (expr);
2702   code = TREE_CODE (expr);
2703   *offset = 0;
2704
2705   switch (code)
2706     {
2707     case INTEGER_CST:
2708       if (!cst_and_fits_in_hwi (expr)
2709           || integer_zerop (expr))
2710         return orig_expr;
2711
2712       *offset = int_cst_value (expr);
2713       return build_int_cst (orig_type, 0);
2714
2715     case POINTER_PLUS_EXPR:
2716     case PLUS_EXPR:
2717     case MINUS_EXPR:
2718       op0 = TREE_OPERAND (expr, 0);
2719       op1 = TREE_OPERAND (expr, 1);
2720
2721       op0 = strip_offset_1 (op0, false, false, &off0);
2722       op1 = strip_offset_1 (op1, false, false, &off1);
2723
2724       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2725       if (op0 == TREE_OPERAND (expr, 0)
2726           && op1 == TREE_OPERAND (expr, 1))
2727         return orig_expr;
2728
2729       if (integer_zerop (op1))
2730         expr = op0;
2731       else if (integer_zerop (op0))
2732         {
2733           if (code == MINUS_EXPR)
2734             expr = fold_build1 (NEGATE_EXPR, type, op1);
2735           else
2736             expr = op1;
2737         }
2738       else
2739         expr = fold_build2 (code, type, op0, op1);
2740
2741       return fold_convert (orig_type, expr);
2742
2743     case MULT_EXPR:
2744       op1 = TREE_OPERAND (expr, 1);
2745       if (!cst_and_fits_in_hwi (op1))
2746         return orig_expr;
2747
2748       op0 = TREE_OPERAND (expr, 0);
2749       op0 = strip_offset_1 (op0, false, false, &off0);
2750       if (op0 == TREE_OPERAND (expr, 0))
2751         return orig_expr;
2752
2753       *offset = off0 * int_cst_value (op1);
2754       if (integer_zerop (op0))
2755         expr = op0;
2756       else
2757         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2758
2759       return fold_convert (orig_type, expr);
2760
2761     case ARRAY_REF:
2762     case ARRAY_RANGE_REF:
2763       if (!inside_addr)
2764         return orig_expr;
2765
2766       step = array_ref_element_size (expr);
2767       if (!cst_and_fits_in_hwi (step))
2768         break;
2769
2770       st = int_cst_value (step);
2771       op1 = TREE_OPERAND (expr, 1);
2772       op1 = strip_offset_1 (op1, false, false, &off1);
2773       *offset = off1 * st;
2774
2775       if (top_compref
2776           && integer_zerop (op1))
2777         {
2778           /* Strip the component reference completely.  */
2779           op0 = TREE_OPERAND (expr, 0);
2780           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2781           *offset += off0;
2782           return op0;
2783         }
2784       break;
2785
2786     case COMPONENT_REF:
2787       {
2788         tree field;
2789
2790         if (!inside_addr)
2791           return orig_expr;
2792
2793         tmp = component_ref_field_offset (expr);
2794         field = TREE_OPERAND (expr, 1);
2795         if (top_compref
2796             && cst_and_fits_in_hwi (tmp)
2797             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2798           {
2799             HOST_WIDE_INT boffset, abs_off;
2800
2801             /* Strip the component reference completely.  */
2802             op0 = TREE_OPERAND (expr, 0);
2803             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2804             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2805             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2806             if (boffset < 0)
2807               abs_off = -abs_off;
2808
2809             *offset = off0 + int_cst_value (tmp) + abs_off;
2810             return op0;
2811           }
2812       }
2813       break;
2814
2815     case ADDR_EXPR:
2816       op0 = TREE_OPERAND (expr, 0);
2817       op0 = strip_offset_1 (op0, true, true, &off0);
2818       *offset += off0;
2819
2820       if (op0 == TREE_OPERAND (expr, 0))
2821         return orig_expr;
2822
2823       expr = build_fold_addr_expr (op0);
2824       return fold_convert (orig_type, expr);
2825
2826     case MEM_REF:
2827       /* ???  Offset operand?  */
2828       inside_addr = false;
2829       break;
2830
2831     default:
2832       return orig_expr;
2833     }
2834
2835   /* Default handling of expressions for that we want to recurse into
2836      the first operand.  */
2837   op0 = TREE_OPERAND (expr, 0);
2838   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2839   *offset += off0;
2840
2841   if (op0 == TREE_OPERAND (expr, 0)
2842       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2843     return orig_expr;
2844
2845   expr = copy_node (expr);
2846   TREE_OPERAND (expr, 0) = op0;
2847   if (op1)
2848     TREE_OPERAND (expr, 1) = op1;
2849
2850   /* Inside address, we might strip the top level component references,
2851      thus changing type of the expression.  Handling of ADDR_EXPR
2852      will fix that.  */
2853   expr = fold_convert (orig_type, expr);
2854
2855   return expr;
2856 }
2857
2858 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2859
2860 static tree
2861 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2862 {
2863   HOST_WIDE_INT off;
2864   tree core = strip_offset_1 (expr, false, false, &off);
2865   *offset = off;
2866   return core;
2867 }
2868
2869 /* Returns variant of TYPE that can be used as base for different uses.
2870    We return unsigned type with the same precision, which avoids problems
2871    with overflows.  */
2872
2873 static tree
2874 generic_type_for (tree type)
2875 {
2876   if (POINTER_TYPE_P (type))
2877     return unsigned_type_for (type);
2878
2879   if (TYPE_UNSIGNED (type))
2880     return type;
2881
2882   return unsigned_type_for (type);
2883 }
2884
2885 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2886    the bitmap to that we should store it.  */
2887
2888 static struct ivopts_data *fd_ivopts_data;
2889 static tree
2890 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2891 {
2892   bitmap *depends_on = (bitmap *) data;
2893   struct version_info *info;
2894
2895   if (TREE_CODE (*expr_p) != SSA_NAME)
2896     return NULL_TREE;
2897   info = name_info (fd_ivopts_data, *expr_p);
2898
2899   if (!info->inv_id || info->has_nonlin_use)
2900     return NULL_TREE;
2901
2902   if (!*depends_on)
2903     *depends_on = BITMAP_ALLOC (NULL);
2904   bitmap_set_bit (*depends_on, info->inv_id);
2905
2906   return NULL_TREE;
2907 }
2908
2909 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2910    position to POS.  If USE is not NULL, the candidate is set as related to
2911    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2912    replacement of the final value of the iv by a direct computation.  */
2913
2914 static struct iv_cand *
2915 add_candidate_1 (struct ivopts_data *data,
2916                  tree base, tree step, bool important, enum iv_position pos,
2917                  struct iv_use *use, gimple *incremented_at,
2918                  struct iv *orig_iv = NULL)
2919 {
2920   unsigned i;
2921   struct iv_cand *cand = NULL;
2922   tree type, orig_type;
2923
2924   gcc_assert (base && step);
2925
2926   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2927      live, but the ivopts code may replace a real pointer with one
2928      pointing before or after the memory block that is then adjusted
2929      into the memory block during the loop.  FIXME: It would likely be
2930      better to actually force the pointer live and still use ivopts;
2931      for example, it would be enough to write the pointer into memory
2932      and keep it there until after the loop.  */
2933   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2934     return NULL;
2935
2936   /* For non-original variables, make sure their values are computed in a type
2937      that does not invoke undefined behavior on overflows (since in general,
2938      we cannot prove that these induction variables are non-wrapping).  */
2939   if (pos != IP_ORIGINAL)
2940     {
2941       orig_type = TREE_TYPE (base);
2942       type = generic_type_for (orig_type);
2943       if (type != orig_type)
2944         {
2945           base = fold_convert (type, base);
2946           step = fold_convert (type, step);
2947         }
2948     }
2949
2950   for (i = 0; i < data->vcands.length (); i++)
2951     {
2952       cand = data->vcands[i];
2953
2954       if (cand->pos != pos)
2955         continue;
2956
2957       if (cand->incremented_at != incremented_at
2958           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2959               && cand->ainc_use != use))
2960         continue;
2961
2962       if (operand_equal_p (base, cand->iv->base, 0)
2963           && operand_equal_p (step, cand->iv->step, 0)
2964           && (TYPE_PRECISION (TREE_TYPE (base))
2965               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2966         break;
2967     }
2968
2969   if (i == data->vcands.length ())
2970     {
2971       cand = XCNEW (struct iv_cand);
2972       cand->id = i;
2973       cand->iv = alloc_iv (data, base, step);
2974       cand->pos = pos;
2975       if (pos != IP_ORIGINAL)
2976         {
2977           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2978           cand->var_after = cand->var_before;
2979         }
2980       cand->important = important;
2981       cand->incremented_at = incremented_at;
2982       data->vcands.safe_push (cand);
2983
2984       if (TREE_CODE (step) != INTEGER_CST)
2985         {
2986           fd_ivopts_data = data;
2987           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2988         }
2989
2990       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2991         cand->ainc_use = use;
2992       else
2993         cand->ainc_use = NULL;
2994
2995       cand->orig_iv = orig_iv;
2996       if (dump_file && (dump_flags & TDF_DETAILS))
2997         dump_cand (dump_file, cand);
2998     }
2999
3000   cand->important |= important;
3001
3002   /* Relate candidate to the group for which it is added.  */
3003   if (use)
3004     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3005
3006   return cand;
3007 }
3008
3009 /* Returns true if incrementing the induction variable at the end of the LOOP
3010    is allowed.
3011
3012    The purpose is to avoid splitting latch edge with a biv increment, thus
3013    creating a jump, possibly confusing other optimization passes and leaving
3014    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
3015    is not available (so we do not have a better alternative), or if the latch
3016    edge is already nonempty.  */
3017
3018 static bool
3019 allow_ip_end_pos_p (struct loop *loop)
3020 {
3021   if (!ip_normal_pos (loop))
3022     return true;
3023
3024   if (!empty_block_p (ip_end_pos (loop)))
3025     return true;
3026
3027   return false;
3028 }
3029
3030 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3031    Important field is set to IMPORTANT.  */
3032
3033 static void
3034 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3035                         bool important, struct iv_use *use)
3036 {
3037   basic_block use_bb = gimple_bb (use->stmt);
3038   machine_mode mem_mode;
3039   unsigned HOST_WIDE_INT cstepi;
3040
3041   /* If we insert the increment in any position other than the standard
3042      ones, we must ensure that it is incremented once per iteration.
3043      It must not be in an inner nested loop, or one side of an if
3044      statement.  */
3045   if (use_bb->loop_father != data->current_loop
3046       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3047       || stmt_could_throw_p (use->stmt)
3048       || !cst_and_fits_in_hwi (step))
3049     return;
3050
3051   cstepi = int_cst_value (step);
3052
3053   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3054   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3055         || USE_STORE_PRE_INCREMENT (mem_mode))
3056        && GET_MODE_SIZE (mem_mode) == cstepi)
3057       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3058            || USE_STORE_PRE_DECREMENT (mem_mode))
3059           && GET_MODE_SIZE (mem_mode) == -cstepi))
3060     {
3061       enum tree_code code = MINUS_EXPR;
3062       tree new_base;
3063       tree new_step = step;
3064
3065       if (POINTER_TYPE_P (TREE_TYPE (base)))
3066         {
3067           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3068           code = POINTER_PLUS_EXPR;
3069         }
3070       else
3071         new_step = fold_convert (TREE_TYPE (base), new_step);
3072       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3073       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3074                        use->stmt);
3075     }
3076   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3077         || USE_STORE_POST_INCREMENT (mem_mode))
3078        && GET_MODE_SIZE (mem_mode) == cstepi)
3079       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3080            || USE_STORE_POST_DECREMENT (mem_mode))
3081           && GET_MODE_SIZE (mem_mode) == -cstepi))
3082     {
3083       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3084                        use->stmt);
3085     }
3086 }
3087
3088 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3089    position to POS.  If USE is not NULL, the candidate is set as related to
3090    it.  The candidate computation is scheduled before exit condition and at
3091    the end of loop.  */
3092
3093 static void
3094 add_candidate (struct ivopts_data *data,
3095                tree base, tree step, bool important, struct iv_use *use,
3096                struct iv *orig_iv = NULL)
3097 {
3098   if (ip_normal_pos (data->current_loop))
3099     add_candidate_1 (data, base, step, important,
3100                      IP_NORMAL, use, NULL, orig_iv);
3101   if (ip_end_pos (data->current_loop)
3102       && allow_ip_end_pos_p (data->current_loop))
3103     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3104 }
3105
3106 /* Adds standard iv candidates.  */
3107
3108 static void
3109 add_standard_iv_candidates (struct ivopts_data *data)
3110 {
3111   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3112
3113   /* The same for a double-integer type if it is still fast enough.  */
3114   if (TYPE_PRECISION
3115         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3116       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3117     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3118                    build_int_cst (long_integer_type_node, 1), true, NULL);
3119
3120   /* The same for a double-integer type if it is still fast enough.  */
3121   if (TYPE_PRECISION
3122         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3123       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3124     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3125                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3126 }
3127
3128
3129 /* Adds candidates bases on the old induction variable IV.  */
3130
3131 static void
3132 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3133 {
3134   gimple *phi;
3135   tree def;
3136   struct iv_cand *cand;
3137
3138   /* Check if this biv is used in address type use.  */
3139   if (iv->no_overflow  && iv->have_address_use
3140       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3141       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3142     {
3143       tree base = fold_convert (sizetype, iv->base);
3144       tree step = fold_convert (sizetype, iv->step);
3145
3146       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3147       add_candidate (data, base, step, true, NULL, iv);
3148       /* Add iv cand of the original type only if it has nonlinear use.  */
3149       if (iv->nonlin_use)
3150         add_candidate (data, iv->base, iv->step, true, NULL);
3151     }
3152   else
3153     add_candidate (data, iv->base, iv->step, true, NULL);
3154
3155   /* The same, but with initial value zero.  */
3156   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3157     add_candidate (data, size_int (0), iv->step, true, NULL);
3158   else
3159     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3160                    iv->step, true, NULL);
3161
3162   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3163   if (gimple_code (phi) == GIMPLE_PHI)
3164     {
3165       /* Additionally record the possibility of leaving the original iv
3166          untouched.  */
3167       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3168       /* Don't add candidate if it's from another PHI node because
3169          it's an affine iv appearing in the form of PEELED_CHREC.  */
3170       phi = SSA_NAME_DEF_STMT (def);
3171       if (gimple_code (phi) != GIMPLE_PHI)
3172         {
3173           cand = add_candidate_1 (data,
3174                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3175                                   SSA_NAME_DEF_STMT (def));
3176           if (cand)
3177             {
3178               cand->var_before = iv->ssa_name;
3179               cand->var_after = def;
3180             }
3181         }
3182       else
3183         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3184     }
3185 }
3186
3187 /* Adds candidates based on the old induction variables.  */
3188
3189 static void
3190 add_iv_candidate_for_bivs (struct ivopts_data *data)
3191 {
3192   unsigned i;
3193   struct iv *iv;
3194   bitmap_iterator bi;
3195
3196   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3197     {
3198       iv = ver_info (data, i)->iv;
3199       if (iv && iv->biv_p && !integer_zerop (iv->step))
3200         add_iv_candidate_for_biv (data, iv);
3201     }
3202 }
3203
3204 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3205
3206 static void
3207 record_common_cand (struct ivopts_data *data, tree base,
3208                     tree step, struct iv_use *use)
3209 {
3210   struct iv_common_cand ent;
3211   struct iv_common_cand **slot;
3212
3213   ent.base = base;
3214   ent.step = step;
3215   ent.hash = iterative_hash_expr (base, 0);
3216   ent.hash = iterative_hash_expr (step, ent.hash);
3217
3218   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3219   if (*slot == NULL)
3220     {
3221       *slot = new iv_common_cand ();
3222       (*slot)->base = base;
3223       (*slot)->step = step;
3224       (*slot)->uses.create (8);
3225       (*slot)->hash = ent.hash;
3226       data->iv_common_cands.safe_push ((*slot));
3227     }
3228
3229   gcc_assert (use != NULL);
3230   (*slot)->uses.safe_push (use);
3231   return;
3232 }
3233
3234 /* Comparison function used to sort common candidates.  */
3235
3236 static int
3237 common_cand_cmp (const void *p1, const void *p2)
3238 {
3239   unsigned n1, n2;
3240   const struct iv_common_cand *const *const ccand1
3241     = (const struct iv_common_cand *const *)p1;
3242   const struct iv_common_cand *const *const ccand2
3243     = (const struct iv_common_cand *const *)p2;
3244
3245   n1 = (*ccand1)->uses.length ();
3246   n2 = (*ccand2)->uses.length ();
3247   return n2 - n1;
3248 }
3249
3250 /* Adds IV candidates based on common candidated recorded.  */
3251
3252 static void
3253 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3254 {
3255   unsigned i, j;
3256   struct iv_cand *cand_1, *cand_2;
3257
3258   data->iv_common_cands.qsort (common_cand_cmp);
3259   for (i = 0; i < data->iv_common_cands.length (); i++)
3260     {
3261       struct iv_common_cand *ptr = data->iv_common_cands[i];
3262
3263       /* Only add IV candidate if it's derived from multiple uses.  */
3264       if (ptr->uses.length () <= 1)
3265         break;
3266
3267       cand_1 = NULL;
3268       cand_2 = NULL;
3269       if (ip_normal_pos (data->current_loop))
3270         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3271                                   false, IP_NORMAL, NULL, NULL);
3272
3273       if (ip_end_pos (data->current_loop)
3274           && allow_ip_end_pos_p (data->current_loop))
3275         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3276                                   false, IP_END, NULL, NULL);
3277
3278       /* Bind deriving uses and the new candidates.  */
3279       for (j = 0; j < ptr->uses.length (); j++)
3280         {
3281           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3282           if (cand_1)
3283             bitmap_set_bit (group->related_cands, cand_1->id);
3284           if (cand_2)
3285             bitmap_set_bit (group->related_cands, cand_2->id);
3286         }
3287     }
3288
3289   /* Release data since it is useless from this point.  */
3290   data->iv_common_cand_tab->empty ();
3291   data->iv_common_cands.truncate (0);
3292 }
3293
3294 /* Adds candidates based on the value of USE's iv.  */
3295
3296 static void
3297 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3298 {
3299   unsigned HOST_WIDE_INT offset;
3300   tree base;
3301   tree basetype;
3302   struct iv *iv = use->iv;
3303
3304   add_candidate (data, iv->base, iv->step, false, use);
3305
3306   /* Record common candidate for use in case it can be shared by others.  */
3307   record_common_cand (data, iv->base, iv->step, use);
3308
3309   /* Record common candidate with initial value zero.  */
3310   basetype = TREE_TYPE (iv->base);
3311   if (POINTER_TYPE_P (basetype))
3312     basetype = sizetype;
3313   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3314
3315   /* Record common candidate with constant offset stripped in base.
3316      Like the use itself, we also add candidate directly for it.  */
3317   base = strip_offset (iv->base, &offset);
3318   if (offset || base != iv->base)
3319     {
3320       record_common_cand (data, base, iv->step, use);
3321       add_candidate (data, base, iv->step, false, use);
3322     }
3323
3324   /* Record common candidate with base_object removed in base.  */
3325   if (iv->base_object != NULL)
3326     {
3327       unsigned i;
3328       aff_tree aff_base;
3329       tree step, base_object = iv->base_object;
3330
3331       base = iv->base;
3332       step = iv->step;
3333       STRIP_NOPS (base);
3334       STRIP_NOPS (step);
3335       STRIP_NOPS (base_object);
3336       tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3337       for (i = 0; i < aff_base.n; i++)
3338         {
3339           if (aff_base.elts[i].coef != 1)
3340             continue;
3341
3342           if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3343             break;
3344         }
3345       if (i < aff_base.n)
3346         {
3347           aff_combination_remove_elt (&aff_base, i);
3348           base = aff_combination_to_tree (&aff_base);
3349           basetype = TREE_TYPE (base);
3350           if (POINTER_TYPE_P (basetype))
3351             basetype = sizetype;
3352
3353           step = fold_convert (basetype, step);
3354           record_common_cand (data, base, step, use);
3355           /* Also record common candidate with offset stripped.  */
3356           base = strip_offset (base, &offset);
3357           if (offset)
3358             record_common_cand (data, base, step, use);
3359         }
3360     }
3361
3362   /* At last, add auto-incremental candidates.  Make such variables
3363      important since other iv uses with same base object may be based
3364      on it.  */
3365   if (use != NULL && use->type == USE_ADDRESS)
3366     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3367 }
3368
3369 /* Adds candidates based on the uses.  */
3370
3371 static void
3372 add_iv_candidate_for_groups (struct ivopts_data *data)
3373 {
3374   unsigned i;
3375
3376   /* Only add candidate for the first use in group.  */
3377   for (i = 0; i < data->vgroups.length (); i++)
3378     {
3379       struct iv_group *group = data->vgroups[i];
3380
3381       gcc_assert (group->vuses[0] != NULL);
3382       add_iv_candidate_for_use (data, group->vuses[0]);
3383     }
3384   add_iv_candidate_derived_from_uses (data);
3385 }
3386
3387 /* Record important candidates and add them to related_cands bitmaps.  */
3388
3389 static void
3390 record_important_candidates (struct ivopts_data *data)
3391 {
3392   unsigned i;
3393   struct iv_group *group;
3394
3395   for (i = 0; i < data->vcands.length (); i++)
3396     {
3397       struct iv_cand *cand = data->vcands[i];
3398
3399       if (cand->important)
3400         bitmap_set_bit (data->important_candidates, i);
3401     }
3402
3403   data->consider_all_candidates = (data->vcands.length ()
3404                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3405
3406   /* Add important candidates to groups' related_cands bitmaps.  */
3407   for (i = 0; i < data->vgroups.length (); i++)
3408     {
3409       group = data->vgroups[i];
3410       bitmap_ior_into (group->related_cands, data->important_candidates);
3411     }
3412 }
3413
3414 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3415    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3416    we allocate a simple list to every use.  */
3417
3418 static void
3419 alloc_use_cost_map (struct ivopts_data *data)
3420 {
3421   unsigned i, size, s;
3422
3423   for (i = 0; i < data->vgroups.length (); i++)
3424     {
3425       struct iv_group *group = data->vgroups[i];
3426
3427       if (data->consider_all_candidates)
3428         size = data->vcands.length ();
3429       else
3430         {
3431           s = bitmap_count_bits (group->related_cands);
3432
3433           /* Round up to the power of two, so that moduling by it is fast.  */
3434           size = s ? (1 << ceil_log2 (s)) : 1;
3435         }
3436
3437       group->n_map_members = size;
3438       group->cost_map = XCNEWVEC (struct cost_pair, size);
3439     }
3440 }
3441
3442 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3443    on invariants DEPENDS_ON and that the value used in expressing it
3444    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3445
3446 static void
3447 set_group_iv_cost (struct ivopts_data *data,
3448                    struct iv_group *group, struct iv_cand *cand,
3449                    comp_cost cost, bitmap depends_on, tree value,
3450                    enum tree_code comp, iv_inv_expr_ent *inv_expr)
3451 {
3452   unsigned i, s;
3453
3454   if (cost.infinite_cost_p ())
3455     {
3456       BITMAP_FREE (depends_on);
3457       return;
3458     }
3459
3460   if (data->consider_all_candidates)
3461     {
3462       group->cost_map[cand->id].cand = cand;
3463       group->cost_map[cand->id].cost = cost;
3464       group->cost_map[cand->id].depends_on = depends_on;
3465       group->cost_map[cand->id].value = value;
3466       group->cost_map[cand->id].comp = comp;
3467       group->cost_map[cand->id].inv_expr = inv_expr;
3468       return;
3469     }
3470
3471   /* n_map_members is a power of two, so this computes modulo.  */
3472   s = cand->id & (group->n_map_members - 1);
3473   for (i = s; i < group->n_map_members; i++)
3474     if (!group->cost_map[i].cand)
3475       goto found;
3476   for (i = 0; i < s; i++)
3477     if (!group->cost_map[i].cand)
3478       goto found;
3479
3480   gcc_unreachable ();
3481
3482 found:
3483   group->cost_map[i].cand = cand;
3484   group->cost_map[i].cost = cost;
3485   group->cost_map[i].depends_on = depends_on;
3486   group->cost_map[i].value = value;
3487   group->cost_map[i].comp = comp;
3488   group->cost_map[i].inv_expr = inv_expr;
3489 }
3490
3491 /* Gets cost of (GROUP, CAND) pair.  */
3492
3493 static struct cost_pair *
3494 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3495                    struct iv_cand *cand)
3496 {
3497   unsigned i, s;
3498   struct cost_pair *ret;
3499
3500   if (!cand)
3501     return NULL;
3502
3503   if (data->consider_all_candidates)
3504     {
3505       ret = group->cost_map + cand->id;
3506       if (!ret->cand)
3507         return NULL;
3508
3509       return ret;
3510     }
3511
3512   /* n_map_members is a power of two, so this computes modulo.  */
3513   s = cand->id & (group->n_map_members - 1);
3514   for (i = s; i < group->n_map_members; i++)
3515     if (group->cost_map[i].cand == cand)
3516       return group->cost_map + i;
3517     else if (group->cost_map[i].cand == NULL)
3518       return NULL;
3519   for (i = 0; i < s; i++)
3520     if (group->cost_map[i].cand == cand)
3521       return group->cost_map + i;
3522     else if (group->cost_map[i].cand == NULL)
3523       return NULL;
3524
3525   return NULL;
3526 }
3527
3528 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3529 static rtx
3530 produce_memory_decl_rtl (tree obj, int *regno)
3531 {
3532   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3533   machine_mode address_mode = targetm.addr_space.address_mode (as);
3534   rtx x;
3535
3536   gcc_assert (obj);
3537   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3538     {
3539       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3540       x = gen_rtx_SYMBOL_REF (address_mode, name);
3541       SET_SYMBOL_REF_DECL (x, obj);
3542       x = gen_rtx_MEM (DECL_MODE (obj), x);
3543       set_mem_addr_space (x, as);
3544       targetm.encode_section_info (obj, x, true);
3545     }
3546   else
3547     {
3548       x = gen_raw_REG (address_mode, (*regno)++);
3549       x = gen_rtx_MEM (DECL_MODE (obj), x);
3550       set_mem_addr_space (x, as);
3551     }
3552
3553   return x;
3554 }
3555
3556 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3557    walk_tree.  DATA contains the actual fake register number.  */
3558
3559 static tree
3560 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3561 {
3562   tree obj = NULL_TREE;
3563   rtx x = NULL_RTX;
3564   int *regno = (int *) data;
3565
3566   switch (TREE_CODE (*expr_p))
3567     {
3568     case ADDR_EXPR:
3569       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3570            handled_component_p (*expr_p);
3571            expr_p = &TREE_OPERAND (*expr_p, 0))
3572         continue;
3573       obj = *expr_p;
3574       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3575         x = produce_memory_decl_rtl (obj, regno);
3576       break;
3577
3578     case SSA_NAME:
3579       *ws = 0;
3580       obj = SSA_NAME_VAR (*expr_p);
3581       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3582       if (!obj)
3583         return NULL_TREE;
3584       if (!DECL_RTL_SET_P (obj))
3585         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3586       break;
3587
3588     case VAR_DECL:
3589     case PARM_DECL:
3590     case RESULT_DECL:
3591       *ws = 0;
3592       obj = *expr_p;
3593
3594       if (DECL_RTL_SET_P (obj))
3595         break;
3596
3597       if (DECL_MODE (obj) == BLKmode)
3598         x = produce_memory_decl_rtl (obj, regno);
3599       else
3600         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3601
3602       break;
3603
3604     default:
3605       break;
3606     }
3607
3608   if (x)
3609     {
3610       decl_rtl_to_reset.safe_push (obj);
3611       SET_DECL_RTL (obj, x);
3612     }
3613
3614   return NULL_TREE;
3615 }
3616
3617 /* Determines cost of the computation of EXPR.  */
3618
3619 static unsigned
3620 computation_cost (tree expr, bool speed)
3621 {
3622   rtx_insn *seq;
3623   rtx rslt;
3624   tree type = TREE_TYPE (expr);
3625   unsigned cost;
3626   /* Avoid using hard regs in ways which may be unsupported.  */
3627   int regno = LAST_VIRTUAL_REGISTER + 1;
3628   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3629   enum node_frequency real_frequency = node->frequency;
3630
3631   node->frequency = NODE_FREQUENCY_NORMAL;
3632   crtl->maybe_hot_insn_p = speed;
3633   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3634   start_sequence ();
3635   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3636   seq = get_insns ();
3637   end_sequence ();
3638   default_rtl_profile ();
3639   node->frequency = real_frequency;
3640
3641   cost = seq_cost (seq, speed);
3642   if (MEM_P (rslt))
3643     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3644                           TYPE_ADDR_SPACE (type), speed);
3645   else if (!REG_P (rslt))
3646     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3647
3648   return cost;
3649 }
3650
3651 /* Returns variable containing the value of candidate CAND at statement AT.  */
3652
3653 static tree
3654 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3655 {
3656   if (stmt_after_increment (loop, cand, stmt))
3657     return cand->var_after;
3658   else
3659     return cand->var_before;
3660 }
3661
3662 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3663    same precision that is at least as wide as the precision of TYPE, stores
3664    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3665    type of A and B.  */
3666
3667 static tree
3668 determine_common_wider_type (tree *a, tree *b)
3669 {
3670   tree wider_type = NULL;
3671   tree suba, subb;
3672   tree atype = TREE_TYPE (*a);
3673
3674   if (CONVERT_EXPR_P (*a))
3675     {
3676       suba = TREE_OPERAND (*a, 0);
3677       wider_type = TREE_TYPE (suba);
3678       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3679         return atype;
3680     }
3681   else
3682     return atype;
3683
3684   if (CONVERT_EXPR_P (*b))
3685     {
3686       subb = TREE_OPERAND (*b, 0);
3687       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3688         return atype;
3689     }
3690   else
3691     return atype;
3692
3693   *a = suba;
3694   *b = subb;
3695   return wider_type;
3696 }
3697
3698 /* Determines the expression by that USE is expressed from induction variable
3699    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3700    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3701
3702 static bool
3703 get_computation_aff (struct loop *loop,
3704                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3705                      struct aff_tree *aff)
3706 {
3707   tree ubase = use->iv->base;
3708   tree ustep = use->iv->step;
3709   tree cbase = cand->iv->base;
3710   tree cstep = cand->iv->step, cstep_common;
3711   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3712   tree common_type, var;
3713   tree uutype;
3714   aff_tree cbase_aff, var_aff;
3715   widest_int rat;
3716
3717   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3718     {
3719       /* We do not have a precision to express the values of use.  */
3720       return false;
3721     }
3722
3723   var = var_at_stmt (loop, cand, at);
3724   uutype = unsigned_type_for (utype);
3725
3726   /* If the conversion is not noop, perform it.  */
3727   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3728     {
3729       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3730           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3731         {
3732           tree inner_base, inner_step, inner_type;
3733           inner_base = TREE_OPERAND (cbase, 0);
3734           if (CONVERT_EXPR_P (cstep))
3735             inner_step = TREE_OPERAND (cstep, 0);
3736           else
3737             inner_step = cstep;
3738
3739           inner_type = TREE_TYPE (inner_base);
3740           /* If candidate is added from a biv whose type is smaller than
3741              ctype, we know both candidate and the biv won't overflow.
3742              In this case, it's safe to skip the convertion in candidate.
3743              As an example, (unsigned short)((unsigned long)A) equals to
3744              (unsigned short)A, if A has a type no larger than short.  */
3745           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3746             {
3747               cbase = inner_base;
3748               cstep = inner_step;
3749             }
3750         }
3751       cstep = fold_convert (uutype, cstep);
3752       cbase = fold_convert (uutype, cbase);
3753       var = fold_convert (uutype, var);
3754     }
3755
3756   /* Ratio is 1 when computing the value of biv cand by itself.
3757      We can't rely on constant_multiple_of in this case because the
3758      use is created after the original biv is selected.  The call
3759      could fail because of inconsistent fold behavior.  See PR68021
3760      for more information.  */
3761   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3762     {
3763       gcc_assert (is_gimple_assign (use->stmt));
3764       gcc_assert (use->iv->ssa_name == cand->var_after);
3765       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3766       rat = 1;
3767     }
3768   else if (!constant_multiple_of (ustep, cstep, &rat))
3769     return false;
3770
3771   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3772      type, we achieve better folding by computing their difference in this
3773      wider type, and cast the result to UUTYPE.  We do not need to worry about
3774      overflows, as all the arithmetics will in the end be performed in UUTYPE
3775      anyway.  */
3776   common_type = determine_common_wider_type (&ubase, &cbase);
3777
3778   /* use = ubase - ratio * cbase + ratio * var.  */
3779   tree_to_aff_combination (ubase, common_type, aff);
3780   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3781   tree_to_aff_combination (var, uutype, &var_aff);
3782
3783   /* We need to shift the value if we are after the increment.  */
3784   if (stmt_after_increment (loop, cand, at))
3785     {
3786       aff_tree cstep_aff;
3787
3788       if (common_type != uutype)
3789         cstep_common = fold_convert (common_type, cstep);
3790       else
3791         cstep_common = cstep;
3792
3793       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3794       aff_combination_add (&cbase_aff, &cstep_aff);
3795     }
3796
3797   aff_combination_scale (&cbase_aff, -rat);
3798   aff_combination_add (aff, &cbase_aff);
3799   if (common_type != uutype)
3800     aff_combination_convert (aff, uutype);
3801
3802   aff_combination_scale (&var_aff, rat);
3803   aff_combination_add (aff, &var_aff);
3804
3805   return true;
3806 }
3807
3808 /* Return the type of USE.  */
3809
3810 static tree
3811 get_use_type (struct iv_use *use)
3812 {
3813   tree base_type = TREE_TYPE (use->iv->base);
3814   tree type;
3815
3816   if (use->type == USE_ADDRESS)
3817     {
3818       /* The base_type may be a void pointer.  Create a pointer type based on
3819          the mem_ref instead.  */
3820       type = build_pointer_type (TREE_TYPE (*use->op_p));
3821       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3822                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3823     }
3824   else
3825     type = base_type;
3826
3827   return type;
3828 }
3829
3830 /* Determines the expression by that USE is expressed from induction variable
3831    CAND at statement AT in LOOP.  The computation is unshared.  */
3832
3833 static tree
3834 get_computation_at (struct loop *loop,
3835                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3836 {
3837   aff_tree aff;
3838   tree type = get_use_type (use);
3839
3840   if (!get_computation_aff (loop, use, cand, at, &aff))
3841     return NULL_TREE;
3842   unshare_aff_combination (&aff);
3843   return fold_convert (type, aff_combination_to_tree (&aff));
3844 }
3845
3846 /* Determines the expression by that USE is expressed from induction variable
3847    CAND in LOOP.  The computation is unshared.  */
3848
3849 static tree
3850 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3851 {
3852   return get_computation_at (loop, use, cand, use->stmt);
3853 }
3854
3855 /* Adjust the cost COST for being in loop setup rather than loop body.
3856    If we're optimizing for space, the loop setup overhead is constant;
3857    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3858 static unsigned
3859 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3860 {
3861   if (cost == INFTY)
3862     return cost;
3863   else if (optimize_loop_for_speed_p (data->current_loop))
3864     return cost / avg_loop_niter (data->current_loop);
3865   else
3866     return cost;
3867 }
3868
3869 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3870    validity for a memory reference accessing memory of mode MODE in
3871    address space AS.  */
3872
3873
3874 bool
3875 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3876                                  addr_space_t as)
3877 {
3878 #define MAX_RATIO 128
3879   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3880   static vec<sbitmap> valid_mult_list;
3881   sbitmap valid_mult;
3882
3883   if (data_index >= valid_mult_list.length ())
3884     valid_mult_list.safe_grow_cleared (data_index + 1);
3885
3886   valid_mult = valid_mult_list[data_index];
3887   if (!valid_mult)
3888     {
3889       machine_mode address_mode = targetm.addr_space.address_mode (as);
3890       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3891       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3892       rtx addr, scaled;
3893       HOST_WIDE_INT i;
3894
3895       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3896       bitmap_clear (valid_mult);
3897       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3898       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3899       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3900         {
3901           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3902           if (memory_address_addr_space_p (mode, addr, as)
3903               || memory_address_addr_space_p (mode, scaled, as))
3904             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3905         }
3906
3907       if (dump_file && (dump_flags & TDF_DETAILS))
3908         {
3909           fprintf (dump_file, "  allowed multipliers:");
3910           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3911             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3912               fprintf (dump_file, " %d", (int) i);
3913           fprintf (dump_file, "\n");
3914           fprintf (dump_file, "\n");
3915         }
3916
3917       valid_mult_list[data_index] = valid_mult;
3918     }
3919
3920   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3921     return false;
3922
3923   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3924 }
3925
3926 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3927    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3928    variable is omitted.  Compute the cost for a memory reference that accesses
3929    a memory location of mode MEM_MODE in address space AS.
3930
3931    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3932    size of MEM_MODE / RATIO) is available.  To make this determination, we
3933    look at the size of the increment to be made, which is given in CSTEP.
3934    CSTEP may be zero if the step is unknown.
3935    STMT_AFTER_INC is true iff the statement we're looking at is after the
3936    increment of the original biv.
3937
3938    TODO -- there must be some better way.  This all is quite crude.  */
3939
3940 enum ainc_type
3941 {
3942   AINC_PRE_INC,         /* Pre increment.  */
3943   AINC_PRE_DEC,         /* Pre decrement.  */
3944   AINC_POST_INC,        /* Post increment.  */
3945   AINC_POST_DEC,        /* Post decrement.  */
3946   AINC_NONE             /* Also the number of auto increment types.  */
3947 };
3948
3949 struct address_cost_data
3950 {
3951   HOST_WIDE_INT min_offset, max_offset;
3952   unsigned costs[2][2][2][2];
3953   unsigned ainc_costs[AINC_NONE];
3954 };
3955
3956
3957 static comp_cost
3958 get_address_cost (bool symbol_present, bool var_present,
3959                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3960                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3961                   addr_space_t as, bool speed,
3962                   bool stmt_after_inc, bool *may_autoinc)
3963 {
3964   machine_mode address_mode = targetm.addr_space.address_mode (as);
3965   static vec<address_cost_data *> address_cost_data_list;
3966   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3967   address_cost_data *data;
3968   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3969   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3970   unsigned cost, acost, complexity;
3971   enum ainc_type autoinc_type;
3972   bool offset_p, ratio_p, autoinc;
3973   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3974   unsigned HOST_WIDE_INT mask;
3975   unsigned bits;
3976
3977   if (data_index >= address_cost_data_list.length ())
3978     address_cost_data_list.safe_grow_cleared (data_index + 1);
3979
3980   data = address_cost_data_list[data_index];
3981   if (!data)
3982     {
3983       HOST_WIDE_INT i;
3984       HOST_WIDE_INT rat, off = 0;
3985       int old_cse_not_expected, width;
3986       unsigned sym_p, var_p, off_p, rat_p, add_c;
3987       rtx_insn *seq;
3988       rtx addr, base;
3989       rtx reg0, reg1;
3990
3991       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3992
3993       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3994
3995       width = GET_MODE_BITSIZE (address_mode) - 1;
3996       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3997         width = HOST_BITS_PER_WIDE_INT - 1;
3998       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3999
4000       for (i = width; i >= 0; i--)
4001         {
4002           off = -((unsigned HOST_WIDE_INT) 1 << i);
4003           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4004           if (memory_address_addr_space_p (mem_mode, addr, as))
4005             break;
4006         }
4007       data->min_offset = (i == -1? 0 : off);
4008
4009       for (i = width; i >= 0; i--)
4010         {
4011           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4012           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4013           if (memory_address_addr_space_p (mem_mode, addr, as))
4014             break;
4015           /* For some strict-alignment targets, the offset must be naturally
4016              aligned.  Try an aligned offset if mem_mode is not QImode.  */
4017           off = mem_mode != QImode
4018                 ? ((unsigned HOST_WIDE_INT) 1 << i)
4019                     - GET_MODE_SIZE (mem_mode)
4020                 : 0;
4021           if (off > 0)
4022             {
4023               XEXP (addr, 1) = gen_int_mode (off, address_mode);
4024               if (memory_address_addr_space_p (mem_mode, addr, as))
4025                 break;
4026             }
4027         }
4028       if (i == -1)
4029         off = 0;
4030       data->max_offset = off;
4031
4032       if (dump_file && (dump_flags & TDF_DETAILS))
4033         {
4034           fprintf (dump_file, "get_address_cost:\n");
4035           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4036                    GET_MODE_NAME (mem_mode),
4037                    data->min_offset);
4038           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4039                    GET_MODE_NAME (mem_mode),
4040                    data->max_offset);
4041         }
4042
4043       rat = 1;
4044       for (i = 2; i <= MAX_RATIO; i++)
4045         if (multiplier_allowed_in_address_p (i, mem_mode, as))
4046           {
4047             rat = i;
4048             break;
4049           }
4050
4051       /* Compute the cost of various addressing modes.  */
4052       acost = 0;
4053       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4054       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4055
4056       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4057           || USE_STORE_PRE_DECREMENT (mem_mode))
4058         {
4059           addr = gen_rtx_PRE_DEC (address_mode, reg0);
4060           has_predec[mem_mode]
4061             = memory_address_addr_space_p (mem_mode, addr, as);
4062
4063           if (has_predec[mem_mode])
4064             data->ainc_costs[AINC_PRE_DEC]
4065               = address_cost (addr, mem_mode, as, speed);
4066         }
4067       if (USE_LOAD_POST_DECREMENT (mem_mode)
4068           || USE_STORE_POST_DECREMENT (mem_mode))
4069         {
4070           addr = gen_rtx_POST_DEC (address_mode, reg0);
4071           has_postdec[mem_mode]
4072             = memory_address_addr_space_p (mem_mode, addr, as);
4073
4074           if (has_postdec[mem_mode])
4075             data->ainc_costs[AINC_POST_DEC]
4076               = address_cost (addr, mem_mode, as, speed);
4077         }
4078       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4079           || USE_STORE_PRE_DECREMENT (mem_mode))
4080         {
4081           addr = gen_rtx_PRE_INC (address_mode, reg0);
4082           has_preinc[mem_mode]
4083             = memory_address_addr_space_p (mem_mode, addr, as);
4084
4085           if (has_preinc[mem_mode])
4086             data->ainc_costs[AINC_PRE_INC]
4087               = address_cost (addr, mem_mode, as, speed);
4088         }
4089       if (USE_LOAD_POST_INCREMENT (mem_mode)
4090           || USE_STORE_POST_INCREMENT (mem_mode))
4091         {
4092           addr = gen_rtx_POST_INC (address_mode, reg0);
4093           has_postinc[mem_mode]
4094             = memory_address_addr_space_p (mem_mode, addr, as);
4095
4096           if (has_postinc[mem_mode])
4097             data->ainc_costs[AINC_POST_INC]
4098               = address_cost (addr, mem_mode, as, speed);
4099         }
4100       for (i = 0; i < 16; i++)
4101         {
4102           sym_p = i & 1;
4103           var_p = (i >> 1) & 1;
4104           off_p = (i >> 2) & 1;
4105           rat_p = (i >> 3) & 1;
4106
4107           addr = reg0;
4108           if (rat_p)
4109             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4110                                    gen_int_mode (rat, address_mode));
4111
4112           if (var_p)
4113             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4114
4115           if (sym_p)
4116             {
4117               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4118               /* ??? We can run into trouble with some backends by presenting
4119                  it with symbols which haven't been properly passed through
4120                  targetm.encode_section_info.  By setting the local bit, we
4121                  enhance the probability of things working.  */
4122               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4123
4124               if (off_p)
4125                 base = gen_rtx_fmt_e (CONST, address_mode,
4126                                       gen_rtx_fmt_ee
4127                                         (PLUS, address_mode, base,
4128                                          gen_int_mode (off, address_mode)));
4129             }
4130           else if (off_p)
4131             base = gen_int_mode (off, address_mode);
4132           else
4133             base = NULL_RTX;
4134
4135           if (base)
4136             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4137
4138           start_sequence ();
4139           /* To avoid splitting addressing modes, pretend that no cse will
4140              follow.  */
4141           old_cse_not_expected = cse_not_expected;
4142           cse_not_expected = true;
4143           addr = memory_address_addr_space (mem_mode, addr, as);
4144           cse_not_expected = old_cse_not_expected;
4145           seq = get_insns ();
4146           end_sequence ();
4147
4148           acost = seq_cost (seq, speed);
4149           acost += address_cost (addr, mem_mode, as, speed);
4150
4151           if (!acost)
4152             acost = 1;
4153           data->costs[sym_p][var_p][off_p][rat_p] = acost;
4154         }
4155
4156       /* On some targets, it is quite expensive to load symbol to a register,
4157          which makes addresses that contain symbols look much more expensive.
4158          However, the symbol will have to be loaded in any case before the
4159          loop (and quite likely we have it in register already), so it does not
4160          make much sense to penalize them too heavily.  So make some final
4161          tweaks for the SYMBOL_PRESENT modes:
4162
4163          If VAR_PRESENT is false, and the mode obtained by changing symbol to
4164          var is cheaper, use this mode with small penalty.
4165          If VAR_PRESENT is true, try whether the mode with
4166          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4167          if this is the case, use it.  */
4168       add_c = add_cost (speed, address_mode);
4169       for (i = 0; i < 8; i++)
4170         {
4171           var_p = i & 1;
4172           off_p = (i >> 1) & 1;
4173           rat_p = (i >> 2) & 1;
4174
4175           acost = data->costs[0][1][off_p][rat_p] + 1;
4176           if (var_p)
4177             acost += add_c;
4178
4179           if (acost < data->costs[1][var_p][off_p][rat_p])
4180             data->costs[1][var_p][off_p][rat_p] = acost;
4181         }
4182
4183       if (dump_file && (dump_flags & TDF_DETAILS))
4184         {
4185           fprintf (dump_file, "<Address Costs>:\n");
4186
4187           for (i = 0; i < 16; i++)
4188             {
4189               sym_p = i & 1;
4190               var_p = (i >> 1) & 1;
4191               off_p = (i >> 2) & 1;
4192               rat_p = (i >> 3) & 1;
4193
4194               fprintf (dump_file, "  ");
4195               if (sym_p)
4196                 fprintf (dump_file, "sym + ");
4197               if (var_p)
4198                 fprintf (dump_file, "var + ");
4199               if (off_p)
4200                 fprintf (dump_file, "cst + ");
4201               if (rat_p)
4202                 fprintf (dump_file, "rat * ");
4203
4204               acost = data->costs[sym_p][var_p][off_p][rat_p];
4205               fprintf (dump_file, "index costs %d\n", acost);
4206             }
4207           if (has_predec[mem_mode] || has_postdec[mem_mode]
4208               || has_preinc[mem_mode] || has_postinc[mem_mode])
4209             fprintf (dump_file, "  May include autoinc/dec\n");
4210           fprintf (dump_file, "\n");
4211         }
4212
4213       address_cost_data_list[data_index] = data;
4214     }
4215
4216   bits = GET_MODE_BITSIZE (address_mode);
4217   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4218   offset &= mask;
4219   if ((offset >> (bits - 1) & 1))
4220     offset |= ~mask;
4221   s_offset = offset;
4222
4223   autoinc = false;
4224   autoinc_type = AINC_NONE;
4225   msize = GET_MODE_SIZE (mem_mode);
4226   autoinc_offset = offset;
4227   if (stmt_after_inc)
4228     autoinc_offset += ratio * cstep;
4229   if (symbol_present || var_present || ratio != 1)
4230     autoinc = false;
4231   else
4232     {
4233       if (has_postinc[mem_mode] && autoinc_offset == 0
4234           && msize == cstep)
4235         autoinc_type = AINC_POST_INC;
4236       else if (has_postdec[mem_mode] && autoinc_offset == 0
4237                && msize == -cstep)
4238         autoinc_type = AINC_POST_DEC;
4239       else if (has_preinc[mem_mode] && autoinc_offset == msize
4240                && msize == cstep)
4241         autoinc_type = AINC_PRE_INC;
4242       else if (has_predec[mem_mode] && autoinc_offset == -msize
4243                && msize == -cstep)
4244         autoinc_type = AINC_PRE_DEC;
4245
4246       if (autoinc_type != AINC_NONE)
4247         autoinc = true;
4248     }
4249
4250   cost = 0;
4251   offset_p = (s_offset != 0
4252               && data->min_offset <= s_offset
4253               && s_offset <= data->max_offset);
4254   ratio_p = (ratio != 1
4255              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4256
4257   if (ratio != 1 && !ratio_p)
4258     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4259
4260   if (s_offset && !offset_p && !symbol_present)
4261     cost += add_cost (speed, address_mode);
4262
4263   if (may_autoinc)
4264     *may_autoinc = autoinc;
4265   if (autoinc)
4266     acost = data->ainc_costs[autoinc_type];
4267   else
4268     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4269   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4270   return comp_cost (cost + acost, complexity);
4271 }
4272
4273  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4274     EXPR operand holding the shift.  COST0 and COST1 are the costs for
4275     calculating the operands of EXPR.  Returns true if successful, and returns
4276     the cost in COST.  */
4277
4278 static bool
4279 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4280                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4281 {
4282   comp_cost res;
4283   tree op1 = TREE_OPERAND (expr, 1);
4284   tree cst = TREE_OPERAND (mult, 1);
4285   tree multop = TREE_OPERAND (mult, 0);
4286   int m = exact_log2 (int_cst_value (cst));
4287   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4288   int as_cost, sa_cost;
4289   bool mult_in_op1;
4290
4291   if (!(m >= 0 && m < maxm))
4292     return false;
4293
4294   STRIP_NOPS (op1);
4295   mult_in_op1 = operand_equal_p (op1, mult, 0);
4296
4297   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4298
4299   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4300      use that in preference to a shift insn followed by an add insn.  */
4301   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4302              ? shiftadd_cost (speed, mode, m)
4303              : (mult_in_op1
4304                 ? shiftsub1_cost (speed, mode, m)
4305                 : shiftsub0_cost (speed, mode, m)));
4306
4307   res = comp_cost (MIN (as_cost, sa_cost), 0);
4308   res += (mult_in_op1 ? cost0 : cost1);
4309
4310   STRIP_NOPS (multop);
4311   if (!is_gimple_val (multop))
4312     res += force_expr_to_var_cost (multop, speed);
4313
4314   *cost = res;
4315   return true;
4316 }
4317
4318 /* Estimates cost of forcing expression EXPR into a variable.  */
4319
4320 static comp_cost
4321 force_expr_to_var_cost (tree expr, bool speed)
4322 {
4323   static bool costs_initialized = false;
4324   static unsigned integer_cost [2];
4325   static unsigned symbol_cost [2];
4326   static unsigned address_cost [2];
4327   tree op0, op1;
4328   comp_cost cost0, cost1, cost;
4329   machine_mode mode;
4330
4331   if (!costs_initialized)
4332     {
4333       tree type = build_pointer_type (integer_type_node);
4334       tree var, addr;
4335       rtx x;
4336       int i;
4337
4338       var = create_tmp_var_raw (integer_type_node, "test_var");
4339       TREE_STATIC (var) = 1;
4340       x = produce_memory_decl_rtl (var, NULL);
4341       SET_DECL_RTL (var, x);
4342
4343       addr = build1 (ADDR_EXPR, type, var);
4344
4345
4346       for (i = 0; i < 2; i++)
4347         {
4348           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4349                                                              2000), i);
4350
4351           symbol_cost[i] = computation_cost (addr, i) + 1;
4352
4353           address_cost[i]
4354             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4355           if (dump_file && (dump_flags & TDF_DETAILS))
4356             {
4357               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4358               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4359               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4360               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4361               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4362               fprintf (dump_file, "\n");
4363             }
4364         }
4365
4366       costs_initialized = true;
4367     }
4368
4369   STRIP_NOPS (expr);
4370
4371   if (SSA_VAR_P (expr))
4372     return no_cost;
4373
4374   if (is_gimple_min_invariant (expr))
4375     {
4376       if (TREE_CODE (expr) == INTEGER_CST)
4377         return comp_cost (integer_cost [speed], 0);
4378
4379       if (TREE_CODE (expr) == ADDR_EXPR)
4380         {
4381           tree obj = TREE_OPERAND (expr, 0);
4382
4383           if (TREE_CODE (obj) == VAR_DECL
4384               || TREE_CODE (obj) == PARM_DECL
4385               || TREE_CODE (obj) == RESULT_DECL)
4386             return comp_cost (symbol_cost [speed], 0);
4387         }
4388
4389       return comp_cost (address_cost [speed], 0);
4390     }
4391
4392   switch (TREE_CODE (expr))
4393     {
4394     case POINTER_PLUS_EXPR:
4395     case PLUS_EXPR:
4396     case MINUS_EXPR:
4397     case MULT_EXPR:
4398       op0 = TREE_OPERAND (expr, 0);
4399       op1 = TREE_OPERAND (expr, 1);
4400       STRIP_NOPS (op0);
4401       STRIP_NOPS (op1);
4402       break;
4403
4404     CASE_CONVERT:
4405     case NEGATE_EXPR:
4406       op0 = TREE_OPERAND (expr, 0);
4407       STRIP_NOPS (op0);
4408       op1 = NULL_TREE;
4409       break;
4410
4411     default:
4412       /* Just an arbitrary value, FIXME.  */
4413       return comp_cost (target_spill_cost[speed], 0);
4414     }
4415
4416   if (op0 == NULL_TREE
4417       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4418     cost0 = no_cost;
4419   else
4420     cost0 = force_expr_to_var_cost (op0, speed);
4421
4422   if (op1 == NULL_TREE
4423       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4424     cost1 = no_cost;
4425   else
4426     cost1 = force_expr_to_var_cost (op1, speed);
4427
4428   mode = TYPE_MODE (TREE_TYPE (expr));
4429   switch (TREE_CODE (expr))
4430     {
4431     case POINTER_PLUS_EXPR:
4432     case PLUS_EXPR:
4433     case MINUS_EXPR:
4434     case NEGATE_EXPR:
4435       cost = comp_cost (add_cost (speed, mode), 0);
4436       if (TREE_CODE (expr) != NEGATE_EXPR)
4437         {
4438           tree mult = NULL_TREE;
4439           comp_cost sa_cost;
4440           if (TREE_CODE (op1) == MULT_EXPR)
4441             mult = op1;
4442           else if (TREE_CODE (op0) == MULT_EXPR)
4443             mult = op0;
4444
4445           if (mult != NULL_TREE
4446               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4447               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4448                                     speed, &sa_cost))
4449             return sa_cost;
4450         }
4451       break;
4452
4453     CASE_CONVERT:
4454       {
4455         tree inner_mode, outer_mode;
4456         outer_mode = TREE_TYPE (expr);
4457         inner_mode = TREE_TYPE (op0);
4458         cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4459                                        TYPE_MODE (inner_mode), speed), 0);
4460       }
4461       break;
4462
4463     case MULT_EXPR:
4464       if (cst_and_fits_in_hwi (op0))
4465         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4466                                              mode, speed), 0);
4467       else if (cst_and_fits_in_hwi (op1))
4468         cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4469                                              mode, speed), 0);
4470       else
4471         return comp_cost (target_spill_cost [speed], 0);
4472       break;
4473
4474     default:
4475       gcc_unreachable ();
4476     }
4477
4478   cost += cost0;
4479   cost += cost1;
4480
4481   /* Bound the cost by target_spill_cost.  The parts of complicated
4482      computations often are either loop invariant or at least can
4483      be shared between several iv uses, so letting this grow without
4484      limits would not give reasonable results.  */
4485   if (cost.cost > (int) target_spill_cost [speed])
4486     cost.cost = target_spill_cost [speed];
4487
4488   return cost;
4489 }
4490
4491 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4492    invariants the computation depends on.  */
4493
4494 static comp_cost
4495 force_var_cost (struct ivopts_data *data,
4496                 tree expr, bitmap *depends_on)
4497 {
4498   if (depends_on)
4499     {
4500       fd_ivopts_data = data;
4501       walk_tree (&expr, find_depends, depends_on, NULL);
4502     }
4503
4504   return force_expr_to_var_cost (expr, data->speed);
4505 }
4506
4507 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4508    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4509    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4510    invariants the computation depends on.  */
4511
4512 static comp_cost
4513 split_address_cost (struct ivopts_data *data,
4514                     tree addr, bool *symbol_present, bool *var_present,
4515                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4516 {
4517   tree core;
4518   HOST_WIDE_INT bitsize;
4519   HOST_WIDE_INT bitpos;
4520   tree toffset;
4521   machine_mode mode;
4522   int unsignedp, reversep, volatilep;
4523
4524   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4525                               &unsignedp, &reversep, &volatilep, false);
4526
4527   if (toffset != 0
4528       || bitpos % BITS_PER_UNIT != 0
4529       || reversep
4530       || TREE_CODE (core) != VAR_DECL)
4531     {
4532       *symbol_present = false;
4533       *var_present = true;
4534       fd_ivopts_data = data;
4535       if (depends_on)
4536         walk_tree (&addr, find_depends, depends_on, NULL);
4537
4538       return comp_cost (target_spill_cost[data->speed], 0);
4539     }
4540
4541   *offset += bitpos / BITS_PER_UNIT;
4542   if (TREE_STATIC (core)
4543       || DECL_EXTERNAL (core))
4544     {
4545       *symbol_present = true;
4546       *var_present = false;
4547       return no_cost;
4548     }
4549
4550   *symbol_present = false;
4551   *var_present = true;
4552   return no_cost;
4553 }
4554
4555 /* Estimates cost of expressing difference of addresses E1 - E2 as
4556    var + symbol + offset.  The value of offset is added to OFFSET,
4557    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4558    part is missing.  DEPENDS_ON is a set of the invariants the computation
4559    depends on.  */
4560
4561 static comp_cost
4562 ptr_difference_cost (struct ivopts_data *data,
4563                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4564                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4565 {
4566   HOST_WIDE_INT diff = 0;
4567   aff_tree aff_e1, aff_e2;
4568   tree type;
4569
4570   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4571
4572   if (ptr_difference_const (e1, e2, &diff))
4573     {
4574       *offset += diff;
4575       *symbol_present = false;
4576       *var_present = false;
4577       return no_cost;
4578     }
4579
4580   if (integer_zerop (e2))
4581     return split_address_cost (data, TREE_OPERAND (e1, 0),
4582                                symbol_present, var_present, offset, depends_on);
4583
4584   *symbol_present = false;
4585   *var_present = true;
4586
4587   type = signed_type_for (TREE_TYPE (e1));
4588   tree_to_aff_combination (e1, type, &aff_e1);
4589   tree_to_aff_combination (e2, type, &aff_e2);
4590   aff_combination_scale (&aff_e2, -1);
4591   aff_combination_add (&aff_e1, &aff_e2);
4592
4593   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4594 }
4595
4596 /* Estimates cost of expressing difference E1 - E2 as
4597    var + symbol + offset.  The value of offset is added to OFFSET,
4598    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4599    part is missing.  DEPENDS_ON is a set of the invariants the computation
4600    depends on.  */
4601
4602 static comp_cost
4603 difference_cost (struct ivopts_data *data,
4604                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4605                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4606 {
4607   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4608   unsigned HOST_WIDE_INT off1, off2;
4609   aff_tree aff_e1, aff_e2;
4610   tree type;
4611
4612   e1 = strip_offset (e1, &off1);
4613   e2 = strip_offset (e2, &off2);
4614   *offset += off1 - off2;
4615
4616   STRIP_NOPS (e1);
4617   STRIP_NOPS (e2);
4618
4619   if (TREE_CODE (e1) == ADDR_EXPR)
4620     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4621                                 offset, depends_on);
4622   *symbol_present = false;
4623
4624   if (operand_equal_p (e1, e2, 0))
4625     {
4626       *var_present = false;
4627       return no_cost;
4628     }
4629
4630   *var_present = true;
4631
4632   if (integer_zerop (e2))
4633     return force_var_cost (data, e1, depends_on);
4634
4635   if (integer_zerop (e1))
4636     {
4637       comp_cost cost = force_var_cost (data, e2, depends_on);
4638       cost += mult_by_coeff_cost (-1, mode, data->speed);
4639       return cost;
4640     }
4641
4642   type = signed_type_for (TREE_TYPE (e1));
4643   tree_to_aff_combination (e1, type, &aff_e1);
4644   tree_to_aff_combination (e2, type, &aff_e2);
4645   aff_combination_scale (&aff_e2, -1);
4646   aff_combination_add (&aff_e1, &aff_e2);
4647
4648   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4649 }
4650
4651 /* Returns true if AFF1 and AFF2 are identical.  */
4652
4653 static bool
4654 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4655 {
4656   unsigned i;
4657
4658   if (aff1->n != aff2->n)
4659     return false;
4660
4661   for (i = 0; i < aff1->n; i++)
4662     {
4663       if (aff1->elts[i].coef != aff2->elts[i].coef)
4664         return false;
4665
4666       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4667         return false;
4668     }
4669   return true;
4670 }
4671
4672 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
4673
4674 static iv_inv_expr_ent *
4675 record_inv_expr (struct ivopts_data *data, tree expr)
4676 {
4677   struct iv_inv_expr_ent ent;
4678   struct iv_inv_expr_ent **slot;
4679
4680   ent.expr = expr;
4681   ent.hash = iterative_hash_expr (expr, 0);
4682   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4683
4684   if (!*slot)
4685     {
4686       *slot = XNEW (struct iv_inv_expr_ent);
4687       (*slot)->expr = expr;
4688       (*slot)->hash = ent.hash;
4689       (*slot)->id = data->max_inv_expr_id++;
4690     }
4691
4692   return *slot;
4693 }
4694
4695 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4696    requires a new compiler generated temporary.  Returns -1 otherwise.
4697    ADDRESS_P is a flag indicating if the expression is for address
4698    computation.  */
4699
4700 static iv_inv_expr_ent *
4701 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4702                          tree cbase, HOST_WIDE_INT ratio,
4703                          bool address_p)
4704 {
4705   aff_tree ubase_aff, cbase_aff;
4706   tree expr, ub, cb;
4707
4708   STRIP_NOPS (ubase);
4709   STRIP_NOPS (cbase);
4710   ub = ubase;
4711   cb = cbase;
4712
4713   if ((TREE_CODE (ubase) == INTEGER_CST)
4714       && (TREE_CODE (cbase) == INTEGER_CST))
4715     return NULL;
4716
4717   /* Strips the constant part. */
4718   if (TREE_CODE (ubase) == PLUS_EXPR
4719       || TREE_CODE (ubase) == MINUS_EXPR
4720       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4721     {
4722       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4723         ubase = TREE_OPERAND (ubase, 0);
4724     }
4725
4726   /* Strips the constant part. */
4727   if (TREE_CODE (cbase) == PLUS_EXPR
4728       || TREE_CODE (cbase) == MINUS_EXPR
4729       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4730     {
4731       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4732         cbase = TREE_OPERAND (cbase, 0);
4733     }
4734
4735   if (address_p)
4736     {
4737       if (((TREE_CODE (ubase) == SSA_NAME)
4738            || (TREE_CODE (ubase) == ADDR_EXPR
4739                && is_gimple_min_invariant (ubase)))
4740           && (TREE_CODE (cbase) == INTEGER_CST))
4741         return NULL;
4742
4743       if (((TREE_CODE (cbase) == SSA_NAME)
4744            || (TREE_CODE (cbase) == ADDR_EXPR
4745                && is_gimple_min_invariant (cbase)))
4746           && (TREE_CODE (ubase) == INTEGER_CST))
4747         return NULL;
4748     }
4749
4750   if (ratio == 1)
4751     {
4752       if (operand_equal_p (ubase, cbase, 0))
4753         return NULL;
4754
4755       if (TREE_CODE (ubase) == ADDR_EXPR
4756           && TREE_CODE (cbase) == ADDR_EXPR)
4757         {
4758           tree usym, csym;
4759
4760           usym = TREE_OPERAND (ubase, 0);
4761           csym = TREE_OPERAND (cbase, 0);
4762           if (TREE_CODE (usym) == ARRAY_REF)
4763             {
4764               tree ind = TREE_OPERAND (usym, 1);
4765               if (TREE_CODE (ind) == INTEGER_CST
4766                   && tree_fits_shwi_p (ind)
4767                   && tree_to_shwi (ind) == 0)
4768                 usym = TREE_OPERAND (usym, 0);
4769             }
4770           if (TREE_CODE (csym) == ARRAY_REF)
4771             {
4772               tree ind = TREE_OPERAND (csym, 1);
4773               if (TREE_CODE (ind) == INTEGER_CST
4774                   && tree_fits_shwi_p (ind)
4775                   && tree_to_shwi (ind) == 0)
4776                 csym = TREE_OPERAND (csym, 0);
4777             }
4778           if (operand_equal_p (usym, csym, 0))
4779             return NULL;
4780         }
4781       /* Now do more complex comparison  */
4782       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4783       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4784       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4785         return NULL;
4786     }
4787
4788   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4789   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4790
4791   aff_combination_scale (&cbase_aff, -1 * ratio);
4792   aff_combination_add (&ubase_aff, &cbase_aff);
4793   expr = aff_combination_to_tree (&ubase_aff);
4794   return record_inv_expr (data, expr);
4795 }
4796
4797
4798
4799 /* Determines the cost of the computation by that USE is expressed
4800    from induction variable CAND.  If ADDRESS_P is true, we just need
4801    to create an address from it, otherwise we want to get it into
4802    register.  A set of invariants we depend on is stored in
4803    DEPENDS_ON.  AT is the statement at that the value is computed.
4804    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4805    addressing is likely.  */
4806
4807 static comp_cost
4808 get_computation_cost_at (struct ivopts_data *data,
4809                          struct iv_use *use, struct iv_cand *cand,
4810                          bool address_p, bitmap *depends_on, gimple *at,
4811                          bool *can_autoinc,
4812                          iv_inv_expr_ent **inv_expr)
4813 {
4814   tree ubase = use->iv->base, ustep = use->iv->step;
4815   tree cbase, cstep;
4816   tree utype = TREE_TYPE (ubase), ctype;
4817   unsigned HOST_WIDE_INT cstepi, offset = 0;
4818   HOST_WIDE_INT ratio, aratio;
4819   bool var_present, symbol_present, stmt_is_after_inc;
4820   comp_cost cost;
4821   widest_int rat;
4822   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4823   machine_mode mem_mode = (address_p
4824                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4825                                 : VOIDmode);
4826
4827   if (depends_on)
4828     *depends_on = NULL;
4829
4830   /* Only consider real candidates.  */
4831   if (!cand->iv)
4832     return infinite_cost;
4833
4834   cbase = cand->iv->base;
4835   cstep = cand->iv->step;
4836   ctype = TREE_TYPE (cbase);
4837
4838   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4839     {
4840       /* We do not have a precision to express the values of use.  */
4841       return infinite_cost;
4842     }
4843
4844   if (address_p
4845       || (use->iv->base_object
4846           && cand->iv->base_object
4847           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4848           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4849     {
4850       /* Do not try to express address of an object with computation based
4851          on address of a different object.  This may cause problems in rtl
4852          level alias analysis (that does not expect this to be happening,
4853          as this is illegal in C), and would be unlikely to be useful
4854          anyway.  */
4855       if (use->iv->base_object
4856           && cand->iv->base_object
4857           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4858         return infinite_cost;
4859     }
4860
4861   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4862     {
4863       /* TODO -- add direct handling of this case.  */
4864       goto fallback;
4865     }
4866
4867   /* CSTEPI is removed from the offset in case statement is after the
4868      increment.  If the step is not constant, we use zero instead.
4869      This is a bit imprecise (there is the extra addition), but
4870      redundancy elimination is likely to transform the code so that
4871      it uses value of the variable before increment anyway,
4872      so it is not that much unrealistic.  */
4873   if (cst_and_fits_in_hwi (cstep))
4874     cstepi = int_cst_value (cstep);
4875   else
4876     cstepi = 0;
4877
4878   if (!constant_multiple_of (ustep, cstep, &rat))
4879     return infinite_cost;
4880
4881   if (wi::fits_shwi_p (rat))
4882     ratio = rat.to_shwi ();
4883   else
4884     return infinite_cost;
4885
4886   STRIP_NOPS (cbase);
4887   ctype = TREE_TYPE (cbase);
4888
4889   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4890
4891   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4892      or ratio == 1, it is better to handle this like
4893
4894      ubase - ratio * cbase + ratio * var
4895
4896      (also holds in the case ratio == -1, TODO.  */
4897
4898   if (cst_and_fits_in_hwi (cbase))
4899     {
4900       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4901       cost = difference_cost (data,
4902                               ubase, build_int_cst (utype, 0),
4903                               &symbol_present, &var_present, &offset,
4904                               depends_on);
4905       cost /= avg_loop_niter (data->current_loop);
4906     }
4907   else if (ratio == 1)
4908     {
4909       tree real_cbase = cbase;
4910
4911       /* Check to see if any adjustment is needed.  */
4912       if (cstepi == 0 && stmt_is_after_inc)
4913         {
4914           aff_tree real_cbase_aff;
4915           aff_tree cstep_aff;
4916
4917           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4918                                    &real_cbase_aff);
4919           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4920
4921           aff_combination_add (&real_cbase_aff, &cstep_aff);
4922           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4923         }
4924
4925       cost = difference_cost (data,
4926                               ubase, real_cbase,
4927                               &symbol_present, &var_present, &offset,
4928                               depends_on);
4929       cost /= avg_loop_niter (data->current_loop);
4930     }
4931   else if (address_p
4932            && !POINTER_TYPE_P (ctype)
4933            && multiplier_allowed_in_address_p
4934                 (ratio, mem_mode,
4935                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4936     {
4937       tree real_cbase = cbase;
4938
4939       if (cstepi == 0 && stmt_is_after_inc)
4940         {
4941           if (POINTER_TYPE_P (ctype))
4942             real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4943           else
4944             real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4945         }
4946       real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4947                                 build_int_cst (ctype, ratio));
4948       cost = difference_cost (data,
4949                               ubase, real_cbase,
4950                               &symbol_present, &var_present, &offset,
4951                               depends_on);
4952       cost /= avg_loop_niter (data->current_loop);
4953     }
4954   else
4955     {
4956       cost = force_var_cost (data, cbase, depends_on);
4957       cost += difference_cost (data, ubase, build_int_cst (utype, 0),
4958                                &symbol_present, &var_present, &offset,
4959                                depends_on);
4960       cost /= avg_loop_niter (data->current_loop);
4961       cost += add_cost (data->speed, TYPE_MODE (ctype));
4962     }
4963
4964   /* Record setup cost in scratch field.  */
4965   cost.scratch = cost.cost;
4966
4967   if (inv_expr && depends_on && *depends_on)
4968     {
4969       *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
4970                                            address_p);
4971       /* Clear depends on.  */
4972       if (*inv_expr != NULL)
4973         bitmap_clear (*depends_on);
4974     }
4975
4976   /* If we are after the increment, the value of the candidate is higher by
4977      one iteration.  */
4978   if (stmt_is_after_inc)
4979     offset -= ratio * cstepi;
4980
4981   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4982      (symbol/var1/const parts may be omitted).  If we are looking for an
4983      address, find the cost of addressing this.  */
4984   if (address_p)
4985     return cost + get_address_cost (symbol_present, var_present,
4986                                     offset, ratio, cstepi,
4987                                     mem_mode,
4988                                     TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4989                                     speed, stmt_is_after_inc, can_autoinc);
4990
4991   /* Otherwise estimate the costs for computing the expression.  */
4992   if (!symbol_present && !var_present && !offset)
4993     {
4994       if (ratio != 1)
4995         cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4996       return cost;
4997     }
4998
4999   /* Symbol + offset should be compile-time computable so consider that they
5000       are added once to the variable, if present.  */
5001   if (var_present && (symbol_present || offset))
5002     cost += adjust_setup_cost (data,
5003                                     add_cost (speed, TYPE_MODE (ctype)));
5004
5005   /* Having offset does not affect runtime cost in case it is added to
5006      symbol, but it increases complexity.  */
5007   if (offset)
5008     cost.complexity++;
5009
5010   cost += add_cost (speed, TYPE_MODE (ctype));
5011
5012   aratio = ratio > 0 ? ratio : -ratio;
5013   if (aratio != 1)
5014     cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5015   return cost;
5016
5017 fallback:
5018   if (can_autoinc)
5019     *can_autoinc = false;
5020
5021   {
5022     /* Just get the expression, expand it and measure the cost.  */
5023     tree comp = get_computation_at (data->current_loop, use, cand, at);
5024
5025     if (!comp)
5026       return infinite_cost;
5027
5028     if (address_p)
5029       comp = build_simple_mem_ref (comp);
5030
5031     return comp_cost (computation_cost (comp, speed), 0);
5032   }
5033 }
5034
5035 /* Determines the cost of the computation by that USE is expressed
5036    from induction variable CAND.  If ADDRESS_P is true, we just need
5037    to create an address from it, otherwise we want to get it into
5038    register.  A set of invariants we depend on is stored in
5039    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
5040    autoinc addressing is likely.  */
5041
5042 static comp_cost
5043 get_computation_cost (struct ivopts_data *data,
5044                       struct iv_use *use, struct iv_cand *cand,
5045                       bool address_p, bitmap *depends_on,
5046                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
5047 {
5048   return get_computation_cost_at (data,
5049                                   use, cand, address_p, depends_on, use->stmt,
5050                                   can_autoinc, inv_expr);
5051 }
5052
5053 /* Determines cost of computing the use in GROUP with CAND in a generic
5054    expression.  */
5055
5056 static bool
5057 determine_group_iv_cost_generic (struct ivopts_data *data,
5058                                  struct iv_group *group, struct iv_cand *cand)
5059 {
5060   comp_cost cost;
5061   iv_inv_expr_ent *inv_expr = NULL;
5062   bitmap depends_on = NULL;
5063   struct iv_use *use = group->vuses[0];
5064
5065   /* The simple case first -- if we need to express value of the preserved
5066      original biv, the cost is 0.  This also prevents us from counting the
5067      cost of increment twice -- once at this use and once in the cost of
5068      the candidate.  */
5069   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5070     cost = no_cost;
5071   else
5072     cost = get_computation_cost (data, use, cand, false,
5073                                  &depends_on, NULL, &inv_expr);
5074
5075   set_group_iv_cost (data, group, cand, cost, depends_on,
5076                      NULL_TREE, ERROR_MARK, inv_expr);
5077   return !cost.infinite_cost_p ();
5078 }
5079
5080 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
5081
5082 static bool
5083 determine_group_iv_cost_address (struct ivopts_data *data,
5084                                  struct iv_group *group, struct iv_cand *cand)
5085 {
5086   unsigned i;
5087   bitmap depends_on;
5088   bool can_autoinc, first = true;
5089   iv_inv_expr_ent *inv_expr = NULL;
5090   struct iv_use *use = group->vuses[0];
5091   comp_cost sum_cost = no_cost, cost;
5092
5093   cost = get_computation_cost (data, use, cand, true,
5094                                &depends_on, &can_autoinc, &inv_expr);
5095
5096   sum_cost = cost;
5097   if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5098     {
5099       if (can_autoinc)
5100         sum_cost -= cand->cost_step;
5101       /* If we generated the candidate solely for exploiting autoincrement
5102          opportunities, and it turns out it can't be used, set the cost to
5103          infinity to make sure we ignore it.  */
5104       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5105         sum_cost = infinite_cost;
5106     }
5107
5108   /* Uses in a group can share setup code, so only add setup cost once.  */
5109   cost -= cost.scratch;
5110   /* Compute and add costs for rest uses of this group.  */
5111   for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5112     {
5113       struct iv_use *next = group->vuses[i];
5114
5115       /* Compute cost for the first use with different offset to the main
5116          use and add it afterwards.  Costs for these uses could be quite
5117          different.  Given below uses in a group:
5118            use 0  : {base + A + offset_0, step}
5119            use 0.1: {base + A + offset_0, step}
5120            use 0.2: {base + A + offset_1, step}
5121            use 0.3: {base + A + offset_2, step}
5122          when we need to compute costs with candidate:
5123            cand 1 : {base + B + offset_0, step}
5124
5125          The first use with different offset is use 0.2, its cost is larger
5126          than cost of use 0/0.1 because we need to compute:
5127            A - B + offset_1 - offset_0
5128            rather than:
5129            A - B.  */
5130       if (first && next->addr_offset != use->addr_offset)
5131         {
5132           first = false;
5133           cost = get_computation_cost (data, next, cand, true,
5134                                        NULL, &can_autoinc, NULL);
5135           /* Remove setup cost.  */
5136           if (!cost.infinite_cost_p ())
5137             cost -= cost.scratch;
5138         }
5139       sum_cost += cost;
5140     }
5141   set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5142                      NULL_TREE, ERROR_MARK, inv_expr);
5143
5144   return !sum_cost.infinite_cost_p ();
5145 }
5146
5147 /* Computes value of candidate CAND at position AT in iteration NITER, and
5148    stores it to VAL.  */
5149
5150 static void
5151 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5152                aff_tree *val)
5153 {
5154   aff_tree step, delta, nit;
5155   struct iv *iv = cand->iv;
5156   tree type = TREE_TYPE (iv->base);
5157   tree steptype = type;
5158   if (POINTER_TYPE_P (type))
5159     steptype = sizetype;
5160   steptype = unsigned_type_for (type);
5161
5162   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5163   aff_combination_convert (&step, steptype);
5164   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5165   aff_combination_convert (&nit, steptype);
5166   aff_combination_mult (&nit, &step, &delta);
5167   if (stmt_after_increment (loop, cand, at))
5168     aff_combination_add (&delta, &step);
5169
5170   tree_to_aff_combination (iv->base, type, val);
5171   if (!POINTER_TYPE_P (type))
5172     aff_combination_convert (val, steptype);
5173   aff_combination_add (val, &delta);
5174 }
5175
5176 /* Returns period of induction variable iv.  */
5177
5178 static tree
5179 iv_period (struct iv *iv)
5180 {
5181   tree step = iv->step, period, type;
5182   tree pow2div;
5183
5184   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5185
5186   type = unsigned_type_for (TREE_TYPE (step));
5187   /* Period of the iv is lcm (step, type_range)/step -1,
5188      i.e., N*type_range/step - 1. Since type range is power
5189      of two, N == (step >> num_of_ending_zeros_binary (step),
5190      so the final result is
5191
5192        (type_range >> num_of_ending_zeros_binary (step)) - 1
5193
5194   */
5195   pow2div = num_ending_zeros (step);
5196
5197   period = build_low_bits_mask (type,
5198                                 (TYPE_PRECISION (type)
5199                                  - tree_to_uhwi (pow2div)));
5200
5201   return period;
5202 }
5203
5204 /* Returns the comparison operator used when eliminating the iv USE.  */
5205
5206 static enum tree_code
5207 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5208 {
5209   struct loop *loop = data->current_loop;
5210   basic_block ex_bb;
5211   edge exit;
5212
5213   ex_bb = gimple_bb (use->stmt);
5214   exit = EDGE_SUCC (ex_bb, 0);
5215   if (flow_bb_inside_loop_p (loop, exit->dest))
5216     exit = EDGE_SUCC (ex_bb, 1);
5217
5218   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5219 }
5220
5221 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5222    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5223    calculation is performed in non-wrapping type.
5224
5225    TODO: More generally, we could test for the situation that
5226          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5227          This would require knowing the sign of OFFSET.  */
5228
5229 static bool
5230 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5231 {
5232   enum tree_code code;
5233   tree e1, e2;
5234   aff_tree aff_e1, aff_e2, aff_offset;
5235
5236   if (!nowrap_type_p (TREE_TYPE (base)))
5237     return false;
5238
5239   base = expand_simple_operations (base);
5240
5241   if (TREE_CODE (base) == SSA_NAME)
5242     {
5243       gimple *stmt = SSA_NAME_DEF_STMT (base);
5244
5245       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5246         return false;
5247
5248       code = gimple_assign_rhs_code (stmt);
5249       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5250         return false;
5251
5252       e1 = gimple_assign_rhs1 (stmt);
5253       e2 = gimple_assign_rhs2 (stmt);
5254     }
5255   else
5256     {
5257       code = TREE_CODE (base);
5258       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5259         return false;
5260       e1 = TREE_OPERAND (base, 0);
5261       e2 = TREE_OPERAND (base, 1);
5262     }
5263
5264   /* Use affine expansion as deeper inspection to prove the equality.  */
5265   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5266                                   &aff_e2, &data->name_expansion_cache);
5267   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5268                                   &aff_offset, &data->name_expansion_cache);
5269   aff_combination_scale (&aff_offset, -1);
5270   switch (code)
5271     {
5272     case PLUS_EXPR:
5273       aff_combination_add (&aff_e2, &aff_offset);
5274       if (aff_combination_zero_p (&aff_e2))
5275         return true;
5276
5277       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5278                                       &aff_e1, &data->name_expansion_cache);
5279       aff_combination_add (&aff_e1, &aff_offset);
5280       return aff_combination_zero_p (&aff_e1);
5281
5282     case POINTER_PLUS_EXPR:
5283       aff_combination_add (&aff_e2, &aff_offset);
5284       return aff_combination_zero_p (&aff_e2);
5285
5286     default:
5287       return false;
5288     }
5289 }
5290
5291 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5292    comparison with CAND.  NITER describes the number of iterations of
5293    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5294
5295    We aim to handle the following situation:
5296
5297    sometype *base, *p;
5298    int a, b, i;
5299
5300    i = a;
5301    p = p_0 = base + a;
5302
5303    do
5304      {
5305        bla (*p);
5306        p++;
5307        i++;
5308      }
5309    while (i < b);
5310
5311    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5312    We aim to optimize this to
5313
5314    p = p_0 = base + a;
5315    do
5316      {
5317        bla (*p);
5318        p++;
5319      }
5320    while (p < p_0 - a + b);
5321
5322    This preserves the correctness, since the pointer arithmetics does not
5323    overflow.  More precisely:
5324
5325    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5326       overflow in computing it or the values of p.
5327    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5328       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5329
5330 static bool
5331 iv_elimination_compare_lt (struct ivopts_data *data,
5332                            struct iv_cand *cand, enum tree_code *comp_p,
5333                            struct tree_niter_desc *niter)
5334 {
5335   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5336   struct aff_tree nit, tmpa, tmpb;
5337   enum tree_code comp;
5338   HOST_WIDE_INT step;
5339
5340   /* We need to know that the candidate induction variable does not overflow.
5341      While more complex analysis may be used to prove this, for now just
5342      check that the variable appears in the original program and that it
5343      is computed in a type that guarantees no overflows.  */
5344   cand_type = TREE_TYPE (cand->iv->base);
5345   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5346     return false;
5347
5348   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5349      the calculation of the BOUND could overflow, making the comparison
5350      invalid.  */
5351   if (!data->loop_single_exit_p)
5352     return false;
5353
5354   /* We need to be able to decide whether candidate is increasing or decreasing
5355      in order to choose the right comparison operator.  */
5356   if (!cst_and_fits_in_hwi (cand->iv->step))
5357     return false;
5358   step = int_cst_value (cand->iv->step);
5359
5360   /* Check that the number of iterations matches the expected pattern:
5361      a + 1 > b ? 0 : b - a - 1.  */
5362   mbz = niter->may_be_zero;
5363   if (TREE_CODE (mbz) == GT_EXPR)
5364     {
5365       /* Handle a + 1 > b.  */
5366       tree op0 = TREE_OPERAND (mbz, 0);
5367       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5368         {
5369           a = TREE_OPERAND (op0, 0);
5370           b = TREE_OPERAND (mbz, 1);
5371         }
5372       else
5373         return false;
5374     }
5375   else if (TREE_CODE (mbz) == LT_EXPR)
5376     {
5377       tree op1 = TREE_OPERAND (mbz, 1);
5378
5379       /* Handle b < a + 1.  */
5380       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5381         {
5382           a = TREE_OPERAND (op1, 0);
5383           b = TREE_OPERAND (mbz, 0);
5384         }
5385       else
5386         return false;
5387     }
5388   else
5389     return false;
5390
5391   /* Expected number of iterations is B - A - 1.  Check that it matches
5392      the actual number, i.e., that B - A - NITER = 1.  */
5393   tree_to_aff_combination (niter->niter, nit_type, &nit);
5394   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5395   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5396   aff_combination_scale (&nit, -1);
5397   aff_combination_scale (&tmpa, -1);
5398   aff_combination_add (&tmpb, &tmpa);
5399   aff_combination_add (&tmpb, &nit);
5400   if (tmpb.n != 0 || tmpb.offset != 1)
5401     return false;
5402
5403   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5404      overflow.  */
5405   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5406                         cand->iv->step,
5407                         fold_convert (TREE_TYPE (cand->iv->step), a));
5408   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5409     return false;
5410
5411   /* Determine the new comparison operator.  */
5412   comp = step < 0 ? GT_EXPR : LT_EXPR;
5413   if (*comp_p == NE_EXPR)
5414     *comp_p = comp;
5415   else if (*comp_p == EQ_EXPR)
5416     *comp_p = invert_tree_comparison (comp, false);
5417   else
5418     gcc_unreachable ();
5419
5420   return true;
5421 }
5422
5423 /* Check whether it is possible to express the condition in USE by comparison
5424    of candidate CAND.  If so, store the value compared with to BOUND, and the
5425    comparison operator to COMP.  */
5426
5427 static bool
5428 may_eliminate_iv (struct ivopts_data *data,
5429                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5430                   enum tree_code *comp)
5431 {
5432   basic_block ex_bb;
5433   edge exit;
5434   tree period;
5435   struct loop *loop = data->current_loop;
5436   aff_tree bnd;
5437   struct tree_niter_desc *desc = NULL;
5438
5439   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5440     return false;
5441
5442   /* For now works only for exits that dominate the loop latch.
5443      TODO: extend to other conditions inside loop body.  */
5444   ex_bb = gimple_bb (use->stmt);
5445   if (use->stmt != last_stmt (ex_bb)
5446       || gimple_code (use->stmt) != GIMPLE_COND
5447       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5448     return false;
5449
5450   exit = EDGE_SUCC (ex_bb, 0);
5451   if (flow_bb_inside_loop_p (loop, exit->dest))
5452     exit = EDGE_SUCC (ex_bb, 1);
5453   if (flow_bb_inside_loop_p (loop, exit->dest))
5454     return false;
5455
5456   desc = niter_for_exit (data, exit);
5457   if (!desc)
5458     return false;
5459
5460   /* Determine whether we can use the variable to test the exit condition.
5461      This is the case iff the period of the induction variable is greater
5462      than the number of iterations for which the exit condition is true.  */
5463   period = iv_period (cand->iv);
5464
5465   /* If the number of iterations is constant, compare against it directly.  */
5466   if (TREE_CODE (desc->niter) == INTEGER_CST)
5467     {
5468       /* See cand_value_at.  */
5469       if (stmt_after_increment (loop, cand, use->stmt))
5470         {
5471           if (!tree_int_cst_lt (desc->niter, period))
5472             return false;
5473         }
5474       else
5475         {
5476           if (tree_int_cst_lt (period, desc->niter))
5477             return false;
5478         }
5479     }
5480
5481   /* If not, and if this is the only possible exit of the loop, see whether
5482      we can get a conservative estimate on the number of iterations of the
5483      entire loop and compare against that instead.  */
5484   else
5485     {
5486       widest_int period_value, max_niter;
5487
5488       max_niter = desc->max;
5489       if (stmt_after_increment (loop, cand, use->stmt))
5490         max_niter += 1;
5491       period_value = wi::to_widest (period);
5492       if (wi::gtu_p (max_niter, period_value))
5493         {
5494           /* See if we can take advantage of inferred loop bound
5495              information.  */
5496           if (data->loop_single_exit_p)
5497             {
5498               if (!max_loop_iterations (loop, &max_niter))
5499                 return false;
5500               /* The loop bound is already adjusted by adding 1.  */
5501               if (wi::gtu_p (max_niter, period_value))
5502                 return false;
5503             }
5504           else
5505             return false;
5506         }
5507     }
5508
5509   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5510
5511   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5512                          aff_combination_to_tree (&bnd));
5513   *comp = iv_elimination_compare (data, use);
5514
5515   /* It is unlikely that computing the number of iterations using division
5516      would be more profitable than keeping the original induction variable.  */
5517   if (expression_expensive_p (*bound))
5518     return false;
5519
5520   /* Sometimes, it is possible to handle the situation that the number of
5521      iterations may be zero unless additional assumtions by using <
5522      instead of != in the exit condition.
5523
5524      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5525            base the exit condition on it.  However, that is often too
5526            expensive.  */
5527   if (!integer_zerop (desc->may_be_zero))
5528     return iv_elimination_compare_lt (data, cand, comp, desc);
5529
5530   return true;
5531 }
5532
5533  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5534     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5535
5536 static int
5537 parm_decl_cost (struct ivopts_data *data, tree bound)
5538 {
5539   tree sbound = bound;
5540   STRIP_NOPS (sbound);
5541
5542   if (TREE_CODE (sbound) == SSA_NAME
5543       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5544       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5545       && data->body_includes_call)
5546     return COSTS_N_INSNS (1);
5547
5548   return 0;
5549 }
5550
5551 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5552
5553 static bool
5554 determine_group_iv_cost_cond (struct ivopts_data *data,
5555                               struct iv_group *group, struct iv_cand *cand)
5556 {
5557   tree bound = NULL_TREE;
5558   struct iv *cmp_iv;
5559   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5560   comp_cost elim_cost, express_cost, cost, bound_cost;
5561   bool ok;
5562   iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5563   tree *control_var, *bound_cst;
5564   enum tree_code comp = ERROR_MARK;
5565   struct iv_use *use = group->vuses[0];
5566
5567   gcc_assert (cand->iv);
5568
5569   /* Try iv elimination.  */
5570   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5571     {
5572       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5573       if (elim_cost.cost == 0)
5574         elim_cost.cost = parm_decl_cost (data, bound);
5575       else if (TREE_CODE (bound) == INTEGER_CST)
5576         elim_cost.cost = 0;
5577       /* If we replace a loop condition 'i < n' with 'p < base + n',
5578          depends_on_elim will have 'base' and 'n' set, which implies
5579          that both 'base' and 'n' will be live during the loop.  More likely,
5580          'base + n' will be loop invariant, resulting in only one live value
5581          during the loop.  So in that case we clear depends_on_elim and set
5582         elim_inv_expr_id instead.  */
5583       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5584         {
5585           elim_inv_expr = record_inv_expr (data, bound);
5586           bitmap_clear (depends_on_elim);
5587         }
5588       /* The bound is a loop invariant, so it will be only computed
5589          once.  */
5590       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5591     }
5592   else
5593     elim_cost = infinite_cost;
5594
5595   /* Try expressing the original giv.  If it is compared with an invariant,
5596      note that we cannot get rid of it.  */
5597   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5598                               NULL, &cmp_iv);
5599   gcc_assert (ok);
5600
5601   /* When the condition is a comparison of the candidate IV against
5602      zero, prefer this IV.
5603
5604      TODO: The constant that we're subtracting from the cost should
5605      be target-dependent.  This information should be added to the
5606      target costs for each backend.  */
5607   if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5608       && integer_zerop (*bound_cst)
5609       && (operand_equal_p (*control_var, cand->var_after, 0)
5610           || operand_equal_p (*control_var, cand->var_before, 0)))
5611     elim_cost -= 1;
5612
5613   express_cost = get_computation_cost (data, use, cand, false,
5614                                        &depends_on_express, NULL,
5615                                        &express_inv_expr);
5616   fd_ivopts_data = data;
5617   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5618
5619   /* Count the cost of the original bound as well.  */
5620   bound_cost = force_var_cost (data, *bound_cst, NULL);
5621   if (bound_cost.cost == 0)
5622     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5623   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5624     bound_cost.cost = 0;
5625   express_cost += bound_cost;
5626
5627   /* Choose the better approach, preferring the eliminated IV. */
5628   if (elim_cost <= express_cost)
5629     {
5630       cost = elim_cost;
5631       depends_on = depends_on_elim;
5632       depends_on_elim = NULL;
5633       inv_expr = elim_inv_expr;
5634     }
5635   else
5636     {
5637       cost = express_cost;
5638       depends_on = depends_on_express;
5639       depends_on_express = NULL;
5640       bound = NULL_TREE;
5641       comp = ERROR_MARK;
5642       inv_expr = express_inv_expr;
5643     }
5644
5645   set_group_iv_cost (data, group, cand, cost,
5646                      depends_on, bound, comp, inv_expr);
5647
5648   if (depends_on_elim)
5649     BITMAP_FREE (depends_on_elim);
5650   if (depends_on_express)
5651     BITMAP_FREE (depends_on_express);
5652
5653   return !cost.infinite_cost_p ();
5654 }
5655
5656 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5657    if USE cannot be represented with CAND.  */
5658
5659 static bool
5660 determine_group_iv_cost (struct ivopts_data *data,
5661                          struct iv_group *group, struct iv_cand *cand)
5662 {
5663   switch (group->type)
5664     {
5665     case USE_NONLINEAR_EXPR:
5666       return determine_group_iv_cost_generic (data, group, cand);
5667
5668     case USE_ADDRESS:
5669       return determine_group_iv_cost_address (data, group, cand);
5670
5671     case USE_COMPARE:
5672       return determine_group_iv_cost_cond (data, group, cand);
5673
5674     default:
5675       gcc_unreachable ();
5676     }
5677 }
5678
5679 /* Return true if get_computation_cost indicates that autoincrement is
5680    a possibility for the pair of USE and CAND, false otherwise.  */
5681
5682 static bool
5683 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5684                            struct iv_cand *cand)
5685 {
5686   bitmap depends_on;
5687   bool can_autoinc;
5688   comp_cost cost;
5689
5690   if (use->type != USE_ADDRESS)
5691     return false;
5692
5693   cost = get_computation_cost (data, use, cand, true, &depends_on,
5694                                &can_autoinc, NULL);
5695
5696   BITMAP_FREE (depends_on);
5697
5698   return !cost.infinite_cost_p () && can_autoinc;
5699 }
5700
5701 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5702    use that allows autoincrement, and set their AINC_USE if possible.  */
5703
5704 static void
5705 set_autoinc_for_original_candidates (struct ivopts_data *data)
5706 {
5707   unsigned i, j;
5708
5709   for (i = 0; i < data->vcands.length (); i++)
5710     {
5711       struct iv_cand *cand = data->vcands[i];
5712       struct iv_use *closest_before = NULL;
5713       struct iv_use *closest_after = NULL;
5714       if (cand->pos != IP_ORIGINAL)
5715         continue;
5716
5717       for (j = 0; j < data->vgroups.length (); j++)
5718         {
5719           struct iv_group *group = data->vgroups[j];
5720           struct iv_use *use = group->vuses[0];
5721           unsigned uid = gimple_uid (use->stmt);
5722
5723           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5724             continue;
5725
5726           if (uid < gimple_uid (cand->incremented_at)
5727               && (closest_before == NULL
5728                   || uid > gimple_uid (closest_before->stmt)))
5729             closest_before = use;
5730
5731           if (uid > gimple_uid (cand->incremented_at)
5732               && (closest_after == NULL
5733                   || uid < gimple_uid (closest_after->stmt)))
5734             closest_after = use;
5735         }
5736
5737       if (closest_before != NULL
5738           && autoinc_possible_for_pair (data, closest_before, cand))
5739         cand->ainc_use = closest_before;
5740       else if (closest_after != NULL
5741                && autoinc_possible_for_pair (data, closest_after, cand))
5742         cand->ainc_use = closest_after;
5743     }
5744 }
5745
5746 /* Finds the candidates for the induction variables.  */
5747
5748 static void
5749 find_iv_candidates (struct ivopts_data *data)
5750 {
5751   /* Add commonly used ivs.  */
5752   add_standard_iv_candidates (data);
5753
5754   /* Add old induction variables.  */
5755   add_iv_candidate_for_bivs (data);
5756
5757   /* Add induction variables derived from uses.  */
5758   add_iv_candidate_for_groups (data);
5759
5760   set_autoinc_for_original_candidates (data);
5761
5762   /* Record the important candidates.  */
5763   record_important_candidates (data);
5764
5765   if (dump_file && (dump_flags & TDF_DETAILS))
5766     {
5767       unsigned i;
5768
5769       fprintf (dump_file, "\n<Important Candidates>:\t");
5770       for (i = 0; i < data->vcands.length (); i++)
5771         if (data->vcands[i]->important)
5772           fprintf (dump_file, " %d,", data->vcands[i]->id);
5773       fprintf (dump_file, "\n");
5774
5775       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5776       for (i = 0; i < data->vgroups.length (); i++)
5777         {
5778           struct iv_group *group = data->vgroups[i];
5779
5780           if (group->related_cands)
5781             {
5782               fprintf (dump_file, "  Group %d:\t", group->id);
5783               dump_bitmap (dump_file, group->related_cands);
5784             }
5785         }
5786       fprintf (dump_file, "\n");
5787     }
5788 }
5789
5790 /* Determines costs of computing use of iv with an iv candidate.  */
5791
5792 static void
5793 determine_group_iv_costs (struct ivopts_data *data)
5794 {
5795   unsigned i, j;
5796   struct iv_cand *cand;
5797   struct iv_group *group;
5798   bitmap to_clear = BITMAP_ALLOC (NULL);
5799
5800   alloc_use_cost_map (data);
5801
5802   for (i = 0; i < data->vgroups.length (); i++)
5803     {
5804       group = data->vgroups[i];
5805
5806       if (data->consider_all_candidates)
5807         {
5808           for (j = 0; j < data->vcands.length (); j++)
5809             {
5810               cand = data->vcands[j];
5811               determine_group_iv_cost (data, group, cand);
5812             }
5813         }
5814       else
5815         {
5816           bitmap_iterator bi;
5817
5818           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5819             {
5820               cand = data->vcands[j];
5821               if (!determine_group_iv_cost (data, group, cand))
5822                 bitmap_set_bit (to_clear, j);
5823             }
5824
5825           /* Remove the candidates for that the cost is infinite from
5826              the list of related candidates.  */
5827           bitmap_and_compl_into (group->related_cands, to_clear);
5828           bitmap_clear (to_clear);
5829         }
5830     }
5831
5832   BITMAP_FREE (to_clear);
5833
5834   if (dump_file && (dump_flags & TDF_DETAILS))
5835     {
5836       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5837       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5838
5839       for (hash_table<iv_inv_expr_hasher>::iterator it
5840            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5841            ++it)
5842         list.safe_push (*it);
5843
5844       list.qsort (sort_iv_inv_expr_ent);
5845
5846       for (i = 0; i < list.length (); ++i)
5847         {
5848           fprintf (dump_file, "inv_expr %d: \t", i);
5849           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5850           fprintf (dump_file, "\n");
5851         }
5852
5853       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5854
5855       for (i = 0; i < data->vgroups.length (); i++)
5856         {
5857           group = data->vgroups[i];
5858
5859           fprintf (dump_file, "Group %d:\n", i);
5860           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5861           for (j = 0; j < group->n_map_members; j++)
5862             {
5863               if (!group->cost_map[j].cand
5864                   || group->cost_map[j].cost.infinite_cost_p ())
5865                 continue;
5866
5867               fprintf (dump_file, "  %d\t%d\t%d\t",
5868                        group->cost_map[j].cand->id,
5869                        group->cost_map[j].cost.cost,
5870                        group->cost_map[j].cost.complexity);
5871               if (group->cost_map[j].inv_expr != NULL)
5872                 fprintf (dump_file, "%d\t",
5873                          group->cost_map[j].inv_expr->id);
5874               else
5875                 fprintf (dump_file, "\t");
5876               if (group->cost_map[j].depends_on)
5877                 bitmap_print (dump_file,
5878                               group->cost_map[j].depends_on, "","");
5879               fprintf (dump_file, "\n");
5880             }
5881
5882           fprintf (dump_file, "\n");
5883         }
5884       fprintf (dump_file, "\n");
5885     }
5886 }
5887
5888 /* Determines cost of the candidate CAND.  */
5889
5890 static void
5891 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5892 {
5893   comp_cost cost_base;
5894   unsigned cost, cost_step;
5895   tree base;
5896
5897   if (!cand->iv)
5898     {
5899       cand->cost = 0;
5900       return;
5901     }
5902
5903   /* There are two costs associated with the candidate -- its increment
5904      and its initialization.  The second is almost negligible for any loop
5905      that rolls enough, so we take it just very little into account.  */
5906
5907   base = cand->iv->base;
5908   cost_base = force_var_cost (data, base, NULL);
5909   /* It will be exceptional that the iv register happens to be initialized with
5910      the proper value at no cost.  In general, there will at least be a regcopy
5911      or a const set.  */
5912   if (cost_base.cost == 0)
5913     cost_base.cost = COSTS_N_INSNS (1);
5914   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5915
5916   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5917
5918   /* Prefer the original ivs unless we may gain something by replacing it.
5919      The reason is to make debugging simpler; so this is not relevant for
5920      artificial ivs created by other optimization passes.  */
5921   if (cand->pos != IP_ORIGINAL
5922       || !SSA_NAME_VAR (cand->var_before)
5923       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5924     cost++;
5925
5926   /* Prefer not to insert statements into latch unless there are some
5927      already (so that we do not create unnecessary jumps).  */
5928   if (cand->pos == IP_END
5929       && empty_block_p (ip_end_pos (data->current_loop)))
5930     cost++;
5931
5932   cand->cost = cost;
5933   cand->cost_step = cost_step;
5934 }
5935
5936 /* Determines costs of computation of the candidates.  */
5937
5938 static void
5939 determine_iv_costs (struct ivopts_data *data)
5940 {
5941   unsigned i;
5942
5943   if (dump_file && (dump_flags & TDF_DETAILS))
5944     {
5945       fprintf (dump_file, "<Candidate Costs>:\n");
5946       fprintf (dump_file, "  cand\tcost\n");
5947     }
5948
5949   for (i = 0; i < data->vcands.length (); i++)
5950     {
5951       struct iv_cand *cand = data->vcands[i];
5952
5953       determine_iv_cost (data, cand);
5954
5955       if (dump_file && (dump_flags & TDF_DETAILS))
5956         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5957     }
5958
5959   if (dump_file && (dump_flags & TDF_DETAILS))
5960     fprintf (dump_file, "\n");
5961 }
5962
5963 /* Calculates cost for having SIZE induction variables.  */
5964
5965 static unsigned
5966 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5967 {
5968   /* We add size to the cost, so that we prefer eliminating ivs
5969      if possible.  */
5970   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5971                                             data->body_includes_call);
5972 }
5973
5974 /* For each size of the induction variable set determine the penalty.  */
5975
5976 static void
5977 determine_set_costs (struct ivopts_data *data)
5978 {
5979   unsigned j, n;
5980   gphi *phi;
5981   gphi_iterator psi;
5982   tree op;
5983   struct loop *loop = data->current_loop;
5984   bitmap_iterator bi;
5985
5986   if (dump_file && (dump_flags & TDF_DETAILS))
5987     {
5988       fprintf (dump_file, "<Global Costs>:\n");
5989       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5990       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5991       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5992       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5993     }
5994
5995   n = 0;
5996   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5997     {
5998       phi = psi.phi ();
5999       op = PHI_RESULT (phi);
6000
6001       if (virtual_operand_p (op))
6002         continue;
6003
6004       if (get_iv (data, op))
6005         continue;
6006
6007       n++;
6008     }
6009
6010   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6011     {
6012       struct version_info *info = ver_info (data, j);
6013
6014       if (info->inv_id && info->has_nonlin_use)
6015         n++;
6016     }
6017
6018   data->regs_used = n;
6019   if (dump_file && (dump_flags & TDF_DETAILS))
6020     fprintf (dump_file, "  regs_used %d\n", n);
6021
6022   if (dump_file && (dump_flags & TDF_DETAILS))
6023     {
6024       fprintf (dump_file, "  cost for size:\n");
6025       fprintf (dump_file, "  ivs\tcost\n");
6026       for (j = 0; j <= 2 * target_avail_regs; j++)
6027         fprintf (dump_file, "  %d\t%d\n", j,
6028                  ivopts_global_cost_for_size (data, j));
6029       fprintf (dump_file, "\n");
6030     }
6031 }
6032
6033 /* Returns true if A is a cheaper cost pair than B.  */
6034
6035 static bool
6036 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6037 {
6038   if (!a)
6039     return false;
6040
6041   if (!b)
6042     return true;
6043
6044   if (a->cost < b->cost)
6045     return true;
6046
6047   if (b->cost < a->cost)
6048     return false;
6049
6050   /* In case the costs are the same, prefer the cheaper candidate.  */
6051   if (a->cand->cost < b->cand->cost)
6052     return true;
6053
6054   return false;
6055 }
6056
6057
6058 /* Returns candidate by that USE is expressed in IVS.  */
6059
6060 static struct cost_pair *
6061 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
6062 {
6063   return ivs->cand_for_group[group->id];
6064 }
6065
6066 /* Computes the cost field of IVS structure.  */
6067
6068 static void
6069 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6070 {
6071   comp_cost cost = ivs->cand_use_cost;
6072
6073   cost += ivs->cand_cost;
6074
6075   cost += ivopts_global_cost_for_size (data,
6076                                        ivs->n_regs
6077                                        + ivs->used_inv_exprs->elements ());
6078
6079   ivs->cost = cost;
6080 }
6081
6082 /* Remove invariants in set INVS to set IVS.  */
6083
6084 static void
6085 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6086 {
6087   bitmap_iterator bi;
6088   unsigned iid;
6089
6090   if (!invs)
6091     return;
6092
6093   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6094     {
6095       ivs->n_invariant_uses[iid]--;
6096       if (ivs->n_invariant_uses[iid] == 0)
6097         ivs->n_regs--;
6098     }
6099 }
6100
6101 /* Set USE not to be expressed by any candidate in IVS.  */
6102
6103 static void
6104 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6105                  struct iv_group *group)
6106 {
6107   unsigned gid = group->id, cid;
6108   struct cost_pair *cp;
6109
6110   cp = ivs->cand_for_group[gid];
6111   if (!cp)
6112     return;
6113   cid = cp->cand->id;
6114
6115   ivs->bad_groups++;
6116   ivs->cand_for_group[gid] = NULL;
6117   ivs->n_cand_uses[cid]--;
6118
6119   if (ivs->n_cand_uses[cid] == 0)
6120     {
6121       bitmap_clear_bit (ivs->cands, cid);
6122       /* Do not count the pseudocandidates.  */
6123       if (cp->cand->iv)
6124         ivs->n_regs--;
6125       ivs->n_cands--;
6126       ivs->cand_cost -= cp->cand->cost;
6127
6128       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6129     }
6130
6131   ivs->cand_use_cost -= cp->cost;
6132
6133   iv_ca_set_remove_invariants (ivs, cp->depends_on);
6134
6135   if (cp->inv_expr != NULL)
6136     {
6137       unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6138       --(*slot);
6139       if (*slot == 0)
6140         ivs->used_inv_exprs->remove (cp->inv_expr);
6141     }
6142   iv_ca_recount_cost (data, ivs);
6143 }
6144
6145 /* Add invariants in set INVS to set IVS.  */
6146
6147 static void
6148 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6149 {
6150   bitmap_iterator bi;
6151   unsigned iid;
6152
6153   if (!invs)
6154     return;
6155
6156   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6157     {
6158       ivs->n_invariant_uses[iid]++;
6159       if (ivs->n_invariant_uses[iid] == 1)
6160         ivs->n_regs++;
6161     }
6162 }
6163
6164 /* Set cost pair for GROUP in set IVS to CP.  */
6165
6166 static void
6167 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6168               struct iv_group *group, struct cost_pair *cp)
6169 {
6170   unsigned gid = group->id, cid;
6171
6172   if (ivs->cand_for_group[gid] == cp)
6173     return;
6174
6175   if (ivs->cand_for_group[gid])
6176     iv_ca_set_no_cp (data, ivs, group);
6177
6178   if (cp)
6179     {
6180       cid = cp->cand->id;
6181
6182       ivs->bad_groups--;
6183       ivs->cand_for_group[gid] = cp;
6184       ivs->n_cand_uses[cid]++;
6185       if (ivs->n_cand_uses[cid] == 1)
6186         {
6187           bitmap_set_bit (ivs->cands, cid);
6188           /* Do not count the pseudocandidates.  */
6189           if (cp->cand->iv)
6190             ivs->n_regs++;
6191           ivs->n_cands++;
6192           ivs->cand_cost += cp->cand->cost;
6193
6194           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6195         }
6196
6197       ivs->cand_use_cost += cp->cost;
6198       iv_ca_set_add_invariants (ivs, cp->depends_on);
6199
6200       if (cp->inv_expr != NULL)
6201         {
6202           unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6203           ++(*slot);
6204         }
6205       iv_ca_recount_cost (data, ivs);
6206     }
6207 }
6208
6209 /* Extend set IVS by expressing USE by some of the candidates in it
6210    if possible.  Consider all important candidates if candidates in
6211    set IVS don't give any result.  */
6212
6213 static void
6214 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6215                struct iv_group *group)
6216 {
6217   struct cost_pair *best_cp = NULL, *cp;
6218   bitmap_iterator bi;
6219   unsigned i;
6220   struct iv_cand *cand;
6221
6222   gcc_assert (ivs->upto >= group->id);
6223   ivs->upto++;
6224   ivs->bad_groups++;
6225
6226   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6227     {
6228       cand = data->vcands[i];
6229       cp = get_group_iv_cost (data, group, cand);
6230       if (cheaper_cost_pair (cp, best_cp))
6231         best_cp = cp;
6232     }
6233
6234   if (best_cp == NULL)
6235     {
6236       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6237         {
6238           cand = data->vcands[i];
6239           cp = get_group_iv_cost (data, group, cand);
6240           if (cheaper_cost_pair (cp, best_cp))
6241             best_cp = cp;
6242         }
6243     }
6244
6245   iv_ca_set_cp (data, ivs, group, best_cp);
6246 }
6247
6248 /* Get cost for assignment IVS.  */
6249
6250 static comp_cost
6251 iv_ca_cost (struct iv_ca *ivs)
6252 {
6253   /* This was a conditional expression but it triggered a bug in
6254      Sun C 5.5.  */
6255   if (ivs->bad_groups)
6256     return infinite_cost;
6257   else
6258     return ivs->cost;
6259 }
6260
6261 /* Returns true if all dependences of CP are among invariants in IVS.  */
6262
6263 static bool
6264 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6265 {
6266   unsigned i;
6267   bitmap_iterator bi;
6268
6269   if (!cp->depends_on)
6270     return true;
6271
6272   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6273     {
6274       if (ivs->n_invariant_uses[i] == 0)
6275         return false;
6276     }
6277
6278   return true;
6279 }
6280
6281 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6282    it before NEXT.  */
6283
6284 static struct iv_ca_delta *
6285 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6286                  struct cost_pair *new_cp, struct iv_ca_delta *next)
6287 {
6288   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6289
6290   change->group = group;
6291   change->old_cp = old_cp;
6292   change->new_cp = new_cp;
6293   change->next = next;
6294
6295   return change;
6296 }
6297
6298 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6299    are rewritten.  */
6300
6301 static struct iv_ca_delta *
6302 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6303 {
6304   struct iv_ca_delta *last;
6305
6306   if (!l2)
6307     return l1;
6308
6309   if (!l1)
6310     return l2;
6311
6312   for (last = l1; last->next; last = last->next)
6313     continue;
6314   last->next = l2;
6315
6316   return l1;
6317 }
6318
6319 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6320
6321 static struct iv_ca_delta *
6322 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6323 {
6324   struct iv_ca_delta *act, *next, *prev = NULL;
6325
6326   for (act = delta; act; act = next)
6327     {
6328       next = act->next;
6329       act->next = prev;
6330       prev = act;
6331
6332       std::swap (act->old_cp, act->new_cp);
6333     }
6334
6335   return prev;
6336 }
6337
6338 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6339    reverted instead.  */
6340
6341 static void
6342 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6343                     struct iv_ca_delta *delta, bool forward)
6344 {
6345   struct cost_pair *from, *to;
6346   struct iv_ca_delta *act;
6347
6348   if (!forward)
6349     delta = iv_ca_delta_reverse (delta);
6350
6351   for (act = delta; act; act = act->next)
6352     {
6353       from = act->old_cp;
6354       to = act->new_cp;
6355       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6356       iv_ca_set_cp (data, ivs, act->group, to);
6357     }
6358
6359   if (!forward)
6360     iv_ca_delta_reverse (delta);
6361 }
6362
6363 /* Returns true if CAND is used in IVS.  */
6364
6365 static bool
6366 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6367 {
6368   return ivs->n_cand_uses[cand->id] > 0;
6369 }
6370
6371 /* Returns number of induction variable candidates in the set IVS.  */
6372
6373 static unsigned
6374 iv_ca_n_cands (struct iv_ca *ivs)
6375 {
6376   return ivs->n_cands;
6377 }
6378
6379 /* Free the list of changes DELTA.  */
6380
6381 static void
6382 iv_ca_delta_free (struct iv_ca_delta **delta)
6383 {
6384   struct iv_ca_delta *act, *next;
6385
6386   for (act = *delta; act; act = next)
6387     {
6388       next = act->next;
6389       free (act);
6390     }
6391
6392   *delta = NULL;
6393 }
6394
6395 /* Allocates new iv candidates assignment.  */
6396
6397 static struct iv_ca *
6398 iv_ca_new (struct ivopts_data *data)
6399 {
6400   struct iv_ca *nw = XNEW (struct iv_ca);
6401
6402   nw->upto = 0;
6403   nw->bad_groups = 0;
6404   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6405                                  data->vgroups.length ());
6406   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6407   nw->cands = BITMAP_ALLOC (NULL);
6408   nw->n_cands = 0;
6409   nw->n_regs = 0;
6410   nw->cand_use_cost = no_cost;
6411   nw->cand_cost = 0;
6412   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6413   nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6414   nw->cost = no_cost;
6415
6416   return nw;
6417 }
6418
6419 /* Free memory occupied by the set IVS.  */
6420
6421 static void
6422 iv_ca_free (struct iv_ca **ivs)
6423 {
6424   free ((*ivs)->cand_for_group);
6425   free ((*ivs)->n_cand_uses);
6426   BITMAP_FREE ((*ivs)->cands);
6427   free ((*ivs)->n_invariant_uses);
6428   delete ((*ivs)->used_inv_exprs);
6429   free (*ivs);
6430   *ivs = NULL;
6431 }
6432
6433 /* Dumps IVS to FILE.  */
6434
6435 static void
6436 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6437 {
6438   unsigned i;
6439   comp_cost cost = iv_ca_cost (ivs);
6440
6441   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost,
6442            cost.complexity);
6443   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6444            ivs->cand_cost, ivs->cand_use_cost.cost,
6445            ivs->cand_use_cost.complexity);
6446   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6447
6448   for (i = 0; i < ivs->upto; i++)
6449     {
6450       struct iv_group *group = data->vgroups[i];
6451       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6452       if (cp)
6453         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6454                  group->id, cp->cand->id, cp->cost.cost,
6455                  cp->cost.complexity);
6456       else
6457         fprintf (file, "   group:%d --> ??\n", group->id);
6458     }
6459
6460   const char *pref = "";
6461   fprintf (file, "  invariant variables: ");
6462   for (i = 1; i <= data->max_inv_id; i++)
6463     if (ivs->n_invariant_uses[i])
6464       {
6465         fprintf (file, "%s%d", pref, i);
6466         pref = ", ";
6467       }
6468
6469   pref = "";
6470   fprintf (file, "\n  invariant expressions: ");
6471   for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6472        = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6473     {
6474         fprintf (file, "%s%d", pref, (*it).first->id);
6475         pref = ", ";
6476     }
6477
6478   fprintf (file, "\n\n");
6479 }
6480
6481 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6482    new set, and store differences in DELTA.  Number of induction variables
6483    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6484    the function will try to find a solution with mimimal iv candidates.  */
6485
6486 static comp_cost
6487 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6488               struct iv_cand *cand, struct iv_ca_delta **delta,
6489               unsigned *n_ivs, bool min_ncand)
6490 {
6491   unsigned i;
6492   comp_cost cost;
6493   struct iv_group *group;
6494   struct cost_pair *old_cp, *new_cp;
6495
6496   *delta = NULL;
6497   for (i = 0; i < ivs->upto; i++)
6498     {
6499       group = data->vgroups[i];
6500       old_cp = iv_ca_cand_for_group (ivs, group);
6501
6502       if (old_cp
6503           && old_cp->cand == cand)
6504         continue;
6505
6506       new_cp = get_group_iv_cost (data, group, cand);
6507       if (!new_cp)
6508         continue;
6509
6510       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6511         continue;
6512
6513       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6514         continue;
6515
6516       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6517     }
6518
6519   iv_ca_delta_commit (data, ivs, *delta, true);
6520   cost = iv_ca_cost (ivs);
6521   if (n_ivs)
6522     *n_ivs = iv_ca_n_cands (ivs);
6523   iv_ca_delta_commit (data, ivs, *delta, false);
6524
6525   return cost;
6526 }
6527
6528 /* Try narrowing set IVS by removing CAND.  Return the cost of
6529    the new set and store the differences in DELTA.  START is
6530    the candidate with which we start narrowing.  */
6531
6532 static comp_cost
6533 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6534               struct iv_cand *cand, struct iv_cand *start,
6535               struct iv_ca_delta **delta)
6536 {
6537   unsigned i, ci;
6538   struct iv_group *group;
6539   struct cost_pair *old_cp, *new_cp, *cp;
6540   bitmap_iterator bi;
6541   struct iv_cand *cnd;
6542   comp_cost cost, best_cost, acost;
6543
6544   *delta = NULL;
6545   for (i = 0; i < data->vgroups.length (); i++)
6546     {
6547       group = data->vgroups[i];
6548
6549       old_cp = iv_ca_cand_for_group (ivs, group);
6550       if (old_cp->cand != cand)
6551         continue;
6552
6553       best_cost = iv_ca_cost (ivs);
6554       /* Start narrowing with START.  */
6555       new_cp = get_group_iv_cost (data, group, start);
6556
6557       if (data->consider_all_candidates)
6558         {
6559           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6560             {
6561               if (ci == cand->id || (start && ci == start->id))
6562                 continue;
6563
6564               cnd = data->vcands[ci];
6565
6566               cp = get_group_iv_cost (data, group, cnd);
6567               if (!cp)
6568                 continue;
6569
6570               iv_ca_set_cp (data, ivs, group, cp);
6571               acost = iv_ca_cost (ivs);
6572
6573               if (acost < best_cost)
6574                 {
6575                   best_cost = acost;
6576                   new_cp = cp;
6577                 }
6578             }
6579         }
6580       else
6581         {
6582           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6583             {
6584               if (ci == cand->id || (start && ci == start->id))
6585                 continue;
6586
6587               cnd = data->vcands[ci];
6588
6589               cp = get_group_iv_cost (data, group, cnd);
6590               if (!cp)
6591                 continue;
6592
6593               iv_ca_set_cp (data, ivs, group, cp);
6594               acost = iv_ca_cost (ivs);
6595
6596               if (acost < best_cost)
6597                 {
6598                   best_cost = acost;
6599                   new_cp = cp;
6600                 }
6601             }
6602         }
6603       /* Restore to old cp for use.  */
6604       iv_ca_set_cp (data, ivs, group, old_cp);
6605
6606       if (!new_cp)
6607         {
6608           iv_ca_delta_free (delta);
6609           return infinite_cost;
6610         }
6611
6612       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6613     }
6614
6615   iv_ca_delta_commit (data, ivs, *delta, true);
6616   cost = iv_ca_cost (ivs);
6617   iv_ca_delta_commit (data, ivs, *delta, false);
6618
6619   return cost;
6620 }
6621
6622 /* Try optimizing the set of candidates IVS by removing candidates different
6623    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6624    differences in DELTA.  */
6625
6626 static comp_cost
6627 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6628              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6629 {
6630   bitmap_iterator bi;
6631   struct iv_ca_delta *act_delta, *best_delta;
6632   unsigned i;
6633   comp_cost best_cost, acost;
6634   struct iv_cand *cand;
6635
6636   best_delta = NULL;
6637   best_cost = iv_ca_cost (ivs);
6638
6639   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6640     {
6641       cand = data->vcands[i];
6642
6643       if (cand == except_cand)
6644         continue;
6645
6646       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6647
6648       if (acost < best_cost)
6649         {
6650           best_cost = acost;
6651           iv_ca_delta_free (&best_delta);
6652           best_delta = act_delta;
6653         }
6654       else
6655         iv_ca_delta_free (&act_delta);
6656     }
6657
6658   if (!best_delta)
6659     {
6660       *delta = NULL;
6661       return best_cost;
6662     }
6663
6664   /* Recurse to possibly remove other unnecessary ivs.  */
6665   iv_ca_delta_commit (data, ivs, best_delta, true);
6666   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6667   iv_ca_delta_commit (data, ivs, best_delta, false);
6668   *delta = iv_ca_delta_join (best_delta, *delta);
6669   return best_cost;
6670 }
6671
6672 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6673    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6674    the corresponding cost_pair, otherwise just return BEST_CP.  */
6675
6676 static struct cost_pair*
6677 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6678                         unsigned int cand_idx, struct iv_cand *old_cand,
6679                         struct cost_pair *best_cp)
6680 {
6681   struct iv_cand *cand;
6682   struct cost_pair *cp;
6683
6684   gcc_assert (old_cand != NULL && best_cp != NULL);
6685   if (cand_idx == old_cand->id)
6686     return best_cp;
6687
6688   cand = data->vcands[cand_idx];
6689   cp = get_group_iv_cost (data, group, cand);
6690   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6691     return cp;
6692
6693   return best_cp;
6694 }
6695
6696 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6697    which are used by more than one iv uses.  For each of those candidates,
6698    this function tries to represent iv uses under that candidate using
6699    other ones with lower local cost, then tries to prune the new set.
6700    If the new set has lower cost, It returns the new cost after recording
6701    candidate replacement in list DELTA.  */
6702
6703 static comp_cost
6704 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6705                struct iv_ca_delta **delta)
6706 {
6707   bitmap_iterator bi, bj;
6708   unsigned int i, j, k;
6709   struct iv_cand *cand;
6710   comp_cost orig_cost, acost;
6711   struct iv_ca_delta *act_delta, *tmp_delta;
6712   struct cost_pair *old_cp, *best_cp = NULL;
6713
6714   *delta = NULL;
6715   orig_cost = iv_ca_cost (ivs);
6716
6717   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6718     {
6719       if (ivs->n_cand_uses[i] == 1
6720           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6721         continue;
6722
6723       cand = data->vcands[i];
6724
6725       act_delta = NULL;
6726       /*  Represent uses under current candidate using other ones with
6727           lower local cost.  */
6728       for (j = 0; j < ivs->upto; j++)
6729         {
6730           struct iv_group *group = data->vgroups[j];
6731           old_cp = iv_ca_cand_for_group (ivs, group);
6732
6733           if (old_cp->cand != cand)
6734             continue;
6735
6736           best_cp = old_cp;
6737           if (data->consider_all_candidates)
6738             for (k = 0; k < data->vcands.length (); k++)
6739               best_cp = cheaper_cost_with_cand (data, group, k,
6740                                                 old_cp->cand, best_cp);
6741           else
6742             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6743               best_cp = cheaper_cost_with_cand (data, group, k,
6744                                                 old_cp->cand, best_cp);
6745
6746           if (best_cp == old_cp)
6747             continue;
6748
6749           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6750         }
6751       /* No need for further prune.  */
6752       if (!act_delta)
6753         continue;
6754
6755       /* Prune the new candidate set.  */
6756       iv_ca_delta_commit (data, ivs, act_delta, true);
6757       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6758       iv_ca_delta_commit (data, ivs, act_delta, false);
6759       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6760
6761       if (acost < orig_cost)
6762         {
6763           *delta = act_delta;
6764           return acost;
6765         }
6766       else
6767         iv_ca_delta_free (&act_delta);
6768     }
6769
6770   return orig_cost;
6771 }
6772
6773 /* Tries to extend the sets IVS in the best possible way in order to
6774    express the GROUP.  If ORIGINALP is true, prefer candidates from
6775    the original set of IVs, otherwise favor important candidates not
6776    based on any memory object.  */
6777
6778 static bool
6779 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6780                   struct iv_group *group, bool originalp)
6781 {
6782   comp_cost best_cost, act_cost;
6783   unsigned i;
6784   bitmap_iterator bi;
6785   struct iv_cand *cand;
6786   struct iv_ca_delta *best_delta = NULL, *act_delta;
6787   struct cost_pair *cp;
6788
6789   iv_ca_add_group (data, ivs, group);
6790   best_cost = iv_ca_cost (ivs);
6791   cp = iv_ca_cand_for_group (ivs, group);
6792   if (cp)
6793     {
6794       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6795       iv_ca_set_no_cp (data, ivs, group);
6796     }
6797
6798   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6799      first try important candidates not based on any memory object.  Only if
6800      this fails, try the specific ones.  Rationale -- in loops with many
6801      variables the best choice often is to use just one generic biv.  If we
6802      added here many ivs specific to the uses, the optimization algorithm later
6803      would be likely to get stuck in a local minimum, thus causing us to create
6804      too many ivs.  The approach from few ivs to more seems more likely to be
6805      successful -- starting from few ivs, replacing an expensive use by a
6806      specific iv should always be a win.  */
6807   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6808     {
6809       cand = data->vcands[i];
6810
6811       if (originalp && cand->pos !=IP_ORIGINAL)
6812         continue;
6813
6814       if (!originalp && cand->iv->base_object != NULL_TREE)
6815         continue;
6816
6817       if (iv_ca_cand_used_p (ivs, cand))
6818         continue;
6819
6820       cp = get_group_iv_cost (data, group, cand);
6821       if (!cp)
6822         continue;
6823
6824       iv_ca_set_cp (data, ivs, group, cp);
6825       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6826                                true);
6827       iv_ca_set_no_cp (data, ivs, group);
6828       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6829
6830       if (act_cost < best_cost)
6831         {
6832           best_cost = act_cost;
6833
6834           iv_ca_delta_free (&best_delta);
6835           best_delta = act_delta;
6836         }
6837       else
6838         iv_ca_delta_free (&act_delta);
6839     }
6840
6841   if (best_cost.infinite_cost_p ())
6842     {
6843       for (i = 0; i < group->n_map_members; i++)
6844         {
6845           cp = group->cost_map + i;
6846           cand = cp->cand;
6847           if (!cand)
6848             continue;
6849
6850           /* Already tried this.  */
6851           if (cand->important)
6852             {
6853               if (originalp && cand->pos == IP_ORIGINAL)
6854                 continue;
6855               if (!originalp && cand->iv->base_object == NULL_TREE)
6856                 continue;
6857             }
6858
6859           if (iv_ca_cand_used_p (ivs, cand))
6860             continue;
6861
6862           act_delta = NULL;
6863           iv_ca_set_cp (data, ivs, group, cp);
6864           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6865           iv_ca_set_no_cp (data, ivs, group);
6866           act_delta = iv_ca_delta_add (group,
6867                                        iv_ca_cand_for_group (ivs, group),
6868                                        cp, act_delta);
6869
6870           if (act_cost < best_cost)
6871             {
6872               best_cost = act_cost;
6873
6874               if (best_delta)
6875                 iv_ca_delta_free (&best_delta);
6876               best_delta = act_delta;
6877             }
6878           else
6879             iv_ca_delta_free (&act_delta);
6880         }
6881     }
6882
6883   iv_ca_delta_commit (data, ivs, best_delta, true);
6884   iv_ca_delta_free (&best_delta);
6885
6886   return !best_cost.infinite_cost_p ();
6887 }
6888
6889 /* Finds an initial assignment of candidates to uses.  */
6890
6891 static struct iv_ca *
6892 get_initial_solution (struct ivopts_data *data, bool originalp)
6893 {
6894   unsigned i;
6895   struct iv_ca *ivs = iv_ca_new (data);
6896
6897   for (i = 0; i < data->vgroups.length (); i++)
6898     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6899       {
6900         iv_ca_free (&ivs);
6901         return NULL;
6902       }
6903
6904   return ivs;
6905 }
6906
6907 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6908    points to a bool variable, this function tries to break local
6909    optimal fixed-point by replacing candidates in IVS if it's true.  */
6910
6911 static bool
6912 try_improve_iv_set (struct ivopts_data *data,
6913                     struct iv_ca *ivs, bool *try_replace_p)
6914 {
6915   unsigned i, n_ivs;
6916   comp_cost acost, best_cost = iv_ca_cost (ivs);
6917   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6918   struct iv_cand *cand;
6919
6920   /* Try extending the set of induction variables by one.  */
6921   for (i = 0; i < data->vcands.length (); i++)
6922     {
6923       cand = data->vcands[i];
6924
6925       if (iv_ca_cand_used_p (ivs, cand))
6926         continue;
6927
6928       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6929       if (!act_delta)
6930         continue;
6931
6932       /* If we successfully added the candidate and the set is small enough,
6933          try optimizing it by removing other candidates.  */
6934       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6935         {
6936           iv_ca_delta_commit (data, ivs, act_delta, true);
6937           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6938           iv_ca_delta_commit (data, ivs, act_delta, false);
6939           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6940         }
6941
6942       if (acost < best_cost)
6943         {
6944           best_cost = acost;
6945           iv_ca_delta_free (&best_delta);
6946           best_delta = act_delta;
6947         }
6948       else
6949         iv_ca_delta_free (&act_delta);
6950     }
6951
6952   if (!best_delta)
6953     {
6954       /* Try removing the candidates from the set instead.  */
6955       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6956
6957       if (!best_delta && *try_replace_p)
6958         {
6959           *try_replace_p = false;
6960           /* So far candidate selecting algorithm tends to choose fewer IVs
6961              so that it can handle cases in which loops have many variables
6962              but the best choice is often to use only one general biv.  One
6963              weakness is it can't handle opposite cases, in which different
6964              candidates should be chosen with respect to each use.  To solve
6965              the problem, we replace candidates in a manner described by the
6966              comments of iv_ca_replace, thus give general algorithm a chance
6967              to break local optimal fixed-point in these cases.  */
6968           best_cost = iv_ca_replace (data, ivs, &best_delta);
6969         }
6970
6971       if (!best_delta)
6972         return false;
6973     }
6974
6975   iv_ca_delta_commit (data, ivs, best_delta, true);
6976   gcc_assert (best_cost == iv_ca_cost (ivs));
6977   iv_ca_delta_free (&best_delta);
6978   return true;
6979 }
6980
6981 /* Attempts to find the optimal set of induction variables.  We do simple
6982    greedy heuristic -- we try to replace at most one candidate in the selected
6983    solution and remove the unused ivs while this improves the cost.  */
6984
6985 static struct iv_ca *
6986 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6987 {
6988   struct iv_ca *set;
6989   bool try_replace_p = true;
6990
6991   /* Get the initial solution.  */
6992   set = get_initial_solution (data, originalp);
6993   if (!set)
6994     {
6995       if (dump_file && (dump_flags & TDF_DETAILS))
6996         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6997       return NULL;
6998     }
6999
7000   if (dump_file && (dump_flags & TDF_DETAILS))
7001     {
7002       fprintf (dump_file, "Initial set of candidates:\n");
7003       iv_ca_dump (data, dump_file, set);
7004     }
7005
7006   while (try_improve_iv_set (data, set, &try_replace_p))
7007     {
7008       if (dump_file && (dump_flags & TDF_DETAILS))
7009         {
7010           fprintf (dump_file, "Improved to:\n");
7011           iv_ca_dump (data, dump_file, set);
7012         }
7013     }
7014
7015   return set;
7016 }
7017
7018 static struct iv_ca *
7019 find_optimal_iv_set (struct ivopts_data *data)
7020 {
7021   unsigned i;
7022   comp_cost cost, origcost;
7023   struct iv_ca *set, *origset;
7024
7025   /* Determine the cost based on a strategy that starts with original IVs,
7026      and try again using a strategy that prefers candidates not based
7027      on any IVs.  */
7028   origset = find_optimal_iv_set_1 (data, true);
7029   set = find_optimal_iv_set_1 (data, false);
7030
7031   if (!origset && !set)
7032     return NULL;
7033
7034   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7035   cost = set ? iv_ca_cost (set) : infinite_cost;
7036
7037   if (dump_file && (dump_flags & TDF_DETAILS))
7038     {
7039       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7040                origcost.cost, origcost.complexity);
7041       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7042                cost.cost, cost.complexity);
7043     }
7044
7045   /* Choose the one with the best cost.  */
7046   if (origcost <= cost)
7047     {
7048       if (set)
7049         iv_ca_free (&set);
7050       set = origset;
7051     }
7052   else if (origset)
7053     iv_ca_free (&origset);
7054
7055   for (i = 0; i < data->vgroups.length (); i++)
7056     {
7057       struct iv_group *group = data->vgroups[i];
7058       group->selected = iv_ca_cand_for_group (set, group)->cand;
7059     }
7060
7061   return set;
7062 }
7063
7064 /* Creates a new induction variable corresponding to CAND.  */
7065
7066 static void
7067 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7068 {
7069   gimple_stmt_iterator incr_pos;
7070   tree base;
7071   struct iv_use *use;
7072   struct iv_group *group;
7073   bool after = false;
7074
7075   if (!cand->iv)
7076     return;
7077
7078   switch (cand->pos)
7079     {
7080     case IP_NORMAL:
7081       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7082       break;
7083
7084     case IP_END:
7085       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7086       after = true;
7087       break;
7088
7089     case IP_AFTER_USE:
7090       after = true;
7091       /* fall through */
7092     case IP_BEFORE_USE:
7093       incr_pos = gsi_for_stmt (cand->incremented_at);
7094       break;
7095
7096     case IP_ORIGINAL:
7097       /* Mark that the iv is preserved.  */
7098       name_info (data, cand->var_before)->preserve_biv = true;
7099       name_info (data, cand->var_after)->preserve_biv = true;
7100
7101       /* Rewrite the increment so that it uses var_before directly.  */
7102       use = find_interesting_uses_op (data, cand->var_after);
7103       group = data->vgroups[use->group_id];
7104       group->selected = cand;
7105       return;
7106     }
7107
7108   gimple_add_tmp_var (cand->var_before);
7109
7110   base = unshare_expr (cand->iv->base);
7111
7112   create_iv (base, unshare_expr (cand->iv->step),
7113              cand->var_before, data->current_loop,
7114              &incr_pos, after, &cand->var_before, &cand->var_after);
7115 }
7116
7117 /* Creates new induction variables described in SET.  */
7118
7119 static void
7120 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7121 {
7122   unsigned i;
7123   struct iv_cand *cand;
7124   bitmap_iterator bi;
7125
7126   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7127     {
7128       cand = data->vcands[i];
7129       create_new_iv (data, cand);
7130     }
7131
7132   if (dump_file && (dump_flags & TDF_DETAILS))
7133     {
7134       fprintf (dump_file, "Selected IV set for loop %d",
7135                data->current_loop->num);
7136       if (data->loop_loc != UNKNOWN_LOCATION)
7137         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7138                  LOCATION_LINE (data->loop_loc));
7139       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7140                avg_loop_niter (data->current_loop));
7141       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7142                (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7143       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7144       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7145         {
7146           cand = data->vcands[i];
7147           dump_cand (dump_file, cand);
7148         }
7149       fprintf (dump_file, "\n");
7150     }
7151 }
7152
7153 /* Rewrites USE (definition of iv used in a nonlinear expression)
7154    using candidate CAND.  */
7155
7156 static void
7157 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7158                             struct iv_use *use, struct iv_cand *cand)
7159 {
7160   tree comp;
7161   tree op, tgt;
7162   gassign *ass;
7163   gimple_stmt_iterator bsi;
7164
7165   /* An important special case -- if we are asked to express value of
7166      the original iv by itself, just exit; there is no need to
7167      introduce a new computation (that might also need casting the
7168      variable to unsigned and back).  */
7169   if (cand->pos == IP_ORIGINAL
7170       && cand->incremented_at == use->stmt)
7171     {
7172       enum tree_code stmt_code;
7173
7174       gcc_assert (is_gimple_assign (use->stmt));
7175       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7176
7177       /* Check whether we may leave the computation unchanged.
7178          This is the case only if it does not rely on other
7179          computations in the loop -- otherwise, the computation
7180          we rely upon may be removed in remove_unused_ivs,
7181          thus leading to ICE.  */
7182       stmt_code = gimple_assign_rhs_code (use->stmt);
7183       if (stmt_code == PLUS_EXPR
7184           || stmt_code == MINUS_EXPR
7185           || stmt_code == POINTER_PLUS_EXPR)
7186         {
7187           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7188             op = gimple_assign_rhs2 (use->stmt);
7189           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7190             op = gimple_assign_rhs1 (use->stmt);
7191           else
7192             op = NULL_TREE;
7193         }
7194       else
7195         op = NULL_TREE;
7196
7197       if (op && expr_invariant_in_loop_p (data->current_loop, op))
7198         return;
7199     }
7200
7201   comp = get_computation (data->current_loop, use, cand);
7202   gcc_assert (comp != NULL_TREE);
7203
7204   switch (gimple_code (use->stmt))
7205     {
7206     case GIMPLE_PHI:
7207       tgt = PHI_RESULT (use->stmt);
7208
7209       /* If we should keep the biv, do not replace it.  */
7210       if (name_info (data, tgt)->preserve_biv)
7211         return;
7212
7213       bsi = gsi_after_labels (gimple_bb (use->stmt));
7214       break;
7215
7216     case GIMPLE_ASSIGN:
7217       tgt = gimple_assign_lhs (use->stmt);
7218       bsi = gsi_for_stmt (use->stmt);
7219       break;
7220
7221     default:
7222       gcc_unreachable ();
7223     }
7224
7225   if (!valid_gimple_rhs_p (comp)
7226       || (gimple_code (use->stmt) != GIMPLE_PHI
7227           /* We can't allow re-allocating the stmt as it might be pointed
7228              to still.  */
7229           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7230               >= gimple_num_ops (gsi_stmt (bsi)))))
7231     {
7232       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7233                                        true, GSI_SAME_STMT);
7234       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7235         {
7236           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7237           /* As this isn't a plain copy we have to reset alignment
7238              information.  */
7239           if (SSA_NAME_PTR_INFO (comp))
7240             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7241         }
7242     }
7243
7244   if (gimple_code (use->stmt) == GIMPLE_PHI)
7245     {
7246       ass = gimple_build_assign (tgt, comp);
7247       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7248
7249       bsi = gsi_for_stmt (use->stmt);
7250       remove_phi_node (&bsi, false);
7251     }
7252   else
7253     {
7254       gimple_assign_set_rhs_from_tree (&bsi, comp);
7255       use->stmt = gsi_stmt (bsi);
7256     }
7257 }
7258
7259 /* Performs a peephole optimization to reorder the iv update statement with
7260    a mem ref to enable instruction combining in later phases. The mem ref uses
7261    the iv value before the update, so the reordering transformation requires
7262    adjustment of the offset. CAND is the selected IV_CAND.
7263
7264    Example:
7265
7266    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7267    iv2 = iv1 + 1;
7268
7269    if (t < val)      (1)
7270      goto L;
7271    goto Head;
7272
7273
7274    directly propagating t over to (1) will introduce overlapping live range
7275    thus increase register pressure. This peephole transform it into:
7276
7277
7278    iv2 = iv1 + 1;
7279    t = MEM_REF (base, iv2, 8, 8);
7280    if (t < val)
7281      goto L;
7282    goto Head;
7283 */
7284
7285 static void
7286 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7287 {
7288   tree var_after;
7289   gimple *iv_update, *stmt;
7290   basic_block bb;
7291   gimple_stmt_iterator gsi, gsi_iv;
7292
7293   if (cand->pos != IP_NORMAL)
7294     return;
7295
7296   var_after = cand->var_after;
7297   iv_update = SSA_NAME_DEF_STMT (var_after);
7298
7299   bb = gimple_bb (iv_update);
7300   gsi = gsi_last_nondebug_bb (bb);
7301   stmt = gsi_stmt (gsi);
7302
7303   /* Only handle conditional statement for now.  */
7304   if (gimple_code (stmt) != GIMPLE_COND)
7305     return;
7306
7307   gsi_prev_nondebug (&gsi);
7308   stmt = gsi_stmt (gsi);
7309   if (stmt != iv_update)
7310     return;
7311
7312   gsi_prev_nondebug (&gsi);
7313   if (gsi_end_p (gsi))
7314     return;
7315
7316   stmt = gsi_stmt (gsi);
7317   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7318     return;
7319
7320   if (stmt != use->stmt)
7321     return;
7322
7323   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7324     return;
7325
7326   if (dump_file && (dump_flags & TDF_DETAILS))
7327     {
7328       fprintf (dump_file, "Reordering \n");
7329       print_gimple_stmt (dump_file, iv_update, 0, 0);
7330       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7331       fprintf (dump_file, "\n");
7332     }
7333
7334   gsi = gsi_for_stmt (use->stmt);
7335   gsi_iv = gsi_for_stmt (iv_update);
7336   gsi_move_before (&gsi_iv, &gsi);
7337
7338   cand->pos = IP_BEFORE_USE;
7339   cand->incremented_at = use->stmt;
7340 }
7341
7342 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7343
7344 static void
7345 rewrite_use_address (struct ivopts_data *data,
7346                      struct iv_use *use, struct iv_cand *cand)
7347 {
7348   aff_tree aff;
7349   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7350   tree base_hint = NULL_TREE;
7351   tree ref, iv;
7352   bool ok;
7353
7354   adjust_iv_update_pos (cand, use);
7355   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7356   gcc_assert (ok);
7357   unshare_aff_combination (&aff);
7358
7359   /* To avoid undefined overflow problems, all IV candidates use unsigned
7360      integer types.  The drawback is that this makes it impossible for
7361      create_mem_ref to distinguish an IV that is based on a memory object
7362      from one that represents simply an offset.
7363
7364      To work around this problem, we pass a hint to create_mem_ref that
7365      indicates which variable (if any) in aff is an IV based on a memory
7366      object.  Note that we only consider the candidate.  If this is not
7367      based on an object, the base of the reference is in some subexpression
7368      of the use -- but these will use pointer types, so they are recognized
7369      by the create_mem_ref heuristics anyway.  */
7370   if (cand->iv->base_object)
7371     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7372
7373   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7374   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7375                         reference_alias_ptr_type (*use->op_p),
7376                         iv, base_hint, data->speed);
7377   copy_ref_info (ref, *use->op_p);
7378   *use->op_p = ref;
7379 }
7380
7381 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7382    candidate CAND.  */
7383
7384 static void
7385 rewrite_use_compare (struct ivopts_data *data,
7386                      struct iv_use *use, struct iv_cand *cand)
7387 {
7388   tree comp, *var_p, op, bound;
7389   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7390   enum tree_code compare;
7391   struct iv_group *group = data->vgroups[use->group_id];
7392   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7393   bool ok;
7394
7395   bound = cp->value;
7396   if (bound)
7397     {
7398       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7399       tree var_type = TREE_TYPE (var);
7400       gimple_seq stmts;
7401
7402       if (dump_file && (dump_flags & TDF_DETAILS))
7403         {
7404           fprintf (dump_file, "Replacing exit test: ");
7405           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7406         }
7407       compare = cp->comp;
7408       bound = unshare_expr (fold_convert (var_type, bound));
7409       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7410       if (stmts)
7411         gsi_insert_seq_on_edge_immediate (
7412                 loop_preheader_edge (data->current_loop),
7413                 stmts);
7414
7415       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7416       gimple_cond_set_lhs (cond_stmt, var);
7417       gimple_cond_set_code (cond_stmt, compare);
7418       gimple_cond_set_rhs (cond_stmt, op);
7419       return;
7420     }
7421
7422   /* The induction variable elimination failed; just express the original
7423      giv.  */
7424   comp = get_computation (data->current_loop, use, cand);
7425   gcc_assert (comp != NULL_TREE);
7426
7427   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7428   gcc_assert (ok);
7429
7430   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7431                                      true, GSI_SAME_STMT);
7432 }
7433
7434 /* Rewrite the groups using the selected induction variables.  */
7435
7436 static void
7437 rewrite_groups (struct ivopts_data *data)
7438 {
7439   unsigned i, j;
7440
7441   for (i = 0; i < data->vgroups.length (); i++)
7442     {
7443       struct iv_group *group = data->vgroups[i];
7444       struct iv_cand *cand = group->selected;
7445
7446       gcc_assert (cand);
7447
7448       if (group->type == USE_NONLINEAR_EXPR)
7449         {
7450           for (j = 0; j < group->vuses.length (); j++)
7451             {
7452               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7453               update_stmt (group->vuses[j]->stmt);
7454             }
7455         }
7456       else if (group->type == USE_ADDRESS)
7457         {
7458           for (j = 0; j < group->vuses.length (); j++)
7459             {
7460               rewrite_use_address (data, group->vuses[j], cand);
7461               update_stmt (group->vuses[j]->stmt);
7462             }
7463         }
7464       else
7465         {
7466           gcc_assert (group->type == USE_COMPARE);
7467
7468           for (j = 0; j < group->vuses.length (); j++)
7469             {
7470               rewrite_use_compare (data, group->vuses[j], cand);
7471               update_stmt (group->vuses[j]->stmt);
7472             }
7473         }
7474     }
7475 }
7476
7477 /* Removes the ivs that are not used after rewriting.  */
7478
7479 static void
7480 remove_unused_ivs (struct ivopts_data *data)
7481 {
7482   unsigned j;
7483   bitmap_iterator bi;
7484   bitmap toremove = BITMAP_ALLOC (NULL);
7485
7486   /* Figure out an order in which to release SSA DEFs so that we don't
7487      release something that we'd have to propagate into a debug stmt
7488      afterwards.  */
7489   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7490     {
7491       struct version_info *info;
7492
7493       info = ver_info (data, j);
7494       if (info->iv
7495           && !integer_zerop (info->iv->step)
7496           && !info->inv_id
7497           && !info->iv->nonlin_use
7498           && !info->preserve_biv)
7499         {
7500           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7501
7502           tree def = info->iv->ssa_name;
7503
7504           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7505             {
7506               imm_use_iterator imm_iter;
7507               use_operand_p use_p;
7508               gimple *stmt;
7509               int count = 0;
7510
7511               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7512                 {
7513                   if (!gimple_debug_bind_p (stmt))
7514                     continue;
7515
7516                   /* We just want to determine whether to do nothing
7517                      (count == 0), to substitute the computed
7518                      expression into a single use of the SSA DEF by
7519                      itself (count == 1), or to use a debug temp
7520                      because the SSA DEF is used multiple times or as
7521                      part of a larger expression (count > 1). */
7522                   count++;
7523                   if (gimple_debug_bind_get_value (stmt) != def)
7524                     count++;
7525
7526                   if (count > 1)
7527                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7528                 }
7529
7530               if (!count)
7531                 continue;
7532
7533               struct iv_use dummy_use;
7534               struct iv_cand *best_cand = NULL, *cand;
7535               unsigned i, best_pref = 0, cand_pref;
7536
7537               memset (&dummy_use, 0, sizeof (dummy_use));
7538               dummy_use.iv = info->iv;
7539               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7540                 {
7541                   cand = data->vgroups[i]->selected;
7542                   if (cand == best_cand)
7543                     continue;
7544                   cand_pref = operand_equal_p (cand->iv->step,
7545                                                info->iv->step, 0)
7546                     ? 4 : 0;
7547                   cand_pref
7548                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7549                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7550                     ? 2 : 0;
7551                   cand_pref
7552                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7553                     ? 1 : 0;
7554                   if (best_cand == NULL || best_pref < cand_pref)
7555                     {
7556                       best_cand = cand;
7557                       best_pref = cand_pref;
7558                     }
7559                 }
7560
7561               if (!best_cand)
7562                 continue;
7563
7564               tree comp = get_computation_at (data->current_loop,
7565                                               &dummy_use, best_cand,
7566                                               SSA_NAME_DEF_STMT (def));
7567               if (!comp)
7568                 continue;
7569
7570               if (count > 1)
7571                 {
7572                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7573                   DECL_ARTIFICIAL (vexpr) = 1;
7574                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7575                   if (SSA_NAME_VAR (def))
7576                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7577                   else
7578                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7579                   gdebug *def_temp
7580                     = gimple_build_debug_bind (vexpr, comp, NULL);
7581                   gimple_stmt_iterator gsi;
7582
7583                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7584                     gsi = gsi_after_labels (gimple_bb
7585                                             (SSA_NAME_DEF_STMT (def)));
7586                   else
7587                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7588
7589                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7590                   comp = vexpr;
7591                 }
7592
7593               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7594                 {
7595                   if (!gimple_debug_bind_p (stmt))
7596                     continue;
7597
7598                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7599                     SET_USE (use_p, comp);
7600
7601                   update_stmt (stmt);
7602                 }
7603             }
7604         }
7605     }
7606
7607   release_defs_bitset (toremove);
7608
7609   BITMAP_FREE (toremove);
7610 }
7611
7612 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7613    for hash_map::traverse.  */
7614
7615 bool
7616 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7617 {
7618   free (value);
7619   return true;
7620 }
7621
7622 /* Frees data allocated by the optimization of a single loop.  */
7623
7624 static void
7625 free_loop_data (struct ivopts_data *data)
7626 {
7627   unsigned i, j;
7628   bitmap_iterator bi;
7629   tree obj;
7630
7631   if (data->niters)
7632     {
7633       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7634       delete data->niters;
7635       data->niters = NULL;
7636     }
7637
7638   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7639     {
7640       struct version_info *info;
7641
7642       info = ver_info (data, i);
7643       info->iv = NULL;
7644       info->has_nonlin_use = false;
7645       info->preserve_biv = false;
7646       info->inv_id = 0;
7647     }
7648   bitmap_clear (data->relevant);
7649   bitmap_clear (data->important_candidates);
7650
7651   for (i = 0; i < data->vgroups.length (); i++)
7652     {
7653       struct iv_group *group = data->vgroups[i];
7654
7655       for (j = 0; j < group->vuses.length (); j++)
7656         free (group->vuses[j]);
7657       group->vuses.release ();
7658
7659       BITMAP_FREE (group->related_cands);
7660       for (j = 0; j < group->n_map_members; j++)
7661         if (group->cost_map[j].depends_on)
7662           BITMAP_FREE (group->cost_map[j].depends_on);
7663
7664       free (group->cost_map);
7665       free (group);
7666     }
7667   data->vgroups.truncate (0);
7668
7669   for (i = 0; i < data->vcands.length (); i++)
7670     {
7671       struct iv_cand *cand = data->vcands[i];
7672
7673       if (cand->depends_on)
7674         BITMAP_FREE (cand->depends_on);
7675       free (cand);
7676     }
7677   data->vcands.truncate (0);
7678
7679   if (data->version_info_size < num_ssa_names)
7680     {
7681       data->version_info_size = 2 * num_ssa_names;
7682       free (data->version_info);
7683       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7684     }
7685
7686   data->max_inv_id = 0;
7687
7688   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7689     SET_DECL_RTL (obj, NULL_RTX);
7690
7691   decl_rtl_to_reset.truncate (0);
7692
7693   data->inv_expr_tab->empty ();
7694   data->max_inv_expr_id = 0;
7695
7696   data->iv_common_cand_tab->empty ();
7697   data->iv_common_cands.truncate (0);
7698 }
7699
7700 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7701    loop tree.  */
7702
7703 static void
7704 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7705 {
7706   free_loop_data (data);
7707   free (data->version_info);
7708   BITMAP_FREE (data->relevant);
7709   BITMAP_FREE (data->important_candidates);
7710
7711   decl_rtl_to_reset.release ();
7712   data->vgroups.release ();
7713   data->vcands.release ();
7714   delete data->inv_expr_tab;
7715   data->inv_expr_tab = NULL;
7716   free_affine_expand_cache (&data->name_expansion_cache);
7717   delete data->iv_common_cand_tab;
7718   data->iv_common_cand_tab = NULL;
7719   data->iv_common_cands.release ();
7720   obstack_free (&data->iv_obstack, NULL);
7721 }
7722
7723 /* Returns true if the loop body BODY includes any function calls.  */
7724
7725 static bool
7726 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7727 {
7728   gimple_stmt_iterator gsi;
7729   unsigned i;
7730
7731   for (i = 0; i < num_nodes; i++)
7732     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7733       {
7734         gimple *stmt = gsi_stmt (gsi);
7735         if (is_gimple_call (stmt)
7736             && !gimple_call_internal_p (stmt)
7737             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7738           return true;
7739       }
7740   return false;
7741 }
7742
7743 /* Optimizes the LOOP.  Returns true if anything changed.  */
7744
7745 static bool
7746 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7747 {
7748   bool changed = false;
7749   struct iv_ca *iv_ca;
7750   edge exit = single_dom_exit (loop);
7751   basic_block *body;
7752
7753   gcc_assert (!data->niters);
7754   data->current_loop = loop;
7755   data->loop_loc = find_loop_location (loop);
7756   data->speed = optimize_loop_for_speed_p (loop);
7757
7758   if (dump_file && (dump_flags & TDF_DETAILS))
7759     {
7760       fprintf (dump_file, "Processing loop %d", loop->num);
7761       if (data->loop_loc != UNKNOWN_LOCATION)
7762         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7763                  LOCATION_LINE (data->loop_loc));
7764       fprintf (dump_file, "\n");
7765
7766       if (exit)
7767         {
7768           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7769                    exit->src->index, exit->dest->index);
7770           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7771           fprintf (dump_file, "\n");
7772         }
7773
7774       fprintf (dump_file, "\n");
7775     }
7776
7777   body = get_loop_body (loop);
7778   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7779   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7780   free (body);
7781
7782   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7783
7784   /* For each ssa name determines whether it behaves as an induction variable
7785      in some loop.  */
7786   if (!find_induction_variables (data))
7787     goto finish;
7788
7789   /* Finds interesting uses (item 1).  */
7790   find_interesting_uses (data);
7791   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7792     goto finish;
7793
7794   /* Finds candidates for the induction variables (item 2).  */
7795   find_iv_candidates (data);
7796
7797   /* Calculates the costs (item 3, part 1).  */
7798   determine_iv_costs (data);
7799   determine_group_iv_costs (data);
7800   determine_set_costs (data);
7801
7802   /* Find the optimal set of induction variables (item 3, part 2).  */
7803   iv_ca = find_optimal_iv_set (data);
7804   if (!iv_ca)
7805     goto finish;
7806   changed = true;
7807
7808   /* Create the new induction variables (item 4, part 1).  */
7809   create_new_ivs (data, iv_ca);
7810   iv_ca_free (&iv_ca);
7811
7812   /* Rewrite the uses (item 4, part 2).  */
7813   rewrite_groups (data);
7814
7815   /* Remove the ivs that are unused after rewriting.  */
7816   remove_unused_ivs (data);
7817
7818   /* We have changed the structure of induction variables; it might happen
7819      that definitions in the scev database refer to some of them that were
7820      eliminated.  */
7821   scev_reset ();
7822
7823 finish:
7824   free_loop_data (data);
7825
7826   return changed;
7827 }
7828
7829 /* Main entry point.  Optimizes induction variables in loops.  */
7830
7831 void
7832 tree_ssa_iv_optimize (void)
7833 {
7834   struct loop *loop;
7835   struct ivopts_data data;
7836
7837   tree_ssa_iv_optimize_init (&data);
7838
7839   /* Optimize the loops starting with the innermost ones.  */
7840   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7841     {
7842       if (dump_file && (dump_flags & TDF_DETAILS))
7843         flow_loop_dump (loop, dump_file, NULL, 1);
7844
7845       tree_ssa_iv_optimize_loop (&data, loop);
7846     }
7847
7848   tree_ssa_iv_optimize_finalize (&data);
7849 }