gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33       Note the interesting uses are categorized and handled in group.
  34       Generally, address type uses are grouped together if their iv bases
  35       are different in constant offset.
  36
  37    2) Candidates for the induction variables are found.  This includes
  38
  39       -- old induction variables
  40       -- the variables defined by expressions derived from the "interesting
  41          groups/uses" above
  42
  43    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  44       cost function assigns a cost to sets of induction variables and consists
  45       of three parts:
  46
  47       -- The group/use costs.  Each of the interesting groups/uses chooses
  48          the best induction variable in the set and adds its cost to the sum.
  49          The cost reflects the time spent on modifying the induction variables
  50          value to be usable for the given purpose (adding base and offset for
  51          arrays, etc.).
  52       -- The variable costs.  Each of the variables has a cost assigned that
  53          reflects the costs associated with incrementing the value of the
  54          variable.  The original variables are somewhat preferred.
  55       -- The set cost.  Depending on the size of the set, extra cost may be
  56          added to reflect register pressure.
  57
  58       All the costs are defined in a machine-specific way, using the target
  59       hooks and machine descriptions to determine them.
  60
  61    4) The trees are transformed to use the new variables, the dead code is
  62       removed.
  63
  64    All of this is done loop by loop.  Doing it globally is theoretically
  65    possible, it might give a better performance and it might enable us
  66    to decide costs more precisely, but getting all the interactions right
  67    would be complicated.  */
  68
  69 #include "config.h"
  70 #include "system.h"
  71 #include "coretypes.h"
  72 #include "backend.h"
  73 #include "rtl.h"
  74 #include "tree.h"
  75 #include "gimple.h"
  76 #include "cfghooks.h"
  77 #include "tree-pass.h"
  78 #include "tm_p.h"
  79 #include "ssa.h"
  80 #include "expmed.h"
  81 #include "insn-config.h"
  82 #include "emit-rtl.h"
  83 #include "recog.h"
  84 #include "cgraph.h"
  85 #include "gimple-pretty-print.h"
  86 #include "alias.h"
  87 #include "fold-const.h"
  88 #include "stor-layout.h"
  89 #include "tree-eh.h"
  90 #include "gimplify.h"
  91 #include "gimple-iterator.h"
  92 #include "gimplify-me.h"
  93 #include "tree-cfg.h"
  94 #include "tree-ssa-loop-ivopts.h"
  95 #include "tree-ssa-loop-manip.h"
  96 #include "tree-ssa-loop-niter.h"
  97 #include "tree-ssa-loop.h"
  98 #include "explow.h"
  99 #include "expr.h"
 100 #include "tree-dfa.h"
 101 #include "tree-ssa.h"
 102 #include "cfgloop.h"
 103 #include "tree-scalar-evolution.h"
 104 #include "params.h"
 105 #include "tree-affine.h"
 106 #include "tree-ssa-propagate.h"
 107 #include "tree-ssa-address.h"
 108 #include "builtins.h"
 109 #include "tree-vectorizer.h"
 110
 111 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 112    cost of different addressing modes.  This should be moved to a TBD
 113    interface between the GIMPLE and RTL worlds.  */
 114
 115 /* The infinite cost.  */
 116 #define INFTY 10000000
 117
 118 #define AVG_LOOP_NITER(LOOP) 5
 119
 120 /* Returns the expected number of loop iterations for LOOP.
 121    The average trip count is computed from profile data if it
 122    exists. */
 123
 124 static inline HOST_WIDE_INT
 125 avg_loop_niter (struct loop *loop)
 126 {
 127   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 128   if (niter == -1)
 129     {
 130       niter = max_stmt_executions_int (loop);
 131       if (niter == -1 || niter > AVG_LOOP_NITER (loop))
 132         return AVG_LOOP_NITER (loop);
 133     }
 134
 135   return niter;
 136 }
 137
 138 struct iv_use;
 139
 140 /* Representation of the induction variable.  */
 141 struct iv
 142 {
 143   tree base;            /* Initial value of the iv.  */
 144   tree base_object;     /* A memory object to that the induction variable points.  */
 145   tree step;            /* Step of the iv (constant only).  */
 146   tree ssa_name;        /* The ssa name with the value.  */
 147   struct iv_use *nonlin_use;    /* The identifier in the use if it is the case.  */
 148   bool biv_p;           /* Is it a biv?  */
 149   bool no_overflow;     /* True if the iv doesn't overflow.  */
 150   bool have_address_use;/* For biv, indicate if it's used in any address
 151                            type use.  */
 152 };
 153
 154 /* Per-ssa version information (induction variable descriptions, etc.).  */
 155 struct version_info
 156 {
 157   tree name;            /* The ssa name.  */
 158   struct iv *iv;        /* Induction variable description.  */
 159   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 160                            an expression that is not an induction variable.  */
 161   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 162   unsigned inv_id;      /* Id of an invariant.  */
 163 };
 164
 165 /* Types of uses.  */
 166 enum use_type
 167 {
 168   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 169   USE_ADDRESS,          /* Use in an address.  */
 170   USE_COMPARE           /* Use is a compare.  */
 171 };
 172
 173 /* Cost of a computation.  */
 174 struct comp_cost
 175 {
 176   int cost;             /* The runtime cost.  */
 177   unsigned complexity;  /* The estimate of the complexity of the code for
 178                            the computation (in no concrete units --
 179                            complexity field should be larger for more
 180                            complex expressions and addressing modes).  */
 181   int scratch;          /* Scratch used during cost computation.  */
 182 };
 183
 184 static const comp_cost no_cost = {0, 0, 0};
 185 static const comp_cost infinite_cost = {INFTY, INFTY, INFTY};
 186
 187 struct iv_inv_expr_ent;
 188
 189 /* The candidate - cost pair.  */
 190 struct cost_pair
 191 {
 192   struct iv_cand *cand; /* The candidate.  */
 193   comp_cost cost;       /* The cost.  */
 194   bitmap depends_on;    /* The list of invariants that have to be
 195                            preserved.  */
 196   tree value;           /* For final value elimination, the expression for
 197                            the final value of the iv.  For iv elimination,
 198                            the new bound to compare with.  */
 199   enum tree_code comp;  /* For iv elimination, the comparison.  */
 200   iv_inv_expr_ent *inv_expr; /* Loop invariant expression.  */
 201 };
 202
 203 /* Use.  */
 204 struct iv_use
 205 {
 206   unsigned id;          /* The id of the use.  */
 207   unsigned group_id;    /* The group id the use belongs to.  */
 208   enum use_type type;   /* Type of the use.  */
 209   struct iv *iv;        /* The induction variable it is based on.  */
 210   gimple *stmt;         /* Statement in that it occurs.  */
 211   tree *op_p;           /* The place where it occurs.  */
 212
 213   tree addr_base;       /* Base address with const offset stripped.  */
 214   unsigned HOST_WIDE_INT addr_offset;
 215                         /* Const offset stripped from base address.  */
 216 };
 217
 218 /* Group of uses.  */
 219 struct iv_group
 220 {
 221   /* The id of the group.  */
 222   unsigned id;
 223   /* Uses of the group are of the same type.  */
 224   enum use_type type;
 225   /* The set of "related" IV candidates, plus the important ones.  */
 226   bitmap related_cands;
 227   /* Number of IV candidates in the cost_map.  */
 228   unsigned n_map_members;
 229   /* The costs wrto the iv candidates.  */
 230   struct cost_pair *cost_map;
 231   /* The selected candidate for the group.  */
 232   struct iv_cand *selected;
 233   /* Uses in the group.  */
 234   vec<struct iv_use *> vuses;
 235 };
 236
 237 /* The position where the iv is computed.  */
 238 enum iv_position
 239 {
 240   IP_NORMAL,            /* At the end, just before the exit condition.  */
 241   IP_END,               /* At the end of the latch block.  */
 242   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 243   IP_AFTER_USE,         /* Immediately after a specific use.  */
 244   IP_ORIGINAL           /* The original biv.  */
 245 };
 246
 247 /* The induction variable candidate.  */
 248 struct iv_cand
 249 {
 250   unsigned id;          /* The number of the candidate.  */
 251   bool important;       /* Whether this is an "important" candidate, i.e. such
 252                            that it should be considered by all uses.  */
 253   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 254   gimple *incremented_at;/* For original biv, the statement where it is
 255                            incremented.  */
 256   tree var_before;      /* The variable used for it before increment.  */
 257   tree var_after;       /* The variable used for it after increment.  */
 258   struct iv *iv;        /* The value of the candidate.  NULL for
 259                            "pseudocandidate" used to indicate the possibility
 260                            to replace the final value of an iv by direct
 261                            computation of the value.  */
 262   unsigned cost;        /* Cost of the candidate.  */
 263   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 264   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 265                               where it is incremented.  */
 266   bitmap depends_on;    /* The list of invariants that are used in step of the
 267                            biv.  */
 268   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 269                            smaller type.  */
 270 };
 271
 272 /* Hashtable entry for common candidate derived from iv uses.  */
 273 struct iv_common_cand
 274 {
 275   tree base;
 276   tree step;
 277   /* IV uses from which this common candidate is derived.  */
 278   auto_vec<struct iv_use *> uses;
 279   hashval_t hash;
 280 };
 281
 282 /* Hashtable helpers.  */
 283
 284 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 285 {
 286   static inline hashval_t hash (const iv_common_cand *);
 287   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 288 };
 289
 290 /* Hash function for possible common candidates.  */
 291
 292 inline hashval_t
 293 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 294 {
 295   return ccand->hash;
 296 }
 297
 298 /* Hash table equality function for common candidates.  */
 299
 300 inline bool
 301 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 302                               const iv_common_cand *ccand2)
 303 {
 304   return (ccand1->hash == ccand2->hash
 305           && operand_equal_p (ccand1->base, ccand2->base, 0)
 306           && operand_equal_p (ccand1->step, ccand2->step, 0)
 307           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 308               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 309 }
 310
 311 /* Loop invariant expression hashtable entry.  */
 312
 313 struct iv_inv_expr_ent
 314 {
 315   /* Tree expression of the entry.  */
 316   tree expr;
 317   /* Unique indentifier.  */
 318   int id;
 319   /* Hash value.  */
 320   hashval_t hash;
 321 };
 322
 323 /* Sort iv_inv_expr_ent pair A and B by id field.  */
 324
 325 static int
 326 sort_iv_inv_expr_ent (const void *a, const void *b)
 327 {
 328   const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
 329   const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
 330
 331   unsigned id1 = (*e1)->id;
 332   unsigned id2 = (*e2)->id;
 333
 334   if (id1 < id2)
 335     return -1;
 336   else if (id1 > id2)
 337     return 1;
 338   else
 339     return 0;
 340 }
 341
 342 /* Hashtable helpers.  */
 343
 344 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 345 {
 346   static inline hashval_t hash (const iv_inv_expr_ent *);
 347   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 348 };
 349
 350 /* Hash function for loop invariant expressions.  */
 351
 352 inline hashval_t
 353 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 354 {
 355   return expr->hash;
 356 }
 357
 358 /* Hash table equality function for expressions.  */
 359
 360 inline bool
 361 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 362                            const iv_inv_expr_ent *expr2)
 363 {
 364   return expr1->hash == expr2->hash
 365          && operand_equal_p (expr1->expr, expr2->expr, 0);
 366 }
 367
 368 struct ivopts_data
 369 {
 370   /* The currently optimized loop.  */
 371   struct loop *current_loop;
 372   source_location loop_loc;
 373
 374   /* Numbers of iterations for all exits of the current loop.  */
 375   hash_map<edge, tree_niter_desc *> *niters;
 376
 377   /* Number of registers used in it.  */
 378   unsigned regs_used;
 379
 380   /* The size of version_info array allocated.  */
 381   unsigned version_info_size;
 382
 383   /* The array of information for the ssa names.  */
 384   struct version_info *version_info;
 385
 386   /* The hashtable of loop invariant expressions created
 387      by ivopt.  */
 388   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 389
 390   /* Loop invariant expression id.  */
 391   int max_inv_expr_id;
 392
 393   /* The bitmap of indices in version_info whose value was changed.  */
 394   bitmap relevant;
 395
 396   /* The uses of induction variables.  */
 397   vec<iv_group *> vgroups;
 398
 399   /* The candidates.  */
 400   vec<iv_cand *> vcands;
 401
 402   /* A bitmap of important candidates.  */
 403   bitmap important_candidates;
 404
 405   /* Cache used by tree_to_aff_combination_expand.  */
 406   hash_map<tree, name_expansion *> *name_expansion_cache;
 407
 408   /* The hashtable of common candidates derived from iv uses.  */
 409   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 410
 411   /* The common candidates.  */
 412   vec<iv_common_cand *> iv_common_cands;
 413
 414   /* The maximum invariant id.  */
 415   unsigned max_inv_id;
 416
 417   /* Number of no_overflow BIVs which are not used in memory address.  */
 418   unsigned bivs_not_used_in_addr;
 419
 420   /* Obstack for iv structure.  */
 421   struct obstack iv_obstack;
 422
 423   /* Whether to consider just related and important candidates when replacing a
 424      use.  */
 425   bool consider_all_candidates;
 426
 427   /* Are we optimizing for speed?  */
 428   bool speed;
 429
 430   /* Whether the loop body includes any function calls.  */
 431   bool body_includes_call;
 432
 433   /* Whether the loop body can only be exited via single exit.  */
 434   bool loop_single_exit_p;
 435 };
 436
 437 /* An assignment of iv candidates to uses.  */
 438
 439 struct iv_ca
 440 {
 441   /* The number of uses covered by the assignment.  */
 442   unsigned upto;
 443
 444   /* Number of uses that cannot be expressed by the candidates in the set.  */
 445   unsigned bad_groups;
 446
 447   /* Candidate assigned to a use, together with the related costs.  */
 448   struct cost_pair **cand_for_group;
 449
 450   /* Number of times each candidate is used.  */
 451   unsigned *n_cand_uses;
 452
 453   /* The candidates used.  */
 454   bitmap cands;
 455
 456   /* The number of candidates in the set.  */
 457   unsigned n_cands;
 458
 459   /* Total number of registers needed.  */
 460   unsigned n_regs;
 461
 462   /* Total cost of expressing uses.  */
 463   comp_cost cand_use_cost;
 464
 465   /* Total cost of candidates.  */
 466   unsigned cand_cost;
 467
 468   /* Number of times each invariant is used.  */
 469   unsigned *n_invariant_uses;
 470
 471   /* Hash set with used invariant expression.  */
 472   hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
 473
 474   /* Total cost of the assignment.  */
 475   comp_cost cost;
 476 };
 477
 478 /* Difference of two iv candidate assignments.  */
 479
 480 struct iv_ca_delta
 481 {
 482   /* Changed group.  */
 483   struct iv_group *group;
 484
 485   /* An old assignment (for rollback purposes).  */
 486   struct cost_pair *old_cp;
 487
 488   /* A new assignment.  */
 489   struct cost_pair *new_cp;
 490
 491   /* Next change in the list.  */
 492   struct iv_ca_delta *next;
 493 };
 494
 495 /* Bound on number of candidates below that all candidates are considered.  */
 496
 497 #define CONSIDER_ALL_CANDIDATES_BOUND \
 498   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 499
 500 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 501    optimizing such a loop would help, and it would take ages).  */
 502
 503 #define MAX_CONSIDERED_GROUPS \
 504   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 505
 506 /* If there are at most this number of ivs in the set, try removing unnecessary
 507    ivs from the set always.  */
 508
 509 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 510   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 511
 512 /* The list of trees for that the decl_rtl field must be reset is stored
 513    here.  */
 514
 515 static vec<tree> decl_rtl_to_reset;
 516
 517 static comp_cost force_expr_to_var_cost (tree, bool);
 518
 519 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 520
 521 edge
 522 single_dom_exit (struct loop *loop)
 523 {
 524   edge exit = single_exit (loop);
 525
 526   if (!exit)
 527     return NULL;
 528
 529   if (!just_once_each_iteration_p (loop, exit->src))
 530     return NULL;
 531
 532   return exit;
 533 }
 534
 535 /* Dumps information about the induction variable IV to FILE.  Don't dump
 536    variable's name if DUMP_NAME is FALSE.  The information is dumped with
 537    preceding spaces indicated by INDENT_LEVEL.  */
 538
 539 void
 540 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
 541 {
 542   const char *p;
 543   const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
 544
 545   if (indent_level > 4)
 546     indent_level = 4;
 547   p = spaces + 8 - (indent_level << 1);
 548
 549   fprintf (file, "%sIV struct:\n", p);
 550   if (iv->ssa_name && dump_name)
 551     {
 552       fprintf (file, "%s  SSA_NAME:\t", p);
 553       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 554       fprintf (file, "\n");
 555     }
 556
 557   fprintf (file, "%s  Type:\t", p);
 558   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 559   fprintf (file, "\n");
 560
 561   fprintf (file, "%s  Base:\t", p);
 562   print_generic_expr (file, iv->base, TDF_SLIM);
 563   fprintf (file, "\n");
 564
 565   fprintf (file, "%s  Step:\t", p);
 566   print_generic_expr (file, iv->step, TDF_SLIM);
 567   fprintf (file, "\n");
 568
 569   if (iv->base_object)
 570     {
 571       fprintf (file, "%s  Object:\t", p);
 572       print_generic_expr (file, iv->base_object, TDF_SLIM);
 573       fprintf (file, "\n");
 574     }
 575
 576   fprintf (file, "%s  Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
 577
 578   fprintf (file, "%s  Overflowness wrto loop niter:\t%s\n",
 579            p, iv->no_overflow ? "No-overflow" : "Overflow");
 580 }
 581
 582 /* Dumps information about the USE to FILE.  */
 583
 584 void
 585 dump_use (FILE *file, struct iv_use *use)
 586 {
 587   fprintf (file, "  Use %d.%d:\n", use->group_id, use->id);
 588   fprintf (file, "    At stmt:\t");
 589   print_gimple_stmt (file, use->stmt, 0, 0);
 590   fprintf (file, "    At pos:\t");
 591   if (use->op_p)
 592     print_generic_expr (file, *use->op_p, TDF_SLIM);
 593   fprintf (file, "\n");
 594   dump_iv (file, use->iv, false, 2);
 595 }
 596
 597 /* Dumps information about the uses to FILE.  */
 598
 599 void
 600 dump_groups (FILE *file, struct ivopts_data *data)
 601 {
 602   unsigned i, j;
 603   struct iv_group *group;
 604
 605   for (i = 0; i < data->vgroups.length (); i++)
 606     {
 607       group = data->vgroups[i];
 608       fprintf (file, "Group %d:\n", group->id);
 609       if (group->type == USE_NONLINEAR_EXPR)
 610         fprintf (file, "  Type:\tGENERIC\n");
 611       else if (group->type == USE_ADDRESS)
 612         fprintf (file, "  Type:\tADDRESS\n");
 613       else
 614         {
 615           gcc_assert (group->type == USE_COMPARE);
 616           fprintf (file, "  Type:\tCOMPARE\n");
 617         }
 618       for (j = 0; j < group->vuses.length (); j++)
 619         dump_use (file, group->vuses[j]);
 620     }
 621 }
 622
 623 /* Dumps information about induction variable candidate CAND to FILE.  */
 624
 625 void
 626 dump_cand (FILE *file, struct iv_cand *cand)
 627 {
 628   struct iv *iv = cand->iv;
 629
 630   fprintf (file, "Candidate %d:\n", cand->id);
 631   if (cand->depends_on)
 632     {
 633       fprintf (file, "  Depend on: ");
 634       dump_bitmap (file, cand->depends_on);
 635     }
 636
 637   if (cand->var_before)
 638     {
 639       fprintf (file, "  Var befor: ");
 640       print_generic_expr (file, cand->var_before, TDF_SLIM);
 641       fprintf (file, "\n");
 642     }
 643   if (cand->var_after)
 644     {
 645       fprintf (file, "  Var after: ");
 646       print_generic_expr (file, cand->var_after, TDF_SLIM);
 647       fprintf (file, "\n");
 648     }
 649
 650   switch (cand->pos)
 651     {
 652     case IP_NORMAL:
 653       fprintf (file, "  Incr POS: before exit test\n");
 654       break;
 655
 656     case IP_BEFORE_USE:
 657       fprintf (file, "  Incr POS: before use %d\n", cand->ainc_use->id);
 658       break;
 659
 660     case IP_AFTER_USE:
 661       fprintf (file, "  Incr POS: after use %d\n", cand->ainc_use->id);
 662       break;
 663
 664     case IP_END:
 665       fprintf (file, "  Incr POS: at end\n");
 666       break;
 667
 668     case IP_ORIGINAL:
 669       fprintf (file, "  Incr POS: orig biv\n");
 670       break;
 671     }
 672
 673   dump_iv (file, iv, false, 1);
 674 }
 675
 676 /* Returns the info for ssa version VER.  */
 677
 678 static inline struct version_info *
 679 ver_info (struct ivopts_data *data, unsigned ver)
 680 {
 681   return data->version_info + ver;
 682 }
 683
 684 /* Returns the info for ssa name NAME.  */
 685
 686 static inline struct version_info *
 687 name_info (struct ivopts_data *data, tree name)
 688 {
 689   return ver_info (data, SSA_NAME_VERSION (name));
 690 }
 691
 692 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 693    emitted in LOOP.  */
 694
 695 static bool
 696 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 697 {
 698   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 699
 700   gcc_assert (bb);
 701
 702   if (sbb == loop->latch)
 703     return true;
 704
 705   if (sbb != bb)
 706     return false;
 707
 708   return stmt == last_stmt (bb);
 709 }
 710
 711 /* Returns true if STMT if after the place where the original induction
 712    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 713    if the positions are identical.  */
 714
 715 static bool
 716 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 717 {
 718   basic_block cand_bb = gimple_bb (cand->incremented_at);
 719   basic_block stmt_bb = gimple_bb (stmt);
 720
 721   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 722     return false;
 723
 724   if (stmt_bb != cand_bb)
 725     return true;
 726
 727   if (true_if_equal
 728       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 729     return true;
 730   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 731 }
 732
 733 /* Returns true if STMT if after the place where the induction variable
 734    CAND is incremented in LOOP.  */
 735
 736 static bool
 737 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 738 {
 739   switch (cand->pos)
 740     {
 741     case IP_END:
 742       return false;
 743
 744     case IP_NORMAL:
 745       return stmt_after_ip_normal_pos (loop, stmt);
 746
 747     case IP_ORIGINAL:
 748     case IP_AFTER_USE:
 749       return stmt_after_inc_pos (cand, stmt, false);
 750
 751     case IP_BEFORE_USE:
 752       return stmt_after_inc_pos (cand, stmt, true);
 753
 754     default:
 755       gcc_unreachable ();
 756     }
 757 }
 758
 759 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 760
 761 static bool
 762 abnormal_ssa_name_p (tree exp)
 763 {
 764   if (!exp)
 765     return false;
 766
 767   if (TREE_CODE (exp) != SSA_NAME)
 768     return false;
 769
 770   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 771 }
 772
 773 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 774    abnormal phi node.  Callback for for_each_index.  */
 775
 776 static bool
 777 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 778                                   void *data ATTRIBUTE_UNUSED)
 779 {
 780   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 781     {
 782       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 783         return false;
 784       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 785         return false;
 786     }
 787
 788   return !abnormal_ssa_name_p (*index);
 789 }
 790
 791 /* Returns true if EXPR contains a ssa name that occurs in an
 792    abnormal phi node.  */
 793
 794 bool
 795 contains_abnormal_ssa_name_p (tree expr)
 796 {
 797   enum tree_code code;
 798   enum tree_code_class codeclass;
 799
 800   if (!expr)
 801     return false;
 802
 803   code = TREE_CODE (expr);
 804   codeclass = TREE_CODE_CLASS (code);
 805
 806   if (code == SSA_NAME)
 807     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 808
 809   if (code == INTEGER_CST
 810       || is_gimple_min_invariant (expr))
 811     return false;
 812
 813   if (code == ADDR_EXPR)
 814     return !for_each_index (&TREE_OPERAND (expr, 0),
 815                             idx_contains_abnormal_ssa_name_p,
 816                             NULL);
 817
 818   if (code == COND_EXPR)
 819     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 820       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 821       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 822
 823   switch (codeclass)
 824     {
 825     case tcc_binary:
 826     case tcc_comparison:
 827       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 828         return true;
 829
 830       /* Fallthru.  */
 831     case tcc_unary:
 832       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 833         return true;
 834
 835       break;
 836
 837     default:
 838       gcc_unreachable ();
 839     }
 840
 841   return false;
 842 }
 843
 844 /*  Returns the structure describing number of iterations determined from
 845     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 846
 847 static struct tree_niter_desc *
 848 niter_for_exit (struct ivopts_data *data, edge exit)
 849 {
 850   struct tree_niter_desc *desc;
 851   tree_niter_desc **slot;
 852
 853   if (!data->niters)
 854     {
 855       data->niters = new hash_map<edge, tree_niter_desc *>;
 856       slot = NULL;
 857     }
 858   else
 859     slot = data->niters->get (exit);
 860
 861   if (!slot)
 862     {
 863       /* Try to determine number of iterations.  We cannot safely work with ssa
 864          names that appear in phi nodes on abnormal edges, so that we do not
 865          create overlapping life ranges for them (PR 27283).  */
 866       desc = XNEW (struct tree_niter_desc);
 867       if (!number_of_iterations_exit (data->current_loop,
 868                                       exit, desc, true)
 869           || contains_abnormal_ssa_name_p (desc->niter))
 870         {
 871           XDELETE (desc);
 872           desc = NULL;
 873         }
 874       data->niters->put (exit, desc);
 875     }
 876   else
 877     desc = *slot;
 878
 879   return desc;
 880 }
 881
 882 /* Returns the structure describing number of iterations determined from
 883    single dominating exit of DATA->current_loop, or NULL if something
 884    goes wrong.  */
 885
 886 static struct tree_niter_desc *
 887 niter_for_single_dom_exit (struct ivopts_data *data)
 888 {
 889   edge exit = single_dom_exit (data->current_loop);
 890
 891   if (!exit)
 892     return NULL;
 893
 894   return niter_for_exit (data, exit);
 895 }
 896
 897 /* Initializes data structures used by the iv optimization pass, stored
 898    in DATA.  */
 899
 900 static void
 901 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 902 {
 903   data->version_info_size = 2 * num_ssa_names;
 904   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 905   data->relevant = BITMAP_ALLOC (NULL);
 906   data->important_candidates = BITMAP_ALLOC (NULL);
 907   data->max_inv_id = 0;
 908   data->niters = NULL;
 909   data->vgroups.create (20);
 910   data->vcands.create (20);
 911   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 912   data->max_inv_expr_id = 0;
 913   data->name_expansion_cache = NULL;
 914   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
 915   data->iv_common_cands.create (20);
 916   decl_rtl_to_reset.create (20);
 917   gcc_obstack_init (&data->iv_obstack);
 918 }
 919
 920 /* Returns a memory object to that EXPR points.  In case we are able to
 921    determine that it does not point to any such object, NULL is returned.  */
 922
 923 static tree
 924 determine_base_object (tree expr)
 925 {
 926   enum tree_code code = TREE_CODE (expr);
 927   tree base, obj;
 928
 929   /* If this is a pointer casted to any type, we need to determine
 930      the base object for the pointer; so handle conversions before
 931      throwing away non-pointer expressions.  */
 932   if (CONVERT_EXPR_P (expr))
 933     return determine_base_object (TREE_OPERAND (expr, 0));
 934
 935   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 936     return NULL_TREE;
 937
 938   switch (code)
 939     {
 940     case INTEGER_CST:
 941       return NULL_TREE;
 942
 943     case ADDR_EXPR:
 944       obj = TREE_OPERAND (expr, 0);
 945       base = get_base_address (obj);
 946
 947       if (!base)
 948         return expr;
 949
 950       if (TREE_CODE (base) == MEM_REF)
 951         return determine_base_object (TREE_OPERAND (base, 0));
 952
 953       return fold_convert (ptr_type_node,
 954                            build_fold_addr_expr (base));
 955
 956     case POINTER_PLUS_EXPR:
 957       return determine_base_object (TREE_OPERAND (expr, 0));
 958
 959     case PLUS_EXPR:
 960     case MINUS_EXPR:
 961       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 962       gcc_unreachable ();
 963
 964     default:
 965       return fold_convert (ptr_type_node, expr);
 966     }
 967 }
 968
 969 /* Return true if address expression with non-DECL_P operand appears
 970    in EXPR.  */
 971
 972 static bool
 973 contain_complex_addr_expr (tree expr)
 974 {
 975   bool res = false;
 976
 977   STRIP_NOPS (expr);
 978   switch (TREE_CODE (expr))
 979     {
 980     case POINTER_PLUS_EXPR:
 981     case PLUS_EXPR:
 982     case MINUS_EXPR:
 983       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
 984       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
 985       break;
 986
 987     case ADDR_EXPR:
 988       return (!DECL_P (TREE_OPERAND (expr, 0)));
 989
 990     default:
 991       return false;
 992     }
 993
 994   return res;
 995 }
 996
 997 /* Allocates an induction variable with given initial value BASE and step STEP
 998    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
 999
1000 static struct iv *
1001 alloc_iv (struct ivopts_data *data, tree base, tree step,
1002           bool no_overflow = false)
1003 {
1004   tree expr = base;
1005   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1006                                               sizeof (struct iv));
1007   gcc_assert (step != NULL_TREE);
1008
1009   /* Lower address expression in base except ones with DECL_P as operand.
1010      By doing this:
1011        1) More accurate cost can be computed for address expressions;
1012        2) Duplicate candidates won't be created for bases in different
1013           forms, like &a[0] and &a.  */
1014   STRIP_NOPS (expr);
1015   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1016       || contain_complex_addr_expr (expr))
1017     {
1018       aff_tree comb;
1019       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1020       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1021     }
1022
1023   iv->base = base;
1024   iv->base_object = determine_base_object (base);
1025   iv->step = step;
1026   iv->biv_p = false;
1027   iv->nonlin_use = NULL;
1028   iv->ssa_name = NULL_TREE;
1029   iv->no_overflow = no_overflow;
1030   iv->have_address_use = false;
1031
1032   return iv;
1033 }
1034
1035 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1036    doesn't overflow.  */
1037
1038 static void
1039 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1040         bool no_overflow)
1041 {
1042   struct version_info *info = name_info (data, iv);
1043
1044   gcc_assert (!info->iv);
1045
1046   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1047   info->iv = alloc_iv (data, base, step, no_overflow);
1048   info->iv->ssa_name = iv;
1049 }
1050
1051 /* Finds induction variable declaration for VAR.  */
1052
1053 static struct iv *
1054 get_iv (struct ivopts_data *data, tree var)
1055 {
1056   basic_block bb;
1057   tree type = TREE_TYPE (var);
1058
1059   if (!POINTER_TYPE_P (type)
1060       && !INTEGRAL_TYPE_P (type))
1061     return NULL;
1062
1063   if (!name_info (data, var)->iv)
1064     {
1065       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1066
1067       if (!bb
1068           || !flow_bb_inside_loop_p (data->current_loop, bb))
1069         set_iv (data, var, var, build_int_cst (type, 0), true);
1070     }
1071
1072   return name_info (data, var)->iv;
1073 }
1074
1075 /* Return the first non-invariant ssa var found in EXPR.  */
1076
1077 static tree
1078 extract_single_var_from_expr (tree expr)
1079 {
1080   int i, n;
1081   tree tmp;
1082   enum tree_code code;
1083
1084   if (!expr || is_gimple_min_invariant (expr))
1085     return NULL;
1086
1087   code = TREE_CODE (expr);
1088   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1089     {
1090       n = TREE_OPERAND_LENGTH (expr);
1091       for (i = 0; i < n; i++)
1092         {
1093           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1094
1095           if (tmp)
1096             return tmp;
1097         }
1098     }
1099   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1100 }
1101
1102 /* Finds basic ivs.  */
1103
1104 static bool
1105 find_bivs (struct ivopts_data *data)
1106 {
1107   gphi *phi;
1108   affine_iv iv;
1109   tree step, type, base, stop;
1110   bool found = false;
1111   struct loop *loop = data->current_loop;
1112   gphi_iterator psi;
1113
1114   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1115     {
1116       phi = psi.phi ();
1117
1118       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1119         continue;
1120
1121       if (virtual_operand_p (PHI_RESULT (phi)))
1122         continue;
1123
1124       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1125         continue;
1126
1127       if (integer_zerop (iv.step))
1128         continue;
1129
1130       step = iv.step;
1131       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1132       /* Stop expanding iv base at the first ssa var referred by iv step.
1133          Ideally we should stop at any ssa var, because that's expensive
1134          and unusual to happen, we just do it on the first one.
1135
1136          See PR64705 for the rationale.  */
1137       stop = extract_single_var_from_expr (step);
1138       base = expand_simple_operations (base, stop);
1139       if (contains_abnormal_ssa_name_p (base)
1140           || contains_abnormal_ssa_name_p (step))
1141         continue;
1142
1143       type = TREE_TYPE (PHI_RESULT (phi));
1144       base = fold_convert (type, base);
1145       if (step)
1146         {
1147           if (POINTER_TYPE_P (type))
1148             step = convert_to_ptrofftype (step);
1149           else
1150             step = fold_convert (type, step);
1151         }
1152
1153       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1154       found = true;
1155     }
1156
1157   return found;
1158 }
1159
1160 /* Marks basic ivs.  */
1161
1162 static void
1163 mark_bivs (struct ivopts_data *data)
1164 {
1165   gphi *phi;
1166   gimple *def;
1167   tree var;
1168   struct iv *iv, *incr_iv;
1169   struct loop *loop = data->current_loop;
1170   basic_block incr_bb;
1171   gphi_iterator psi;
1172
1173   data->bivs_not_used_in_addr = 0;
1174   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1175     {
1176       phi = psi.phi ();
1177
1178       iv = get_iv (data, PHI_RESULT (phi));
1179       if (!iv)
1180         continue;
1181
1182       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1183       def = SSA_NAME_DEF_STMT (var);
1184       /* Don't mark iv peeled from other one as biv.  */
1185       if (def
1186           && gimple_code (def) == GIMPLE_PHI
1187           && gimple_bb (def) == loop->header)
1188         continue;
1189
1190       incr_iv = get_iv (data, var);
1191       if (!incr_iv)
1192         continue;
1193
1194       /* If the increment is in the subloop, ignore it.  */
1195       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1196       if (incr_bb->loop_father != data->current_loop
1197           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1198         continue;
1199
1200       iv->biv_p = true;
1201       incr_iv->biv_p = true;
1202       if (iv->no_overflow)
1203         data->bivs_not_used_in_addr++;
1204       if (incr_iv->no_overflow)
1205         data->bivs_not_used_in_addr++;
1206     }
1207 }
1208
1209 /* Checks whether STMT defines a linear induction variable and stores its
1210    parameters to IV.  */
1211
1212 static bool
1213 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1214 {
1215   tree lhs, stop;
1216   struct loop *loop = data->current_loop;
1217
1218   iv->base = NULL_TREE;
1219   iv->step = NULL_TREE;
1220
1221   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1222     return false;
1223
1224   lhs = gimple_assign_lhs (stmt);
1225   if (TREE_CODE (lhs) != SSA_NAME)
1226     return false;
1227
1228   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1229     return false;
1230
1231   /* Stop expanding iv base at the first ssa var referred by iv step.
1232      Ideally we should stop at any ssa var, because that's expensive
1233      and unusual to happen, we just do it on the first one.
1234
1235      See PR64705 for the rationale.  */
1236   stop = extract_single_var_from_expr (iv->step);
1237   iv->base = expand_simple_operations (iv->base, stop);
1238   if (contains_abnormal_ssa_name_p (iv->base)
1239       || contains_abnormal_ssa_name_p (iv->step))
1240     return false;
1241
1242   /* If STMT could throw, then do not consider STMT as defining a GIV.
1243      While this will suppress optimizations, we can not safely delete this
1244      GIV and associated statements, even if it appears it is not used.  */
1245   if (stmt_could_throw_p (stmt))
1246     return false;
1247
1248   return true;
1249 }
1250
1251 /* Finds general ivs in statement STMT.  */
1252
1253 static void
1254 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1255 {
1256   affine_iv iv;
1257
1258   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1259     return;
1260
1261   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1262 }
1263
1264 /* Finds general ivs in basic block BB.  */
1265
1266 static void
1267 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1268 {
1269   gimple_stmt_iterator bsi;
1270
1271   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1272     find_givs_in_stmt (data, gsi_stmt (bsi));
1273 }
1274
1275 /* Finds general ivs.  */
1276
1277 static void
1278 find_givs (struct ivopts_data *data)
1279 {
1280   struct loop *loop = data->current_loop;
1281   basic_block *body = get_loop_body_in_dom_order (loop);
1282   unsigned i;
1283
1284   for (i = 0; i < loop->num_nodes; i++)
1285     find_givs_in_bb (data, body[i]);
1286   free (body);
1287 }
1288
1289 /* For each ssa name defined in LOOP determines whether it is an induction
1290    variable and if so, its initial value and step.  */
1291
1292 static bool
1293 find_induction_variables (struct ivopts_data *data)
1294 {
1295   unsigned i;
1296   bitmap_iterator bi;
1297
1298   if (!find_bivs (data))
1299     return false;
1300
1301   find_givs (data);
1302   mark_bivs (data);
1303
1304   if (dump_file && (dump_flags & TDF_DETAILS))
1305     {
1306       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1307
1308       if (niter)
1309         {
1310           fprintf (dump_file, "  number of iterations ");
1311           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1312           if (!integer_zerop (niter->may_be_zero))
1313             {
1314               fprintf (dump_file, "; zero if ");
1315               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1316             }
1317           fprintf (dump_file, "\n");
1318         };
1319
1320       fprintf (dump_file, "\n<Induction Vars>:\n");
1321       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1322         {
1323           struct version_info *info = ver_info (data, i);
1324           if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1325             dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1326         }
1327     }
1328
1329   return true;
1330 }
1331
1332 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1333    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1334    is the const offset stripped from IV base; for other types use, both
1335    are zero by default.  */
1336
1337 static struct iv_use *
1338 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1339             gimple *stmt, enum use_type type, tree addr_base,
1340             unsigned HOST_WIDE_INT addr_offset)
1341 {
1342   struct iv_use *use = XCNEW (struct iv_use);
1343
1344   use->id = group->vuses.length ();
1345   use->group_id = group->id;
1346   use->type = type;
1347   use->iv = iv;
1348   use->stmt = stmt;
1349   use->op_p = use_p;
1350   use->addr_base = addr_base;
1351   use->addr_offset = addr_offset;
1352
1353   group->vuses.safe_push (use);
1354   return use;
1355 }
1356
1357 /* Checks whether OP is a loop-level invariant and if so, records it.
1358    NONLINEAR_USE is true if the invariant is used in a way we do not
1359    handle specially.  */
1360
1361 static void
1362 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1363 {
1364   basic_block bb;
1365   struct version_info *info;
1366
1367   if (TREE_CODE (op) != SSA_NAME
1368       || virtual_operand_p (op))
1369     return;
1370
1371   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1372   if (bb
1373       && flow_bb_inside_loop_p (data->current_loop, bb))
1374     return;
1375
1376   info = name_info (data, op);
1377   info->name = op;
1378   info->has_nonlin_use |= nonlinear_use;
1379   if (!info->inv_id)
1380     info->inv_id = ++data->max_inv_id;
1381   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1382 }
1383
1384 static tree
1385 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1386
1387 /* Record a group of TYPE.  */
1388
1389 static struct iv_group *
1390 record_group (struct ivopts_data *data, enum use_type type)
1391 {
1392   struct iv_group *group = XCNEW (struct iv_group);
1393
1394   group->id = data->vgroups.length ();
1395   group->type = type;
1396   group->related_cands = BITMAP_ALLOC (NULL);
1397   group->vuses.create (1);
1398
1399   data->vgroups.safe_push (group);
1400   return group;
1401 }
1402
1403 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1404    New group will be created if there is no existing group for the use.  */
1405
1406 static struct iv_use *
1407 record_group_use (struct ivopts_data *data, tree *use_p,
1408                   struct iv *iv, gimple *stmt, enum use_type type)
1409 {
1410   tree addr_base = NULL;
1411   struct iv_group *group = NULL;
1412   unsigned HOST_WIDE_INT addr_offset = 0;
1413
1414   /* Record non address type use in a new group.  */
1415   if (type == USE_ADDRESS && iv->base_object)
1416     {
1417       unsigned int i;
1418
1419       addr_base = strip_offset (iv->base, &addr_offset);
1420       for (i = 0; i < data->vgroups.length (); i++)
1421         {
1422           struct iv_use *use;
1423
1424           group = data->vgroups[i];
1425           use = group->vuses[0];
1426           if (use->type != USE_ADDRESS || !use->iv->base_object)
1427             continue;
1428
1429           /* Check if it has the same stripped base and step.  */
1430           if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1431               && operand_equal_p (iv->step, use->iv->step, 0)
1432               && operand_equal_p (addr_base, use->addr_base, 0))
1433             break;
1434         }
1435       if (i == data->vgroups.length ())
1436         group = NULL;
1437     }
1438
1439   if (!group)
1440     group = record_group (data, type);
1441
1442   return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1443 }
1444
1445 /* Checks whether the use OP is interesting and if so, records it.  */
1446
1447 static struct iv_use *
1448 find_interesting_uses_op (struct ivopts_data *data, tree op)
1449 {
1450   struct iv *iv;
1451   gimple *stmt;
1452   struct iv_use *use;
1453
1454   if (TREE_CODE (op) != SSA_NAME)
1455     return NULL;
1456
1457   iv = get_iv (data, op);
1458   if (!iv)
1459     return NULL;
1460
1461   if (iv->nonlin_use)
1462     {
1463       gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1464       return iv->nonlin_use;
1465     }
1466
1467   if (integer_zerop (iv->step))
1468     {
1469       record_invariant (data, op, true);
1470       return NULL;
1471     }
1472
1473   stmt = SSA_NAME_DEF_STMT (op);
1474   gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1475
1476   use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1477   iv->nonlin_use = use;
1478   return use;
1479 }
1480
1481 /* Given a condition in statement STMT, checks whether it is a compare
1482    of an induction variable and an invariant.  If this is the case,
1483    CONTROL_VAR is set to location of the iv, BOUND to the location of
1484    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1485    induction variable descriptions, and true is returned.  If this is not
1486    the case, CONTROL_VAR and BOUND are set to the arguments of the
1487    condition and false is returned.  */
1488
1489 static bool
1490 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1491                        tree **control_var, tree **bound,
1492                        struct iv **iv_var, struct iv **iv_bound)
1493 {
1494   /* The objects returned when COND has constant operands.  */
1495   static struct iv const_iv;
1496   static tree zero;
1497   tree *op0 = &zero, *op1 = &zero;
1498   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1499   bool ret = false;
1500
1501   if (gimple_code (stmt) == GIMPLE_COND)
1502     {
1503       gcond *cond_stmt = as_a <gcond *> (stmt);
1504       op0 = gimple_cond_lhs_ptr (cond_stmt);
1505       op1 = gimple_cond_rhs_ptr (cond_stmt);
1506     }
1507   else
1508     {
1509       op0 = gimple_assign_rhs1_ptr (stmt);
1510       op1 = gimple_assign_rhs2_ptr (stmt);
1511     }
1512
1513   zero = integer_zero_node;
1514   const_iv.step = integer_zero_node;
1515
1516   if (TREE_CODE (*op0) == SSA_NAME)
1517     iv0 = get_iv (data, *op0);
1518   if (TREE_CODE (*op1) == SSA_NAME)
1519     iv1 = get_iv (data, *op1);
1520
1521   /* Exactly one of the compared values must be an iv, and the other one must
1522      be an invariant.  */
1523   if (!iv0 || !iv1)
1524     goto end;
1525
1526   if (integer_zerop (iv0->step))
1527     {
1528       /* Control variable may be on the other side.  */
1529       std::swap (op0, op1);
1530       std::swap (iv0, iv1);
1531     }
1532   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1533
1534 end:
1535   if (control_var)
1536     *control_var = op0;
1537   if (iv_var)
1538     *iv_var = iv0;
1539   if (bound)
1540     *bound = op1;
1541   if (iv_bound)
1542     *iv_bound = iv1;
1543
1544   return ret;
1545 }
1546
1547 /* Checks whether the condition in STMT is interesting and if so,
1548    records it.  */
1549
1550 static void
1551 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1552 {
1553   tree *var_p, *bound_p;
1554   struct iv *var_iv;
1555
1556   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1557     {
1558       find_interesting_uses_op (data, *var_p);
1559       find_interesting_uses_op (data, *bound_p);
1560       return;
1561     }
1562
1563   record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1564 }
1565
1566 /* Returns the outermost loop EXPR is obviously invariant in
1567    relative to the loop LOOP, i.e. if all its operands are defined
1568    outside of the returned loop.  Returns NULL if EXPR is not
1569    even obviously invariant in LOOP.  */
1570
1571 struct loop *
1572 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1573 {
1574   basic_block def_bb;
1575   unsigned i, len;
1576
1577   if (is_gimple_min_invariant (expr))
1578     return current_loops->tree_root;
1579
1580   if (TREE_CODE (expr) == SSA_NAME)
1581     {
1582       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1583       if (def_bb)
1584         {
1585           if (flow_bb_inside_loop_p (loop, def_bb))
1586             return NULL;
1587           return superloop_at_depth (loop,
1588                                      loop_depth (def_bb->loop_father) + 1);
1589         }
1590
1591       return current_loops->tree_root;
1592     }
1593
1594   if (!EXPR_P (expr))
1595     return NULL;
1596
1597   unsigned maxdepth = 0;
1598   len = TREE_OPERAND_LENGTH (expr);
1599   for (i = 0; i < len; i++)
1600     {
1601       struct loop *ivloop;
1602       if (!TREE_OPERAND (expr, i))
1603         continue;
1604
1605       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1606       if (!ivloop)
1607         return NULL;
1608       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1609     }
1610
1611   return superloop_at_depth (loop, maxdepth);
1612 }
1613
1614 /* Returns true if expression EXPR is obviously invariant in LOOP,
1615    i.e. if all its operands are defined outside of the LOOP.  LOOP
1616    should not be the function body.  */
1617
1618 bool
1619 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1620 {
1621   basic_block def_bb;
1622   unsigned i, len;
1623
1624   gcc_assert (loop_depth (loop) > 0);
1625
1626   if (is_gimple_min_invariant (expr))
1627     return true;
1628
1629   if (TREE_CODE (expr) == SSA_NAME)
1630     {
1631       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1632       if (def_bb
1633           && flow_bb_inside_loop_p (loop, def_bb))
1634         return false;
1635
1636       return true;
1637     }
1638
1639   if (!EXPR_P (expr))
1640     return false;
1641
1642   len = TREE_OPERAND_LENGTH (expr);
1643   for (i = 0; i < len; i++)
1644     if (TREE_OPERAND (expr, i)
1645         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1646       return false;
1647
1648   return true;
1649 }
1650
1651 /* Given expression EXPR which computes inductive values with respect
1652    to loop recorded in DATA, this function returns biv from which EXPR
1653    is derived by tracing definition chains of ssa variables in EXPR.  */
1654
1655 static struct iv*
1656 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1657 {
1658   struct iv *iv;
1659   unsigned i, n;
1660   tree e2, e1;
1661   enum tree_code code;
1662   gimple *stmt;
1663
1664   if (expr == NULL_TREE)
1665     return NULL;
1666
1667   if (is_gimple_min_invariant (expr))
1668     return NULL;
1669
1670   code = TREE_CODE (expr);
1671   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1672     {
1673       n = TREE_OPERAND_LENGTH (expr);
1674       for (i = 0; i < n; i++)
1675         {
1676           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1677           if (iv)
1678             return iv;
1679         }
1680     }
1681
1682   /* Stop if it's not ssa name.  */
1683   if (code != SSA_NAME)
1684     return NULL;
1685
1686   iv = get_iv (data, expr);
1687   if (!iv || integer_zerop (iv->step))
1688     return NULL;
1689   else if (iv->biv_p)
1690     return iv;
1691
1692   stmt = SSA_NAME_DEF_STMT (expr);
1693   if (gphi *phi = dyn_cast <gphi *> (stmt))
1694     {
1695       ssa_op_iter iter;
1696       use_operand_p use_p;
1697
1698       if (virtual_operand_p (gimple_phi_result (phi)))
1699         return NULL;
1700
1701       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1702         {
1703           tree use = USE_FROM_PTR (use_p);
1704           iv = find_deriving_biv_for_expr (data, use);
1705           if (iv)
1706             return iv;
1707         }
1708       return NULL;
1709     }
1710   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1711     return NULL;
1712
1713   e1 = gimple_assign_rhs1 (stmt);
1714   code = gimple_assign_rhs_code (stmt);
1715   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1716     return find_deriving_biv_for_expr (data, e1);
1717
1718   switch (code)
1719     {
1720     case MULT_EXPR:
1721     case PLUS_EXPR:
1722     case MINUS_EXPR:
1723     case POINTER_PLUS_EXPR:
1724       /* Increments, decrements and multiplications by a constant
1725          are simple.  */
1726       e2 = gimple_assign_rhs2 (stmt);
1727       iv = find_deriving_biv_for_expr (data, e2);
1728       if (iv)
1729         return iv;
1730
1731       /* Fallthru.  */
1732     CASE_CONVERT:
1733       /* Casts are simple.  */
1734       return find_deriving_biv_for_expr (data, e1);
1735
1736     default:
1737       break;
1738     }
1739
1740   return NULL;
1741 }
1742
1743 /* Record BIV, its predecessor and successor that they are used in
1744    address type uses.  */
1745
1746 static void
1747 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1748 {
1749   unsigned i;
1750   tree type, base_1, base_2;
1751   bitmap_iterator bi;
1752
1753   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1754       || biv->have_address_use || !biv->no_overflow)
1755     return;
1756
1757   type = TREE_TYPE (biv->base);
1758   if (!INTEGRAL_TYPE_P (type))
1759     return;
1760
1761   biv->have_address_use = true;
1762   data->bivs_not_used_in_addr--;
1763   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1764   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1765     {
1766       struct iv *iv = ver_info (data, i)->iv;
1767
1768       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1769           || iv->have_address_use || !iv->no_overflow)
1770         continue;
1771
1772       if (type != TREE_TYPE (iv->base)
1773           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1774         continue;
1775
1776       if (!operand_equal_p (biv->step, iv->step, 0))
1777         continue;
1778
1779       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1780       if (operand_equal_p (base_1, iv->base, 0)
1781           || operand_equal_p (base_2, biv->base, 0))
1782         {
1783           iv->have_address_use = true;
1784           data->bivs_not_used_in_addr--;
1785         }
1786     }
1787 }
1788
1789 /* Cumulates the steps of indices into DATA and replaces their values with the
1790    initial ones.  Returns false when the value of the index cannot be determined.
1791    Callback for for_each_index.  */
1792
1793 struct ifs_ivopts_data
1794 {
1795   struct ivopts_data *ivopts_data;
1796   gimple *stmt;
1797   tree step;
1798 };
1799
1800 static bool
1801 idx_find_step (tree base, tree *idx, void *data)
1802 {
1803   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1804   struct iv *iv;
1805   bool use_overflow_semantics = false;
1806   tree step, iv_base, iv_step, lbound, off;
1807   struct loop *loop = dta->ivopts_data->current_loop;
1808
1809   /* If base is a component ref, require that the offset of the reference
1810      be invariant.  */
1811   if (TREE_CODE (base) == COMPONENT_REF)
1812     {
1813       off = component_ref_field_offset (base);
1814       return expr_invariant_in_loop_p (loop, off);
1815     }
1816
1817   /* If base is array, first check whether we will be able to move the
1818      reference out of the loop (in order to take its address in strength
1819      reduction).  In order for this to work we need both lower bound
1820      and step to be loop invariants.  */
1821   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1822     {
1823       /* Moreover, for a range, the size needs to be invariant as well.  */
1824       if (TREE_CODE (base) == ARRAY_RANGE_REF
1825           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1826         return false;
1827
1828       step = array_ref_element_size (base);
1829       lbound = array_ref_low_bound (base);
1830
1831       if (!expr_invariant_in_loop_p (loop, step)
1832           || !expr_invariant_in_loop_p (loop, lbound))
1833         return false;
1834     }
1835
1836   if (TREE_CODE (*idx) != SSA_NAME)
1837     return true;
1838
1839   iv = get_iv (dta->ivopts_data, *idx);
1840   if (!iv)
1841     return false;
1842
1843   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1844           *&x[0], which is not folded and does not trigger the
1845           ARRAY_REF path below.  */
1846   *idx = iv->base;
1847
1848   if (integer_zerop (iv->step))
1849     return true;
1850
1851   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1852     {
1853       step = array_ref_element_size (base);
1854
1855       /* We only handle addresses whose step is an integer constant.  */
1856       if (TREE_CODE (step) != INTEGER_CST)
1857         return false;
1858     }
1859   else
1860     /* The step for pointer arithmetics already is 1 byte.  */
1861     step = size_one_node;
1862
1863   iv_base = iv->base;
1864   iv_step = iv->step;
1865   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1866     use_overflow_semantics = true;
1867
1868   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1869                             sizetype, &iv_base, &iv_step, dta->stmt,
1870                             use_overflow_semantics))
1871     {
1872       /* The index might wrap.  */
1873       return false;
1874     }
1875
1876   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1877   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1878
1879   if (dta->ivopts_data->bivs_not_used_in_addr)
1880     {
1881       if (!iv->biv_p)
1882         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1883
1884       record_biv_for_address_use (dta->ivopts_data, iv);
1885     }
1886   return true;
1887 }
1888
1889 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1890    object is passed to it in DATA.  */
1891
1892 static bool
1893 idx_record_use (tree base, tree *idx,
1894                 void *vdata)
1895 {
1896   struct ivopts_data *data = (struct ivopts_data *) vdata;
1897   find_interesting_uses_op (data, *idx);
1898   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1899     {
1900       find_interesting_uses_op (data, array_ref_element_size (base));
1901       find_interesting_uses_op (data, array_ref_low_bound (base));
1902     }
1903   return true;
1904 }
1905
1906 /* If we can prove that TOP = cst * BOT for some constant cst,
1907    store cst to MUL and return true.  Otherwise return false.
1908    The returned value is always sign-extended, regardless of the
1909    signedness of TOP and BOT.  */
1910
1911 static bool
1912 constant_multiple_of (tree top, tree bot, widest_int *mul)
1913 {
1914   tree mby;
1915   enum tree_code code;
1916   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1917   widest_int res, p0, p1;
1918
1919   STRIP_NOPS (top);
1920   STRIP_NOPS (bot);
1921
1922   if (operand_equal_p (top, bot, 0))
1923     {
1924       *mul = 1;
1925       return true;
1926     }
1927
1928   code = TREE_CODE (top);
1929   switch (code)
1930     {
1931     case MULT_EXPR:
1932       mby = TREE_OPERAND (top, 1);
1933       if (TREE_CODE (mby) != INTEGER_CST)
1934         return false;
1935
1936       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1937         return false;
1938
1939       *mul = wi::sext (res * wi::to_widest (mby), precision);
1940       return true;
1941
1942     case PLUS_EXPR:
1943     case MINUS_EXPR:
1944       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1945           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1946         return false;
1947
1948       if (code == MINUS_EXPR)
1949         p1 = -p1;
1950       *mul = wi::sext (p0 + p1, precision);
1951       return true;
1952
1953     case INTEGER_CST:
1954       if (TREE_CODE (bot) != INTEGER_CST)
1955         return false;
1956
1957       p0 = widest_int::from (top, SIGNED);
1958       p1 = widest_int::from (bot, SIGNED);
1959       if (p1 == 0)
1960         return false;
1961       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1962       return res == 0;
1963
1964     default:
1965       return false;
1966     }
1967 }
1968
1969 /* Return true if memory reference REF with step STEP may be unaligned.  */
1970
1971 static bool
1972 may_be_unaligned_p (tree ref, tree step)
1973 {
1974   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1975      thus they are not misaligned.  */
1976   if (TREE_CODE (ref) == TARGET_MEM_REF)
1977     return false;
1978
1979   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1980   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1981     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1982
1983   unsigned HOST_WIDE_INT bitpos;
1984   unsigned int ref_align;
1985   get_object_alignment_1 (ref, &ref_align, &bitpos);
1986   if (ref_align < align
1987       || (bitpos % align) != 0
1988       || (bitpos % BITS_PER_UNIT) != 0)
1989     return true;
1990
1991   unsigned int trailing_zeros = tree_ctz (step);
1992   if (trailing_zeros < HOST_BITS_PER_INT
1993       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1994     return true;
1995
1996   return false;
1997 }
1998
1999 /* Return true if EXPR may be non-addressable.   */
2000
2001 bool
2002 may_be_nonaddressable_p (tree expr)
2003 {
2004   switch (TREE_CODE (expr))
2005     {
2006     case TARGET_MEM_REF:
2007       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2008          target, thus they are always addressable.  */
2009       return false;
2010
2011     case MEM_REF:
2012       /* Likewise for MEM_REFs, modulo the storage order.  */
2013       return REF_REVERSE_STORAGE_ORDER (expr);
2014
2015     case BIT_FIELD_REF:
2016       if (REF_REVERSE_STORAGE_ORDER (expr))
2017         return true;
2018       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2019
2020     case COMPONENT_REF:
2021       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2022         return true;
2023       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2024              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2025
2026     case ARRAY_REF:
2027     case ARRAY_RANGE_REF:
2028       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2029         return true;
2030       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2031
2032     case VIEW_CONVERT_EXPR:
2033       /* This kind of view-conversions may wrap non-addressable objects
2034          and make them look addressable.  After some processing the
2035          non-addressability may be uncovered again, causing ADDR_EXPRs
2036          of inappropriate objects to be built.  */
2037       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2038           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2039         return true;
2040       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2041
2042     CASE_CONVERT:
2043       return true;
2044
2045     default:
2046       break;
2047     }
2048
2049   return false;
2050 }
2051
2052 /* Finds addresses in *OP_P inside STMT.  */
2053
2054 static void
2055 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2056                                tree *op_p)
2057 {
2058   tree base = *op_p, step = size_zero_node;
2059   struct iv *civ;
2060   struct ifs_ivopts_data ifs_ivopts_data;
2061
2062   /* Do not play with volatile memory references.  A bit too conservative,
2063      perhaps, but safe.  */
2064   if (gimple_has_volatile_ops (stmt))
2065     goto fail;
2066
2067   /* Ignore bitfields for now.  Not really something terribly complicated
2068      to handle.  TODO.  */
2069   if (TREE_CODE (base) == BIT_FIELD_REF)
2070     goto fail;
2071
2072   base = unshare_expr (base);
2073
2074   if (TREE_CODE (base) == TARGET_MEM_REF)
2075     {
2076       tree type = build_pointer_type (TREE_TYPE (base));
2077       tree astep;
2078
2079       if (TMR_BASE (base)
2080           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2081         {
2082           civ = get_iv (data, TMR_BASE (base));
2083           if (!civ)
2084             goto fail;
2085
2086           TMR_BASE (base) = civ->base;
2087           step = civ->step;
2088         }
2089       if (TMR_INDEX2 (base)
2090           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2091         {
2092           civ = get_iv (data, TMR_INDEX2 (base));
2093           if (!civ)
2094             goto fail;
2095
2096           TMR_INDEX2 (base) = civ->base;
2097           step = civ->step;
2098         }
2099       if (TMR_INDEX (base)
2100           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2101         {
2102           civ = get_iv (data, TMR_INDEX (base));
2103           if (!civ)
2104             goto fail;
2105
2106           TMR_INDEX (base) = civ->base;
2107           astep = civ->step;
2108
2109           if (astep)
2110             {
2111               if (TMR_STEP (base))
2112                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2113
2114               step = fold_build2 (PLUS_EXPR, type, step, astep);
2115             }
2116         }
2117
2118       if (integer_zerop (step))
2119         goto fail;
2120       base = tree_mem_ref_addr (type, base);
2121     }
2122   else
2123     {
2124       ifs_ivopts_data.ivopts_data = data;
2125       ifs_ivopts_data.stmt = stmt;
2126       ifs_ivopts_data.step = size_zero_node;
2127       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2128           || integer_zerop (ifs_ivopts_data.step))
2129         goto fail;
2130       step = ifs_ivopts_data.step;
2131
2132       /* Check that the base expression is addressable.  This needs
2133          to be done after substituting bases of IVs into it.  */
2134       if (may_be_nonaddressable_p (base))
2135         goto fail;
2136
2137       /* Moreover, on strict alignment platforms, check that it is
2138          sufficiently aligned.  */
2139       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2140         goto fail;
2141
2142       base = build_fold_addr_expr (base);
2143
2144       /* Substituting bases of IVs into the base expression might
2145          have caused folding opportunities.  */
2146       if (TREE_CODE (base) == ADDR_EXPR)
2147         {
2148           tree *ref = &TREE_OPERAND (base, 0);
2149           while (handled_component_p (*ref))
2150             ref = &TREE_OPERAND (*ref, 0);
2151           if (TREE_CODE (*ref) == MEM_REF)
2152             {
2153               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2154                                       TREE_OPERAND (*ref, 0),
2155                                       TREE_OPERAND (*ref, 1));
2156               if (tem)
2157                 *ref = tem;
2158             }
2159         }
2160     }
2161
2162   civ = alloc_iv (data, base, step);
2163   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2164   return;
2165
2166 fail:
2167   for_each_index (op_p, idx_record_use, data);
2168 }
2169
2170 /* Finds and records invariants used in STMT.  */
2171
2172 static void
2173 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2174 {
2175   ssa_op_iter iter;
2176   use_operand_p use_p;
2177   tree op;
2178
2179   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2180     {
2181       op = USE_FROM_PTR (use_p);
2182       record_invariant (data, op, false);
2183     }
2184 }
2185
2186 /* Finds interesting uses of induction variables in the statement STMT.  */
2187
2188 static void
2189 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2190 {
2191   struct iv *iv;
2192   tree op, *lhs, *rhs;
2193   ssa_op_iter iter;
2194   use_operand_p use_p;
2195   enum tree_code code;
2196
2197   find_invariants_stmt (data, stmt);
2198
2199   if (gimple_code (stmt) == GIMPLE_COND)
2200     {
2201       find_interesting_uses_cond (data, stmt);
2202       return;
2203     }
2204
2205   if (is_gimple_assign (stmt))
2206     {
2207       lhs = gimple_assign_lhs_ptr (stmt);
2208       rhs = gimple_assign_rhs1_ptr (stmt);
2209
2210       if (TREE_CODE (*lhs) == SSA_NAME)
2211         {
2212           /* If the statement defines an induction variable, the uses are not
2213              interesting by themselves.  */
2214
2215           iv = get_iv (data, *lhs);
2216
2217           if (iv && !integer_zerop (iv->step))
2218             return;
2219         }
2220
2221       code = gimple_assign_rhs_code (stmt);
2222       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2223           && (REFERENCE_CLASS_P (*rhs)
2224               || is_gimple_val (*rhs)))
2225         {
2226           if (REFERENCE_CLASS_P (*rhs))
2227             find_interesting_uses_address (data, stmt, rhs);
2228           else
2229             find_interesting_uses_op (data, *rhs);
2230
2231           if (REFERENCE_CLASS_P (*lhs))
2232             find_interesting_uses_address (data, stmt, lhs);
2233           return;
2234         }
2235       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2236         {
2237           find_interesting_uses_cond (data, stmt);
2238           return;
2239         }
2240
2241       /* TODO -- we should also handle address uses of type
2242
2243          memory = call (whatever);
2244
2245          and
2246
2247          call (memory).  */
2248     }
2249
2250   if (gimple_code (stmt) == GIMPLE_PHI
2251       && gimple_bb (stmt) == data->current_loop->header)
2252     {
2253       iv = get_iv (data, PHI_RESULT (stmt));
2254
2255       if (iv && !integer_zerop (iv->step))
2256         return;
2257     }
2258
2259   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2260     {
2261       op = USE_FROM_PTR (use_p);
2262
2263       if (TREE_CODE (op) != SSA_NAME)
2264         continue;
2265
2266       iv = get_iv (data, op);
2267       if (!iv)
2268         continue;
2269
2270       find_interesting_uses_op (data, op);
2271     }
2272 }
2273
2274 /* Finds interesting uses of induction variables outside of loops
2275    on loop exit edge EXIT.  */
2276
2277 static void
2278 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2279 {
2280   gphi *phi;
2281   gphi_iterator psi;
2282   tree def;
2283
2284   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2285     {
2286       phi = psi.phi ();
2287       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2288       if (!virtual_operand_p (def))
2289         find_interesting_uses_op (data, def);
2290     }
2291 }
2292
2293 /* Compute maximum offset of [base + offset] addressing mode
2294    for memory reference represented by USE.  */
2295
2296 static HOST_WIDE_INT
2297 compute_max_addr_offset (struct iv_use *use)
2298 {
2299   int width;
2300   rtx reg, addr;
2301   HOST_WIDE_INT i, off;
2302   unsigned list_index, num;
2303   addr_space_t as;
2304   machine_mode mem_mode, addr_mode;
2305   static vec<HOST_WIDE_INT> max_offset_list;
2306
2307   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2308   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2309
2310   num = max_offset_list.length ();
2311   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2312   if (list_index >= num)
2313     {
2314       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2315       for (; num < max_offset_list.length (); num++)
2316         max_offset_list[num] = -1;
2317     }
2318
2319   off = max_offset_list[list_index];
2320   if (off != -1)
2321     return off;
2322
2323   addr_mode = targetm.addr_space.address_mode (as);
2324   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2325   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2326
2327   width = GET_MODE_BITSIZE (addr_mode) - 1;
2328   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2329     width = HOST_BITS_PER_WIDE_INT - 1;
2330
2331   for (i = width; i > 0; i--)
2332     {
2333       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2334       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2335       if (memory_address_addr_space_p (mem_mode, addr, as))
2336         break;
2337
2338       /* For some strict-alignment targets, the offset must be naturally
2339          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2340       off = ((unsigned HOST_WIDE_INT) 1 << i);
2341       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2342         {
2343           off -= GET_MODE_SIZE (mem_mode);
2344           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2345           if (memory_address_addr_space_p (mem_mode, addr, as))
2346             break;
2347         }
2348     }
2349   if (i == 0)
2350     off = 0;
2351
2352   max_offset_list[list_index] = off;
2353   return off;
2354 }
2355
2356 /* Comparison function to sort group in ascending order of addr_offset.  */
2357
2358 static int
2359 group_compare_offset (const void *a, const void *b)
2360 {
2361   const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2362   const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2363
2364   if ((*u1)->addr_offset != (*u2)->addr_offset)
2365     return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2366   else
2367     return 0;
2368 }
2369
2370 /* Check if small groups should be split.  Return true if no group
2371    contains more than two uses with distinct addr_offsets.  Return
2372    false otherwise.  We want to split such groups because:
2373
2374      1) Small groups don't have much benefit and may interfer with
2375         general candidate selection.
2376      2) Size for problem with only small groups is usually small and
2377         general algorithm can handle it well.
2378
2379    TODO -- Above claim may not hold when we want to merge memory
2380    accesses with conseuctive addresses.  */
2381
2382 static bool
2383 split_small_address_groups_p (struct ivopts_data *data)
2384 {
2385   unsigned int i, j, distinct = 1;
2386   struct iv_use *pre;
2387   struct iv_group *group;
2388
2389   for (i = 0; i < data->vgroups.length (); i++)
2390     {
2391       group = data->vgroups[i];
2392       if (group->vuses.length () == 1)
2393         continue;
2394
2395       gcc_assert (group->type == USE_ADDRESS);
2396       if (group->vuses.length () == 2)
2397         {
2398           if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2399             std::swap (group->vuses[0], group->vuses[1]);
2400         }
2401       else
2402         group->vuses.qsort (group_compare_offset);
2403
2404       if (distinct > 2)
2405         continue;
2406
2407       distinct = 1;
2408       for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2409         {
2410           if (group->vuses[j]->addr_offset != pre->addr_offset)
2411             {
2412               pre = group->vuses[j];
2413               distinct++;
2414             }
2415
2416           if (distinct > 2)
2417             break;
2418         }
2419     }
2420
2421   return (distinct <= 2);
2422 }
2423
2424 /* For each group of address type uses, this function further groups
2425    these uses according to the maximum offset supported by target's
2426    [base + offset] addressing mode.  */
2427
2428 static void
2429 split_address_groups (struct ivopts_data *data)
2430 {
2431   unsigned int i, j;
2432   HOST_WIDE_INT max_offset = -1;
2433
2434   /* Reset max offset to split all small groups.  */
2435   if (split_small_address_groups_p (data))
2436     max_offset = 0;
2437
2438   for (i = 0; i < data->vgroups.length (); i++)
2439     {
2440       struct iv_group *group = data->vgroups[i];
2441       struct iv_use *use = group->vuses[0];
2442
2443       use->id = 0;
2444       use->group_id = group->id;
2445       if (group->vuses.length () == 1)
2446         continue;
2447
2448       if (max_offset != 0)
2449         max_offset = compute_max_addr_offset (use);
2450
2451       for (j = 1; j < group->vuses.length (); j++)
2452         {
2453           struct iv_use *next = group->vuses[j];
2454
2455           /* Only uses with offset that can fit in offset part against
2456              the first use can be grouped together.  */
2457           if (next->addr_offset - use->addr_offset
2458               > (unsigned HOST_WIDE_INT) max_offset)
2459             break;
2460
2461           next->id = j;
2462           next->group_id = group->id;
2463         }
2464       /* Split group.  */
2465       if (j < group->vuses.length ())
2466         {
2467           struct iv_group *new_group = record_group (data, group->type);
2468           new_group->vuses.safe_splice (group->vuses);
2469           new_group->vuses.block_remove (0, j);
2470           group->vuses.truncate (j);
2471         }
2472     }
2473 }
2474
2475 /* Finds uses of the induction variables that are interesting.  */
2476
2477 static void
2478 find_interesting_uses (struct ivopts_data *data)
2479 {
2480   basic_block bb;
2481   gimple_stmt_iterator bsi;
2482   basic_block *body = get_loop_body (data->current_loop);
2483   unsigned i;
2484   edge e;
2485
2486   for (i = 0; i < data->current_loop->num_nodes; i++)
2487     {
2488       edge_iterator ei;
2489       bb = body[i];
2490
2491       FOR_EACH_EDGE (e, ei, bb->succs)
2492         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2493             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2494           find_interesting_uses_outside (data, e);
2495
2496       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2497         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2498       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2499         if (!is_gimple_debug (gsi_stmt (bsi)))
2500           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2501     }
2502
2503   split_address_groups (data);
2504
2505   if (dump_file && (dump_flags & TDF_DETAILS))
2506     {
2507       bitmap_iterator bi;
2508
2509       fprintf (dump_file, "\n<Invariant Vars>:\n");
2510       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2511         {
2512           struct version_info *info = ver_info (data, i);
2513           if (info->inv_id)
2514             {
2515               fprintf (dump_file, "Inv %d:\t", info->inv_id);
2516               print_generic_expr (dump_file, info->name, TDF_SLIM);
2517               fprintf (dump_file, "%s\n",
2518                        info->has_nonlin_use ? "" : "\t(eliminable)");
2519             }
2520         }
2521
2522       fprintf (dump_file, "\n<IV Groups>:\n");
2523       dump_groups (dump_file, data);
2524       fprintf (dump_file, "\n");
2525     }
2526
2527   free (body);
2528 }
2529
2530 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2531    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2532    we are at the top-level of the processed address.  */
2533
2534 static tree
2535 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2536                 HOST_WIDE_INT *offset)
2537 {
2538   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2539   enum tree_code code;
2540   tree type, orig_type = TREE_TYPE (expr);
2541   HOST_WIDE_INT off0, off1, st;
2542   tree orig_expr = expr;
2543
2544   STRIP_NOPS (expr);
2545
2546   type = TREE_TYPE (expr);
2547   code = TREE_CODE (expr);
2548   *offset = 0;
2549
2550   switch (code)
2551     {
2552     case INTEGER_CST:
2553       if (!cst_and_fits_in_hwi (expr)
2554           || integer_zerop (expr))
2555         return orig_expr;
2556
2557       *offset = int_cst_value (expr);
2558       return build_int_cst (orig_type, 0);
2559
2560     case POINTER_PLUS_EXPR:
2561     case PLUS_EXPR:
2562     case MINUS_EXPR:
2563       op0 = TREE_OPERAND (expr, 0);
2564       op1 = TREE_OPERAND (expr, 1);
2565
2566       op0 = strip_offset_1 (op0, false, false, &off0);
2567       op1 = strip_offset_1 (op1, false, false, &off1);
2568
2569       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2570       if (op0 == TREE_OPERAND (expr, 0)
2571           && op1 == TREE_OPERAND (expr, 1))
2572         return orig_expr;
2573
2574       if (integer_zerop (op1))
2575         expr = op0;
2576       else if (integer_zerop (op0))
2577         {
2578           if (code == MINUS_EXPR)
2579             expr = fold_build1 (NEGATE_EXPR, type, op1);
2580           else
2581             expr = op1;
2582         }
2583       else
2584         expr = fold_build2 (code, type, op0, op1);
2585
2586       return fold_convert (orig_type, expr);
2587
2588     case MULT_EXPR:
2589       op1 = TREE_OPERAND (expr, 1);
2590       if (!cst_and_fits_in_hwi (op1))
2591         return orig_expr;
2592
2593       op0 = TREE_OPERAND (expr, 0);
2594       op0 = strip_offset_1 (op0, false, false, &off0);
2595       if (op0 == TREE_OPERAND (expr, 0))
2596         return orig_expr;
2597
2598       *offset = off0 * int_cst_value (op1);
2599       if (integer_zerop (op0))
2600         expr = op0;
2601       else
2602         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2603
2604       return fold_convert (orig_type, expr);
2605
2606     case ARRAY_REF:
2607     case ARRAY_RANGE_REF:
2608       if (!inside_addr)
2609         return orig_expr;
2610
2611       step = array_ref_element_size (expr);
2612       if (!cst_and_fits_in_hwi (step))
2613         break;
2614
2615       st = int_cst_value (step);
2616       op1 = TREE_OPERAND (expr, 1);
2617       op1 = strip_offset_1 (op1, false, false, &off1);
2618       *offset = off1 * st;
2619
2620       if (top_compref
2621           && integer_zerop (op1))
2622         {
2623           /* Strip the component reference completely.  */
2624           op0 = TREE_OPERAND (expr, 0);
2625           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2626           *offset += off0;
2627           return op0;
2628         }
2629       break;
2630
2631     case COMPONENT_REF:
2632       {
2633         tree field;
2634
2635         if (!inside_addr)
2636           return orig_expr;
2637
2638         tmp = component_ref_field_offset (expr);
2639         field = TREE_OPERAND (expr, 1);
2640         if (top_compref
2641             && cst_and_fits_in_hwi (tmp)
2642             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2643           {
2644             HOST_WIDE_INT boffset, abs_off;
2645
2646             /* Strip the component reference completely.  */
2647             op0 = TREE_OPERAND (expr, 0);
2648             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2649             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2650             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2651             if (boffset < 0)
2652               abs_off = -abs_off;
2653
2654             *offset = off0 + int_cst_value (tmp) + abs_off;
2655             return op0;
2656           }
2657       }
2658       break;
2659
2660     case ADDR_EXPR:
2661       op0 = TREE_OPERAND (expr, 0);
2662       op0 = strip_offset_1 (op0, true, true, &off0);
2663       *offset += off0;
2664
2665       if (op0 == TREE_OPERAND (expr, 0))
2666         return orig_expr;
2667
2668       expr = build_fold_addr_expr (op0);
2669       return fold_convert (orig_type, expr);
2670
2671     case MEM_REF:
2672       /* ???  Offset operand?  */
2673       inside_addr = false;
2674       break;
2675
2676     default:
2677       return orig_expr;
2678     }
2679
2680   /* Default handling of expressions for that we want to recurse into
2681      the first operand.  */
2682   op0 = TREE_OPERAND (expr, 0);
2683   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2684   *offset += off0;
2685
2686   if (op0 == TREE_OPERAND (expr, 0)
2687       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2688     return orig_expr;
2689
2690   expr = copy_node (expr);
2691   TREE_OPERAND (expr, 0) = op0;
2692   if (op1)
2693     TREE_OPERAND (expr, 1) = op1;
2694
2695   /* Inside address, we might strip the top level component references,
2696      thus changing type of the expression.  Handling of ADDR_EXPR
2697      will fix that.  */
2698   expr = fold_convert (orig_type, expr);
2699
2700   return expr;
2701 }
2702
2703 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2704
2705 static tree
2706 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2707 {
2708   HOST_WIDE_INT off;
2709   tree core = strip_offset_1 (expr, false, false, &off);
2710   *offset = off;
2711   return core;
2712 }
2713
2714 /* Returns variant of TYPE that can be used as base for different uses.
2715    We return unsigned type with the same precision, which avoids problems
2716    with overflows.  */
2717
2718 static tree
2719 generic_type_for (tree type)
2720 {
2721   if (POINTER_TYPE_P (type))
2722     return unsigned_type_for (type);
2723
2724   if (TYPE_UNSIGNED (type))
2725     return type;
2726
2727   return unsigned_type_for (type);
2728 }
2729
2730 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2731    the bitmap to that we should store it.  */
2732
2733 static struct ivopts_data *fd_ivopts_data;
2734 static tree
2735 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2736 {
2737   bitmap *depends_on = (bitmap *) data;
2738   struct version_info *info;
2739
2740   if (TREE_CODE (*expr_p) != SSA_NAME)
2741     return NULL_TREE;
2742   info = name_info (fd_ivopts_data, *expr_p);
2743
2744   if (!info->inv_id || info->has_nonlin_use)
2745     return NULL_TREE;
2746
2747   if (!*depends_on)
2748     *depends_on = BITMAP_ALLOC (NULL);
2749   bitmap_set_bit (*depends_on, info->inv_id);
2750
2751   return NULL_TREE;
2752 }
2753
2754 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2755    position to POS.  If USE is not NULL, the candidate is set as related to
2756    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2757    replacement of the final value of the iv by a direct computation.  */
2758
2759 static struct iv_cand *
2760 add_candidate_1 (struct ivopts_data *data,
2761                  tree base, tree step, bool important, enum iv_position pos,
2762                  struct iv_use *use, gimple *incremented_at,
2763                  struct iv *orig_iv = NULL)
2764 {
2765   unsigned i;
2766   struct iv_cand *cand = NULL;
2767   tree type, orig_type;
2768
2769   gcc_assert (base && step);
2770
2771   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2772      live, but the ivopts code may replace a real pointer with one
2773      pointing before or after the memory block that is then adjusted
2774      into the memory block during the loop.  FIXME: It would likely be
2775      better to actually force the pointer live and still use ivopts;
2776      for example, it would be enough to write the pointer into memory
2777      and keep it there until after the loop.  */
2778   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2779     return NULL;
2780
2781   /* For non-original variables, make sure their values are computed in a type
2782      that does not invoke undefined behavior on overflows (since in general,
2783      we cannot prove that these induction variables are non-wrapping).  */
2784   if (pos != IP_ORIGINAL)
2785     {
2786       orig_type = TREE_TYPE (base);
2787       type = generic_type_for (orig_type);
2788       if (type != orig_type)
2789         {
2790           base = fold_convert (type, base);
2791           step = fold_convert (type, step);
2792         }
2793     }
2794
2795   for (i = 0; i < data->vcands.length (); i++)
2796     {
2797       cand = data->vcands[i];
2798
2799       if (cand->pos != pos)
2800         continue;
2801
2802       if (cand->incremented_at != incremented_at
2803           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2804               && cand->ainc_use != use))
2805         continue;
2806
2807       if (operand_equal_p (base, cand->iv->base, 0)
2808           && operand_equal_p (step, cand->iv->step, 0)
2809           && (TYPE_PRECISION (TREE_TYPE (base))
2810               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2811         break;
2812     }
2813
2814   if (i == data->vcands.length ())
2815     {
2816       cand = XCNEW (struct iv_cand);
2817       cand->id = i;
2818       cand->iv = alloc_iv (data, base, step);
2819       cand->pos = pos;
2820       if (pos != IP_ORIGINAL)
2821         {
2822           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2823           cand->var_after = cand->var_before;
2824         }
2825       cand->important = important;
2826       cand->incremented_at = incremented_at;
2827       data->vcands.safe_push (cand);
2828
2829       if (TREE_CODE (step) != INTEGER_CST)
2830         {
2831           fd_ivopts_data = data;
2832           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2833         }
2834
2835       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2836         cand->ainc_use = use;
2837       else
2838         cand->ainc_use = NULL;
2839
2840       cand->orig_iv = orig_iv;
2841       if (dump_file && (dump_flags & TDF_DETAILS))
2842         dump_cand (dump_file, cand);
2843     }
2844
2845   cand->important |= important;
2846
2847   /* Relate candidate to the group for which it is added.  */
2848   if (use)
2849     bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
2850
2851   return cand;
2852 }
2853
2854 /* Returns true if incrementing the induction variable at the end of the LOOP
2855    is allowed.
2856
2857    The purpose is to avoid splitting latch edge with a biv increment, thus
2858    creating a jump, possibly confusing other optimization passes and leaving
2859    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2860    is not available (so we do not have a better alternative), or if the latch
2861    edge is already nonempty.  */
2862
2863 static bool
2864 allow_ip_end_pos_p (struct loop *loop)
2865 {
2866   if (!ip_normal_pos (loop))
2867     return true;
2868
2869   if (!empty_block_p (ip_end_pos (loop)))
2870     return true;
2871
2872   return false;
2873 }
2874
2875 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2876    Important field is set to IMPORTANT.  */
2877
2878 static void
2879 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2880                         bool important, struct iv_use *use)
2881 {
2882   basic_block use_bb = gimple_bb (use->stmt);
2883   machine_mode mem_mode;
2884   unsigned HOST_WIDE_INT cstepi;
2885
2886   /* If we insert the increment in any position other than the standard
2887      ones, we must ensure that it is incremented once per iteration.
2888      It must not be in an inner nested loop, or one side of an if
2889      statement.  */
2890   if (use_bb->loop_father != data->current_loop
2891       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2892       || stmt_could_throw_p (use->stmt)
2893       || !cst_and_fits_in_hwi (step))
2894     return;
2895
2896   cstepi = int_cst_value (step);
2897
2898   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2899   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2900         || USE_STORE_PRE_INCREMENT (mem_mode))
2901        && GET_MODE_SIZE (mem_mode) == cstepi)
2902       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2903            || USE_STORE_PRE_DECREMENT (mem_mode))
2904           && GET_MODE_SIZE (mem_mode) == -cstepi))
2905     {
2906       enum tree_code code = MINUS_EXPR;
2907       tree new_base;
2908       tree new_step = step;
2909
2910       if (POINTER_TYPE_P (TREE_TYPE (base)))
2911         {
2912           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2913           code = POINTER_PLUS_EXPR;
2914         }
2915       else
2916         new_step = fold_convert (TREE_TYPE (base), new_step);
2917       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2918       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2919                        use->stmt);
2920     }
2921   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2922         || USE_STORE_POST_INCREMENT (mem_mode))
2923        && GET_MODE_SIZE (mem_mode) == cstepi)
2924       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2925            || USE_STORE_POST_DECREMENT (mem_mode))
2926           && GET_MODE_SIZE (mem_mode) == -cstepi))
2927     {
2928       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2929                        use->stmt);
2930     }
2931 }
2932
2933 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2934    position to POS.  If USE is not NULL, the candidate is set as related to
2935    it.  The candidate computation is scheduled before exit condition and at
2936    the end of loop.  */
2937
2938 static void
2939 add_candidate (struct ivopts_data *data,
2940                tree base, tree step, bool important, struct iv_use *use,
2941                struct iv *orig_iv = NULL)
2942 {
2943   if (ip_normal_pos (data->current_loop))
2944     add_candidate_1 (data, base, step, important,
2945                      IP_NORMAL, use, NULL, orig_iv);
2946   if (ip_end_pos (data->current_loop)
2947       && allow_ip_end_pos_p (data->current_loop))
2948     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
2949 }
2950
2951 /* Adds standard iv candidates.  */
2952
2953 static void
2954 add_standard_iv_candidates (struct ivopts_data *data)
2955 {
2956   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2957
2958   /* The same for a double-integer type if it is still fast enough.  */
2959   if (TYPE_PRECISION
2960         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2961       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2962     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2963                    build_int_cst (long_integer_type_node, 1), true, NULL);
2964
2965   /* The same for a double-integer type if it is still fast enough.  */
2966   if (TYPE_PRECISION
2967         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2968       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2969     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2970                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2971 }
2972
2973
2974 /* Adds candidates bases on the old induction variable IV.  */
2975
2976 static void
2977 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2978 {
2979   gimple *phi;
2980   tree def;
2981   struct iv_cand *cand;
2982
2983   /* Check if this biv is used in address type use.  */
2984   if (iv->no_overflow  && iv->have_address_use
2985       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
2986       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
2987     {
2988       tree base = fold_convert (sizetype, iv->base);
2989       tree step = fold_convert (sizetype, iv->step);
2990
2991       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
2992       add_candidate (data, base, step, true, NULL, iv);
2993       /* Add iv cand of the original type only if it has nonlinear use.  */
2994       if (iv->nonlin_use)
2995         add_candidate (data, iv->base, iv->step, true, NULL);
2996     }
2997   else
2998     add_candidate (data, iv->base, iv->step, true, NULL);
2999
3000   /* The same, but with initial value zero.  */
3001   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3002     add_candidate (data, size_int (0), iv->step, true, NULL);
3003   else
3004     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3005                    iv->step, true, NULL);
3006
3007   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3008   if (gimple_code (phi) == GIMPLE_PHI)
3009     {
3010       /* Additionally record the possibility of leaving the original iv
3011          untouched.  */
3012       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3013       /* Don't add candidate if it's from another PHI node because
3014          it's an affine iv appearing in the form of PEELED_CHREC.  */
3015       phi = SSA_NAME_DEF_STMT (def);
3016       if (gimple_code (phi) != GIMPLE_PHI)
3017         {
3018           cand = add_candidate_1 (data,
3019                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3020                                   SSA_NAME_DEF_STMT (def));
3021           if (cand)
3022             {
3023               cand->var_before = iv->ssa_name;
3024               cand->var_after = def;
3025             }
3026         }
3027       else
3028         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3029     }
3030 }
3031
3032 /* Adds candidates based on the old induction variables.  */
3033
3034 static void
3035 add_iv_candidate_for_bivs (struct ivopts_data *data)
3036 {
3037   unsigned i;
3038   struct iv *iv;
3039   bitmap_iterator bi;
3040
3041   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3042     {
3043       iv = ver_info (data, i)->iv;
3044       if (iv && iv->biv_p && !integer_zerop (iv->step))
3045         add_iv_candidate_for_biv (data, iv);
3046     }
3047 }
3048
3049 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3050
3051 static void
3052 record_common_cand (struct ivopts_data *data, tree base,
3053                     tree step, struct iv_use *use)
3054 {
3055   struct iv_common_cand ent;
3056   struct iv_common_cand **slot;
3057
3058   ent.base = base;
3059   ent.step = step;
3060   ent.hash = iterative_hash_expr (base, 0);
3061   ent.hash = iterative_hash_expr (step, ent.hash);
3062
3063   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3064   if (*slot == NULL)
3065     {
3066       *slot = new iv_common_cand ();
3067       (*slot)->base = base;
3068       (*slot)->step = step;
3069       (*slot)->uses.create (8);
3070       (*slot)->hash = ent.hash;
3071       data->iv_common_cands.safe_push ((*slot));
3072     }
3073
3074   gcc_assert (use != NULL);
3075   (*slot)->uses.safe_push (use);
3076   return;
3077 }
3078
3079 /* Comparison function used to sort common candidates.  */
3080
3081 static int
3082 common_cand_cmp (const void *p1, const void *p2)
3083 {
3084   unsigned n1, n2;
3085   const struct iv_common_cand *const *const ccand1
3086     = (const struct iv_common_cand *const *)p1;
3087   const struct iv_common_cand *const *const ccand2
3088     = (const struct iv_common_cand *const *)p2;
3089
3090   n1 = (*ccand1)->uses.length ();
3091   n2 = (*ccand2)->uses.length ();
3092   return n2 - n1;
3093 }
3094
3095 /* Adds IV candidates based on common candidated recorded.  */
3096
3097 static void
3098 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3099 {
3100   unsigned i, j;
3101   struct iv_cand *cand_1, *cand_2;
3102
3103   data->iv_common_cands.qsort (common_cand_cmp);
3104   for (i = 0; i < data->iv_common_cands.length (); i++)
3105     {
3106       struct iv_common_cand *ptr = data->iv_common_cands[i];
3107
3108       /* Only add IV candidate if it's derived from multiple uses.  */
3109       if (ptr->uses.length () <= 1)
3110         break;
3111
3112       cand_1 = NULL;
3113       cand_2 = NULL;
3114       if (ip_normal_pos (data->current_loop))
3115         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3116                                   false, IP_NORMAL, NULL, NULL);
3117
3118       if (ip_end_pos (data->current_loop)
3119           && allow_ip_end_pos_p (data->current_loop))
3120         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3121                                   false, IP_END, NULL, NULL);
3122
3123       /* Bind deriving uses and the new candidates.  */
3124       for (j = 0; j < ptr->uses.length (); j++)
3125         {
3126           struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3127           if (cand_1)
3128             bitmap_set_bit (group->related_cands, cand_1->id);
3129           if (cand_2)
3130             bitmap_set_bit (group->related_cands, cand_2->id);
3131         }
3132     }
3133
3134   /* Release data since it is useless from this point.  */
3135   data->iv_common_cand_tab->empty ();
3136   data->iv_common_cands.truncate (0);
3137 }
3138
3139 /* Adds candidates based on the value of USE's iv.  */
3140
3141 static void
3142 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3143 {
3144   unsigned HOST_WIDE_INT offset;
3145   tree base;
3146   tree basetype;
3147   struct iv *iv = use->iv;
3148
3149   add_candidate (data, iv->base, iv->step, false, use);
3150
3151   /* Record common candidate for use in case it can be shared by others.  */
3152   record_common_cand (data, iv->base, iv->step, use);
3153
3154   /* Record common candidate with initial value zero.  */
3155   basetype = TREE_TYPE (iv->base);
3156   if (POINTER_TYPE_P (basetype))
3157     basetype = sizetype;
3158   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3159
3160   /* Record common candidate with constant offset stripped in base.
3161      Like the use itself, we also add candidate directly for it.  */
3162   base = strip_offset (iv->base, &offset);
3163   if (offset || base != iv->base)
3164     {
3165       record_common_cand (data, base, iv->step, use);
3166       add_candidate (data, base, iv->step, false, use);
3167     }
3168
3169   /* Record common candidate with base_object removed in base.  */
3170   if (iv->base_object != NULL)
3171     {
3172       unsigned i;
3173       aff_tree aff_base;
3174       tree step, base_object = iv->base_object;
3175
3176       base = iv->base;
3177       step = iv->step;
3178       STRIP_NOPS (base);
3179       STRIP_NOPS (step);
3180       STRIP_NOPS (base_object);
3181       tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3182       for (i = 0; i < aff_base.n; i++)
3183         {
3184           if (aff_base.elts[i].coef != 1)
3185             continue;
3186
3187           if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3188             break;
3189         }
3190       if (i < aff_base.n)
3191         {
3192           aff_combination_remove_elt (&aff_base, i);
3193           base = aff_combination_to_tree (&aff_base);
3194           basetype = TREE_TYPE (base);
3195           if (POINTER_TYPE_P (basetype))
3196             basetype = sizetype;
3197
3198           step = fold_convert (basetype, step);
3199           record_common_cand (data, base, step, use);
3200           /* Also record common candidate with offset stripped.  */
3201           base = strip_offset (base, &offset);
3202           if (offset)
3203             record_common_cand (data, base, step, use);
3204         }
3205     }
3206
3207   /* At last, add auto-incremental candidates.  Make such variables
3208      important since other iv uses with same base object may be based
3209      on it.  */
3210   if (use != NULL && use->type == USE_ADDRESS)
3211     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3212 }
3213
3214 /* Adds candidates based on the uses.  */
3215
3216 static void
3217 add_iv_candidate_for_groups (struct ivopts_data *data)
3218 {
3219   unsigned i;
3220
3221   /* Only add candidate for the first use in group.  */
3222   for (i = 0; i < data->vgroups.length (); i++)
3223     {
3224       struct iv_group *group = data->vgroups[i];
3225
3226       gcc_assert (group->vuses[0] != NULL);
3227       add_iv_candidate_for_use (data, group->vuses[0]);
3228     }
3229   add_iv_candidate_derived_from_uses (data);
3230 }
3231
3232 /* Record important candidates and add them to related_cands bitmaps.  */
3233
3234 static void
3235 record_important_candidates (struct ivopts_data *data)
3236 {
3237   unsigned i;
3238   struct iv_group *group;
3239
3240   for (i = 0; i < data->vcands.length (); i++)
3241     {
3242       struct iv_cand *cand = data->vcands[i];
3243
3244       if (cand->important)
3245         bitmap_set_bit (data->important_candidates, i);
3246     }
3247
3248   data->consider_all_candidates = (data->vcands.length ()
3249                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3250
3251   /* Add important candidates to groups' related_cands bitmaps.  */
3252   for (i = 0; i < data->vgroups.length (); i++)
3253     {
3254       group = data->vgroups[i];
3255       bitmap_ior_into (group->related_cands, data->important_candidates);
3256     }
3257 }
3258
3259 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3260    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3261    we allocate a simple list to every use.  */
3262
3263 static void
3264 alloc_use_cost_map (struct ivopts_data *data)
3265 {
3266   unsigned i, size, s;
3267
3268   for (i = 0; i < data->vgroups.length (); i++)
3269     {
3270       struct iv_group *group = data->vgroups[i];
3271
3272       if (data->consider_all_candidates)
3273         size = data->vcands.length ();
3274       else
3275         {
3276           s = bitmap_count_bits (group->related_cands);
3277
3278           /* Round up to the power of two, so that moduling by it is fast.  */
3279           size = s ? (1 << ceil_log2 (s)) : 1;
3280         }
3281
3282       group->n_map_members = size;
3283       group->cost_map = XCNEWVEC (struct cost_pair, size);
3284     }
3285 }
3286
3287 /* Returns description of computation cost of expression whose runtime
3288    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3289
3290 static comp_cost
3291 new_cost (unsigned runtime, unsigned complexity)
3292 {
3293   comp_cost cost;
3294
3295   cost.cost = runtime;
3296   cost.complexity = complexity;
3297
3298   return cost;
3299 }
3300
3301 /* Returns true if COST is infinite.  */
3302
3303 static bool
3304 infinite_cost_p (comp_cost cost)
3305 {
3306   return cost.cost == INFTY;
3307 }
3308
3309 /* Adds costs COST1 and COST2.  */
3310
3311 static comp_cost
3312 add_costs (comp_cost cost1, comp_cost cost2)
3313 {
3314   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3315     return infinite_cost;
3316
3317   cost1.cost += cost2.cost;
3318   cost1.complexity += cost2.complexity;
3319
3320   return cost1;
3321 }
3322 /* Subtracts costs COST1 and COST2.  */
3323
3324 static comp_cost
3325 sub_costs (comp_cost cost1, comp_cost cost2)
3326 {
3327   cost1.cost -= cost2.cost;
3328   cost1.complexity -= cost2.complexity;
3329
3330   return cost1;
3331 }
3332
3333 /* Returns a negative number if COST1 < COST2, a positive number if
3334    COST1 > COST2, and 0 if COST1 = COST2.  */
3335
3336 static int
3337 compare_costs (comp_cost cost1, comp_cost cost2)
3338 {
3339   if (cost1.cost == cost2.cost)
3340     return cost1.complexity - cost2.complexity;
3341
3342   return cost1.cost - cost2.cost;
3343 }
3344
3345 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3346    on invariants DEPENDS_ON and that the value used in expressing it
3347    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3348
3349 static void
3350 set_group_iv_cost (struct ivopts_data *data,
3351                    struct iv_group *group, struct iv_cand *cand,
3352                    comp_cost cost, bitmap depends_on, tree value,
3353                    enum tree_code comp, iv_inv_expr_ent *inv_expr)
3354 {
3355   unsigned i, s;
3356
3357   if (infinite_cost_p (cost))
3358     {
3359       BITMAP_FREE (depends_on);
3360       return;
3361     }
3362
3363   if (data->consider_all_candidates)
3364     {
3365       group->cost_map[cand->id].cand = cand;
3366       group->cost_map[cand->id].cost = cost;
3367       group->cost_map[cand->id].depends_on = depends_on;
3368       group->cost_map[cand->id].value = value;
3369       group->cost_map[cand->id].comp = comp;
3370       group->cost_map[cand->id].inv_expr = inv_expr;
3371       return;
3372     }
3373
3374   /* n_map_members is a power of two, so this computes modulo.  */
3375   s = cand->id & (group->n_map_members - 1);
3376   for (i = s; i < group->n_map_members; i++)
3377     if (!group->cost_map[i].cand)
3378       goto found;
3379   for (i = 0; i < s; i++)
3380     if (!group->cost_map[i].cand)
3381       goto found;
3382
3383   gcc_unreachable ();
3384
3385 found:
3386   group->cost_map[i].cand = cand;
3387   group->cost_map[i].cost = cost;
3388   group->cost_map[i].depends_on = depends_on;
3389   group->cost_map[i].value = value;
3390   group->cost_map[i].comp = comp;
3391   group->cost_map[i].inv_expr = inv_expr;
3392 }
3393
3394 /* Gets cost of (GROUP, CAND) pair.  */
3395
3396 static struct cost_pair *
3397 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3398                    struct iv_cand *cand)
3399 {
3400   unsigned i, s;
3401   struct cost_pair *ret;
3402
3403   if (!cand)
3404     return NULL;
3405
3406   if (data->consider_all_candidates)
3407     {
3408       ret = group->cost_map + cand->id;
3409       if (!ret->cand)
3410         return NULL;
3411
3412       return ret;
3413     }
3414
3415   /* n_map_members is a power of two, so this computes modulo.  */
3416   s = cand->id & (group->n_map_members - 1);
3417   for (i = s; i < group->n_map_members; i++)
3418     if (group->cost_map[i].cand == cand)
3419       return group->cost_map + i;
3420     else if (group->cost_map[i].cand == NULL)
3421       return NULL;
3422   for (i = 0; i < s; i++)
3423     if (group->cost_map[i].cand == cand)
3424       return group->cost_map + i;
3425     else if (group->cost_map[i].cand == NULL)
3426       return NULL;
3427
3428   return NULL;
3429 }
3430
3431 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3432 static rtx
3433 produce_memory_decl_rtl (tree obj, int *regno)
3434 {
3435   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3436   machine_mode address_mode = targetm.addr_space.address_mode (as);
3437   rtx x;
3438
3439   gcc_assert (obj);
3440   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3441     {
3442       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3443       x = gen_rtx_SYMBOL_REF (address_mode, name);
3444       SET_SYMBOL_REF_DECL (x, obj);
3445       x = gen_rtx_MEM (DECL_MODE (obj), x);
3446       set_mem_addr_space (x, as);
3447       targetm.encode_section_info (obj, x, true);
3448     }
3449   else
3450     {
3451       x = gen_raw_REG (address_mode, (*regno)++);
3452       x = gen_rtx_MEM (DECL_MODE (obj), x);
3453       set_mem_addr_space (x, as);
3454     }
3455
3456   return x;
3457 }
3458
3459 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3460    walk_tree.  DATA contains the actual fake register number.  */
3461
3462 static tree
3463 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3464 {
3465   tree obj = NULL_TREE;
3466   rtx x = NULL_RTX;
3467   int *regno = (int *) data;
3468
3469   switch (TREE_CODE (*expr_p))
3470     {
3471     case ADDR_EXPR:
3472       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3473            handled_component_p (*expr_p);
3474            expr_p = &TREE_OPERAND (*expr_p, 0))
3475         continue;
3476       obj = *expr_p;
3477       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3478         x = produce_memory_decl_rtl (obj, regno);
3479       break;
3480
3481     case SSA_NAME:
3482       *ws = 0;
3483       obj = SSA_NAME_VAR (*expr_p);
3484       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3485       if (!obj)
3486         return NULL_TREE;
3487       if (!DECL_RTL_SET_P (obj))
3488         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3489       break;
3490
3491     case VAR_DECL:
3492     case PARM_DECL:
3493     case RESULT_DECL:
3494       *ws = 0;
3495       obj = *expr_p;
3496
3497       if (DECL_RTL_SET_P (obj))
3498         break;
3499
3500       if (DECL_MODE (obj) == BLKmode)
3501         x = produce_memory_decl_rtl (obj, regno);
3502       else
3503         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3504
3505       break;
3506
3507     default:
3508       break;
3509     }
3510
3511   if (x)
3512     {
3513       decl_rtl_to_reset.safe_push (obj);
3514       SET_DECL_RTL (obj, x);
3515     }
3516
3517   return NULL_TREE;
3518 }
3519
3520 /* Determines cost of the computation of EXPR.  */
3521
3522 static unsigned
3523 computation_cost (tree expr, bool speed)
3524 {
3525   rtx_insn *seq;
3526   rtx rslt;
3527   tree type = TREE_TYPE (expr);
3528   unsigned cost;
3529   /* Avoid using hard regs in ways which may be unsupported.  */
3530   int regno = LAST_VIRTUAL_REGISTER + 1;
3531   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3532   enum node_frequency real_frequency = node->frequency;
3533
3534   node->frequency = NODE_FREQUENCY_NORMAL;
3535   crtl->maybe_hot_insn_p = speed;
3536   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3537   start_sequence ();
3538   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3539   seq = get_insns ();
3540   end_sequence ();
3541   default_rtl_profile ();
3542   node->frequency = real_frequency;
3543
3544   cost = seq_cost (seq, speed);
3545   if (MEM_P (rslt))
3546     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3547                           TYPE_ADDR_SPACE (type), speed);
3548   else if (!REG_P (rslt))
3549     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3550
3551   return cost;
3552 }
3553
3554 /* Returns variable containing the value of candidate CAND at statement AT.  */
3555
3556 static tree
3557 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3558 {
3559   if (stmt_after_increment (loop, cand, stmt))
3560     return cand->var_after;
3561   else
3562     return cand->var_before;
3563 }
3564
3565 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3566    same precision that is at least as wide as the precision of TYPE, stores
3567    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3568    type of A and B.  */
3569
3570 static tree
3571 determine_common_wider_type (tree *a, tree *b)
3572 {
3573   tree wider_type = NULL;
3574   tree suba, subb;
3575   tree atype = TREE_TYPE (*a);
3576
3577   if (CONVERT_EXPR_P (*a))
3578     {
3579       suba = TREE_OPERAND (*a, 0);
3580       wider_type = TREE_TYPE (suba);
3581       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3582         return atype;
3583     }
3584   else
3585     return atype;
3586
3587   if (CONVERT_EXPR_P (*b))
3588     {
3589       subb = TREE_OPERAND (*b, 0);
3590       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3591         return atype;
3592     }
3593   else
3594     return atype;
3595
3596   *a = suba;
3597   *b = subb;
3598   return wider_type;
3599 }
3600
3601 /* Determines the expression by that USE is expressed from induction variable
3602    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3603    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3604
3605 static bool
3606 get_computation_aff (struct loop *loop,
3607                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3608                      struct aff_tree *aff)
3609 {
3610   tree ubase = use->iv->base;
3611   tree ustep = use->iv->step;
3612   tree cbase = cand->iv->base;
3613   tree cstep = cand->iv->step, cstep_common;
3614   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3615   tree common_type, var;
3616   tree uutype;
3617   aff_tree cbase_aff, var_aff;
3618   widest_int rat;
3619
3620   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3621     {
3622       /* We do not have a precision to express the values of use.  */
3623       return false;
3624     }
3625
3626   var = var_at_stmt (loop, cand, at);
3627   uutype = unsigned_type_for (utype);
3628
3629   /* If the conversion is not noop, perform it.  */
3630   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3631     {
3632       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3633           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3634         {
3635           tree inner_base, inner_step, inner_type;
3636           inner_base = TREE_OPERAND (cbase, 0);
3637           if (CONVERT_EXPR_P (cstep))
3638             inner_step = TREE_OPERAND (cstep, 0);
3639           else
3640             inner_step = cstep;
3641
3642           inner_type = TREE_TYPE (inner_base);
3643           /* If candidate is added from a biv whose type is smaller than
3644              ctype, we know both candidate and the biv won't overflow.
3645              In this case, it's safe to skip the convertion in candidate.
3646              As an example, (unsigned short)((unsigned long)A) equals to
3647              (unsigned short)A, if A has a type no larger than short.  */
3648           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3649             {
3650               cbase = inner_base;
3651               cstep = inner_step;
3652             }
3653         }
3654       cstep = fold_convert (uutype, cstep);
3655       cbase = fold_convert (uutype, cbase);
3656       var = fold_convert (uutype, var);
3657     }
3658
3659   /* Ratio is 1 when computing the value of biv cand by itself.
3660      We can't rely on constant_multiple_of in this case because the
3661      use is created after the original biv is selected.  The call
3662      could fail because of inconsistent fold behavior.  See PR68021
3663      for more information.  */
3664   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3665     {
3666       gcc_assert (is_gimple_assign (use->stmt));
3667       gcc_assert (use->iv->ssa_name == cand->var_after);
3668       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3669       rat = 1;
3670     }
3671   else if (!constant_multiple_of (ustep, cstep, &rat))
3672     return false;
3673
3674   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3675      type, we achieve better folding by computing their difference in this
3676      wider type, and cast the result to UUTYPE.  We do not need to worry about
3677      overflows, as all the arithmetics will in the end be performed in UUTYPE
3678      anyway.  */
3679   common_type = determine_common_wider_type (&ubase, &cbase);
3680
3681   /* use = ubase - ratio * cbase + ratio * var.  */
3682   tree_to_aff_combination (ubase, common_type, aff);
3683   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3684   tree_to_aff_combination (var, uutype, &var_aff);
3685
3686   /* We need to shift the value if we are after the increment.  */
3687   if (stmt_after_increment (loop, cand, at))
3688     {
3689       aff_tree cstep_aff;
3690
3691       if (common_type != uutype)
3692         cstep_common = fold_convert (common_type, cstep);
3693       else
3694         cstep_common = cstep;
3695
3696       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3697       aff_combination_add (&cbase_aff, &cstep_aff);
3698     }
3699
3700   aff_combination_scale (&cbase_aff, -rat);
3701   aff_combination_add (aff, &cbase_aff);
3702   if (common_type != uutype)
3703     aff_combination_convert (aff, uutype);
3704
3705   aff_combination_scale (&var_aff, rat);
3706   aff_combination_add (aff, &var_aff);
3707
3708   return true;
3709 }
3710
3711 /* Return the type of USE.  */
3712
3713 static tree
3714 get_use_type (struct iv_use *use)
3715 {
3716   tree base_type = TREE_TYPE (use->iv->base);
3717   tree type;
3718
3719   if (use->type == USE_ADDRESS)
3720     {
3721       /* The base_type may be a void pointer.  Create a pointer type based on
3722          the mem_ref instead.  */
3723       type = build_pointer_type (TREE_TYPE (*use->op_p));
3724       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3725                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3726     }
3727   else
3728     type = base_type;
3729
3730   return type;
3731 }
3732
3733 /* Determines the expression by that USE is expressed from induction variable
3734    CAND at statement AT in LOOP.  The computation is unshared.  */
3735
3736 static tree
3737 get_computation_at (struct loop *loop,
3738                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3739 {
3740   aff_tree aff;
3741   tree type = get_use_type (use);
3742
3743   if (!get_computation_aff (loop, use, cand, at, &aff))
3744     return NULL_TREE;
3745   unshare_aff_combination (&aff);
3746   return fold_convert (type, aff_combination_to_tree (&aff));
3747 }
3748
3749 /* Determines the expression by that USE is expressed from induction variable
3750    CAND in LOOP.  The computation is unshared.  */
3751
3752 static tree
3753 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3754 {
3755   return get_computation_at (loop, use, cand, use->stmt);
3756 }
3757
3758 /* Adjust the cost COST for being in loop setup rather than loop body.
3759    If we're optimizing for space, the loop setup overhead is constant;
3760    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3761 static unsigned
3762 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3763 {
3764   if (cost == INFTY)
3765     return cost;
3766   else if (optimize_loop_for_speed_p (data->current_loop))
3767     return cost / avg_loop_niter (data->current_loop);
3768   else
3769     return cost;
3770 }
3771
3772 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3773    validity for a memory reference accessing memory of mode MODE in
3774    address space AS.  */
3775
3776
3777 bool
3778 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3779                                  addr_space_t as)
3780 {
3781 #define MAX_RATIO 128
3782   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3783   static vec<sbitmap> valid_mult_list;
3784   sbitmap valid_mult;
3785
3786   if (data_index >= valid_mult_list.length ())
3787     valid_mult_list.safe_grow_cleared (data_index + 1);
3788
3789   valid_mult = valid_mult_list[data_index];
3790   if (!valid_mult)
3791     {
3792       machine_mode address_mode = targetm.addr_space.address_mode (as);
3793       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3794       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3795       rtx addr, scaled;
3796       HOST_WIDE_INT i;
3797
3798       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3799       bitmap_clear (valid_mult);
3800       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3801       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3802       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3803         {
3804           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3805           if (memory_address_addr_space_p (mode, addr, as)
3806               || memory_address_addr_space_p (mode, scaled, as))
3807             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3808         }
3809
3810       if (dump_file && (dump_flags & TDF_DETAILS))
3811         {
3812           fprintf (dump_file, "  allowed multipliers:");
3813           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3814             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3815               fprintf (dump_file, " %d", (int) i);
3816           fprintf (dump_file, "\n");
3817           fprintf (dump_file, "\n");
3818         }
3819
3820       valid_mult_list[data_index] = valid_mult;
3821     }
3822
3823   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3824     return false;
3825
3826   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3827 }
3828
3829 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3830    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3831    variable is omitted.  Compute the cost for a memory reference that accesses
3832    a memory location of mode MEM_MODE in address space AS.
3833
3834    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3835    size of MEM_MODE / RATIO) is available.  To make this determination, we
3836    look at the size of the increment to be made, which is given in CSTEP.
3837    CSTEP may be zero if the step is unknown.
3838    STMT_AFTER_INC is true iff the statement we're looking at is after the
3839    increment of the original biv.
3840
3841    TODO -- there must be some better way.  This all is quite crude.  */
3842
3843 enum ainc_type
3844 {
3845   AINC_PRE_INC,         /* Pre increment.  */
3846   AINC_PRE_DEC,         /* Pre decrement.  */
3847   AINC_POST_INC,        /* Post increment.  */
3848   AINC_POST_DEC,        /* Post decrement.  */
3849   AINC_NONE             /* Also the number of auto increment types.  */
3850 };
3851
3852 struct address_cost_data
3853 {
3854   HOST_WIDE_INT min_offset, max_offset;
3855   unsigned costs[2][2][2][2];
3856   unsigned ainc_costs[AINC_NONE];
3857 };
3858
3859
3860 static comp_cost
3861 get_address_cost (bool symbol_present, bool var_present,
3862                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3863                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3864                   addr_space_t as, bool speed,
3865                   bool stmt_after_inc, bool *may_autoinc)
3866 {
3867   machine_mode address_mode = targetm.addr_space.address_mode (as);
3868   static vec<address_cost_data *> address_cost_data_list;
3869   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3870   address_cost_data *data;
3871   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3872   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3873   unsigned cost, acost, complexity;
3874   enum ainc_type autoinc_type;
3875   bool offset_p, ratio_p, autoinc;
3876   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3877   unsigned HOST_WIDE_INT mask;
3878   unsigned bits;
3879
3880   if (data_index >= address_cost_data_list.length ())
3881     address_cost_data_list.safe_grow_cleared (data_index + 1);
3882
3883   data = address_cost_data_list[data_index];
3884   if (!data)
3885     {
3886       HOST_WIDE_INT i;
3887       HOST_WIDE_INT rat, off = 0;
3888       int old_cse_not_expected, width;
3889       unsigned sym_p, var_p, off_p, rat_p, add_c;
3890       rtx_insn *seq;
3891       rtx addr, base;
3892       rtx reg0, reg1;
3893
3894       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3895
3896       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3897
3898       width = GET_MODE_BITSIZE (address_mode) - 1;
3899       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3900         width = HOST_BITS_PER_WIDE_INT - 1;
3901       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3902
3903       for (i = width; i >= 0; i--)
3904         {
3905           off = -((unsigned HOST_WIDE_INT) 1 << i);
3906           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3907           if (memory_address_addr_space_p (mem_mode, addr, as))
3908             break;
3909         }
3910       data->min_offset = (i == -1? 0 : off);
3911
3912       for (i = width; i >= 0; i--)
3913         {
3914           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3915           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3916           if (memory_address_addr_space_p (mem_mode, addr, as))
3917             break;
3918           /* For some strict-alignment targets, the offset must be naturally
3919              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3920           off = mem_mode != QImode
3921                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3922                     - GET_MODE_SIZE (mem_mode)
3923                 : 0;
3924           if (off > 0)
3925             {
3926               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3927               if (memory_address_addr_space_p (mem_mode, addr, as))
3928                 break;
3929             }
3930         }
3931       if (i == -1)
3932         off = 0;
3933       data->max_offset = off;
3934
3935       if (dump_file && (dump_flags & TDF_DETAILS))
3936         {
3937           fprintf (dump_file, "get_address_cost:\n");
3938           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3939                    GET_MODE_NAME (mem_mode),
3940                    data->min_offset);
3941           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3942                    GET_MODE_NAME (mem_mode),
3943                    data->max_offset);
3944         }
3945
3946       rat = 1;
3947       for (i = 2; i <= MAX_RATIO; i++)
3948         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3949           {
3950             rat = i;
3951             break;
3952           }
3953
3954       /* Compute the cost of various addressing modes.  */
3955       acost = 0;
3956       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3957       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3958
3959       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3960           || USE_STORE_PRE_DECREMENT (mem_mode))
3961         {
3962           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3963           has_predec[mem_mode]
3964             = memory_address_addr_space_p (mem_mode, addr, as);
3965
3966           if (has_predec[mem_mode])
3967             data->ainc_costs[AINC_PRE_DEC]
3968               = address_cost (addr, mem_mode, as, speed);
3969         }
3970       if (USE_LOAD_POST_DECREMENT (mem_mode)
3971           || USE_STORE_POST_DECREMENT (mem_mode))
3972         {
3973           addr = gen_rtx_POST_DEC (address_mode, reg0);
3974           has_postdec[mem_mode]
3975             = memory_address_addr_space_p (mem_mode, addr, as);
3976
3977           if (has_postdec[mem_mode])
3978             data->ainc_costs[AINC_POST_DEC]
3979               = address_cost (addr, mem_mode, as, speed);
3980         }
3981       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3982           || USE_STORE_PRE_DECREMENT (mem_mode))
3983         {
3984           addr = gen_rtx_PRE_INC (address_mode, reg0);
3985           has_preinc[mem_mode]
3986             = memory_address_addr_space_p (mem_mode, addr, as);
3987
3988           if (has_preinc[mem_mode])
3989             data->ainc_costs[AINC_PRE_INC]
3990               = address_cost (addr, mem_mode, as, speed);
3991         }
3992       if (USE_LOAD_POST_INCREMENT (mem_mode)
3993           || USE_STORE_POST_INCREMENT (mem_mode))
3994         {
3995           addr = gen_rtx_POST_INC (address_mode, reg0);
3996           has_postinc[mem_mode]
3997             = memory_address_addr_space_p (mem_mode, addr, as);
3998
3999           if (has_postinc[mem_mode])
4000             data->ainc_costs[AINC_POST_INC]
4001               = address_cost (addr, mem_mode, as, speed);
4002         }
4003       for (i = 0; i < 16; i++)
4004         {
4005           sym_p = i & 1;
4006           var_p = (i >> 1) & 1;
4007           off_p = (i >> 2) & 1;
4008           rat_p = (i >> 3) & 1;
4009
4010           addr = reg0;
4011           if (rat_p)
4012             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4013                                    gen_int_mode (rat, address_mode));
4014
4015           if (var_p)
4016             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4017
4018           if (sym_p)
4019             {
4020               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4021               /* ??? We can run into trouble with some backends by presenting
4022                  it with symbols which haven't been properly passed through
4023                  targetm.encode_section_info.  By setting the local bit, we
4024                  enhance the probability of things working.  */
4025               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4026
4027               if (off_p)
4028                 base = gen_rtx_fmt_e (CONST, address_mode,
4029                                       gen_rtx_fmt_ee
4030                                         (PLUS, address_mode, base,
4031                                          gen_int_mode (off, address_mode)));
4032             }
4033           else if (off_p)
4034             base = gen_int_mode (off, address_mode);
4035           else
4036             base = NULL_RTX;
4037
4038           if (base)
4039             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4040
4041           start_sequence ();
4042           /* To avoid splitting addressing modes, pretend that no cse will
4043              follow.  */
4044           old_cse_not_expected = cse_not_expected;
4045           cse_not_expected = true;
4046           addr = memory_address_addr_space (mem_mode, addr, as);
4047           cse_not_expected = old_cse_not_expected;
4048           seq = get_insns ();
4049           end_sequence ();
4050
4051           acost = seq_cost (seq, speed);
4052           acost += address_cost (addr, mem_mode, as, speed);
4053
4054           if (!acost)
4055             acost = 1;
4056           data->costs[sym_p][var_p][off_p][rat_p] = acost;
4057         }
4058
4059       /* On some targets, it is quite expensive to load symbol to a register,
4060          which makes addresses that contain symbols look much more expensive.
4061          However, the symbol will have to be loaded in any case before the
4062          loop (and quite likely we have it in register already), so it does not
4063          make much sense to penalize them too heavily.  So make some final
4064          tweaks for the SYMBOL_PRESENT modes:
4065
4066          If VAR_PRESENT is false, and the mode obtained by changing symbol to
4067          var is cheaper, use this mode with small penalty.
4068          If VAR_PRESENT is true, try whether the mode with
4069          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4070          if this is the case, use it.  */
4071       add_c = add_cost (speed, address_mode);
4072       for (i = 0; i < 8; i++)
4073         {
4074           var_p = i & 1;
4075           off_p = (i >> 1) & 1;
4076           rat_p = (i >> 2) & 1;
4077
4078           acost = data->costs[0][1][off_p][rat_p] + 1;
4079           if (var_p)
4080             acost += add_c;
4081
4082           if (acost < data->costs[1][var_p][off_p][rat_p])
4083             data->costs[1][var_p][off_p][rat_p] = acost;
4084         }
4085
4086       if (dump_file && (dump_flags & TDF_DETAILS))
4087         {
4088           fprintf (dump_file, "<Address Costs>:\n");
4089
4090           for (i = 0; i < 16; i++)
4091             {
4092               sym_p = i & 1;
4093               var_p = (i >> 1) & 1;
4094               off_p = (i >> 2) & 1;
4095               rat_p = (i >> 3) & 1;
4096
4097               fprintf (dump_file, "  ");
4098               if (sym_p)
4099                 fprintf (dump_file, "sym + ");
4100               if (var_p)
4101                 fprintf (dump_file, "var + ");
4102               if (off_p)
4103                 fprintf (dump_file, "cst + ");
4104               if (rat_p)
4105                 fprintf (dump_file, "rat * ");
4106
4107               acost = data->costs[sym_p][var_p][off_p][rat_p];
4108               fprintf (dump_file, "index costs %d\n", acost);
4109             }
4110           if (has_predec[mem_mode] || has_postdec[mem_mode]
4111               || has_preinc[mem_mode] || has_postinc[mem_mode])
4112             fprintf (dump_file, "  May include autoinc/dec\n");
4113           fprintf (dump_file, "\n");
4114         }
4115
4116       address_cost_data_list[data_index] = data;
4117     }
4118
4119   bits = GET_MODE_BITSIZE (address_mode);
4120   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4121   offset &= mask;
4122   if ((offset >> (bits - 1) & 1))
4123     offset |= ~mask;
4124   s_offset = offset;
4125
4126   autoinc = false;
4127   autoinc_type = AINC_NONE;
4128   msize = GET_MODE_SIZE (mem_mode);
4129   autoinc_offset = offset;
4130   if (stmt_after_inc)
4131     autoinc_offset += ratio * cstep;
4132   if (symbol_present || var_present || ratio != 1)
4133     autoinc = false;
4134   else
4135     {
4136       if (has_postinc[mem_mode] && autoinc_offset == 0
4137           && msize == cstep)
4138         autoinc_type = AINC_POST_INC;
4139       else if (has_postdec[mem_mode] && autoinc_offset == 0
4140                && msize == -cstep)
4141         autoinc_type = AINC_POST_DEC;
4142       else if (has_preinc[mem_mode] && autoinc_offset == msize
4143                && msize == cstep)
4144         autoinc_type = AINC_PRE_INC;
4145       else if (has_predec[mem_mode] && autoinc_offset == -msize
4146                && msize == -cstep)
4147         autoinc_type = AINC_PRE_DEC;
4148
4149       if (autoinc_type != AINC_NONE)
4150         autoinc = true;
4151     }
4152
4153   cost = 0;
4154   offset_p = (s_offset != 0
4155               && data->min_offset <= s_offset
4156               && s_offset <= data->max_offset);
4157   ratio_p = (ratio != 1
4158              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4159
4160   if (ratio != 1 && !ratio_p)
4161     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4162
4163   if (s_offset && !offset_p && !symbol_present)
4164     cost += add_cost (speed, address_mode);
4165
4166   if (may_autoinc)
4167     *may_autoinc = autoinc;
4168   if (autoinc)
4169     acost = data->ainc_costs[autoinc_type];
4170   else
4171     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4172   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4173   return new_cost (cost + acost, complexity);
4174 }
4175
4176  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4177     EXPR operand holding the shift.  COST0 and COST1 are the costs for
4178     calculating the operands of EXPR.  Returns true if successful, and returns
4179     the cost in COST.  */
4180
4181 static bool
4182 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4183                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4184 {
4185   comp_cost res;
4186   tree op1 = TREE_OPERAND (expr, 1);
4187   tree cst = TREE_OPERAND (mult, 1);
4188   tree multop = TREE_OPERAND (mult, 0);
4189   int m = exact_log2 (int_cst_value (cst));
4190   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4191   int as_cost, sa_cost;
4192   bool mult_in_op1;
4193
4194   if (!(m >= 0 && m < maxm))
4195     return false;
4196
4197   STRIP_NOPS (op1);
4198   mult_in_op1 = operand_equal_p (op1, mult, 0);
4199
4200   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4201
4202   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4203      use that in preference to a shift insn followed by an add insn.  */
4204   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4205              ? shiftadd_cost (speed, mode, m)
4206              : (mult_in_op1
4207                 ? shiftsub1_cost (speed, mode, m)
4208                 : shiftsub0_cost (speed, mode, m)));
4209
4210   res = new_cost (MIN (as_cost, sa_cost), 0);
4211   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4212
4213   STRIP_NOPS (multop);
4214   if (!is_gimple_val (multop))
4215     res = add_costs (res, force_expr_to_var_cost (multop, speed));
4216
4217   *cost = res;
4218   return true;
4219 }
4220
4221 /* Estimates cost of forcing expression EXPR into a variable.  */
4222
4223 static comp_cost
4224 force_expr_to_var_cost (tree expr, bool speed)
4225 {
4226   static bool costs_initialized = false;
4227   static unsigned integer_cost [2];
4228   static unsigned symbol_cost [2];
4229   static unsigned address_cost [2];
4230   tree op0, op1;
4231   comp_cost cost0, cost1, cost;
4232   machine_mode mode;
4233
4234   if (!costs_initialized)
4235     {
4236       tree type = build_pointer_type (integer_type_node);
4237       tree var, addr;
4238       rtx x;
4239       int i;
4240
4241       var = create_tmp_var_raw (integer_type_node, "test_var");
4242       TREE_STATIC (var) = 1;
4243       x = produce_memory_decl_rtl (var, NULL);
4244       SET_DECL_RTL (var, x);
4245
4246       addr = build1 (ADDR_EXPR, type, var);
4247
4248
4249       for (i = 0; i < 2; i++)
4250         {
4251           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4252                                                              2000), i);
4253
4254           symbol_cost[i] = computation_cost (addr, i) + 1;
4255
4256           address_cost[i]
4257             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4258           if (dump_file && (dump_flags & TDF_DETAILS))
4259             {
4260               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4261               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4262               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4263               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4264               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4265               fprintf (dump_file, "\n");
4266             }
4267         }
4268
4269       costs_initialized = true;
4270     }
4271
4272   STRIP_NOPS (expr);
4273
4274   if (SSA_VAR_P (expr))
4275     return no_cost;
4276
4277   if (is_gimple_min_invariant (expr))
4278     {
4279       if (TREE_CODE (expr) == INTEGER_CST)
4280         return new_cost (integer_cost [speed], 0);
4281
4282       if (TREE_CODE (expr) == ADDR_EXPR)
4283         {
4284           tree obj = TREE_OPERAND (expr, 0);
4285
4286           if (TREE_CODE (obj) == VAR_DECL
4287               || TREE_CODE (obj) == PARM_DECL
4288               || TREE_CODE (obj) == RESULT_DECL)
4289             return new_cost (symbol_cost [speed], 0);
4290         }
4291
4292       return new_cost (address_cost [speed], 0);
4293     }
4294
4295   switch (TREE_CODE (expr))
4296     {
4297     case POINTER_PLUS_EXPR:
4298     case PLUS_EXPR:
4299     case MINUS_EXPR:
4300     case MULT_EXPR:
4301       op0 = TREE_OPERAND (expr, 0);
4302       op1 = TREE_OPERAND (expr, 1);
4303       STRIP_NOPS (op0);
4304       STRIP_NOPS (op1);
4305       break;
4306
4307     CASE_CONVERT:
4308     case NEGATE_EXPR:
4309       op0 = TREE_OPERAND (expr, 0);
4310       STRIP_NOPS (op0);
4311       op1 = NULL_TREE;
4312       break;
4313
4314     default:
4315       /* Just an arbitrary value, FIXME.  */
4316       return new_cost (target_spill_cost[speed], 0);
4317     }
4318
4319   if (op0 == NULL_TREE
4320       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4321     cost0 = no_cost;
4322   else
4323     cost0 = force_expr_to_var_cost (op0, speed);
4324
4325   if (op1 == NULL_TREE
4326       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4327     cost1 = no_cost;
4328   else
4329     cost1 = force_expr_to_var_cost (op1, speed);
4330
4331   mode = TYPE_MODE (TREE_TYPE (expr));
4332   switch (TREE_CODE (expr))
4333     {
4334     case POINTER_PLUS_EXPR:
4335     case PLUS_EXPR:
4336     case MINUS_EXPR:
4337     case NEGATE_EXPR:
4338       cost = new_cost (add_cost (speed, mode), 0);
4339       if (TREE_CODE (expr) != NEGATE_EXPR)
4340         {
4341           tree mult = NULL_TREE;
4342           comp_cost sa_cost;
4343           if (TREE_CODE (op1) == MULT_EXPR)
4344             mult = op1;
4345           else if (TREE_CODE (op0) == MULT_EXPR)
4346             mult = op0;
4347
4348           if (mult != NULL_TREE
4349               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4350               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4351                                     speed, &sa_cost))
4352             return sa_cost;
4353         }
4354       break;
4355
4356     CASE_CONVERT:
4357       {
4358         tree inner_mode, outer_mode;
4359         outer_mode = TREE_TYPE (expr);
4360         inner_mode = TREE_TYPE (op0);
4361         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4362                                        TYPE_MODE (inner_mode), speed), 0);
4363       }
4364       break;
4365
4366     case MULT_EXPR:
4367       if (cst_and_fits_in_hwi (op0))
4368         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4369                                              mode, speed), 0);
4370       else if (cst_and_fits_in_hwi (op1))
4371         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4372                                              mode, speed), 0);
4373       else
4374         return new_cost (target_spill_cost [speed], 0);
4375       break;
4376
4377     default:
4378       gcc_unreachable ();
4379     }
4380
4381   cost = add_costs (cost, cost0);
4382   cost = add_costs (cost, cost1);
4383
4384   /* Bound the cost by target_spill_cost.  The parts of complicated
4385      computations often are either loop invariant or at least can
4386      be shared between several iv uses, so letting this grow without
4387      limits would not give reasonable results.  */
4388   if (cost.cost > (int) target_spill_cost [speed])
4389     cost.cost = target_spill_cost [speed];
4390
4391   return cost;
4392 }
4393
4394 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4395    invariants the computation depends on.  */
4396
4397 static comp_cost
4398 force_var_cost (struct ivopts_data *data,
4399                 tree expr, bitmap *depends_on)
4400 {
4401   if (depends_on)
4402     {
4403       fd_ivopts_data = data;
4404       walk_tree (&expr, find_depends, depends_on, NULL);
4405     }
4406
4407   return force_expr_to_var_cost (expr, data->speed);
4408 }
4409
4410 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4411    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4412    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4413    invariants the computation depends on.  */
4414
4415 static comp_cost
4416 split_address_cost (struct ivopts_data *data,
4417                     tree addr, bool *symbol_present, bool *var_present,
4418                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4419 {
4420   tree core;
4421   HOST_WIDE_INT bitsize;
4422   HOST_WIDE_INT bitpos;
4423   tree toffset;
4424   machine_mode mode;
4425   int unsignedp, reversep, volatilep;
4426
4427   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4428                               &unsignedp, &reversep, &volatilep, false);
4429
4430   if (toffset != 0
4431       || bitpos % BITS_PER_UNIT != 0
4432       || reversep
4433       || TREE_CODE (core) != VAR_DECL)
4434     {
4435       *symbol_present = false;
4436       *var_present = true;
4437       fd_ivopts_data = data;
4438       if (depends_on)
4439         walk_tree (&addr, find_depends, depends_on, NULL);
4440
4441       return new_cost (target_spill_cost[data->speed], 0);
4442     }
4443
4444   *offset += bitpos / BITS_PER_UNIT;
4445   if (TREE_STATIC (core)
4446       || DECL_EXTERNAL (core))
4447     {
4448       *symbol_present = true;
4449       *var_present = false;
4450       return no_cost;
4451     }
4452
4453   *symbol_present = false;
4454   *var_present = true;
4455   return no_cost;
4456 }
4457
4458 /* Estimates cost of expressing difference of addresses E1 - E2 as
4459    var + symbol + offset.  The value of offset is added to OFFSET,
4460    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4461    part is missing.  DEPENDS_ON is a set of the invariants the computation
4462    depends on.  */
4463
4464 static comp_cost
4465 ptr_difference_cost (struct ivopts_data *data,
4466                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4467                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4468 {
4469   HOST_WIDE_INT diff = 0;
4470   aff_tree aff_e1, aff_e2;
4471   tree type;
4472
4473   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4474
4475   if (ptr_difference_const (e1, e2, &diff))
4476     {
4477       *offset += diff;
4478       *symbol_present = false;
4479       *var_present = false;
4480       return no_cost;
4481     }
4482
4483   if (integer_zerop (e2))
4484     return split_address_cost (data, TREE_OPERAND (e1, 0),
4485                                symbol_present, var_present, offset, depends_on);
4486
4487   *symbol_present = false;
4488   *var_present = true;
4489
4490   type = signed_type_for (TREE_TYPE (e1));
4491   tree_to_aff_combination (e1, type, &aff_e1);
4492   tree_to_aff_combination (e2, type, &aff_e2);
4493   aff_combination_scale (&aff_e2, -1);
4494   aff_combination_add (&aff_e1, &aff_e2);
4495
4496   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4497 }
4498
4499 /* Estimates cost of expressing difference E1 - E2 as
4500    var + symbol + offset.  The value of offset is added to OFFSET,
4501    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4502    part is missing.  DEPENDS_ON is a set of the invariants the computation
4503    depends on.  */
4504
4505 static comp_cost
4506 difference_cost (struct ivopts_data *data,
4507                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4508                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4509 {
4510   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4511   unsigned HOST_WIDE_INT off1, off2;
4512   aff_tree aff_e1, aff_e2;
4513   tree type;
4514
4515   e1 = strip_offset (e1, &off1);
4516   e2 = strip_offset (e2, &off2);
4517   *offset += off1 - off2;
4518
4519   STRIP_NOPS (e1);
4520   STRIP_NOPS (e2);
4521
4522   if (TREE_CODE (e1) == ADDR_EXPR)
4523     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4524                                 offset, depends_on);
4525   *symbol_present = false;
4526
4527   if (operand_equal_p (e1, e2, 0))
4528     {
4529       *var_present = false;
4530       return no_cost;
4531     }
4532
4533   *var_present = true;
4534
4535   if (integer_zerop (e2))
4536     return force_var_cost (data, e1, depends_on);
4537
4538   if (integer_zerop (e1))
4539     {
4540       comp_cost cost = force_var_cost (data, e2, depends_on);
4541       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4542       return cost;
4543     }
4544
4545   type = signed_type_for (TREE_TYPE (e1));
4546   tree_to_aff_combination (e1, type, &aff_e1);
4547   tree_to_aff_combination (e2, type, &aff_e2);
4548   aff_combination_scale (&aff_e2, -1);
4549   aff_combination_add (&aff_e1, &aff_e2);
4550
4551   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4552 }
4553
4554 /* Returns true if AFF1 and AFF2 are identical.  */
4555
4556 static bool
4557 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4558 {
4559   unsigned i;
4560
4561   if (aff1->n != aff2->n)
4562     return false;
4563
4564   for (i = 0; i < aff1->n; i++)
4565     {
4566       if (aff1->elts[i].coef != aff2->elts[i].coef)
4567         return false;
4568
4569       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4570         return false;
4571     }
4572   return true;
4573 }
4574
4575 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent.  */
4576
4577 static iv_inv_expr_ent *
4578 record_inv_expr (struct ivopts_data *data, tree expr)
4579 {
4580   struct iv_inv_expr_ent ent;
4581   struct iv_inv_expr_ent **slot;
4582
4583   ent.expr = expr;
4584   ent.hash = iterative_hash_expr (expr, 0);
4585   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4586
4587   if (!*slot)
4588     {
4589       *slot = XNEW (struct iv_inv_expr_ent);
4590       (*slot)->expr = expr;
4591       (*slot)->hash = ent.hash;
4592       (*slot)->id = data->max_inv_expr_id++;
4593     }
4594
4595   return *slot;
4596 }
4597
4598 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4599    requires a new compiler generated temporary.  Returns -1 otherwise.
4600    ADDRESS_P is a flag indicating if the expression is for address
4601    computation.  */
4602
4603 static iv_inv_expr_ent *
4604 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4605                          tree cbase, HOST_WIDE_INT ratio,
4606                          bool address_p)
4607 {
4608   aff_tree ubase_aff, cbase_aff;
4609   tree expr, ub, cb;
4610
4611   STRIP_NOPS (ubase);
4612   STRIP_NOPS (cbase);
4613   ub = ubase;
4614   cb = cbase;
4615
4616   if ((TREE_CODE (ubase) == INTEGER_CST)
4617       && (TREE_CODE (cbase) == INTEGER_CST))
4618     return NULL;
4619
4620   /* Strips the constant part. */
4621   if (TREE_CODE (ubase) == PLUS_EXPR
4622       || TREE_CODE (ubase) == MINUS_EXPR
4623       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4624     {
4625       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4626         ubase = TREE_OPERAND (ubase, 0);
4627     }
4628
4629   /* Strips the constant part. */
4630   if (TREE_CODE (cbase) == PLUS_EXPR
4631       || TREE_CODE (cbase) == MINUS_EXPR
4632       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4633     {
4634       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4635         cbase = TREE_OPERAND (cbase, 0);
4636     }
4637
4638   if (address_p)
4639     {
4640       if (((TREE_CODE (ubase) == SSA_NAME)
4641            || (TREE_CODE (ubase) == ADDR_EXPR
4642                && is_gimple_min_invariant (ubase)))
4643           && (TREE_CODE (cbase) == INTEGER_CST))
4644         return NULL;
4645
4646       if (((TREE_CODE (cbase) == SSA_NAME)
4647            || (TREE_CODE (cbase) == ADDR_EXPR
4648                && is_gimple_min_invariant (cbase)))
4649           && (TREE_CODE (ubase) == INTEGER_CST))
4650         return NULL;
4651     }
4652
4653   if (ratio == 1)
4654     {
4655       if (operand_equal_p (ubase, cbase, 0))
4656         return NULL;
4657
4658       if (TREE_CODE (ubase) == ADDR_EXPR
4659           && TREE_CODE (cbase) == ADDR_EXPR)
4660         {
4661           tree usym, csym;
4662
4663           usym = TREE_OPERAND (ubase, 0);
4664           csym = TREE_OPERAND (cbase, 0);
4665           if (TREE_CODE (usym) == ARRAY_REF)
4666             {
4667               tree ind = TREE_OPERAND (usym, 1);
4668               if (TREE_CODE (ind) == INTEGER_CST
4669                   && tree_fits_shwi_p (ind)
4670                   && tree_to_shwi (ind) == 0)
4671                 usym = TREE_OPERAND (usym, 0);
4672             }
4673           if (TREE_CODE (csym) == ARRAY_REF)
4674             {
4675               tree ind = TREE_OPERAND (csym, 1);
4676               if (TREE_CODE (ind) == INTEGER_CST
4677                   && tree_fits_shwi_p (ind)
4678                   && tree_to_shwi (ind) == 0)
4679                 csym = TREE_OPERAND (csym, 0);
4680             }
4681           if (operand_equal_p (usym, csym, 0))
4682             return NULL;
4683         }
4684       /* Now do more complex comparison  */
4685       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4686       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4687       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4688         return NULL;
4689     }
4690
4691   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4692   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4693
4694   aff_combination_scale (&cbase_aff, -1 * ratio);
4695   aff_combination_add (&ubase_aff, &cbase_aff);
4696   expr = aff_combination_to_tree (&ubase_aff);
4697   return record_inv_expr (data, expr);
4698 }
4699
4700
4701
4702 /* Determines the cost of the computation by that USE is expressed
4703    from induction variable CAND.  If ADDRESS_P is true, we just need
4704    to create an address from it, otherwise we want to get it into
4705    register.  A set of invariants we depend on is stored in
4706    DEPENDS_ON.  AT is the statement at that the value is computed.
4707    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4708    addressing is likely.  */
4709
4710 static comp_cost
4711 get_computation_cost_at (struct ivopts_data *data,
4712                          struct iv_use *use, struct iv_cand *cand,
4713                          bool address_p, bitmap *depends_on, gimple *at,
4714                          bool *can_autoinc,
4715                          iv_inv_expr_ent **inv_expr)
4716 {
4717   tree ubase = use->iv->base, ustep = use->iv->step;
4718   tree cbase, cstep;
4719   tree utype = TREE_TYPE (ubase), ctype;
4720   unsigned HOST_WIDE_INT cstepi, offset = 0;
4721   HOST_WIDE_INT ratio, aratio;
4722   bool var_present, symbol_present, stmt_is_after_inc;
4723   comp_cost cost;
4724   widest_int rat;
4725   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4726   machine_mode mem_mode = (address_p
4727                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4728                                 : VOIDmode);
4729
4730   if (depends_on)
4731     *depends_on = NULL;
4732
4733   /* Only consider real candidates.  */
4734   if (!cand->iv)
4735     return infinite_cost;
4736
4737   cbase = cand->iv->base;
4738   cstep = cand->iv->step;
4739   ctype = TREE_TYPE (cbase);
4740
4741   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4742     {
4743       /* We do not have a precision to express the values of use.  */
4744       return infinite_cost;
4745     }
4746
4747   if (address_p
4748       || (use->iv->base_object
4749           && cand->iv->base_object
4750           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4751           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4752     {
4753       /* Do not try to express address of an object with computation based
4754          on address of a different object.  This may cause problems in rtl
4755          level alias analysis (that does not expect this to be happening,
4756          as this is illegal in C), and would be unlikely to be useful
4757          anyway.  */
4758       if (use->iv->base_object
4759           && cand->iv->base_object
4760           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4761         return infinite_cost;
4762     }
4763
4764   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4765     {
4766       /* TODO -- add direct handling of this case.  */
4767       goto fallback;
4768     }
4769
4770   /* CSTEPI is removed from the offset in case statement is after the
4771      increment.  If the step is not constant, we use zero instead.
4772      This is a bit imprecise (there is the extra addition), but
4773      redundancy elimination is likely to transform the code so that
4774      it uses value of the variable before increment anyway,
4775      so it is not that much unrealistic.  */
4776   if (cst_and_fits_in_hwi (cstep))
4777     cstepi = int_cst_value (cstep);
4778   else
4779     cstepi = 0;
4780
4781   if (!constant_multiple_of (ustep, cstep, &rat))
4782     return infinite_cost;
4783
4784   if (wi::fits_shwi_p (rat))
4785     ratio = rat.to_shwi ();
4786   else
4787     return infinite_cost;
4788
4789   STRIP_NOPS (cbase);
4790   ctype = TREE_TYPE (cbase);
4791
4792   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4793
4794   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4795      or ratio == 1, it is better to handle this like
4796
4797      ubase - ratio * cbase + ratio * var
4798
4799      (also holds in the case ratio == -1, TODO.  */
4800
4801   if (cst_and_fits_in_hwi (cbase))
4802     {
4803       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4804       cost = difference_cost (data,
4805                               ubase, build_int_cst (utype, 0),
4806                               &symbol_present, &var_present, &offset,
4807                               depends_on);
4808       cost.cost /= avg_loop_niter (data->current_loop);
4809     }
4810   else if (ratio == 1)
4811     {
4812       tree real_cbase = cbase;
4813
4814       /* Check to see if any adjustment is needed.  */
4815       if (cstepi == 0 && stmt_is_after_inc)
4816         {
4817           aff_tree real_cbase_aff;
4818           aff_tree cstep_aff;
4819
4820           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4821                                    &real_cbase_aff);
4822           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4823
4824           aff_combination_add (&real_cbase_aff, &cstep_aff);
4825           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4826         }
4827
4828       cost = difference_cost (data,
4829                               ubase, real_cbase,
4830                               &symbol_present, &var_present, &offset,
4831                               depends_on);
4832       cost.cost /= avg_loop_niter (data->current_loop);
4833     }
4834   else if (address_p
4835            && !POINTER_TYPE_P (ctype)
4836            && multiplier_allowed_in_address_p
4837                 (ratio, mem_mode,
4838                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4839     {
4840       tree real_cbase = cbase;
4841
4842       if (cstepi == 0 && stmt_is_after_inc)
4843         {
4844           if (POINTER_TYPE_P (ctype))
4845             real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4846           else
4847             real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4848         }
4849       real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4850                                 build_int_cst (ctype, ratio));
4851       cost = difference_cost (data,
4852                               ubase, real_cbase,
4853                               &symbol_present, &var_present, &offset,
4854                               depends_on);
4855       cost.cost /= avg_loop_niter (data->current_loop);
4856     }
4857   else
4858     {
4859       cost = force_var_cost (data, cbase, depends_on);
4860       cost = add_costs (cost,
4861                         difference_cost (data,
4862                                          ubase, build_int_cst (utype, 0),
4863                                          &symbol_present, &var_present,
4864                                          &offset, depends_on));
4865       cost.cost /= avg_loop_niter (data->current_loop);
4866       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4867     }
4868
4869   /* Record setup cost in scrach field.  */
4870   cost.scratch = cost.cost;
4871
4872   if (inv_expr && depends_on && *depends_on)
4873     {
4874       *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
4875                                            address_p);
4876       /* Clear depends on.  */
4877       if (*inv_expr != NULL)
4878         bitmap_clear (*depends_on);
4879     }
4880
4881   /* If we are after the increment, the value of the candidate is higher by
4882      one iteration.  */
4883   if (stmt_is_after_inc)
4884     offset -= ratio * cstepi;
4885
4886   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4887      (symbol/var1/const parts may be omitted).  If we are looking for an
4888      address, find the cost of addressing this.  */
4889   if (address_p)
4890     return add_costs (cost,
4891                       get_address_cost (symbol_present, var_present,
4892                                         offset, ratio, cstepi,
4893                                         mem_mode,
4894                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4895                                         speed, stmt_is_after_inc,
4896                                         can_autoinc));
4897
4898   /* Otherwise estimate the costs for computing the expression.  */
4899   if (!symbol_present && !var_present && !offset)
4900     {
4901       if (ratio != 1)
4902         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4903       return cost;
4904     }
4905
4906   /* Symbol + offset should be compile-time computable so consider that they
4907       are added once to the variable, if present.  */
4908   if (var_present && (symbol_present || offset))
4909     cost.cost += adjust_setup_cost (data,
4910                                     add_cost (speed, TYPE_MODE (ctype)));
4911
4912   /* Having offset does not affect runtime cost in case it is added to
4913      symbol, but it increases complexity.  */
4914   if (offset)
4915     cost.complexity++;
4916
4917   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4918
4919   aratio = ratio > 0 ? ratio : -ratio;
4920   if (aratio != 1)
4921     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4922   return cost;
4923
4924 fallback:
4925   if (can_autoinc)
4926     *can_autoinc = false;
4927
4928   {
4929     /* Just get the expression, expand it and measure the cost.  */
4930     tree comp = get_computation_at (data->current_loop, use, cand, at);
4931
4932     if (!comp)
4933       return infinite_cost;
4934
4935     if (address_p)
4936       comp = build_simple_mem_ref (comp);
4937
4938     cost = new_cost (computation_cost (comp, speed), 0);
4939     cost.scratch = 0;
4940     return cost;
4941   }
4942 }
4943
4944 /* Determines the cost of the computation by that USE is expressed
4945    from induction variable CAND.  If ADDRESS_P is true, we just need
4946    to create an address from it, otherwise we want to get it into
4947    register.  A set of invariants we depend on is stored in
4948    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4949    autoinc addressing is likely.  */
4950
4951 static comp_cost
4952 get_computation_cost (struct ivopts_data *data,
4953                       struct iv_use *use, struct iv_cand *cand,
4954                       bool address_p, bitmap *depends_on,
4955                       bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4956 {
4957   return get_computation_cost_at (data,
4958                                   use, cand, address_p, depends_on, use->stmt,
4959                                   can_autoinc, inv_expr);
4960 }
4961
4962 /* Determines cost of computing the use in GROUP with CAND in a generic
4963    expression.  */
4964
4965 static bool
4966 determine_group_iv_cost_generic (struct ivopts_data *data,
4967                                  struct iv_group *group, struct iv_cand *cand)
4968 {
4969   comp_cost cost;
4970   iv_inv_expr_ent *inv_expr = NULL;
4971   bitmap depends_on = NULL;
4972   struct iv_use *use = group->vuses[0];
4973
4974   /* The simple case first -- if we need to express value of the preserved
4975      original biv, the cost is 0.  This also prevents us from counting the
4976      cost of increment twice -- once at this use and once in the cost of
4977      the candidate.  */
4978   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4979     cost = no_cost;
4980   else
4981     cost = get_computation_cost (data, use, cand, false,
4982                                  &depends_on, NULL, &inv_expr);
4983
4984   set_group_iv_cost (data, group, cand, cost, depends_on,
4985                      NULL_TREE, ERROR_MARK, inv_expr);
4986   return !infinite_cost_p (cost);
4987 }
4988
4989 /* Determines cost of computing uses in GROUP with CAND in addresses.  */
4990
4991 static bool
4992 determine_group_iv_cost_address (struct ivopts_data *data,
4993                                  struct iv_group *group, struct iv_cand *cand)
4994 {
4995   unsigned i;
4996   bitmap depends_on;
4997   bool can_autoinc, first = true;
4998   iv_inv_expr_ent *inv_expr = NULL;
4999   struct iv_use *use = group->vuses[0];
5000   comp_cost sum_cost = no_cost, cost;
5001
5002   cost = get_computation_cost (data, use, cand, true,
5003                                &depends_on, &can_autoinc, &inv_expr);
5004
5005   sum_cost = cost;
5006   if (!infinite_cost_p (sum_cost) && cand->ainc_use == use)
5007     {
5008       if (can_autoinc)
5009         sum_cost.cost -= cand->cost_step;
5010       /* If we generated the candidate solely for exploiting autoincrement
5011          opportunities, and it turns out it can't be used, set the cost to
5012          infinity to make sure we ignore it.  */
5013       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5014         sum_cost = infinite_cost;
5015     }
5016
5017   /* Uses in a group can share setup code, so only add setup cost once.  */
5018   cost.cost -= cost.scratch;
5019   /* Compute and add costs for rest uses of this group.  */
5020   for (i = 1; i < group->vuses.length () && !infinite_cost_p (sum_cost); i++)
5021     {
5022       struct iv_use *next = group->vuses[i];
5023
5024       /* Compute cost for the first use with different offset to the main
5025          use and add it afterwards.  Costs for these uses could be quite
5026          different.  Given below uses in a group:
5027            use 0  : {base + A + offset_0, step}
5028            use 0.1: {base + A + offset_0, step}
5029            use 0.2: {base + A + offset_1, step}
5030            use 0.3: {base + A + offset_2, step}
5031          when we need to compute costs with candidate:
5032            cand 1 : {base + B + offset_0, step}
5033
5034          The first use with different offset is use 0.2, its cost is larger
5035          than cost of use 0/0.1 because we need to compute:
5036            A - B + offset_1 - offset_0
5037            rather than:
5038            A - B.  */
5039       if (first && next->addr_offset != use->addr_offset)
5040         {
5041           first = false;
5042           cost = get_computation_cost (data, next, cand, true,
5043                                        NULL, &can_autoinc, NULL);
5044           /* Remove setup cost.  */
5045           if (!infinite_cost_p (cost))
5046             cost.cost -= cost.scratch;
5047         }
5048       sum_cost = add_costs (sum_cost, cost);
5049     }
5050   set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5051                      NULL_TREE, ERROR_MARK, inv_expr);
5052
5053   return !infinite_cost_p (sum_cost);
5054 }
5055
5056 /* Computes value of candidate CAND at position AT in iteration NITER, and
5057    stores it to VAL.  */
5058
5059 static void
5060 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5061                aff_tree *val)
5062 {
5063   aff_tree step, delta, nit;
5064   struct iv *iv = cand->iv;
5065   tree type = TREE_TYPE (iv->base);
5066   tree steptype = type;
5067   if (POINTER_TYPE_P (type))
5068     steptype = sizetype;
5069   steptype = unsigned_type_for (type);
5070
5071   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5072   aff_combination_convert (&step, steptype);
5073   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5074   aff_combination_convert (&nit, steptype);
5075   aff_combination_mult (&nit, &step, &delta);
5076   if (stmt_after_increment (loop, cand, at))
5077     aff_combination_add (&delta, &step);
5078
5079   tree_to_aff_combination (iv->base, type, val);
5080   if (!POINTER_TYPE_P (type))
5081     aff_combination_convert (val, steptype);
5082   aff_combination_add (val, &delta);
5083 }
5084
5085 /* Returns period of induction variable iv.  */
5086
5087 static tree
5088 iv_period (struct iv *iv)
5089 {
5090   tree step = iv->step, period, type;
5091   tree pow2div;
5092
5093   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5094
5095   type = unsigned_type_for (TREE_TYPE (step));
5096   /* Period of the iv is lcm (step, type_range)/step -1,
5097      i.e., N*type_range/step - 1. Since type range is power
5098      of two, N == (step >> num_of_ending_zeros_binary (step),
5099      so the final result is
5100
5101        (type_range >> num_of_ending_zeros_binary (step)) - 1
5102
5103   */
5104   pow2div = num_ending_zeros (step);
5105
5106   period = build_low_bits_mask (type,
5107                                 (TYPE_PRECISION (type)
5108                                  - tree_to_uhwi (pow2div)));
5109
5110   return period;
5111 }
5112
5113 /* Returns the comparison operator used when eliminating the iv USE.  */
5114
5115 static enum tree_code
5116 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5117 {
5118   struct loop *loop = data->current_loop;
5119   basic_block ex_bb;
5120   edge exit;
5121
5122   ex_bb = gimple_bb (use->stmt);
5123   exit = EDGE_SUCC (ex_bb, 0);
5124   if (flow_bb_inside_loop_p (loop, exit->dest))
5125     exit = EDGE_SUCC (ex_bb, 1);
5126
5127   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5128 }
5129
5130 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5131    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5132    calculation is performed in non-wrapping type.
5133
5134    TODO: More generally, we could test for the situation that
5135          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5136          This would require knowing the sign of OFFSET.  */
5137
5138 static bool
5139 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5140 {
5141   enum tree_code code;
5142   tree e1, e2;
5143   aff_tree aff_e1, aff_e2, aff_offset;
5144
5145   if (!nowrap_type_p (TREE_TYPE (base)))
5146     return false;
5147
5148   base = expand_simple_operations (base);
5149
5150   if (TREE_CODE (base) == SSA_NAME)
5151     {
5152       gimple *stmt = SSA_NAME_DEF_STMT (base);
5153
5154       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5155         return false;
5156
5157       code = gimple_assign_rhs_code (stmt);
5158       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5159         return false;
5160
5161       e1 = gimple_assign_rhs1 (stmt);
5162       e2 = gimple_assign_rhs2 (stmt);
5163     }
5164   else
5165     {
5166       code = TREE_CODE (base);
5167       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5168         return false;
5169       e1 = TREE_OPERAND (base, 0);
5170       e2 = TREE_OPERAND (base, 1);
5171     }
5172
5173   /* Use affine expansion as deeper inspection to prove the equality.  */
5174   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5175                                   &aff_e2, &data->name_expansion_cache);
5176   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5177                                   &aff_offset, &data->name_expansion_cache);
5178   aff_combination_scale (&aff_offset, -1);
5179   switch (code)
5180     {
5181     case PLUS_EXPR:
5182       aff_combination_add (&aff_e2, &aff_offset);
5183       if (aff_combination_zero_p (&aff_e2))
5184         return true;
5185
5186       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5187                                       &aff_e1, &data->name_expansion_cache);
5188       aff_combination_add (&aff_e1, &aff_offset);
5189       return aff_combination_zero_p (&aff_e1);
5190
5191     case POINTER_PLUS_EXPR:
5192       aff_combination_add (&aff_e2, &aff_offset);
5193       return aff_combination_zero_p (&aff_e2);
5194
5195     default:
5196       return false;
5197     }
5198 }
5199
5200 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5201    comparison with CAND.  NITER describes the number of iterations of
5202    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5203
5204    We aim to handle the following situation:
5205
5206    sometype *base, *p;
5207    int a, b, i;
5208
5209    i = a;
5210    p = p_0 = base + a;
5211
5212    do
5213      {
5214        bla (*p);
5215        p++;
5216        i++;
5217      }
5218    while (i < b);
5219
5220    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5221    We aim to optimize this to
5222
5223    p = p_0 = base + a;
5224    do
5225      {
5226        bla (*p);
5227        p++;
5228      }
5229    while (p < p_0 - a + b);
5230
5231    This preserves the correctness, since the pointer arithmetics does not
5232    overflow.  More precisely:
5233
5234    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5235       overflow in computing it or the values of p.
5236    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5237       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5238
5239 static bool
5240 iv_elimination_compare_lt (struct ivopts_data *data,
5241                            struct iv_cand *cand, enum tree_code *comp_p,
5242                            struct tree_niter_desc *niter)
5243 {
5244   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5245   struct aff_tree nit, tmpa, tmpb;
5246   enum tree_code comp;
5247   HOST_WIDE_INT step;
5248
5249   /* We need to know that the candidate induction variable does not overflow.
5250      While more complex analysis may be used to prove this, for now just
5251      check that the variable appears in the original program and that it
5252      is computed in a type that guarantees no overflows.  */
5253   cand_type = TREE_TYPE (cand->iv->base);
5254   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5255     return false;
5256
5257   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5258      the calculation of the BOUND could overflow, making the comparison
5259      invalid.  */
5260   if (!data->loop_single_exit_p)
5261     return false;
5262
5263   /* We need to be able to decide whether candidate is increasing or decreasing
5264      in order to choose the right comparison operator.  */
5265   if (!cst_and_fits_in_hwi (cand->iv->step))
5266     return false;
5267   step = int_cst_value (cand->iv->step);
5268
5269   /* Check that the number of iterations matches the expected pattern:
5270      a + 1 > b ? 0 : b - a - 1.  */
5271   mbz = niter->may_be_zero;
5272   if (TREE_CODE (mbz) == GT_EXPR)
5273     {
5274       /* Handle a + 1 > b.  */
5275       tree op0 = TREE_OPERAND (mbz, 0);
5276       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5277         {
5278           a = TREE_OPERAND (op0, 0);
5279           b = TREE_OPERAND (mbz, 1);
5280         }
5281       else
5282         return false;
5283     }
5284   else if (TREE_CODE (mbz) == LT_EXPR)
5285     {
5286       tree op1 = TREE_OPERAND (mbz, 1);
5287
5288       /* Handle b < a + 1.  */
5289       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5290         {
5291           a = TREE_OPERAND (op1, 0);
5292           b = TREE_OPERAND (mbz, 0);
5293         }
5294       else
5295         return false;
5296     }
5297   else
5298     return false;
5299
5300   /* Expected number of iterations is B - A - 1.  Check that it matches
5301      the actual number, i.e., that B - A - NITER = 1.  */
5302   tree_to_aff_combination (niter->niter, nit_type, &nit);
5303   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5304   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5305   aff_combination_scale (&nit, -1);
5306   aff_combination_scale (&tmpa, -1);
5307   aff_combination_add (&tmpb, &tmpa);
5308   aff_combination_add (&tmpb, &nit);
5309   if (tmpb.n != 0 || tmpb.offset != 1)
5310     return false;
5311
5312   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5313      overflow.  */
5314   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5315                         cand->iv->step,
5316                         fold_convert (TREE_TYPE (cand->iv->step), a));
5317   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5318     return false;
5319
5320   /* Determine the new comparison operator.  */
5321   comp = step < 0 ? GT_EXPR : LT_EXPR;
5322   if (*comp_p == NE_EXPR)
5323     *comp_p = comp;
5324   else if (*comp_p == EQ_EXPR)
5325     *comp_p = invert_tree_comparison (comp, false);
5326   else
5327     gcc_unreachable ();
5328
5329   return true;
5330 }
5331
5332 /* Check whether it is possible to express the condition in USE by comparison
5333    of candidate CAND.  If so, store the value compared with to BOUND, and the
5334    comparison operator to COMP.  */
5335
5336 static bool
5337 may_eliminate_iv (struct ivopts_data *data,
5338                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5339                   enum tree_code *comp)
5340 {
5341   basic_block ex_bb;
5342   edge exit;
5343   tree period;
5344   struct loop *loop = data->current_loop;
5345   aff_tree bnd;
5346   struct tree_niter_desc *desc = NULL;
5347
5348   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5349     return false;
5350
5351   /* For now works only for exits that dominate the loop latch.
5352      TODO: extend to other conditions inside loop body.  */
5353   ex_bb = gimple_bb (use->stmt);
5354   if (use->stmt != last_stmt (ex_bb)
5355       || gimple_code (use->stmt) != GIMPLE_COND
5356       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5357     return false;
5358
5359   exit = EDGE_SUCC (ex_bb, 0);
5360   if (flow_bb_inside_loop_p (loop, exit->dest))
5361     exit = EDGE_SUCC (ex_bb, 1);
5362   if (flow_bb_inside_loop_p (loop, exit->dest))
5363     return false;
5364
5365   desc = niter_for_exit (data, exit);
5366   if (!desc)
5367     return false;
5368
5369   /* Determine whether we can use the variable to test the exit condition.
5370      This is the case iff the period of the induction variable is greater
5371      than the number of iterations for which the exit condition is true.  */
5372   period = iv_period (cand->iv);
5373
5374   /* If the number of iterations is constant, compare against it directly.  */
5375   if (TREE_CODE (desc->niter) == INTEGER_CST)
5376     {
5377       /* See cand_value_at.  */
5378       if (stmt_after_increment (loop, cand, use->stmt))
5379         {
5380           if (!tree_int_cst_lt (desc->niter, period))
5381             return false;
5382         }
5383       else
5384         {
5385           if (tree_int_cst_lt (period, desc->niter))
5386             return false;
5387         }
5388     }
5389
5390   /* If not, and if this is the only possible exit of the loop, see whether
5391      we can get a conservative estimate on the number of iterations of the
5392      entire loop and compare against that instead.  */
5393   else
5394     {
5395       widest_int period_value, max_niter;
5396
5397       max_niter = desc->max;
5398       if (stmt_after_increment (loop, cand, use->stmt))
5399         max_niter += 1;
5400       period_value = wi::to_widest (period);
5401       if (wi::gtu_p (max_niter, period_value))
5402         {
5403           /* See if we can take advantage of inferred loop bound
5404              information.  */
5405           if (data->loop_single_exit_p)
5406             {
5407               if (!max_loop_iterations (loop, &max_niter))
5408                 return false;
5409               /* The loop bound is already adjusted by adding 1.  */
5410               if (wi::gtu_p (max_niter, period_value))
5411                 return false;
5412             }
5413           else
5414             return false;
5415         }
5416     }
5417
5418   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5419
5420   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5421                          aff_combination_to_tree (&bnd));
5422   *comp = iv_elimination_compare (data, use);
5423
5424   /* It is unlikely that computing the number of iterations using division
5425      would be more profitable than keeping the original induction variable.  */
5426   if (expression_expensive_p (*bound))
5427     return false;
5428
5429   /* Sometimes, it is possible to handle the situation that the number of
5430      iterations may be zero unless additional assumtions by using <
5431      instead of != in the exit condition.
5432
5433      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5434            base the exit condition on it.  However, that is often too
5435            expensive.  */
5436   if (!integer_zerop (desc->may_be_zero))
5437     return iv_elimination_compare_lt (data, cand, comp, desc);
5438
5439   return true;
5440 }
5441
5442  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5443     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5444
5445 static int
5446 parm_decl_cost (struct ivopts_data *data, tree bound)
5447 {
5448   tree sbound = bound;
5449   STRIP_NOPS (sbound);
5450
5451   if (TREE_CODE (sbound) == SSA_NAME
5452       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5453       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5454       && data->body_includes_call)
5455     return COSTS_N_INSNS (1);
5456
5457   return 0;
5458 }
5459
5460 /* Determines cost of computing the use in GROUP with CAND in a condition.  */
5461
5462 static bool
5463 determine_group_iv_cost_cond (struct ivopts_data *data,
5464                               struct iv_group *group, struct iv_cand *cand)
5465 {
5466   tree bound = NULL_TREE;
5467   struct iv *cmp_iv;
5468   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5469   comp_cost elim_cost, express_cost, cost, bound_cost;
5470   bool ok;
5471   iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5472   tree *control_var, *bound_cst;
5473   enum tree_code comp = ERROR_MARK;
5474   struct iv_use *use = group->vuses[0];
5475
5476   gcc_assert (cand->iv);
5477
5478   /* Try iv elimination.  */
5479   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5480     {
5481       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5482       if (elim_cost.cost == 0)
5483         elim_cost.cost = parm_decl_cost (data, bound);
5484       else if (TREE_CODE (bound) == INTEGER_CST)
5485         elim_cost.cost = 0;
5486       /* If we replace a loop condition 'i < n' with 'p < base + n',
5487          depends_on_elim will have 'base' and 'n' set, which implies
5488          that both 'base' and 'n' will be live during the loop.  More likely,
5489          'base + n' will be loop invariant, resulting in only one live value
5490          during the loop.  So in that case we clear depends_on_elim and set
5491         elim_inv_expr_id instead.  */
5492       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5493         {
5494           elim_inv_expr = record_inv_expr (data, bound);
5495           bitmap_clear (depends_on_elim);
5496         }
5497       /* The bound is a loop invariant, so it will be only computed
5498          once.  */
5499       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5500     }
5501   else
5502     elim_cost = infinite_cost;
5503
5504   /* Try expressing the original giv.  If it is compared with an invariant,
5505      note that we cannot get rid of it.  */
5506   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5507                               NULL, &cmp_iv);
5508   gcc_assert (ok);
5509
5510   /* When the condition is a comparison of the candidate IV against
5511      zero, prefer this IV.
5512
5513      TODO: The constant that we're subtracting from the cost should
5514      be target-dependent.  This information should be added to the
5515      target costs for each backend.  */
5516   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5517       && integer_zerop (*bound_cst)
5518       && (operand_equal_p (*control_var, cand->var_after, 0)
5519           || operand_equal_p (*control_var, cand->var_before, 0)))
5520     elim_cost.cost -= 1;
5521
5522   express_cost = get_computation_cost (data, use, cand, false,
5523                                        &depends_on_express, NULL,
5524                                        &express_inv_expr);
5525   fd_ivopts_data = data;
5526   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5527
5528   /* Count the cost of the original bound as well.  */
5529   bound_cost = force_var_cost (data, *bound_cst, NULL);
5530   if (bound_cost.cost == 0)
5531     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5532   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5533     bound_cost.cost = 0;
5534   express_cost.cost += bound_cost.cost;
5535
5536   /* Choose the better approach, preferring the eliminated IV. */
5537   if (compare_costs (elim_cost, express_cost) <= 0)
5538     {
5539       cost = elim_cost;
5540       depends_on = depends_on_elim;
5541       depends_on_elim = NULL;
5542       inv_expr = elim_inv_expr;
5543     }
5544   else
5545     {
5546       cost = express_cost;
5547       depends_on = depends_on_express;
5548       depends_on_express = NULL;
5549       bound = NULL_TREE;
5550       comp = ERROR_MARK;
5551       inv_expr = express_inv_expr;
5552     }
5553
5554   set_group_iv_cost (data, group, cand, cost,
5555                      depends_on, bound, comp, inv_expr);
5556
5557   if (depends_on_elim)
5558     BITMAP_FREE (depends_on_elim);
5559   if (depends_on_express)
5560     BITMAP_FREE (depends_on_express);
5561
5562   return !infinite_cost_p (cost);
5563 }
5564
5565 /* Determines cost of computing uses in GROUP with CAND.  Returns false
5566    if USE cannot be represented with CAND.  */
5567
5568 static bool
5569 determine_group_iv_cost (struct ivopts_data *data,
5570                          struct iv_group *group, struct iv_cand *cand)
5571 {
5572   switch (group->type)
5573     {
5574     case USE_NONLINEAR_EXPR:
5575       return determine_group_iv_cost_generic (data, group, cand);
5576
5577     case USE_ADDRESS:
5578       return determine_group_iv_cost_address (data, group, cand);
5579
5580     case USE_COMPARE:
5581       return determine_group_iv_cost_cond (data, group, cand);
5582
5583     default:
5584       gcc_unreachable ();
5585     }
5586 }
5587
5588 /* Return true if get_computation_cost indicates that autoincrement is
5589    a possibility for the pair of USE and CAND, false otherwise.  */
5590
5591 static bool
5592 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5593                            struct iv_cand *cand)
5594 {
5595   bitmap depends_on;
5596   bool can_autoinc;
5597   comp_cost cost;
5598
5599   if (use->type != USE_ADDRESS)
5600     return false;
5601
5602   cost = get_computation_cost (data, use, cand, true, &depends_on,
5603                                &can_autoinc, NULL);
5604
5605   BITMAP_FREE (depends_on);
5606
5607   return !infinite_cost_p (cost) && can_autoinc;
5608 }
5609
5610 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5611    use that allows autoincrement, and set their AINC_USE if possible.  */
5612
5613 static void
5614 set_autoinc_for_original_candidates (struct ivopts_data *data)
5615 {
5616   unsigned i, j;
5617
5618   for (i = 0; i < data->vcands.length (); i++)
5619     {
5620       struct iv_cand *cand = data->vcands[i];
5621       struct iv_use *closest_before = NULL;
5622       struct iv_use *closest_after = NULL;
5623       if (cand->pos != IP_ORIGINAL)
5624         continue;
5625
5626       for (j = 0; j < data->vgroups.length (); j++)
5627         {
5628           struct iv_group *group = data->vgroups[j];
5629           struct iv_use *use = group->vuses[0];
5630           unsigned uid = gimple_uid (use->stmt);
5631
5632           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5633             continue;
5634
5635           if (uid < gimple_uid (cand->incremented_at)
5636               && (closest_before == NULL
5637                   || uid > gimple_uid (closest_before->stmt)))
5638             closest_before = use;
5639
5640           if (uid > gimple_uid (cand->incremented_at)
5641               && (closest_after == NULL
5642                   || uid < gimple_uid (closest_after->stmt)))
5643             closest_after = use;
5644         }
5645
5646       if (closest_before != NULL
5647           && autoinc_possible_for_pair (data, closest_before, cand))
5648         cand->ainc_use = closest_before;
5649       else if (closest_after != NULL
5650                && autoinc_possible_for_pair (data, closest_after, cand))
5651         cand->ainc_use = closest_after;
5652     }
5653 }
5654
5655 /* Finds the candidates for the induction variables.  */
5656
5657 static void
5658 find_iv_candidates (struct ivopts_data *data)
5659 {
5660   /* Add commonly used ivs.  */
5661   add_standard_iv_candidates (data);
5662
5663   /* Add old induction variables.  */
5664   add_iv_candidate_for_bivs (data);
5665
5666   /* Add induction variables derived from uses.  */
5667   add_iv_candidate_for_groups (data);
5668
5669   set_autoinc_for_original_candidates (data);
5670
5671   /* Record the important candidates.  */
5672   record_important_candidates (data);
5673
5674   if (dump_file && (dump_flags & TDF_DETAILS))
5675     {
5676       unsigned i;
5677
5678       fprintf (dump_file, "\n<Important Candidates>:\t");
5679       for (i = 0; i < data->vcands.length (); i++)
5680         if (data->vcands[i]->important)
5681           fprintf (dump_file, " %d,", data->vcands[i]->id);
5682       fprintf (dump_file, "\n");
5683
5684       fprintf (dump_file, "\n<Group, Cand> Related:\n");
5685       for (i = 0; i < data->vgroups.length (); i++)
5686         {
5687           struct iv_group *group = data->vgroups[i];
5688
5689           if (group->related_cands)
5690             {
5691               fprintf (dump_file, "  Group %d:\t", group->id);
5692               dump_bitmap (dump_file, group->related_cands);
5693             }
5694         }
5695       fprintf (dump_file, "\n");
5696     }
5697 }
5698
5699 /* Determines costs of computing use of iv with an iv candidate.  */
5700
5701 static void
5702 determine_group_iv_costs (struct ivopts_data *data)
5703 {
5704   unsigned i, j;
5705   struct iv_cand *cand;
5706   struct iv_group *group;
5707   bitmap to_clear = BITMAP_ALLOC (NULL);
5708
5709   alloc_use_cost_map (data);
5710
5711   for (i = 0; i < data->vgroups.length (); i++)
5712     {
5713       group = data->vgroups[i];
5714
5715       if (data->consider_all_candidates)
5716         {
5717           for (j = 0; j < data->vcands.length (); j++)
5718             {
5719               cand = data->vcands[j];
5720               determine_group_iv_cost (data, group, cand);
5721             }
5722         }
5723       else
5724         {
5725           bitmap_iterator bi;
5726
5727           EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5728             {
5729               cand = data->vcands[j];
5730               if (!determine_group_iv_cost (data, group, cand))
5731                 bitmap_set_bit (to_clear, j);
5732             }
5733
5734           /* Remove the candidates for that the cost is infinite from
5735              the list of related candidates.  */
5736           bitmap_and_compl_into (group->related_cands, to_clear);
5737           bitmap_clear (to_clear);
5738         }
5739     }
5740
5741   BITMAP_FREE (to_clear);
5742
5743   if (dump_file && (dump_flags & TDF_DETAILS))
5744     {
5745       fprintf (dump_file, "\n<Invariant Expressions>:\n");
5746       auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5747
5748       for (hash_table<iv_inv_expr_hasher>::iterator it
5749            = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5750            ++it)
5751         list.safe_push (*it);
5752
5753       list.qsort (sort_iv_inv_expr_ent);
5754
5755       for (i = 0; i < list.length (); ++i)
5756         {
5757           fprintf (dump_file, "inv_expr %d: \t", i);
5758           print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5759           fprintf (dump_file, "\n");
5760         }
5761
5762       fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5763
5764       for (i = 0; i < data->vgroups.length (); i++)
5765         {
5766           group = data->vgroups[i];
5767
5768           fprintf (dump_file, "Group %d:\n", i);
5769           fprintf (dump_file, "  cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5770           for (j = 0; j < group->n_map_members; j++)
5771             {
5772               if (!group->cost_map[j].cand
5773                   || infinite_cost_p (group->cost_map[j].cost))
5774                 continue;
5775
5776               fprintf (dump_file, "  %d\t%d\t%d\t",
5777                        group->cost_map[j].cand->id,
5778                        group->cost_map[j].cost.cost,
5779                        group->cost_map[j].cost.complexity);
5780               if (group->cost_map[j].inv_expr != NULL)
5781                 fprintf (dump_file, "%d\t",
5782                          group->cost_map[j].inv_expr->id);
5783               else
5784                 fprintf (dump_file, "\t");
5785               if (group->cost_map[j].depends_on)
5786                 bitmap_print (dump_file,
5787                               group->cost_map[j].depends_on, "","");
5788               fprintf (dump_file, "\n");
5789             }
5790
5791           fprintf (dump_file, "\n");
5792         }
5793       fprintf (dump_file, "\n");
5794     }
5795 }
5796
5797 /* Determines cost of the candidate CAND.  */
5798
5799 static void
5800 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5801 {
5802   comp_cost cost_base;
5803   unsigned cost, cost_step;
5804   tree base;
5805
5806   if (!cand->iv)
5807     {
5808       cand->cost = 0;
5809       return;
5810     }
5811
5812   /* There are two costs associated with the candidate -- its increment
5813      and its initialization.  The second is almost negligible for any loop
5814      that rolls enough, so we take it just very little into account.  */
5815
5816   base = cand->iv->base;
5817   cost_base = force_var_cost (data, base, NULL);
5818   /* It will be exceptional that the iv register happens to be initialized with
5819      the proper value at no cost.  In general, there will at least be a regcopy
5820      or a const set.  */
5821   if (cost_base.cost == 0)
5822     cost_base.cost = COSTS_N_INSNS (1);
5823   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5824
5825   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5826
5827   /* Prefer the original ivs unless we may gain something by replacing it.
5828      The reason is to make debugging simpler; so this is not relevant for
5829      artificial ivs created by other optimization passes.  */
5830   if (cand->pos != IP_ORIGINAL
5831       || !SSA_NAME_VAR (cand->var_before)
5832       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5833     cost++;
5834
5835   /* Prefer not to insert statements into latch unless there are some
5836      already (so that we do not create unnecessary jumps).  */
5837   if (cand->pos == IP_END
5838       && empty_block_p (ip_end_pos (data->current_loop)))
5839     cost++;
5840
5841   cand->cost = cost;
5842   cand->cost_step = cost_step;
5843 }
5844
5845 /* Determines costs of computation of the candidates.  */
5846
5847 static void
5848 determine_iv_costs (struct ivopts_data *data)
5849 {
5850   unsigned i;
5851
5852   if (dump_file && (dump_flags & TDF_DETAILS))
5853     {
5854       fprintf (dump_file, "<Candidate Costs>:\n");
5855       fprintf (dump_file, "  cand\tcost\n");
5856     }
5857
5858   for (i = 0; i < data->vcands.length (); i++)
5859     {
5860       struct iv_cand *cand = data->vcands[i];
5861
5862       determine_iv_cost (data, cand);
5863
5864       if (dump_file && (dump_flags & TDF_DETAILS))
5865         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5866     }
5867
5868   if (dump_file && (dump_flags & TDF_DETAILS))
5869     fprintf (dump_file, "\n");
5870 }
5871
5872 /* Calculates cost for having SIZE induction variables.  */
5873
5874 static unsigned
5875 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5876 {
5877   /* We add size to the cost, so that we prefer eliminating ivs
5878      if possible.  */
5879   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5880                                             data->body_includes_call);
5881 }
5882
5883 /* For each size of the induction variable set determine the penalty.  */
5884
5885 static void
5886 determine_set_costs (struct ivopts_data *data)
5887 {
5888   unsigned j, n;
5889   gphi *phi;
5890   gphi_iterator psi;
5891   tree op;
5892   struct loop *loop = data->current_loop;
5893   bitmap_iterator bi;
5894
5895   if (dump_file && (dump_flags & TDF_DETAILS))
5896     {
5897       fprintf (dump_file, "<Global Costs>:\n");
5898       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5899       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5900       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5901       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5902     }
5903
5904   n = 0;
5905   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5906     {
5907       phi = psi.phi ();
5908       op = PHI_RESULT (phi);
5909
5910       if (virtual_operand_p (op))
5911         continue;
5912
5913       if (get_iv (data, op))
5914         continue;
5915
5916       n++;
5917     }
5918
5919   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5920     {
5921       struct version_info *info = ver_info (data, j);
5922
5923       if (info->inv_id && info->has_nonlin_use)
5924         n++;
5925     }
5926
5927   data->regs_used = n;
5928   if (dump_file && (dump_flags & TDF_DETAILS))
5929     fprintf (dump_file, "  regs_used %d\n", n);
5930
5931   if (dump_file && (dump_flags & TDF_DETAILS))
5932     {
5933       fprintf (dump_file, "  cost for size:\n");
5934       fprintf (dump_file, "  ivs\tcost\n");
5935       for (j = 0; j <= 2 * target_avail_regs; j++)
5936         fprintf (dump_file, "  %d\t%d\n", j,
5937                  ivopts_global_cost_for_size (data, j));
5938       fprintf (dump_file, "\n");
5939     }
5940 }
5941
5942 /* Returns true if A is a cheaper cost pair than B.  */
5943
5944 static bool
5945 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5946 {
5947   int cmp;
5948
5949   if (!a)
5950     return false;
5951
5952   if (!b)
5953     return true;
5954
5955   cmp = compare_costs (a->cost, b->cost);
5956   if (cmp < 0)
5957     return true;
5958
5959   if (cmp > 0)
5960     return false;
5961
5962   /* In case the costs are the same, prefer the cheaper candidate.  */
5963   if (a->cand->cost < b->cand->cost)
5964     return true;
5965
5966   return false;
5967 }
5968
5969
5970 /* Returns candidate by that USE is expressed in IVS.  */
5971
5972 static struct cost_pair *
5973 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5974 {
5975   return ivs->cand_for_group[group->id];
5976 }
5977
5978 /* Computes the cost field of IVS structure.  */
5979
5980 static void
5981 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5982 {
5983   comp_cost cost = ivs->cand_use_cost;
5984
5985   cost.cost += ivs->cand_cost;
5986
5987   cost.cost += ivopts_global_cost_for_size (data,
5988                                             ivs->n_regs
5989                                             + ivs->used_inv_exprs->elements ());
5990
5991   ivs->cost = cost;
5992 }
5993
5994 /* Remove invariants in set INVS to set IVS.  */
5995
5996 static void
5997 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5998 {
5999   bitmap_iterator bi;
6000   unsigned iid;
6001
6002   if (!invs)
6003     return;
6004
6005   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6006     {
6007       ivs->n_invariant_uses[iid]--;
6008       if (ivs->n_invariant_uses[iid] == 0)
6009         ivs->n_regs--;
6010     }
6011 }
6012
6013 /* Set USE not to be expressed by any candidate in IVS.  */
6014
6015 static void
6016 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6017                  struct iv_group *group)
6018 {
6019   unsigned gid = group->id, cid;
6020   struct cost_pair *cp;
6021
6022   cp = ivs->cand_for_group[gid];
6023   if (!cp)
6024     return;
6025   cid = cp->cand->id;
6026
6027   ivs->bad_groups++;
6028   ivs->cand_for_group[gid] = NULL;
6029   ivs->n_cand_uses[cid]--;
6030
6031   if (ivs->n_cand_uses[cid] == 0)
6032     {
6033       bitmap_clear_bit (ivs->cands, cid);
6034       /* Do not count the pseudocandidates.  */
6035       if (cp->cand->iv)
6036         ivs->n_regs--;
6037       ivs->n_cands--;
6038       ivs->cand_cost -= cp->cand->cost;
6039
6040       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6041     }
6042
6043   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6044
6045   iv_ca_set_remove_invariants (ivs, cp->depends_on);
6046
6047   if (cp->inv_expr != NULL)
6048     {
6049       unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6050       --(*slot);
6051       if (*slot == 0)
6052         ivs->used_inv_exprs->remove (cp->inv_expr);
6053     }
6054   iv_ca_recount_cost (data, ivs);
6055 }
6056
6057 /* Add invariants in set INVS to set IVS.  */
6058
6059 static void
6060 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6061 {
6062   bitmap_iterator bi;
6063   unsigned iid;
6064
6065   if (!invs)
6066     return;
6067
6068   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6069     {
6070       ivs->n_invariant_uses[iid]++;
6071       if (ivs->n_invariant_uses[iid] == 1)
6072         ivs->n_regs++;
6073     }
6074 }
6075
6076 /* Set cost pair for GROUP in set IVS to CP.  */
6077
6078 static void
6079 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6080               struct iv_group *group, struct cost_pair *cp)
6081 {
6082   unsigned gid = group->id, cid;
6083
6084   if (ivs->cand_for_group[gid] == cp)
6085     return;
6086
6087   if (ivs->cand_for_group[gid])
6088     iv_ca_set_no_cp (data, ivs, group);
6089
6090   if (cp)
6091     {
6092       cid = cp->cand->id;
6093
6094       ivs->bad_groups--;
6095       ivs->cand_for_group[gid] = cp;
6096       ivs->n_cand_uses[cid]++;
6097       if (ivs->n_cand_uses[cid] == 1)
6098         {
6099           bitmap_set_bit (ivs->cands, cid);
6100           /* Do not count the pseudocandidates.  */
6101           if (cp->cand->iv)
6102             ivs->n_regs++;
6103           ivs->n_cands++;
6104           ivs->cand_cost += cp->cand->cost;
6105
6106           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6107         }
6108
6109       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6110       iv_ca_set_add_invariants (ivs, cp->depends_on);
6111
6112       if (cp->inv_expr != NULL)
6113         {
6114           unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6115           ++(*slot);
6116         }
6117       iv_ca_recount_cost (data, ivs);
6118     }
6119 }
6120
6121 /* Extend set IVS by expressing USE by some of the candidates in it
6122    if possible.  Consider all important candidates if candidates in
6123    set IVS don't give any result.  */
6124
6125 static void
6126 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6127                struct iv_group *group)
6128 {
6129   struct cost_pair *best_cp = NULL, *cp;
6130   bitmap_iterator bi;
6131   unsigned i;
6132   struct iv_cand *cand;
6133
6134   gcc_assert (ivs->upto >= group->id);
6135   ivs->upto++;
6136   ivs->bad_groups++;
6137
6138   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6139     {
6140       cand = data->vcands[i];
6141       cp = get_group_iv_cost (data, group, cand);
6142       if (cheaper_cost_pair (cp, best_cp))
6143         best_cp = cp;
6144     }
6145
6146   if (best_cp == NULL)
6147     {
6148       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6149         {
6150           cand = data->vcands[i];
6151           cp = get_group_iv_cost (data, group, cand);
6152           if (cheaper_cost_pair (cp, best_cp))
6153             best_cp = cp;
6154         }
6155     }
6156
6157   iv_ca_set_cp (data, ivs, group, best_cp);
6158 }
6159
6160 /* Get cost for assignment IVS.  */
6161
6162 static comp_cost
6163 iv_ca_cost (struct iv_ca *ivs)
6164 {
6165   /* This was a conditional expression but it triggered a bug in
6166      Sun C 5.5.  */
6167   if (ivs->bad_groups)
6168     return infinite_cost;
6169   else
6170     return ivs->cost;
6171 }
6172
6173 /* Returns true if all dependences of CP are among invariants in IVS.  */
6174
6175 static bool
6176 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6177 {
6178   unsigned i;
6179   bitmap_iterator bi;
6180
6181   if (!cp->depends_on)
6182     return true;
6183
6184   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6185     {
6186       if (ivs->n_invariant_uses[i] == 0)
6187         return false;
6188     }
6189
6190   return true;
6191 }
6192
6193 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6194    it before NEXT.  */
6195
6196 static struct iv_ca_delta *
6197 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6198                  struct cost_pair *new_cp, struct iv_ca_delta *next)
6199 {
6200   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6201
6202   change->group = group;
6203   change->old_cp = old_cp;
6204   change->new_cp = new_cp;
6205   change->next = next;
6206
6207   return change;
6208 }
6209
6210 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6211    are rewritten.  */
6212
6213 static struct iv_ca_delta *
6214 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6215 {
6216   struct iv_ca_delta *last;
6217
6218   if (!l2)
6219     return l1;
6220
6221   if (!l1)
6222     return l2;
6223
6224   for (last = l1; last->next; last = last->next)
6225     continue;
6226   last->next = l2;
6227
6228   return l1;
6229 }
6230
6231 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6232
6233 static struct iv_ca_delta *
6234 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6235 {
6236   struct iv_ca_delta *act, *next, *prev = NULL;
6237
6238   for (act = delta; act; act = next)
6239     {
6240       next = act->next;
6241       act->next = prev;
6242       prev = act;
6243
6244       std::swap (act->old_cp, act->new_cp);
6245     }
6246
6247   return prev;
6248 }
6249
6250 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6251    reverted instead.  */
6252
6253 static void
6254 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6255                     struct iv_ca_delta *delta, bool forward)
6256 {
6257   struct cost_pair *from, *to;
6258   struct iv_ca_delta *act;
6259
6260   if (!forward)
6261     delta = iv_ca_delta_reverse (delta);
6262
6263   for (act = delta; act; act = act->next)
6264     {
6265       from = act->old_cp;
6266       to = act->new_cp;
6267       gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6268       iv_ca_set_cp (data, ivs, act->group, to);
6269     }
6270
6271   if (!forward)
6272     iv_ca_delta_reverse (delta);
6273 }
6274
6275 /* Returns true if CAND is used in IVS.  */
6276
6277 static bool
6278 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6279 {
6280   return ivs->n_cand_uses[cand->id] > 0;
6281 }
6282
6283 /* Returns number of induction variable candidates in the set IVS.  */
6284
6285 static unsigned
6286 iv_ca_n_cands (struct iv_ca *ivs)
6287 {
6288   return ivs->n_cands;
6289 }
6290
6291 /* Free the list of changes DELTA.  */
6292
6293 static void
6294 iv_ca_delta_free (struct iv_ca_delta **delta)
6295 {
6296   struct iv_ca_delta *act, *next;
6297
6298   for (act = *delta; act; act = next)
6299     {
6300       next = act->next;
6301       free (act);
6302     }
6303
6304   *delta = NULL;
6305 }
6306
6307 /* Allocates new iv candidates assignment.  */
6308
6309 static struct iv_ca *
6310 iv_ca_new (struct ivopts_data *data)
6311 {
6312   struct iv_ca *nw = XNEW (struct iv_ca);
6313
6314   nw->upto = 0;
6315   nw->bad_groups = 0;
6316   nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6317                                  data->vgroups.length ());
6318   nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6319   nw->cands = BITMAP_ALLOC (NULL);
6320   nw->n_cands = 0;
6321   nw->n_regs = 0;
6322   nw->cand_use_cost = no_cost;
6323   nw->cand_cost = 0;
6324   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6325   nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6326   nw->cost = no_cost;
6327
6328   return nw;
6329 }
6330
6331 /* Free memory occupied by the set IVS.  */
6332
6333 static void
6334 iv_ca_free (struct iv_ca **ivs)
6335 {
6336   free ((*ivs)->cand_for_group);
6337   free ((*ivs)->n_cand_uses);
6338   BITMAP_FREE ((*ivs)->cands);
6339   free ((*ivs)->n_invariant_uses);
6340   delete ((*ivs)->used_inv_exprs);
6341   free (*ivs);
6342   *ivs = NULL;
6343 }
6344
6345 /* Dumps IVS to FILE.  */
6346
6347 static void
6348 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6349 {
6350   unsigned i;
6351   comp_cost cost = iv_ca_cost (ivs);
6352
6353   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6354   fprintf (file, "  cand_cost: %d\n  cand_group_cost: %d (complexity %d)\n",
6355            ivs->cand_cost, ivs->cand_use_cost.cost,
6356            ivs->cand_use_cost.complexity);
6357   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6358
6359   for (i = 0; i < ivs->upto; i++)
6360     {
6361       struct iv_group *group = data->vgroups[i];
6362       struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6363       if (cp)
6364         fprintf (file, "   group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6365                  group->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6366       else
6367         fprintf (file, "   group:%d --> ??\n", group->id);
6368     }
6369
6370   const char *pref = "";
6371   fprintf (file, "  invariant variables: ");
6372   for (i = 1; i <= data->max_inv_id; i++)
6373     if (ivs->n_invariant_uses[i])
6374       {
6375         fprintf (file, "%s%d", pref, i);
6376         pref = ", ";
6377       }
6378
6379   pref = "";
6380   fprintf (file, "\n  invariant expressions: ");
6381   for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6382        = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6383     {
6384         fprintf (file, "%s%d", pref, (*it).first->id);
6385         pref = ", ";
6386     }
6387
6388   fprintf (file, "\n\n");
6389 }
6390
6391 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6392    new set, and store differences in DELTA.  Number of induction variables
6393    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6394    the function will try to find a solution with mimimal iv candidates.  */
6395
6396 static comp_cost
6397 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6398               struct iv_cand *cand, struct iv_ca_delta **delta,
6399               unsigned *n_ivs, bool min_ncand)
6400 {
6401   unsigned i;
6402   comp_cost cost;
6403   struct iv_group *group;
6404   struct cost_pair *old_cp, *new_cp;
6405
6406   *delta = NULL;
6407   for (i = 0; i < ivs->upto; i++)
6408     {
6409       group = data->vgroups[i];
6410       old_cp = iv_ca_cand_for_group (ivs, group);
6411
6412       if (old_cp
6413           && old_cp->cand == cand)
6414         continue;
6415
6416       new_cp = get_group_iv_cost (data, group, cand);
6417       if (!new_cp)
6418         continue;
6419
6420       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6421         continue;
6422
6423       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6424         continue;
6425
6426       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6427     }
6428
6429   iv_ca_delta_commit (data, ivs, *delta, true);
6430   cost = iv_ca_cost (ivs);
6431   if (n_ivs)
6432     *n_ivs = iv_ca_n_cands (ivs);
6433   iv_ca_delta_commit (data, ivs, *delta, false);
6434
6435   return cost;
6436 }
6437
6438 /* Try narrowing set IVS by removing CAND.  Return the cost of
6439    the new set and store the differences in DELTA.  START is
6440    the candidate with which we start narrowing.  */
6441
6442 static comp_cost
6443 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6444               struct iv_cand *cand, struct iv_cand *start,
6445               struct iv_ca_delta **delta)
6446 {
6447   unsigned i, ci;
6448   struct iv_group *group;
6449   struct cost_pair *old_cp, *new_cp, *cp;
6450   bitmap_iterator bi;
6451   struct iv_cand *cnd;
6452   comp_cost cost, best_cost, acost;
6453
6454   *delta = NULL;
6455   for (i = 0; i < data->vgroups.length (); i++)
6456     {
6457       group = data->vgroups[i];
6458
6459       old_cp = iv_ca_cand_for_group (ivs, group);
6460       if (old_cp->cand != cand)
6461         continue;
6462
6463       best_cost = iv_ca_cost (ivs);
6464       /* Start narrowing with START.  */
6465       new_cp = get_group_iv_cost (data, group, start);
6466
6467       if (data->consider_all_candidates)
6468         {
6469           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6470             {
6471               if (ci == cand->id || (start && ci == start->id))
6472                 continue;
6473
6474               cnd = data->vcands[ci];
6475
6476               cp = get_group_iv_cost (data, group, cnd);
6477               if (!cp)
6478                 continue;
6479
6480               iv_ca_set_cp (data, ivs, group, cp);
6481               acost = iv_ca_cost (ivs);
6482
6483               if (compare_costs (acost, best_cost) < 0)
6484                 {
6485                   best_cost = acost;
6486                   new_cp = cp;
6487                 }
6488             }
6489         }
6490       else
6491         {
6492           EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6493             {
6494               if (ci == cand->id || (start && ci == start->id))
6495                 continue;
6496
6497               cnd = data->vcands[ci];
6498
6499               cp = get_group_iv_cost (data, group, cnd);
6500               if (!cp)
6501                 continue;
6502
6503               iv_ca_set_cp (data, ivs, group, cp);
6504               acost = iv_ca_cost (ivs);
6505
6506               if (compare_costs (acost, best_cost) < 0)
6507                 {
6508                   best_cost = acost;
6509                   new_cp = cp;
6510                 }
6511             }
6512         }
6513       /* Restore to old cp for use.  */
6514       iv_ca_set_cp (data, ivs, group, old_cp);
6515
6516       if (!new_cp)
6517         {
6518           iv_ca_delta_free (delta);
6519           return infinite_cost;
6520         }
6521
6522       *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6523     }
6524
6525   iv_ca_delta_commit (data, ivs, *delta, true);
6526   cost = iv_ca_cost (ivs);
6527   iv_ca_delta_commit (data, ivs, *delta, false);
6528
6529   return cost;
6530 }
6531
6532 /* Try optimizing the set of candidates IVS by removing candidates different
6533    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6534    differences in DELTA.  */
6535
6536 static comp_cost
6537 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6538              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6539 {
6540   bitmap_iterator bi;
6541   struct iv_ca_delta *act_delta, *best_delta;
6542   unsigned i;
6543   comp_cost best_cost, acost;
6544   struct iv_cand *cand;
6545
6546   best_delta = NULL;
6547   best_cost = iv_ca_cost (ivs);
6548
6549   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6550     {
6551       cand = data->vcands[i];
6552
6553       if (cand == except_cand)
6554         continue;
6555
6556       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6557
6558       if (compare_costs (acost, best_cost) < 0)
6559         {
6560           best_cost = acost;
6561           iv_ca_delta_free (&best_delta);
6562           best_delta = act_delta;
6563         }
6564       else
6565         iv_ca_delta_free (&act_delta);
6566     }
6567
6568   if (!best_delta)
6569     {
6570       *delta = NULL;
6571       return best_cost;
6572     }
6573
6574   /* Recurse to possibly remove other unnecessary ivs.  */
6575   iv_ca_delta_commit (data, ivs, best_delta, true);
6576   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6577   iv_ca_delta_commit (data, ivs, best_delta, false);
6578   *delta = iv_ca_delta_join (best_delta, *delta);
6579   return best_cost;
6580 }
6581
6582 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6583    cheaper local cost for GROUP than BEST_CP.  Return pointer to
6584    the corresponding cost_pair, otherwise just return BEST_CP.  */
6585
6586 static struct cost_pair*
6587 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6588                         unsigned int cand_idx, struct iv_cand *old_cand,
6589                         struct cost_pair *best_cp)
6590 {
6591   struct iv_cand *cand;
6592   struct cost_pair *cp;
6593
6594   gcc_assert (old_cand != NULL && best_cp != NULL);
6595   if (cand_idx == old_cand->id)
6596     return best_cp;
6597
6598   cand = data->vcands[cand_idx];
6599   cp = get_group_iv_cost (data, group, cand);
6600   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6601     return cp;
6602
6603   return best_cp;
6604 }
6605
6606 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6607    which are used by more than one iv uses.  For each of those candidates,
6608    this function tries to represent iv uses under that candidate using
6609    other ones with lower local cost, then tries to prune the new set.
6610    If the new set has lower cost, It returns the new cost after recording
6611    candidate replacement in list DELTA.  */
6612
6613 static comp_cost
6614 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6615                struct iv_ca_delta **delta)
6616 {
6617   bitmap_iterator bi, bj;
6618   unsigned int i, j, k;
6619   struct iv_cand *cand;
6620   comp_cost orig_cost, acost;
6621   struct iv_ca_delta *act_delta, *tmp_delta;
6622   struct cost_pair *old_cp, *best_cp = NULL;
6623
6624   *delta = NULL;
6625   orig_cost = iv_ca_cost (ivs);
6626
6627   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6628     {
6629       if (ivs->n_cand_uses[i] == 1
6630           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6631         continue;
6632
6633       cand = data->vcands[i];
6634
6635       act_delta = NULL;
6636       /*  Represent uses under current candidate using other ones with
6637           lower local cost.  */
6638       for (j = 0; j < ivs->upto; j++)
6639         {
6640           struct iv_group *group = data->vgroups[j];
6641           old_cp = iv_ca_cand_for_group (ivs, group);
6642
6643           if (old_cp->cand != cand)
6644             continue;
6645
6646           best_cp = old_cp;
6647           if (data->consider_all_candidates)
6648             for (k = 0; k < data->vcands.length (); k++)
6649               best_cp = cheaper_cost_with_cand (data, group, k,
6650                                                 old_cp->cand, best_cp);
6651           else
6652             EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6653               best_cp = cheaper_cost_with_cand (data, group, k,
6654                                                 old_cp->cand, best_cp);
6655
6656           if (best_cp == old_cp)
6657             continue;
6658
6659           act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6660         }
6661       /* No need for further prune.  */
6662       if (!act_delta)
6663         continue;
6664
6665       /* Prune the new candidate set.  */
6666       iv_ca_delta_commit (data, ivs, act_delta, true);
6667       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6668       iv_ca_delta_commit (data, ivs, act_delta, false);
6669       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6670
6671       if (compare_costs (acost, orig_cost) < 0)
6672         {
6673           *delta = act_delta;
6674           return acost;
6675         }
6676       else
6677         iv_ca_delta_free (&act_delta);
6678     }
6679
6680   return orig_cost;
6681 }
6682
6683 /* Tries to extend the sets IVS in the best possible way in order to
6684    express the GROUP.  If ORIGINALP is true, prefer candidates from
6685    the original set of IVs, otherwise favor important candidates not
6686    based on any memory object.  */
6687
6688 static bool
6689 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6690                   struct iv_group *group, bool originalp)
6691 {
6692   comp_cost best_cost, act_cost;
6693   unsigned i;
6694   bitmap_iterator bi;
6695   struct iv_cand *cand;
6696   struct iv_ca_delta *best_delta = NULL, *act_delta;
6697   struct cost_pair *cp;
6698
6699   iv_ca_add_group (data, ivs, group);
6700   best_cost = iv_ca_cost (ivs);
6701   cp = iv_ca_cand_for_group (ivs, group);
6702   if (cp)
6703     {
6704       best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6705       iv_ca_set_no_cp (data, ivs, group);
6706     }
6707
6708   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6709      first try important candidates not based on any memory object.  Only if
6710      this fails, try the specific ones.  Rationale -- in loops with many
6711      variables the best choice often is to use just one generic biv.  If we
6712      added here many ivs specific to the uses, the optimization algorithm later
6713      would be likely to get stuck in a local minimum, thus causing us to create
6714      too many ivs.  The approach from few ivs to more seems more likely to be
6715      successful -- starting from few ivs, replacing an expensive use by a
6716      specific iv should always be a win.  */
6717   EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6718     {
6719       cand = data->vcands[i];
6720
6721       if (originalp && cand->pos !=IP_ORIGINAL)
6722         continue;
6723
6724       if (!originalp && cand->iv->base_object != NULL_TREE)
6725         continue;
6726
6727       if (iv_ca_cand_used_p (ivs, cand))
6728         continue;
6729
6730       cp = get_group_iv_cost (data, group, cand);
6731       if (!cp)
6732         continue;
6733
6734       iv_ca_set_cp (data, ivs, group, cp);
6735       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6736                                true);
6737       iv_ca_set_no_cp (data, ivs, group);
6738       act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6739
6740       if (compare_costs (act_cost, best_cost) < 0)
6741         {
6742           best_cost = act_cost;
6743
6744           iv_ca_delta_free (&best_delta);
6745           best_delta = act_delta;
6746         }
6747       else
6748         iv_ca_delta_free (&act_delta);
6749     }
6750
6751   if (infinite_cost_p (best_cost))
6752     {
6753       for (i = 0; i < group->n_map_members; i++)
6754         {
6755           cp = group->cost_map + i;
6756           cand = cp->cand;
6757           if (!cand)
6758             continue;
6759
6760           /* Already tried this.  */
6761           if (cand->important)
6762             {
6763               if (originalp && cand->pos == IP_ORIGINAL)
6764                 continue;
6765               if (!originalp && cand->iv->base_object == NULL_TREE)
6766                 continue;
6767             }
6768
6769           if (iv_ca_cand_used_p (ivs, cand))
6770             continue;
6771
6772           act_delta = NULL;
6773           iv_ca_set_cp (data, ivs, group, cp);
6774           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6775           iv_ca_set_no_cp (data, ivs, group);
6776           act_delta = iv_ca_delta_add (group,
6777                                        iv_ca_cand_for_group (ivs, group),
6778                                        cp, act_delta);
6779
6780           if (compare_costs (act_cost, best_cost) < 0)
6781             {
6782               best_cost = act_cost;
6783
6784               if (best_delta)
6785                 iv_ca_delta_free (&best_delta);
6786               best_delta = act_delta;
6787             }
6788           else
6789             iv_ca_delta_free (&act_delta);
6790         }
6791     }
6792
6793   iv_ca_delta_commit (data, ivs, best_delta, true);
6794   iv_ca_delta_free (&best_delta);
6795
6796   return !infinite_cost_p (best_cost);
6797 }
6798
6799 /* Finds an initial assignment of candidates to uses.  */
6800
6801 static struct iv_ca *
6802 get_initial_solution (struct ivopts_data *data, bool originalp)
6803 {
6804   unsigned i;
6805   struct iv_ca *ivs = iv_ca_new (data);
6806
6807   for (i = 0; i < data->vgroups.length (); i++)
6808     if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6809       {
6810         iv_ca_free (&ivs);
6811         return NULL;
6812       }
6813
6814   return ivs;
6815 }
6816
6817 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6818    points to a bool variable, this function tries to break local
6819    optimal fixed-point by replacing candidates in IVS if it's true.  */
6820
6821 static bool
6822 try_improve_iv_set (struct ivopts_data *data,
6823                     struct iv_ca *ivs, bool *try_replace_p)
6824 {
6825   unsigned i, n_ivs;
6826   comp_cost acost, best_cost = iv_ca_cost (ivs);
6827   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6828   struct iv_cand *cand;
6829
6830   /* Try extending the set of induction variables by one.  */
6831   for (i = 0; i < data->vcands.length (); i++)
6832     {
6833       cand = data->vcands[i];
6834
6835       if (iv_ca_cand_used_p (ivs, cand))
6836         continue;
6837
6838       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6839       if (!act_delta)
6840         continue;
6841
6842       /* If we successfully added the candidate and the set is small enough,
6843          try optimizing it by removing other candidates.  */
6844       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6845         {
6846           iv_ca_delta_commit (data, ivs, act_delta, true);
6847           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6848           iv_ca_delta_commit (data, ivs, act_delta, false);
6849           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6850         }
6851
6852       if (compare_costs (acost, best_cost) < 0)
6853         {
6854           best_cost = acost;
6855           iv_ca_delta_free (&best_delta);
6856           best_delta = act_delta;
6857         }
6858       else
6859         iv_ca_delta_free (&act_delta);
6860     }
6861
6862   if (!best_delta)
6863     {
6864       /* Try removing the candidates from the set instead.  */
6865       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6866
6867       if (!best_delta && *try_replace_p)
6868         {
6869           *try_replace_p = false;
6870           /* So far candidate selecting algorithm tends to choose fewer IVs
6871              so that it can handle cases in which loops have many variables
6872              but the best choice is often to use only one general biv.  One
6873              weakness is it can't handle opposite cases, in which different
6874              candidates should be chosen with respect to each use.  To solve
6875              the problem, we replace candidates in a manner described by the
6876              comments of iv_ca_replace, thus give general algorithm a chance
6877              to break local optimal fixed-point in these cases.  */
6878           best_cost = iv_ca_replace (data, ivs, &best_delta);
6879         }
6880
6881       if (!best_delta)
6882         return false;
6883     }
6884
6885   iv_ca_delta_commit (data, ivs, best_delta, true);
6886   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6887   iv_ca_delta_free (&best_delta);
6888   return true;
6889 }
6890
6891 /* Attempts to find the optimal set of induction variables.  We do simple
6892    greedy heuristic -- we try to replace at most one candidate in the selected
6893    solution and remove the unused ivs while this improves the cost.  */
6894
6895 static struct iv_ca *
6896 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6897 {
6898   struct iv_ca *set;
6899   bool try_replace_p = true;
6900
6901   /* Get the initial solution.  */
6902   set = get_initial_solution (data, originalp);
6903   if (!set)
6904     {
6905       if (dump_file && (dump_flags & TDF_DETAILS))
6906         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6907       return NULL;
6908     }
6909
6910   if (dump_file && (dump_flags & TDF_DETAILS))
6911     {
6912       fprintf (dump_file, "Initial set of candidates:\n");
6913       iv_ca_dump (data, dump_file, set);
6914     }
6915
6916   while (try_improve_iv_set (data, set, &try_replace_p))
6917     {
6918       if (dump_file && (dump_flags & TDF_DETAILS))
6919         {
6920           fprintf (dump_file, "Improved to:\n");
6921           iv_ca_dump (data, dump_file, set);
6922         }
6923     }
6924
6925   return set;
6926 }
6927
6928 static struct iv_ca *
6929 find_optimal_iv_set (struct ivopts_data *data)
6930 {
6931   unsigned i;
6932   comp_cost cost, origcost;
6933   struct iv_ca *set, *origset;
6934
6935   /* Determine the cost based on a strategy that starts with original IVs,
6936      and try again using a strategy that prefers candidates not based
6937      on any IVs.  */
6938   origset = find_optimal_iv_set_1 (data, true);
6939   set = find_optimal_iv_set_1 (data, false);
6940
6941   if (!origset && !set)
6942     return NULL;
6943
6944   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6945   cost = set ? iv_ca_cost (set) : infinite_cost;
6946
6947   if (dump_file && (dump_flags & TDF_DETAILS))
6948     {
6949       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6950                origcost.cost, origcost.complexity);
6951       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6952                cost.cost, cost.complexity);
6953     }
6954
6955   /* Choose the one with the best cost.  */
6956   if (compare_costs (origcost, cost) <= 0)
6957     {
6958       if (set)
6959         iv_ca_free (&set);
6960       set = origset;
6961     }
6962   else if (origset)
6963     iv_ca_free (&origset);
6964
6965   for (i = 0; i < data->vgroups.length (); i++)
6966     {
6967       struct iv_group *group = data->vgroups[i];
6968       group->selected = iv_ca_cand_for_group (set, group)->cand;
6969     }
6970
6971   return set;
6972 }
6973
6974 /* Creates a new induction variable corresponding to CAND.  */
6975
6976 static void
6977 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6978 {
6979   gimple_stmt_iterator incr_pos;
6980   tree base;
6981   struct iv_use *use;
6982   struct iv_group *group;
6983   bool after = false;
6984
6985   if (!cand->iv)
6986     return;
6987
6988   switch (cand->pos)
6989     {
6990     case IP_NORMAL:
6991       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6992       break;
6993
6994     case IP_END:
6995       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6996       after = true;
6997       break;
6998
6999     case IP_AFTER_USE:
7000       after = true;
7001       /* fall through */
7002     case IP_BEFORE_USE:
7003       incr_pos = gsi_for_stmt (cand->incremented_at);
7004       break;
7005
7006     case IP_ORIGINAL:
7007       /* Mark that the iv is preserved.  */
7008       name_info (data, cand->var_before)->preserve_biv = true;
7009       name_info (data, cand->var_after)->preserve_biv = true;
7010
7011       /* Rewrite the increment so that it uses var_before directly.  */
7012       use = find_interesting_uses_op (data, cand->var_after);
7013       group = data->vgroups[use->group_id];
7014       group->selected = cand;
7015       return;
7016     }
7017
7018   gimple_add_tmp_var (cand->var_before);
7019
7020   base = unshare_expr (cand->iv->base);
7021
7022   create_iv (base, unshare_expr (cand->iv->step),
7023              cand->var_before, data->current_loop,
7024              &incr_pos, after, &cand->var_before, &cand->var_after);
7025 }
7026
7027 /* Creates new induction variables described in SET.  */
7028
7029 static void
7030 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7031 {
7032   unsigned i;
7033   struct iv_cand *cand;
7034   bitmap_iterator bi;
7035
7036   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7037     {
7038       cand = data->vcands[i];
7039       create_new_iv (data, cand);
7040     }
7041
7042   if (dump_file && (dump_flags & TDF_DETAILS))
7043     {
7044       fprintf (dump_file, "Selected IV set for loop %d",
7045                data->current_loop->num);
7046       if (data->loop_loc != UNKNOWN_LOCATION)
7047         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7048                  LOCATION_LINE (data->loop_loc));
7049       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7050                avg_loop_niter (data->current_loop));
7051       fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7052                (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7053       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7054       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7055         {
7056           cand = data->vcands[i];
7057           dump_cand (dump_file, cand);
7058         }
7059       fprintf (dump_file, "\n");
7060     }
7061 }
7062
7063 /* Rewrites USE (definition of iv used in a nonlinear expression)
7064    using candidate CAND.  */
7065
7066 static void
7067 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7068                             struct iv_use *use, struct iv_cand *cand)
7069 {
7070   tree comp;
7071   tree op, tgt;
7072   gassign *ass;
7073   gimple_stmt_iterator bsi;
7074
7075   /* An important special case -- if we are asked to express value of
7076      the original iv by itself, just exit; there is no need to
7077      introduce a new computation (that might also need casting the
7078      variable to unsigned and back).  */
7079   if (cand->pos == IP_ORIGINAL
7080       && cand->incremented_at == use->stmt)
7081     {
7082       enum tree_code stmt_code;
7083
7084       gcc_assert (is_gimple_assign (use->stmt));
7085       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7086
7087       /* Check whether we may leave the computation unchanged.
7088          This is the case only if it does not rely on other
7089          computations in the loop -- otherwise, the computation
7090          we rely upon may be removed in remove_unused_ivs,
7091          thus leading to ICE.  */
7092       stmt_code = gimple_assign_rhs_code (use->stmt);
7093       if (stmt_code == PLUS_EXPR
7094           || stmt_code == MINUS_EXPR
7095           || stmt_code == POINTER_PLUS_EXPR)
7096         {
7097           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7098             op = gimple_assign_rhs2 (use->stmt);
7099           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7100             op = gimple_assign_rhs1 (use->stmt);
7101           else
7102             op = NULL_TREE;
7103         }
7104       else
7105         op = NULL_TREE;
7106
7107       if (op && expr_invariant_in_loop_p (data->current_loop, op))
7108         return;
7109     }
7110
7111   comp = get_computation (data->current_loop, use, cand);
7112   gcc_assert (comp != NULL_TREE);
7113
7114   switch (gimple_code (use->stmt))
7115     {
7116     case GIMPLE_PHI:
7117       tgt = PHI_RESULT (use->stmt);
7118
7119       /* If we should keep the biv, do not replace it.  */
7120       if (name_info (data, tgt)->preserve_biv)
7121         return;
7122
7123       bsi = gsi_after_labels (gimple_bb (use->stmt));
7124       break;
7125
7126     case GIMPLE_ASSIGN:
7127       tgt = gimple_assign_lhs (use->stmt);
7128       bsi = gsi_for_stmt (use->stmt);
7129       break;
7130
7131     default:
7132       gcc_unreachable ();
7133     }
7134
7135   if (!valid_gimple_rhs_p (comp)
7136       || (gimple_code (use->stmt) != GIMPLE_PHI
7137           /* We can't allow re-allocating the stmt as it might be pointed
7138              to still.  */
7139           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7140               >= gimple_num_ops (gsi_stmt (bsi)))))
7141     {
7142       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7143                                        true, GSI_SAME_STMT);
7144       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7145         {
7146           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7147           /* As this isn't a plain copy we have to reset alignment
7148              information.  */
7149           if (SSA_NAME_PTR_INFO (comp))
7150             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7151         }
7152     }
7153
7154   if (gimple_code (use->stmt) == GIMPLE_PHI)
7155     {
7156       ass = gimple_build_assign (tgt, comp);
7157       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7158
7159       bsi = gsi_for_stmt (use->stmt);
7160       remove_phi_node (&bsi, false);
7161     }
7162   else
7163     {
7164       gimple_assign_set_rhs_from_tree (&bsi, comp);
7165       use->stmt = gsi_stmt (bsi);
7166     }
7167 }
7168
7169 /* Performs a peephole optimization to reorder the iv update statement with
7170    a mem ref to enable instruction combining in later phases. The mem ref uses
7171    the iv value before the update, so the reordering transformation requires
7172    adjustment of the offset. CAND is the selected IV_CAND.
7173
7174    Example:
7175
7176    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7177    iv2 = iv1 + 1;
7178
7179    if (t < val)      (1)
7180      goto L;
7181    goto Head;
7182
7183
7184    directly propagating t over to (1) will introduce overlapping live range
7185    thus increase register pressure. This peephole transform it into:
7186
7187
7188    iv2 = iv1 + 1;
7189    t = MEM_REF (base, iv2, 8, 8);
7190    if (t < val)
7191      goto L;
7192    goto Head;
7193 */
7194
7195 static void
7196 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7197 {
7198   tree var_after;
7199   gimple *iv_update, *stmt;
7200   basic_block bb;
7201   gimple_stmt_iterator gsi, gsi_iv;
7202
7203   if (cand->pos != IP_NORMAL)
7204     return;
7205
7206   var_after = cand->var_after;
7207   iv_update = SSA_NAME_DEF_STMT (var_after);
7208
7209   bb = gimple_bb (iv_update);
7210   gsi = gsi_last_nondebug_bb (bb);
7211   stmt = gsi_stmt (gsi);
7212
7213   /* Only handle conditional statement for now.  */
7214   if (gimple_code (stmt) != GIMPLE_COND)
7215     return;
7216
7217   gsi_prev_nondebug (&gsi);
7218   stmt = gsi_stmt (gsi);
7219   if (stmt != iv_update)
7220     return;
7221
7222   gsi_prev_nondebug (&gsi);
7223   if (gsi_end_p (gsi))
7224     return;
7225
7226   stmt = gsi_stmt (gsi);
7227   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7228     return;
7229
7230   if (stmt != use->stmt)
7231     return;
7232
7233   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7234     return;
7235
7236   if (dump_file && (dump_flags & TDF_DETAILS))
7237     {
7238       fprintf (dump_file, "Reordering \n");
7239       print_gimple_stmt (dump_file, iv_update, 0, 0);
7240       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7241       fprintf (dump_file, "\n");
7242     }
7243
7244   gsi = gsi_for_stmt (use->stmt);
7245   gsi_iv = gsi_for_stmt (iv_update);
7246   gsi_move_before (&gsi_iv, &gsi);
7247
7248   cand->pos = IP_BEFORE_USE;
7249   cand->incremented_at = use->stmt;
7250 }
7251
7252 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7253
7254 static void
7255 rewrite_use_address (struct ivopts_data *data,
7256                      struct iv_use *use, struct iv_cand *cand)
7257 {
7258   aff_tree aff;
7259   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7260   tree base_hint = NULL_TREE;
7261   tree ref, iv;
7262   bool ok;
7263
7264   adjust_iv_update_pos (cand, use);
7265   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7266   gcc_assert (ok);
7267   unshare_aff_combination (&aff);
7268
7269   /* To avoid undefined overflow problems, all IV candidates use unsigned
7270      integer types.  The drawback is that this makes it impossible for
7271      create_mem_ref to distinguish an IV that is based on a memory object
7272      from one that represents simply an offset.
7273
7274      To work around this problem, we pass a hint to create_mem_ref that
7275      indicates which variable (if any) in aff is an IV based on a memory
7276      object.  Note that we only consider the candidate.  If this is not
7277      based on an object, the base of the reference is in some subexpression
7278      of the use -- but these will use pointer types, so they are recognized
7279      by the create_mem_ref heuristics anyway.  */
7280   if (cand->iv->base_object)
7281     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7282
7283   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7284   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7285                         reference_alias_ptr_type (*use->op_p),
7286                         iv, base_hint, data->speed);
7287   copy_ref_info (ref, *use->op_p);
7288   *use->op_p = ref;
7289 }
7290
7291 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7292    candidate CAND.  */
7293
7294 static void
7295 rewrite_use_compare (struct ivopts_data *data,
7296                      struct iv_use *use, struct iv_cand *cand)
7297 {
7298   tree comp, *var_p, op, bound;
7299   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7300   enum tree_code compare;
7301   struct iv_group *group = data->vgroups[use->group_id];
7302   struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7303   bool ok;
7304
7305   bound = cp->value;
7306   if (bound)
7307     {
7308       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7309       tree var_type = TREE_TYPE (var);
7310       gimple_seq stmts;
7311
7312       if (dump_file && (dump_flags & TDF_DETAILS))
7313         {
7314           fprintf (dump_file, "Replacing exit test: ");
7315           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7316         }
7317       compare = cp->comp;
7318       bound = unshare_expr (fold_convert (var_type, bound));
7319       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7320       if (stmts)
7321         gsi_insert_seq_on_edge_immediate (
7322                 loop_preheader_edge (data->current_loop),
7323                 stmts);
7324
7325       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7326       gimple_cond_set_lhs (cond_stmt, var);
7327       gimple_cond_set_code (cond_stmt, compare);
7328       gimple_cond_set_rhs (cond_stmt, op);
7329       return;
7330     }
7331
7332   /* The induction variable elimination failed; just express the original
7333      giv.  */
7334   comp = get_computation (data->current_loop, use, cand);
7335   gcc_assert (comp != NULL_TREE);
7336
7337   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7338   gcc_assert (ok);
7339
7340   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7341                                      true, GSI_SAME_STMT);
7342 }
7343
7344 /* Rewrite the groups using the selected induction variables.  */
7345
7346 static void
7347 rewrite_groups (struct ivopts_data *data)
7348 {
7349   unsigned i, j;
7350
7351   for (i = 0; i < data->vgroups.length (); i++)
7352     {
7353       struct iv_group *group = data->vgroups[i];
7354       struct iv_cand *cand = group->selected;
7355
7356       gcc_assert (cand);
7357
7358       if (group->type == USE_NONLINEAR_EXPR)
7359         {
7360           for (j = 0; j < group->vuses.length (); j++)
7361             {
7362               rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7363               update_stmt (group->vuses[j]->stmt);
7364             }
7365         }
7366       else if (group->type == USE_ADDRESS)
7367         {
7368           for (j = 0; j < group->vuses.length (); j++)
7369             {
7370               rewrite_use_address (data, group->vuses[j], cand);
7371               update_stmt (group->vuses[j]->stmt);
7372             }
7373         }
7374       else
7375         {
7376           gcc_assert (group->type == USE_COMPARE);
7377
7378           for (j = 0; j < group->vuses.length (); j++)
7379             {
7380               rewrite_use_compare (data, group->vuses[j], cand);
7381               update_stmt (group->vuses[j]->stmt);
7382             }
7383         }
7384     }
7385 }
7386
7387 /* Removes the ivs that are not used after rewriting.  */
7388
7389 static void
7390 remove_unused_ivs (struct ivopts_data *data)
7391 {
7392   unsigned j;
7393   bitmap_iterator bi;
7394   bitmap toremove = BITMAP_ALLOC (NULL);
7395
7396   /* Figure out an order in which to release SSA DEFs so that we don't
7397      release something that we'd have to propagate into a debug stmt
7398      afterwards.  */
7399   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7400     {
7401       struct version_info *info;
7402
7403       info = ver_info (data, j);
7404       if (info->iv
7405           && !integer_zerop (info->iv->step)
7406           && !info->inv_id
7407           && !info->iv->nonlin_use
7408           && !info->preserve_biv)
7409         {
7410           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7411
7412           tree def = info->iv->ssa_name;
7413
7414           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7415             {
7416               imm_use_iterator imm_iter;
7417               use_operand_p use_p;
7418               gimple *stmt;
7419               int count = 0;
7420
7421               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7422                 {
7423                   if (!gimple_debug_bind_p (stmt))
7424                     continue;
7425
7426                   /* We just want to determine whether to do nothing
7427                      (count == 0), to substitute the computed
7428                      expression into a single use of the SSA DEF by
7429                      itself (count == 1), or to use a debug temp
7430                      because the SSA DEF is used multiple times or as
7431                      part of a larger expression (count > 1). */
7432                   count++;
7433                   if (gimple_debug_bind_get_value (stmt) != def)
7434                     count++;
7435
7436                   if (count > 1)
7437                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7438                 }
7439
7440               if (!count)
7441                 continue;
7442
7443               struct iv_use dummy_use;
7444               struct iv_cand *best_cand = NULL, *cand;
7445               unsigned i, best_pref = 0, cand_pref;
7446
7447               memset (&dummy_use, 0, sizeof (dummy_use));
7448               dummy_use.iv = info->iv;
7449               for (i = 0; i < data->vgroups.length () && i < 64; i++)
7450                 {
7451                   cand = data->vgroups[i]->selected;
7452                   if (cand == best_cand)
7453                     continue;
7454                   cand_pref = operand_equal_p (cand->iv->step,
7455                                                info->iv->step, 0)
7456                     ? 4 : 0;
7457                   cand_pref
7458                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7459                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7460                     ? 2 : 0;
7461                   cand_pref
7462                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7463                     ? 1 : 0;
7464                   if (best_cand == NULL || best_pref < cand_pref)
7465                     {
7466                       best_cand = cand;
7467                       best_pref = cand_pref;
7468                     }
7469                 }
7470
7471               if (!best_cand)
7472                 continue;
7473
7474               tree comp = get_computation_at (data->current_loop,
7475                                               &dummy_use, best_cand,
7476                                               SSA_NAME_DEF_STMT (def));
7477               if (!comp)
7478                 continue;
7479
7480               if (count > 1)
7481                 {
7482                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7483                   DECL_ARTIFICIAL (vexpr) = 1;
7484                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7485                   if (SSA_NAME_VAR (def))
7486                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7487                   else
7488                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7489                   gdebug *def_temp
7490                     = gimple_build_debug_bind (vexpr, comp, NULL);
7491                   gimple_stmt_iterator gsi;
7492
7493                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7494                     gsi = gsi_after_labels (gimple_bb
7495                                             (SSA_NAME_DEF_STMT (def)));
7496                   else
7497                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7498
7499                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7500                   comp = vexpr;
7501                 }
7502
7503               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7504                 {
7505                   if (!gimple_debug_bind_p (stmt))
7506                     continue;
7507
7508                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7509                     SET_USE (use_p, comp);
7510
7511                   update_stmt (stmt);
7512                 }
7513             }
7514         }
7515     }
7516
7517   release_defs_bitset (toremove);
7518
7519   BITMAP_FREE (toremove);
7520 }
7521
7522 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7523    for hash_map::traverse.  */
7524
7525 bool
7526 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7527 {
7528   free (value);
7529   return true;
7530 }
7531
7532 /* Frees data allocated by the optimization of a single loop.  */
7533
7534 static void
7535 free_loop_data (struct ivopts_data *data)
7536 {
7537   unsigned i, j;
7538   bitmap_iterator bi;
7539   tree obj;
7540
7541   if (data->niters)
7542     {
7543       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7544       delete data->niters;
7545       data->niters = NULL;
7546     }
7547
7548   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7549     {
7550       struct version_info *info;
7551
7552       info = ver_info (data, i);
7553       info->iv = NULL;
7554       info->has_nonlin_use = false;
7555       info->preserve_biv = false;
7556       info->inv_id = 0;
7557     }
7558   bitmap_clear (data->relevant);
7559   bitmap_clear (data->important_candidates);
7560
7561   for (i = 0; i < data->vgroups.length (); i++)
7562     {
7563       struct iv_group *group = data->vgroups[i];
7564
7565       for (j = 0; j < group->vuses.length (); j++)
7566         free (group->vuses[j]);
7567       group->vuses.release ();
7568
7569       BITMAP_FREE (group->related_cands);
7570       for (j = 0; j < group->n_map_members; j++)
7571         if (group->cost_map[j].depends_on)
7572           BITMAP_FREE (group->cost_map[j].depends_on);
7573
7574       free (group->cost_map);
7575       free (group);
7576     }
7577   data->vgroups.truncate (0);
7578
7579   for (i = 0; i < data->vcands.length (); i++)
7580     {
7581       struct iv_cand *cand = data->vcands[i];
7582
7583       if (cand->depends_on)
7584         BITMAP_FREE (cand->depends_on);
7585       free (cand);
7586     }
7587   data->vcands.truncate (0);
7588
7589   if (data->version_info_size < num_ssa_names)
7590     {
7591       data->version_info_size = 2 * num_ssa_names;
7592       free (data->version_info);
7593       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7594     }
7595
7596   data->max_inv_id = 0;
7597
7598   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7599     SET_DECL_RTL (obj, NULL_RTX);
7600
7601   decl_rtl_to_reset.truncate (0);
7602
7603   data->inv_expr_tab->empty ();
7604   data->max_inv_expr_id = 0;
7605
7606   data->iv_common_cand_tab->empty ();
7607   data->iv_common_cands.truncate (0);
7608 }
7609
7610 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7611    loop tree.  */
7612
7613 static void
7614 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7615 {
7616   free_loop_data (data);
7617   free (data->version_info);
7618   BITMAP_FREE (data->relevant);
7619   BITMAP_FREE (data->important_candidates);
7620
7621   decl_rtl_to_reset.release ();
7622   data->vgroups.release ();
7623   data->vcands.release ();
7624   delete data->inv_expr_tab;
7625   data->inv_expr_tab = NULL;
7626   free_affine_expand_cache (&data->name_expansion_cache);
7627   delete data->iv_common_cand_tab;
7628   data->iv_common_cand_tab = NULL;
7629   data->iv_common_cands.release ();
7630   obstack_free (&data->iv_obstack, NULL);
7631 }
7632
7633 /* Returns true if the loop body BODY includes any function calls.  */
7634
7635 static bool
7636 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7637 {
7638   gimple_stmt_iterator gsi;
7639   unsigned i;
7640
7641   for (i = 0; i < num_nodes; i++)
7642     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7643       {
7644         gimple *stmt = gsi_stmt (gsi);
7645         if (is_gimple_call (stmt)
7646             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7647           return true;
7648       }
7649   return false;
7650 }
7651
7652 /* Optimizes the LOOP.  Returns true if anything changed.  */
7653
7654 static bool
7655 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7656 {
7657   bool changed = false;
7658   struct iv_ca *iv_ca;
7659   edge exit = single_dom_exit (loop);
7660   basic_block *body;
7661
7662   gcc_assert (!data->niters);
7663   data->current_loop = loop;
7664   data->loop_loc = find_loop_location (loop);
7665   data->speed = optimize_loop_for_speed_p (loop);
7666
7667   if (dump_file && (dump_flags & TDF_DETAILS))
7668     {
7669       fprintf (dump_file, "Processing loop %d", loop->num);
7670       if (data->loop_loc != UNKNOWN_LOCATION)
7671         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7672                  LOCATION_LINE (data->loop_loc));
7673       fprintf (dump_file, "\n");
7674
7675       if (exit)
7676         {
7677           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7678                    exit->src->index, exit->dest->index);
7679           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7680           fprintf (dump_file, "\n");
7681         }
7682
7683       fprintf (dump_file, "\n");
7684     }
7685
7686   body = get_loop_body (loop);
7687   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7688   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7689   free (body);
7690
7691   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7692
7693   /* For each ssa name determines whether it behaves as an induction variable
7694      in some loop.  */
7695   if (!find_induction_variables (data))
7696     goto finish;
7697
7698   /* Finds interesting uses (item 1).  */
7699   find_interesting_uses (data);
7700   if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7701     goto finish;
7702
7703   /* Finds candidates for the induction variables (item 2).  */
7704   find_iv_candidates (data);
7705
7706   /* Calculates the costs (item 3, part 1).  */
7707   determine_iv_costs (data);
7708   determine_group_iv_costs (data);
7709   determine_set_costs (data);
7710
7711   /* Find the optimal set of induction variables (item 3, part 2).  */
7712   iv_ca = find_optimal_iv_set (data);
7713   if (!iv_ca)
7714     goto finish;
7715   changed = true;
7716
7717   /* Create the new induction variables (item 4, part 1).  */
7718   create_new_ivs (data, iv_ca);
7719   iv_ca_free (&iv_ca);
7720
7721   /* Rewrite the uses (item 4, part 2).  */
7722   rewrite_groups (data);
7723
7724   /* Remove the ivs that are unused after rewriting.  */
7725   remove_unused_ivs (data);
7726
7727   /* We have changed the structure of induction variables; it might happen
7728      that definitions in the scev database refer to some of them that were
7729      eliminated.  */
7730   scev_reset ();
7731
7732 finish:
7733   free_loop_data (data);
7734
7735   return changed;
7736 }
7737
7738 /* Main entry point.  Optimizes induction variables in loops.  */
7739
7740 void
7741 tree_ssa_iv_optimize (void)
7742 {
7743   struct loop *loop;
7744   struct ivopts_data data;
7745
7746   tree_ssa_iv_optimize_init (&data);
7747
7748   /* Optimize the loops starting with the innermost ones.  */
7749   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7750     {
7751       if (dump_file && (dump_flags & TDF_DETAILS))
7752         flow_loop_dump (loop, dump_file, NULL, 1);
7753
7754       tree_ssa_iv_optimize_loop (&data, loop);
7755     }
7756
7757   tree_ssa_iv_optimize_finalize (&data);
7758 }