gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "backend.h"
  68 #include "rtl.h"
  69 #include "tree.h"
  70 #include "gimple.h"
  71 #include "cfghooks.h"
  72 #include "tree-pass.h"
  73 #include "tm_p.h"
  74 #include "ssa.h"
  75 #include "expmed.h"
  76 #include "insn-config.h"
  77 #include "emit-rtl.h"
  78 #include "recog.h"
  79 #include "cgraph.h"
  80 #include "gimple-pretty-print.h"
  81 #include "alias.h"
  82 #include "fold-const.h"
  83 #include "stor-layout.h"
  84 #include "tree-eh.h"
  85 #include "gimplify.h"
  86 #include "gimple-iterator.h"
  87 #include "gimplify-me.h"
  88 #include "tree-cfg.h"
  89 #include "tree-ssa-loop-ivopts.h"
  90 #include "tree-ssa-loop-manip.h"
  91 #include "tree-ssa-loop-niter.h"
  92 #include "tree-ssa-loop.h"
  93 #include "explow.h"
  94 #include "expr.h"
  95 #include "tree-dfa.h"
  96 #include "tree-ssa.h"
  97 #include "cfgloop.h"
  98 #include "tree-scalar-evolution.h"
  99 #include "params.h"
 100 #include "tree-affine.h"
 101 #include "tree-ssa-propagate.h"
 102 #include "tree-ssa-address.h"
 103 #include "builtins.h"
 104 #include "tree-vectorizer.h"
 105
 106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 107    cost of different addressing modes.  This should be moved to a TBD
 108    interface between the GIMPLE and RTL worlds.  */
 109
 110 /* The infinite cost.  */
 111 #define INFTY 10000000
 112
 113 #define AVG_LOOP_NITER(LOOP) 5
 114
 115 /* Returns the expected number of loop iterations for LOOP.
 116    The average trip count is computed from profile data if it
 117    exists. */
 118
 119 static inline HOST_WIDE_INT
 120 avg_loop_niter (struct loop *loop)
 121 {
 122   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 123   if (niter == -1)
 124     return AVG_LOOP_NITER (loop);
 125
 126   return niter;
 127 }
 128
 129 /* Representation of the induction variable.  */
 130 struct iv
 131 {
 132   tree base;            /* Initial value of the iv.  */
 133   tree base_object;     /* A memory object to that the induction variable points.  */
 134   tree step;            /* Step of the iv (constant only).  */
 135   tree ssa_name;        /* The ssa name with the value.  */
 136   unsigned use_id;      /* The identifier in the use if it is the case.  */
 137   bool biv_p;           /* Is it a biv?  */
 138   bool have_use_for;    /* Do we already have a use for it?  */
 139   bool no_overflow;     /* True if the iv doesn't overflow.  */
 140   bool have_address_use;/* For biv, indicate if it's used in any address
 141                            type use.  */
 142 };
 143
 144 /* Per-ssa version information (induction variable descriptions, etc.).  */
 145 struct version_info
 146 {
 147   tree name;            /* The ssa name.  */
 148   struct iv *iv;        /* Induction variable description.  */
 149   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 150                            an expression that is not an induction variable.  */
 151   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 152   unsigned inv_id;      /* Id of an invariant.  */
 153 };
 154
 155 /* Types of uses.  */
 156 enum use_type
 157 {
 158   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 159   USE_ADDRESS,          /* Use in an address.  */
 160   USE_COMPARE           /* Use is a compare.  */
 161 };
 162
 163 /* Cost of a computation.  */
 164 struct comp_cost
 165 {
 166   int cost;             /* The runtime cost.  */
 167   unsigned complexity;  /* The estimate of the complexity of the code for
 168                            the computation (in no concrete units --
 169                            complexity field should be larger for more
 170                            complex expressions and addressing modes).  */
 171 };
 172
 173 static const comp_cost no_cost = {0, 0};
 174 static const comp_cost infinite_cost = {INFTY, INFTY};
 175
 176 /* The candidate - cost pair.  */
 177 struct cost_pair
 178 {
 179   struct iv_cand *cand; /* The candidate.  */
 180   comp_cost cost;       /* The cost.  */
 181   bitmap depends_on;    /* The list of invariants that have to be
 182                            preserved.  */
 183   tree value;           /* For final value elimination, the expression for
 184                            the final value of the iv.  For iv elimination,
 185                            the new bound to compare with.  */
 186   enum tree_code comp;  /* For iv elimination, the comparison.  */
 187   int inv_expr_id;      /* Loop invariant expression id.  */
 188 };
 189
 190 /* Use.  */
 191 struct iv_use
 192 {
 193   unsigned id;          /* The id of the use.  */
 194   unsigned sub_id;      /* The id of the sub use.  */
 195   enum use_type type;   /* Type of the use.  */
 196   struct iv *iv;        /* The induction variable it is based on.  */
 197   gimple *stmt;         /* Statement in that it occurs.  */
 198   tree *op_p;           /* The place where it occurs.  */
 199   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 200                            important ones.  */
 201
 202   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 203   struct cost_pair *cost_map;
 204                         /* The costs wrto the iv candidates.  */
 205
 206   struct iv_cand *selected;
 207                         /* The selected candidate.  */
 208
 209   struct iv_use *next;  /* The next sub use.  */
 210   tree addr_base;       /* Base address with const offset stripped.  */
 211   unsigned HOST_WIDE_INT addr_offset;
 212                         /* Const offset stripped from base address.  */
 213 };
 214
 215 /* The position where the iv is computed.  */
 216 enum iv_position
 217 {
 218   IP_NORMAL,            /* At the end, just before the exit condition.  */
 219   IP_END,               /* At the end of the latch block.  */
 220   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 221   IP_AFTER_USE,         /* Immediately after a specific use.  */
 222   IP_ORIGINAL           /* The original biv.  */
 223 };
 224
 225 /* The induction variable candidate.  */
 226 struct iv_cand
 227 {
 228   unsigned id;          /* The number of the candidate.  */
 229   bool important;       /* Whether this is an "important" candidate, i.e. such
 230                            that it should be considered by all uses.  */
 231   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 232   gimple *incremented_at;/* For original biv, the statement where it is
 233                            incremented.  */
 234   tree var_before;      /* The variable used for it before increment.  */
 235   tree var_after;       /* The variable used for it after increment.  */
 236   struct iv *iv;        /* The value of the candidate.  NULL for
 237                            "pseudocandidate" used to indicate the possibility
 238                            to replace the final value of an iv by direct
 239                            computation of the value.  */
 240   unsigned cost;        /* Cost of the candidate.  */
 241   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 242   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 243                               where it is incremented.  */
 244   bitmap depends_on;    /* The list of invariants that are used in step of the
 245                            biv.  */
 246   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 247                            smaller type.  */
 248 };
 249
 250 /* Hashtable entry for common candidate derived from iv uses.  */
 251 struct iv_common_cand
 252 {
 253   tree base;
 254   tree step;
 255   /* IV uses from which this common candidate is derived.  */
 256   auto_vec<iv_use *> uses;
 257   hashval_t hash;
 258 };
 259
 260 /* Hashtable helpers.  */
 261
 262 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 263 {
 264   static inline hashval_t hash (const iv_common_cand *);
 265   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 266 };
 267
 268 /* Hash function for possible common candidates.  */
 269
 270 inline hashval_t
 271 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 272 {
 273   return ccand->hash;
 274 }
 275
 276 /* Hash table equality function for common candidates.  */
 277
 278 inline bool
 279 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 280                               const iv_common_cand *ccand2)
 281 {
 282   return (ccand1->hash == ccand2->hash
 283           && operand_equal_p (ccand1->base, ccand2->base, 0)
 284           && operand_equal_p (ccand1->step, ccand2->step, 0)
 285           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 286               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 287 }
 288
 289 /* Loop invariant expression hashtable entry.  */
 290 struct iv_inv_expr_ent
 291 {
 292   tree expr;
 293   int id;
 294   hashval_t hash;
 295 };
 296
 297 /* Hashtable helpers.  */
 298
 299 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 300 {
 301   static inline hashval_t hash (const iv_inv_expr_ent *);
 302   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 303 };
 304
 305 /* Hash function for loop invariant expressions.  */
 306
 307 inline hashval_t
 308 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 309 {
 310   return expr->hash;
 311 }
 312
 313 /* Hash table equality function for expressions.  */
 314
 315 inline bool
 316 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 317                            const iv_inv_expr_ent *expr2)
 318 {
 319   return expr1->hash == expr2->hash
 320          && operand_equal_p (expr1->expr, expr2->expr, 0);
 321 }
 322
 323 struct ivopts_data
 324 {
 325   /* The currently optimized loop.  */
 326   struct loop *current_loop;
 327   source_location loop_loc;
 328
 329   /* Numbers of iterations for all exits of the current loop.  */
 330   hash_map<edge, tree_niter_desc *> *niters;
 331
 332   /* Number of registers used in it.  */
 333   unsigned regs_used;
 334
 335   /* The size of version_info array allocated.  */
 336   unsigned version_info_size;
 337
 338   /* The array of information for the ssa names.  */
 339   struct version_info *version_info;
 340
 341   /* The hashtable of loop invariant expressions created
 342      by ivopt.  */
 343   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 344
 345   /* Loop invariant expression id.  */
 346   int inv_expr_id;
 347
 348   /* The bitmap of indices in version_info whose value was changed.  */
 349   bitmap relevant;
 350
 351   /* The uses of induction variables.  */
 352   vec<iv_use *> iv_uses;
 353
 354   /* The candidates.  */
 355   vec<iv_cand *> iv_candidates;
 356
 357   /* A bitmap of important candidates.  */
 358   bitmap important_candidates;
 359
 360   /* Cache used by tree_to_aff_combination_expand.  */
 361   hash_map<tree, name_expansion *> *name_expansion_cache;
 362
 363   /* The hashtable of common candidates derived from iv uses.  */
 364   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 365
 366   /* The common candidates.  */
 367   vec<iv_common_cand *> iv_common_cands;
 368
 369   /* The maximum invariant id.  */
 370   unsigned max_inv_id;
 371
 372   /* Number of no_overflow BIVs which are not used in memory address.  */
 373   unsigned bivs_not_used_in_addr;
 374
 375   /* Obstack for iv structure.  */
 376   struct obstack iv_obstack;
 377
 378   /* Whether to consider just related and important candidates when replacing a
 379      use.  */
 380   bool consider_all_candidates;
 381
 382   /* Are we optimizing for speed?  */
 383   bool speed;
 384
 385   /* Whether the loop body includes any function calls.  */
 386   bool body_includes_call;
 387
 388   /* Whether the loop body can only be exited via single exit.  */
 389   bool loop_single_exit_p;
 390 };
 391
 392 /* An assignment of iv candidates to uses.  */
 393
 394 struct iv_ca
 395 {
 396   /* The number of uses covered by the assignment.  */
 397   unsigned upto;
 398
 399   /* Number of uses that cannot be expressed by the candidates in the set.  */
 400   unsigned bad_uses;
 401
 402   /* Candidate assigned to a use, together with the related costs.  */
 403   struct cost_pair **cand_for_use;
 404
 405   /* Number of times each candidate is used.  */
 406   unsigned *n_cand_uses;
 407
 408   /* The candidates used.  */
 409   bitmap cands;
 410
 411   /* The number of candidates in the set.  */
 412   unsigned n_cands;
 413
 414   /* Total number of registers needed.  */
 415   unsigned n_regs;
 416
 417   /* Total cost of expressing uses.  */
 418   comp_cost cand_use_cost;
 419
 420   /* Total cost of candidates.  */
 421   unsigned cand_cost;
 422
 423   /* Number of times each invariant is used.  */
 424   unsigned *n_invariant_uses;
 425
 426   /* The array holding the number of uses of each loop
 427      invariant expressions created by ivopt.  */
 428   unsigned *used_inv_expr;
 429
 430   /* The number of created loop invariants.  */
 431   unsigned num_used_inv_expr;
 432
 433   /* Total cost of the assignment.  */
 434   comp_cost cost;
 435 };
 436
 437 /* Difference of two iv candidate assignments.  */
 438
 439 struct iv_ca_delta
 440 {
 441   /* Changed use.  */
 442   struct iv_use *use;
 443
 444   /* An old assignment (for rollback purposes).  */
 445   struct cost_pair *old_cp;
 446
 447   /* A new assignment.  */
 448   struct cost_pair *new_cp;
 449
 450   /* Next change in the list.  */
 451   struct iv_ca_delta *next_change;
 452 };
 453
 454 /* Bound on number of candidates below that all candidates are considered.  */
 455
 456 #define CONSIDER_ALL_CANDIDATES_BOUND \
 457   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 458
 459 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 460    optimizing such a loop would help, and it would take ages).  */
 461
 462 #define MAX_CONSIDERED_USES \
 463   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 464
 465 /* If there are at most this number of ivs in the set, try removing unnecessary
 466    ivs from the set always.  */
 467
 468 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 469   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 470
 471 /* The list of trees for that the decl_rtl field must be reset is stored
 472    here.  */
 473
 474 static vec<tree> decl_rtl_to_reset;
 475
 476 static comp_cost force_expr_to_var_cost (tree, bool);
 477
 478 /* Number of uses recorded in DATA.  */
 479
 480 static inline unsigned
 481 n_iv_uses (struct ivopts_data *data)
 482 {
 483   return data->iv_uses.length ();
 484 }
 485
 486 /* Ith use recorded in DATA.  */
 487
 488 static inline struct iv_use *
 489 iv_use (struct ivopts_data *data, unsigned i)
 490 {
 491   return data->iv_uses[i];
 492 }
 493
 494 /* Number of candidates recorded in DATA.  */
 495
 496 static inline unsigned
 497 n_iv_cands (struct ivopts_data *data)
 498 {
 499   return data->iv_candidates.length ();
 500 }
 501
 502 /* Ith candidate recorded in DATA.  */
 503
 504 static inline struct iv_cand *
 505 iv_cand (struct ivopts_data *data, unsigned i)
 506 {
 507   return data->iv_candidates[i];
 508 }
 509
 510 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 511
 512 edge
 513 single_dom_exit (struct loop *loop)
 514 {
 515   edge exit = single_exit (loop);
 516
 517   if (!exit)
 518     return NULL;
 519
 520   if (!just_once_each_iteration_p (loop, exit->src))
 521     return NULL;
 522
 523   return exit;
 524 }
 525
 526 /* Dumps information about the induction variable IV to FILE.  */
 527
 528 void
 529 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 530 {
 531   if (iv->ssa_name && dump_name)
 532     {
 533       fprintf (file, "ssa name ");
 534       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 535       fprintf (file, "\n");
 536     }
 537
 538   fprintf (file, "  type ");
 539   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 540   fprintf (file, "\n");
 541
 542   if (iv->step)
 543     {
 544       fprintf (file, "  base ");
 545       print_generic_expr (file, iv->base, TDF_SLIM);
 546       fprintf (file, "\n");
 547
 548       fprintf (file, "  step ");
 549       print_generic_expr (file, iv->step, TDF_SLIM);
 550       fprintf (file, "\n");
 551     }
 552   else
 553     {
 554       fprintf (file, "  invariant ");
 555       print_generic_expr (file, iv->base, TDF_SLIM);
 556       fprintf (file, "\n");
 557     }
 558
 559   if (iv->base_object)
 560     {
 561       fprintf (file, "  base object ");
 562       print_generic_expr (file, iv->base_object, TDF_SLIM);
 563       fprintf (file, "\n");
 564     }
 565
 566   if (iv->biv_p)
 567     fprintf (file, "  is a biv\n");
 568
 569   if (iv->no_overflow)
 570     fprintf (file, "  iv doesn't overflow wrto loop niter\n");
 571 }
 572
 573 /* Dumps information about the USE to FILE.  */
 574
 575 void
 576 dump_use (FILE *file, struct iv_use *use)
 577 {
 578   fprintf (file, "use %d", use->id);
 579   if (use->sub_id)
 580     fprintf (file, ".%d", use->sub_id);
 581
 582   fprintf (file, "\n");
 583
 584   switch (use->type)
 585     {
 586     case USE_NONLINEAR_EXPR:
 587       fprintf (file, "  generic\n");
 588       break;
 589
 590     case USE_ADDRESS:
 591       fprintf (file, "  address\n");
 592       break;
 593
 594     case USE_COMPARE:
 595       fprintf (file, "  compare\n");
 596       break;
 597
 598     default:
 599       gcc_unreachable ();
 600     }
 601
 602   fprintf (file, "  in statement ");
 603   print_gimple_stmt (file, use->stmt, 0, 0);
 604   fprintf (file, "\n");
 605
 606   fprintf (file, "  at position ");
 607   if (use->op_p)
 608     print_generic_expr (file, *use->op_p, TDF_SLIM);
 609   fprintf (file, "\n");
 610
 611   dump_iv (file, use->iv, false);
 612
 613   if (use->related_cands)
 614     {
 615       fprintf (file, "  related candidates ");
 616       dump_bitmap (file, use->related_cands);
 617     }
 618 }
 619
 620 /* Dumps information about the uses to FILE.  */
 621
 622 void
 623 dump_uses (FILE *file, struct ivopts_data *data)
 624 {
 625   unsigned i;
 626   struct iv_use *use;
 627
 628   for (i = 0; i < n_iv_uses (data); i++)
 629     {
 630       use = iv_use (data, i);
 631       do
 632         {
 633           dump_use (file, use);
 634           use = use->next;
 635         }
 636       while (use);
 637       fprintf (file, "\n");
 638     }
 639 }
 640
 641 /* Dumps information about induction variable candidate CAND to FILE.  */
 642
 643 void
 644 dump_cand (FILE *file, struct iv_cand *cand)
 645 {
 646   struct iv *iv = cand->iv;
 647
 648   fprintf (file, "candidate %d%s\n",
 649            cand->id, cand->important ? " (important)" : "");
 650
 651   if (cand->depends_on)
 652     {
 653       fprintf (file, "  depends on ");
 654       dump_bitmap (file, cand->depends_on);
 655     }
 656
 657   if (!iv)
 658     {
 659       fprintf (file, "  final value replacement\n");
 660       return;
 661     }
 662
 663   if (cand->var_before)
 664     {
 665       fprintf (file, "  var_before ");
 666       print_generic_expr (file, cand->var_before, TDF_SLIM);
 667       fprintf (file, "\n");
 668     }
 669   if (cand->var_after)
 670     {
 671       fprintf (file, "  var_after ");
 672       print_generic_expr (file, cand->var_after, TDF_SLIM);
 673       fprintf (file, "\n");
 674     }
 675
 676   switch (cand->pos)
 677     {
 678     case IP_NORMAL:
 679       fprintf (file, "  incremented before exit test\n");
 680       break;
 681
 682     case IP_BEFORE_USE:
 683       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 684       break;
 685
 686     case IP_AFTER_USE:
 687       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 688       break;
 689
 690     case IP_END:
 691       fprintf (file, "  incremented at end\n");
 692       break;
 693
 694     case IP_ORIGINAL:
 695       fprintf (file, "  original biv\n");
 696       break;
 697     }
 698
 699   dump_iv (file, iv, false);
 700 }
 701
 702 /* Returns the info for ssa version VER.  */
 703
 704 static inline struct version_info *
 705 ver_info (struct ivopts_data *data, unsigned ver)
 706 {
 707   return data->version_info + ver;
 708 }
 709
 710 /* Returns the info for ssa name NAME.  */
 711
 712 static inline struct version_info *
 713 name_info (struct ivopts_data *data, tree name)
 714 {
 715   return ver_info (data, SSA_NAME_VERSION (name));
 716 }
 717
 718 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 719    emitted in LOOP.  */
 720
 721 static bool
 722 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 723 {
 724   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 725
 726   gcc_assert (bb);
 727
 728   if (sbb == loop->latch)
 729     return true;
 730
 731   if (sbb != bb)
 732     return false;
 733
 734   return stmt == last_stmt (bb);
 735 }
 736
 737 /* Returns true if STMT if after the place where the original induction
 738    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 739    if the positions are identical.  */
 740
 741 static bool
 742 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 743 {
 744   basic_block cand_bb = gimple_bb (cand->incremented_at);
 745   basic_block stmt_bb = gimple_bb (stmt);
 746
 747   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 748     return false;
 749
 750   if (stmt_bb != cand_bb)
 751     return true;
 752
 753   if (true_if_equal
 754       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 755     return true;
 756   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 757 }
 758
 759 /* Returns true if STMT if after the place where the induction variable
 760    CAND is incremented in LOOP.  */
 761
 762 static bool
 763 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 764 {
 765   switch (cand->pos)
 766     {
 767     case IP_END:
 768       return false;
 769
 770     case IP_NORMAL:
 771       return stmt_after_ip_normal_pos (loop, stmt);
 772
 773     case IP_ORIGINAL:
 774     case IP_AFTER_USE:
 775       return stmt_after_inc_pos (cand, stmt, false);
 776
 777     case IP_BEFORE_USE:
 778       return stmt_after_inc_pos (cand, stmt, true);
 779
 780     default:
 781       gcc_unreachable ();
 782     }
 783 }
 784
 785 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 786
 787 static bool
 788 abnormal_ssa_name_p (tree exp)
 789 {
 790   if (!exp)
 791     return false;
 792
 793   if (TREE_CODE (exp) != SSA_NAME)
 794     return false;
 795
 796   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 797 }
 798
 799 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 800    abnormal phi node.  Callback for for_each_index.  */
 801
 802 static bool
 803 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 804                                   void *data ATTRIBUTE_UNUSED)
 805 {
 806   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 807     {
 808       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 809         return false;
 810       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 811         return false;
 812     }
 813
 814   return !abnormal_ssa_name_p (*index);
 815 }
 816
 817 /* Returns true if EXPR contains a ssa name that occurs in an
 818    abnormal phi node.  */
 819
 820 bool
 821 contains_abnormal_ssa_name_p (tree expr)
 822 {
 823   enum tree_code code;
 824   enum tree_code_class codeclass;
 825
 826   if (!expr)
 827     return false;
 828
 829   code = TREE_CODE (expr);
 830   codeclass = TREE_CODE_CLASS (code);
 831
 832   if (code == SSA_NAME)
 833     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 834
 835   if (code == INTEGER_CST
 836       || is_gimple_min_invariant (expr))
 837     return false;
 838
 839   if (code == ADDR_EXPR)
 840     return !for_each_index (&TREE_OPERAND (expr, 0),
 841                             idx_contains_abnormal_ssa_name_p,
 842                             NULL);
 843
 844   if (code == COND_EXPR)
 845     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 846       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 847       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 848
 849   switch (codeclass)
 850     {
 851     case tcc_binary:
 852     case tcc_comparison:
 853       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 854         return true;
 855
 856       /* Fallthru.  */
 857     case tcc_unary:
 858       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 859         return true;
 860
 861       break;
 862
 863     default:
 864       gcc_unreachable ();
 865     }
 866
 867   return false;
 868 }
 869
 870 /*  Returns the structure describing number of iterations determined from
 871     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 872
 873 static struct tree_niter_desc *
 874 niter_for_exit (struct ivopts_data *data, edge exit)
 875 {
 876   struct tree_niter_desc *desc;
 877   tree_niter_desc **slot;
 878
 879   if (!data->niters)
 880     {
 881       data->niters = new hash_map<edge, tree_niter_desc *>;
 882       slot = NULL;
 883     }
 884   else
 885     slot = data->niters->get (exit);
 886
 887   if (!slot)
 888     {
 889       /* Try to determine number of iterations.  We cannot safely work with ssa
 890          names that appear in phi nodes on abnormal edges, so that we do not
 891          create overlapping life ranges for them (PR 27283).  */
 892       desc = XNEW (struct tree_niter_desc);
 893       if (!number_of_iterations_exit (data->current_loop,
 894                                       exit, desc, true)
 895           || contains_abnormal_ssa_name_p (desc->niter))
 896         {
 897           XDELETE (desc);
 898           desc = NULL;
 899         }
 900       data->niters->put (exit, desc);
 901     }
 902   else
 903     desc = *slot;
 904
 905   return desc;
 906 }
 907
 908 /* Returns the structure describing number of iterations determined from
 909    single dominating exit of DATA->current_loop, or NULL if something
 910    goes wrong.  */
 911
 912 static struct tree_niter_desc *
 913 niter_for_single_dom_exit (struct ivopts_data *data)
 914 {
 915   edge exit = single_dom_exit (data->current_loop);
 916
 917   if (!exit)
 918     return NULL;
 919
 920   return niter_for_exit (data, exit);
 921 }
 922
 923 /* Initializes data structures used by the iv optimization pass, stored
 924    in DATA.  */
 925
 926 static void
 927 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 928 {
 929   data->version_info_size = 2 * num_ssa_names;
 930   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 931   data->relevant = BITMAP_ALLOC (NULL);
 932   data->important_candidates = BITMAP_ALLOC (NULL);
 933   data->max_inv_id = 0;
 934   data->niters = NULL;
 935   data->iv_uses.create (20);
 936   data->iv_candidates.create (20);
 937   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 938   data->inv_expr_id = 0;
 939   data->name_expansion_cache = NULL;
 940   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
 941   data->iv_common_cands.create (20);
 942   decl_rtl_to_reset.create (20);
 943   gcc_obstack_init (&data->iv_obstack);
 944 }
 945
 946 /* Returns a memory object to that EXPR points.  In case we are able to
 947    determine that it does not point to any such object, NULL is returned.  */
 948
 949 static tree
 950 determine_base_object (tree expr)
 951 {
 952   enum tree_code code = TREE_CODE (expr);
 953   tree base, obj;
 954
 955   /* If this is a pointer casted to any type, we need to determine
 956      the base object for the pointer; so handle conversions before
 957      throwing away non-pointer expressions.  */
 958   if (CONVERT_EXPR_P (expr))
 959     return determine_base_object (TREE_OPERAND (expr, 0));
 960
 961   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 962     return NULL_TREE;
 963
 964   switch (code)
 965     {
 966     case INTEGER_CST:
 967       return NULL_TREE;
 968
 969     case ADDR_EXPR:
 970       obj = TREE_OPERAND (expr, 0);
 971       base = get_base_address (obj);
 972
 973       if (!base)
 974         return expr;
 975
 976       if (TREE_CODE (base) == MEM_REF)
 977         return determine_base_object (TREE_OPERAND (base, 0));
 978
 979       return fold_convert (ptr_type_node,
 980                            build_fold_addr_expr (base));
 981
 982     case POINTER_PLUS_EXPR:
 983       return determine_base_object (TREE_OPERAND (expr, 0));
 984
 985     case PLUS_EXPR:
 986     case MINUS_EXPR:
 987       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 988       gcc_unreachable ();
 989
 990     default:
 991       return fold_convert (ptr_type_node, expr);
 992     }
 993 }
 994
 995 /* Return true if address expression with non-DECL_P operand appears
 996    in EXPR.  */
 997
 998 static bool
 999 contain_complex_addr_expr (tree expr)
1000 {
1001   bool res = false;
1002
1003   STRIP_NOPS (expr);
1004   switch (TREE_CODE (expr))
1005     {
1006     case POINTER_PLUS_EXPR:
1007     case PLUS_EXPR:
1008     case MINUS_EXPR:
1009       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1010       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1011       break;
1012
1013     case ADDR_EXPR:
1014       return (!DECL_P (TREE_OPERAND (expr, 0)));
1015
1016     default:
1017       return false;
1018     }
1019
1020   return res;
1021 }
1022
1023 /* Allocates an induction variable with given initial value BASE and step STEP
1024    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1025
1026 static struct iv *
1027 alloc_iv (struct ivopts_data *data, tree base, tree step,
1028           bool no_overflow = false)
1029 {
1030   tree expr = base;
1031   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1032                                               sizeof (struct iv));
1033   gcc_assert (step != NULL_TREE);
1034
1035   /* Lower address expression in base except ones with DECL_P as operand.
1036      By doing this:
1037        1) More accurate cost can be computed for address expressions;
1038        2) Duplicate candidates won't be created for bases in different
1039           forms, like &a[0] and &a.  */
1040   STRIP_NOPS (expr);
1041   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1042       || contain_complex_addr_expr (expr))
1043     {
1044       aff_tree comb;
1045       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1046       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1047     }
1048
1049   iv->base = base;
1050   iv->base_object = determine_base_object (base);
1051   iv->step = step;
1052   iv->biv_p = false;
1053   iv->have_use_for = false;
1054   iv->use_id = 0;
1055   iv->ssa_name = NULL_TREE;
1056   iv->no_overflow = no_overflow;
1057   iv->have_address_use = false;
1058
1059   return iv;
1060 }
1061
1062 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1063    doesn't overflow.  */
1064
1065 static void
1066 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1067         bool no_overflow)
1068 {
1069   struct version_info *info = name_info (data, iv);
1070
1071   gcc_assert (!info->iv);
1072
1073   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1074   info->iv = alloc_iv (data, base, step, no_overflow);
1075   info->iv->ssa_name = iv;
1076 }
1077
1078 /* Finds induction variable declaration for VAR.  */
1079
1080 static struct iv *
1081 get_iv (struct ivopts_data *data, tree var)
1082 {
1083   basic_block bb;
1084   tree type = TREE_TYPE (var);
1085
1086   if (!POINTER_TYPE_P (type)
1087       && !INTEGRAL_TYPE_P (type))
1088     return NULL;
1089
1090   if (!name_info (data, var)->iv)
1091     {
1092       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1093
1094       if (!bb
1095           || !flow_bb_inside_loop_p (data->current_loop, bb))
1096         set_iv (data, var, var, build_int_cst (type, 0), true);
1097     }
1098
1099   return name_info (data, var)->iv;
1100 }
1101
1102 /* Return the first non-invariant ssa var found in EXPR.  */
1103
1104 static tree
1105 extract_single_var_from_expr (tree expr)
1106 {
1107   int i, n;
1108   tree tmp;
1109   enum tree_code code;
1110
1111   if (!expr || is_gimple_min_invariant (expr))
1112     return NULL;
1113
1114   code = TREE_CODE (expr);
1115   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1116     {
1117       n = TREE_OPERAND_LENGTH (expr);
1118       for (i = 0; i < n; i++)
1119         {
1120           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1121
1122           if (tmp)
1123             return tmp;
1124         }
1125     }
1126   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1127 }
1128
1129 /* Finds basic ivs.  */
1130
1131 static bool
1132 find_bivs (struct ivopts_data *data)
1133 {
1134   gphi *phi;
1135   affine_iv iv;
1136   tree step, type, base, stop;
1137   bool found = false;
1138   struct loop *loop = data->current_loop;
1139   gphi_iterator psi;
1140
1141   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1142     {
1143       phi = psi.phi ();
1144
1145       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1146         continue;
1147
1148       if (virtual_operand_p (PHI_RESULT (phi)))
1149         continue;
1150
1151       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1152         continue;
1153
1154       if (integer_zerop (iv.step))
1155         continue;
1156
1157       step = iv.step;
1158       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1159       /* Stop expanding iv base at the first ssa var referred by iv step.
1160          Ideally we should stop at any ssa var, because that's expensive
1161          and unusual to happen, we just do it on the first one.
1162
1163          See PR64705 for the rationale.  */
1164       stop = extract_single_var_from_expr (step);
1165       base = expand_simple_operations (base, stop);
1166       if (contains_abnormal_ssa_name_p (base)
1167           || contains_abnormal_ssa_name_p (step))
1168         continue;
1169
1170       type = TREE_TYPE (PHI_RESULT (phi));
1171       base = fold_convert (type, base);
1172       if (step)
1173         {
1174           if (POINTER_TYPE_P (type))
1175             step = convert_to_ptrofftype (step);
1176           else
1177             step = fold_convert (type, step);
1178         }
1179
1180       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1181       found = true;
1182     }
1183
1184   return found;
1185 }
1186
1187 /* Marks basic ivs.  */
1188
1189 static void
1190 mark_bivs (struct ivopts_data *data)
1191 {
1192   gphi *phi;
1193   gimple *def;
1194   tree var;
1195   struct iv *iv, *incr_iv;
1196   struct loop *loop = data->current_loop;
1197   basic_block incr_bb;
1198   gphi_iterator psi;
1199
1200   data->bivs_not_used_in_addr = 0;
1201   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1202     {
1203       phi = psi.phi ();
1204
1205       iv = get_iv (data, PHI_RESULT (phi));
1206       if (!iv)
1207         continue;
1208
1209       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1210       def = SSA_NAME_DEF_STMT (var);
1211       /* Don't mark iv peeled from other one as biv.  */
1212       if (def
1213           && gimple_code (def) == GIMPLE_PHI
1214           && gimple_bb (def) == loop->header)
1215         continue;
1216
1217       incr_iv = get_iv (data, var);
1218       if (!incr_iv)
1219         continue;
1220
1221       /* If the increment is in the subloop, ignore it.  */
1222       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1223       if (incr_bb->loop_father != data->current_loop
1224           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1225         continue;
1226
1227       iv->biv_p = true;
1228       incr_iv->biv_p = true;
1229       if (iv->no_overflow)
1230         data->bivs_not_used_in_addr++;
1231       if (incr_iv->no_overflow)
1232         data->bivs_not_used_in_addr++;
1233     }
1234 }
1235
1236 /* Checks whether STMT defines a linear induction variable and stores its
1237    parameters to IV.  */
1238
1239 static bool
1240 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1241 {
1242   tree lhs, stop;
1243   struct loop *loop = data->current_loop;
1244
1245   iv->base = NULL_TREE;
1246   iv->step = NULL_TREE;
1247
1248   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1249     return false;
1250
1251   lhs = gimple_assign_lhs (stmt);
1252   if (TREE_CODE (lhs) != SSA_NAME)
1253     return false;
1254
1255   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1256     return false;
1257
1258   /* Stop expanding iv base at the first ssa var referred by iv step.
1259      Ideally we should stop at any ssa var, because that's expensive
1260      and unusual to happen, we just do it on the first one.
1261
1262      See PR64705 for the rationale.  */
1263   stop = extract_single_var_from_expr (iv->step);
1264   iv->base = expand_simple_operations (iv->base, stop);
1265   if (contains_abnormal_ssa_name_p (iv->base)
1266       || contains_abnormal_ssa_name_p (iv->step))
1267     return false;
1268
1269   /* If STMT could throw, then do not consider STMT as defining a GIV.
1270      While this will suppress optimizations, we can not safely delete this
1271      GIV and associated statements, even if it appears it is not used.  */
1272   if (stmt_could_throw_p (stmt))
1273     return false;
1274
1275   return true;
1276 }
1277
1278 /* Finds general ivs in statement STMT.  */
1279
1280 static void
1281 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1282 {
1283   affine_iv iv;
1284
1285   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1286     return;
1287
1288   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1289 }
1290
1291 /* Finds general ivs in basic block BB.  */
1292
1293 static void
1294 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1295 {
1296   gimple_stmt_iterator bsi;
1297
1298   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1299     find_givs_in_stmt (data, gsi_stmt (bsi));
1300 }
1301
1302 /* Finds general ivs.  */
1303
1304 static void
1305 find_givs (struct ivopts_data *data)
1306 {
1307   struct loop *loop = data->current_loop;
1308   basic_block *body = get_loop_body_in_dom_order (loop);
1309   unsigned i;
1310
1311   for (i = 0; i < loop->num_nodes; i++)
1312     find_givs_in_bb (data, body[i]);
1313   free (body);
1314 }
1315
1316 /* For each ssa name defined in LOOP determines whether it is an induction
1317    variable and if so, its initial value and step.  */
1318
1319 static bool
1320 find_induction_variables (struct ivopts_data *data)
1321 {
1322   unsigned i;
1323   bitmap_iterator bi;
1324
1325   if (!find_bivs (data))
1326     return false;
1327
1328   find_givs (data);
1329   mark_bivs (data);
1330
1331   if (dump_file && (dump_flags & TDF_DETAILS))
1332     {
1333       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1334
1335       if (niter)
1336         {
1337           fprintf (dump_file, "  number of iterations ");
1338           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1339           if (!integer_zerop (niter->may_be_zero))
1340             {
1341               fprintf (dump_file, "; zero if ");
1342               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1343             }
1344           fprintf (dump_file, "\n\n");
1345         };
1346
1347       fprintf (dump_file, "Induction variables:\n\n");
1348
1349       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1350         {
1351           if (ver_info (data, i)->iv)
1352             dump_iv (dump_file, ver_info (data, i)->iv, true);
1353         }
1354     }
1355
1356   return true;
1357 }
1358
1359 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1360    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1361    is the const offset stripped from IV base.  For uses of other types,
1362    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1363
1364 static struct iv_use *
1365 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1366             gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1367             unsigned HOST_WIDE_INT addr_offset = 0)
1368 {
1369   struct iv_use *use = XCNEW (struct iv_use);
1370
1371   use->id = n_iv_uses (data);
1372   use->sub_id = 0;
1373   use->type = use_type;
1374   use->iv = iv;
1375   use->stmt = stmt;
1376   use->op_p = use_p;
1377   use->related_cands = BITMAP_ALLOC (NULL);
1378   use->next = NULL;
1379   use->addr_base = addr_base;
1380   use->addr_offset = addr_offset;
1381
1382   data->iv_uses.safe_push (use);
1383
1384   return use;
1385 }
1386
1387 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1388    The sub use is recorded under the one whose use id is ID_GROUP.  */
1389
1390 static struct iv_use *
1391 record_sub_use (struct ivopts_data *data, tree *use_p,
1392                     struct iv *iv, gimple *stmt, enum use_type use_type,
1393                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1394                     unsigned int id_group)
1395 {
1396   struct iv_use *use = XCNEW (struct iv_use);
1397   struct iv_use *group = iv_use (data, id_group);
1398
1399   use->id = group->id;
1400   use->sub_id = 0;
1401   use->type = use_type;
1402   use->iv = iv;
1403   use->stmt = stmt;
1404   use->op_p = use_p;
1405   use->related_cands = NULL;
1406   use->addr_base = addr_base;
1407   use->addr_offset = addr_offset;
1408
1409   /* Sub use list is maintained in offset ascending order.  */
1410   if (addr_offset <= group->addr_offset)
1411     {
1412       use->related_cands = group->related_cands;
1413       group->related_cands = NULL;
1414       use->next = group;
1415       data->iv_uses[id_group] = use;
1416     }
1417   else
1418     {
1419       struct iv_use *pre;
1420       do
1421         {
1422           pre = group;
1423           group = group->next;
1424         }
1425       while (group && addr_offset > group->addr_offset);
1426       use->next = pre->next;
1427       pre->next = use;
1428     }
1429
1430   return use;
1431 }
1432
1433 /* Checks whether OP is a loop-level invariant and if so, records it.
1434    NONLINEAR_USE is true if the invariant is used in a way we do not
1435    handle specially.  */
1436
1437 static void
1438 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1439 {
1440   basic_block bb;
1441   struct version_info *info;
1442
1443   if (TREE_CODE (op) != SSA_NAME
1444       || virtual_operand_p (op))
1445     return;
1446
1447   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1448   if (bb
1449       && flow_bb_inside_loop_p (data->current_loop, bb))
1450     return;
1451
1452   info = name_info (data, op);
1453   info->name = op;
1454   info->has_nonlin_use |= nonlinear_use;
1455   if (!info->inv_id)
1456     info->inv_id = ++data->max_inv_id;
1457   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1458 }
1459
1460 /* Checks whether the use OP is interesting and if so, records it.  */
1461
1462 static struct iv_use *
1463 find_interesting_uses_op (struct ivopts_data *data, tree op)
1464 {
1465   struct iv *iv;
1466   gimple *stmt;
1467   struct iv_use *use;
1468
1469   if (TREE_CODE (op) != SSA_NAME)
1470     return NULL;
1471
1472   iv = get_iv (data, op);
1473   if (!iv)
1474     return NULL;
1475
1476   if (iv->have_use_for)
1477     {
1478       use = iv_use (data, iv->use_id);
1479
1480       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1481       return use;
1482     }
1483
1484   if (integer_zerop (iv->step))
1485     {
1486       record_invariant (data, op, true);
1487       return NULL;
1488     }
1489   iv->have_use_for = true;
1490
1491   stmt = SSA_NAME_DEF_STMT (op);
1492   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1493               || is_gimple_assign (stmt));
1494
1495   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1496   iv->use_id = use->id;
1497
1498   return use;
1499 }
1500
1501 /* Given a condition in statement STMT, checks whether it is a compare
1502    of an induction variable and an invariant.  If this is the case,
1503    CONTROL_VAR is set to location of the iv, BOUND to the location of
1504    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1505    induction variable descriptions, and true is returned.  If this is not
1506    the case, CONTROL_VAR and BOUND are set to the arguments of the
1507    condition and false is returned.  */
1508
1509 static bool
1510 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1511                        tree **control_var, tree **bound,
1512                        struct iv **iv_var, struct iv **iv_bound)
1513 {
1514   /* The objects returned when COND has constant operands.  */
1515   static struct iv const_iv;
1516   static tree zero;
1517   tree *op0 = &zero, *op1 = &zero;
1518   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1519   bool ret = false;
1520
1521   if (gimple_code (stmt) == GIMPLE_COND)
1522     {
1523       gcond *cond_stmt = as_a <gcond *> (stmt);
1524       op0 = gimple_cond_lhs_ptr (cond_stmt);
1525       op1 = gimple_cond_rhs_ptr (cond_stmt);
1526     }
1527   else
1528     {
1529       op0 = gimple_assign_rhs1_ptr (stmt);
1530       op1 = gimple_assign_rhs2_ptr (stmt);
1531     }
1532
1533   zero = integer_zero_node;
1534   const_iv.step = integer_zero_node;
1535
1536   if (TREE_CODE (*op0) == SSA_NAME)
1537     iv0 = get_iv (data, *op0);
1538   if (TREE_CODE (*op1) == SSA_NAME)
1539     iv1 = get_iv (data, *op1);
1540
1541   /* Exactly one of the compared values must be an iv, and the other one must
1542      be an invariant.  */
1543   if (!iv0 || !iv1)
1544     goto end;
1545
1546   if (integer_zerop (iv0->step))
1547     {
1548       /* Control variable may be on the other side.  */
1549       std::swap (op0, op1);
1550       std::swap (iv0, iv1);
1551     }
1552   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1553
1554 end:
1555   if (control_var)
1556     *control_var = op0;
1557   if (iv_var)
1558     *iv_var = iv0;
1559   if (bound)
1560     *bound = op1;
1561   if (iv_bound)
1562     *iv_bound = iv1;
1563
1564   return ret;
1565 }
1566
1567 /* Checks whether the condition in STMT is interesting and if so,
1568    records it.  */
1569
1570 static void
1571 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1572 {
1573   tree *var_p, *bound_p;
1574   struct iv *var_iv;
1575
1576   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1577     {
1578       find_interesting_uses_op (data, *var_p);
1579       find_interesting_uses_op (data, *bound_p);
1580       return;
1581     }
1582
1583   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1584 }
1585
1586 /* Returns the outermost loop EXPR is obviously invariant in
1587    relative to the loop LOOP, i.e. if all its operands are defined
1588    outside of the returned loop.  Returns NULL if EXPR is not
1589    even obviously invariant in LOOP.  */
1590
1591 struct loop *
1592 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1593 {
1594   basic_block def_bb;
1595   unsigned i, len;
1596
1597   if (is_gimple_min_invariant (expr))
1598     return current_loops->tree_root;
1599
1600   if (TREE_CODE (expr) == SSA_NAME)
1601     {
1602       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1603       if (def_bb)
1604         {
1605           if (flow_bb_inside_loop_p (loop, def_bb))
1606             return NULL;
1607           return superloop_at_depth (loop,
1608                                      loop_depth (def_bb->loop_father) + 1);
1609         }
1610
1611       return current_loops->tree_root;
1612     }
1613
1614   if (!EXPR_P (expr))
1615     return NULL;
1616
1617   unsigned maxdepth = 0;
1618   len = TREE_OPERAND_LENGTH (expr);
1619   for (i = 0; i < len; i++)
1620     {
1621       struct loop *ivloop;
1622       if (!TREE_OPERAND (expr, i))
1623         continue;
1624
1625       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1626       if (!ivloop)
1627         return NULL;
1628       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1629     }
1630
1631   return superloop_at_depth (loop, maxdepth);
1632 }
1633
1634 /* Returns true if expression EXPR is obviously invariant in LOOP,
1635    i.e. if all its operands are defined outside of the LOOP.  LOOP
1636    should not be the function body.  */
1637
1638 bool
1639 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1640 {
1641   basic_block def_bb;
1642   unsigned i, len;
1643
1644   gcc_assert (loop_depth (loop) > 0);
1645
1646   if (is_gimple_min_invariant (expr))
1647     return true;
1648
1649   if (TREE_CODE (expr) == SSA_NAME)
1650     {
1651       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1652       if (def_bb
1653           && flow_bb_inside_loop_p (loop, def_bb))
1654         return false;
1655
1656       return true;
1657     }
1658
1659   if (!EXPR_P (expr))
1660     return false;
1661
1662   len = TREE_OPERAND_LENGTH (expr);
1663   for (i = 0; i < len; i++)
1664     if (TREE_OPERAND (expr, i)
1665         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1666       return false;
1667
1668   return true;
1669 }
1670
1671 /* Given expression EXPR which computes inductive values with respect
1672    to loop recorded in DATA, this function returns biv from which EXPR
1673    is derived by tracing definition chains of ssa variables in EXPR.  */
1674
1675 static struct iv*
1676 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1677 {
1678   struct iv *iv;
1679   unsigned i, n;
1680   tree e2, e1;
1681   enum tree_code code;
1682   gimple *stmt;
1683
1684   if (expr == NULL_TREE)
1685     return NULL;
1686
1687   if (is_gimple_min_invariant (expr))
1688     return NULL;
1689
1690   code = TREE_CODE (expr);
1691   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1692     {
1693       n = TREE_OPERAND_LENGTH (expr);
1694       for (i = 0; i < n; i++)
1695         {
1696           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1697           if (iv)
1698             return iv;
1699         }
1700     }
1701
1702   /* Stop if it's not ssa name.  */
1703   if (code != SSA_NAME)
1704     return NULL;
1705
1706   iv = get_iv (data, expr);
1707   if (!iv || integer_zerop (iv->step))
1708     return NULL;
1709   else if (iv->biv_p)
1710     return iv;
1711
1712   stmt = SSA_NAME_DEF_STMT (expr);
1713   if (gphi *phi = dyn_cast <gphi *> (stmt))
1714     {
1715       ssa_op_iter iter;
1716       use_operand_p use_p;
1717
1718       if (virtual_operand_p (gimple_phi_result (phi)))
1719         return NULL;
1720
1721       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1722         {
1723           tree use = USE_FROM_PTR (use_p);
1724           iv = find_deriving_biv_for_expr (data, use);
1725           if (iv)
1726             return iv;
1727         }
1728       return NULL;
1729     }
1730   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1731     return NULL;
1732
1733   e1 = gimple_assign_rhs1 (stmt);
1734   code = gimple_assign_rhs_code (stmt);
1735   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1736     return find_deriving_biv_for_expr (data, e1);
1737
1738   switch (code)
1739     {
1740     case MULT_EXPR:
1741     case PLUS_EXPR:
1742     case MINUS_EXPR:
1743     case POINTER_PLUS_EXPR:
1744       /* Increments, decrements and multiplications by a constant
1745          are simple.  */
1746       e2 = gimple_assign_rhs2 (stmt);
1747       iv = find_deriving_biv_for_expr (data, e2);
1748       if (iv)
1749         return iv;
1750
1751       /* Fallthru.  */
1752     CASE_CONVERT:
1753       /* Casts are simple.  */
1754       return find_deriving_biv_for_expr (data, e1);
1755
1756     default:
1757       break;
1758     }
1759
1760   return NULL;
1761 }
1762
1763 /* Record BIV, its predecessor and successor that they are used in
1764    address type uses.  */
1765
1766 static void
1767 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1768 {
1769   unsigned i;
1770   tree type, base_1, base_2;
1771   bitmap_iterator bi;
1772
1773   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1774       || biv->have_address_use || !biv->no_overflow)
1775     return;
1776
1777   type = TREE_TYPE (biv->base);
1778   if (!INTEGRAL_TYPE_P (type))
1779     return;
1780
1781   biv->have_address_use = true;
1782   data->bivs_not_used_in_addr--;
1783   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1784   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1785     {
1786       struct iv *iv = ver_info (data, i)->iv;
1787
1788       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1789           || iv->have_address_use || !iv->no_overflow)
1790         continue;
1791
1792       if (type != TREE_TYPE (iv->base)
1793           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1794         continue;
1795
1796       if (!operand_equal_p (biv->step, iv->step, 0))
1797         continue;
1798
1799       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1800       if (operand_equal_p (base_1, iv->base, 0)
1801           || operand_equal_p (base_2, biv->base, 0))
1802         {
1803           iv->have_address_use = true;
1804           data->bivs_not_used_in_addr--;
1805         }
1806     }
1807 }
1808
1809 /* Cumulates the steps of indices into DATA and replaces their values with the
1810    initial ones.  Returns false when the value of the index cannot be determined.
1811    Callback for for_each_index.  */
1812
1813 struct ifs_ivopts_data
1814 {
1815   struct ivopts_data *ivopts_data;
1816   gimple *stmt;
1817   tree step;
1818 };
1819
1820 static bool
1821 idx_find_step (tree base, tree *idx, void *data)
1822 {
1823   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1824   struct iv *iv;
1825   bool use_overflow_semantics = false;
1826   tree step, iv_base, iv_step, lbound, off;
1827   struct loop *loop = dta->ivopts_data->current_loop;
1828
1829   /* If base is a component ref, require that the offset of the reference
1830      be invariant.  */
1831   if (TREE_CODE (base) == COMPONENT_REF)
1832     {
1833       off = component_ref_field_offset (base);
1834       return expr_invariant_in_loop_p (loop, off);
1835     }
1836
1837   /* If base is array, first check whether we will be able to move the
1838      reference out of the loop (in order to take its address in strength
1839      reduction).  In order for this to work we need both lower bound
1840      and step to be loop invariants.  */
1841   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1842     {
1843       /* Moreover, for a range, the size needs to be invariant as well.  */
1844       if (TREE_CODE (base) == ARRAY_RANGE_REF
1845           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1846         return false;
1847
1848       step = array_ref_element_size (base);
1849       lbound = array_ref_low_bound (base);
1850
1851       if (!expr_invariant_in_loop_p (loop, step)
1852           || !expr_invariant_in_loop_p (loop, lbound))
1853         return false;
1854     }
1855
1856   if (TREE_CODE (*idx) != SSA_NAME)
1857     return true;
1858
1859   iv = get_iv (dta->ivopts_data, *idx);
1860   if (!iv)
1861     return false;
1862
1863   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1864           *&x[0], which is not folded and does not trigger the
1865           ARRAY_REF path below.  */
1866   *idx = iv->base;
1867
1868   if (integer_zerop (iv->step))
1869     return true;
1870
1871   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1872     {
1873       step = array_ref_element_size (base);
1874
1875       /* We only handle addresses whose step is an integer constant.  */
1876       if (TREE_CODE (step) != INTEGER_CST)
1877         return false;
1878     }
1879   else
1880     /* The step for pointer arithmetics already is 1 byte.  */
1881     step = size_one_node;
1882
1883   iv_base = iv->base;
1884   iv_step = iv->step;
1885   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1886     use_overflow_semantics = true;
1887
1888   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1889                             sizetype, &iv_base, &iv_step, dta->stmt,
1890                             use_overflow_semantics))
1891     {
1892       /* The index might wrap.  */
1893       return false;
1894     }
1895
1896   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1897   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1898
1899   if (dta->ivopts_data->bivs_not_used_in_addr)
1900     {
1901       if (!iv->biv_p)
1902         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1903
1904       record_biv_for_address_use (dta->ivopts_data, iv);
1905     }
1906   return true;
1907 }
1908
1909 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1910    object is passed to it in DATA.  */
1911
1912 static bool
1913 idx_record_use (tree base, tree *idx,
1914                 void *vdata)
1915 {
1916   struct ivopts_data *data = (struct ivopts_data *) vdata;
1917   find_interesting_uses_op (data, *idx);
1918   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1919     {
1920       find_interesting_uses_op (data, array_ref_element_size (base));
1921       find_interesting_uses_op (data, array_ref_low_bound (base));
1922     }
1923   return true;
1924 }
1925
1926 /* If we can prove that TOP = cst * BOT for some constant cst,
1927    store cst to MUL and return true.  Otherwise return false.
1928    The returned value is always sign-extended, regardless of the
1929    signedness of TOP and BOT.  */
1930
1931 static bool
1932 constant_multiple_of (tree top, tree bot, widest_int *mul)
1933 {
1934   tree mby;
1935   enum tree_code code;
1936   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1937   widest_int res, p0, p1;
1938
1939   STRIP_NOPS (top);
1940   STRIP_NOPS (bot);
1941
1942   if (operand_equal_p (top, bot, 0))
1943     {
1944       *mul = 1;
1945       return true;
1946     }
1947
1948   code = TREE_CODE (top);
1949   switch (code)
1950     {
1951     case MULT_EXPR:
1952       mby = TREE_OPERAND (top, 1);
1953       if (TREE_CODE (mby) != INTEGER_CST)
1954         return false;
1955
1956       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1957         return false;
1958
1959       *mul = wi::sext (res * wi::to_widest (mby), precision);
1960       return true;
1961
1962     case PLUS_EXPR:
1963     case MINUS_EXPR:
1964       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1965           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1966         return false;
1967
1968       if (code == MINUS_EXPR)
1969         p1 = -p1;
1970       *mul = wi::sext (p0 + p1, precision);
1971       return true;
1972
1973     case INTEGER_CST:
1974       if (TREE_CODE (bot) != INTEGER_CST)
1975         return false;
1976
1977       p0 = widest_int::from (top, SIGNED);
1978       p1 = widest_int::from (bot, SIGNED);
1979       if (p1 == 0)
1980         return false;
1981       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1982       return res == 0;
1983
1984     default:
1985       return false;
1986     }
1987 }
1988
1989 /* Return true if memory reference REF with step STEP may be unaligned.  */
1990
1991 static bool
1992 may_be_unaligned_p (tree ref, tree step)
1993 {
1994   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1995      thus they are not misaligned.  */
1996   if (TREE_CODE (ref) == TARGET_MEM_REF)
1997     return false;
1998
1999   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2000   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2001     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2002
2003   unsigned HOST_WIDE_INT bitpos;
2004   unsigned int ref_align;
2005   get_object_alignment_1 (ref, &ref_align, &bitpos);
2006   if (ref_align < align
2007       || (bitpos % align) != 0
2008       || (bitpos % BITS_PER_UNIT) != 0)
2009     return true;
2010
2011   unsigned int trailing_zeros = tree_ctz (step);
2012   if (trailing_zeros < HOST_BITS_PER_INT
2013       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2014     return true;
2015
2016   return false;
2017 }
2018
2019 /* Return true if EXPR may be non-addressable.   */
2020
2021 bool
2022 may_be_nonaddressable_p (tree expr)
2023 {
2024   switch (TREE_CODE (expr))
2025     {
2026     case TARGET_MEM_REF:
2027       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2028          target, thus they are always addressable.  */
2029       return false;
2030
2031     case MEM_REF:
2032       /* Likewise for MEM_REFs, modulo the storage order.  */
2033       return REF_REVERSE_STORAGE_ORDER (expr);
2034
2035     case BIT_FIELD_REF:
2036       if (REF_REVERSE_STORAGE_ORDER (expr))
2037         return true;
2038       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2039
2040     case COMPONENT_REF:
2041       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2042         return true;
2043       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2044              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2045
2046     case ARRAY_REF:
2047     case ARRAY_RANGE_REF:
2048       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2049         return true;
2050       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2051
2052     case VIEW_CONVERT_EXPR:
2053       /* This kind of view-conversions may wrap non-addressable objects
2054          and make them look addressable.  After some processing the
2055          non-addressability may be uncovered again, causing ADDR_EXPRs
2056          of inappropriate objects to be built.  */
2057       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2058           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2059         return true;
2060       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2061
2062     CASE_CONVERT:
2063       return true;
2064
2065     default:
2066       break;
2067     }
2068
2069   return false;
2070 }
2071
2072 static tree
2073 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2074
2075 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2076    If there is an existing use which has same stripped iv base and step,
2077    this function records this one as a sub use to that; otherwise records
2078    it as a normal one.  */
2079
2080 static struct iv_use *
2081 record_group_use (struct ivopts_data *data, tree *use_p,
2082                   struct iv *iv, gimple *stmt, enum use_type use_type)
2083 {
2084   unsigned int i;
2085   struct iv_use *use;
2086   tree addr_base;
2087   unsigned HOST_WIDE_INT addr_offset;
2088
2089   /* Only support sub use for address type uses, that is, with base
2090      object.  */
2091   if (!iv->base_object)
2092     return record_use (data, use_p, iv, stmt, use_type);
2093
2094   addr_base = strip_offset (iv->base, &addr_offset);
2095   for (i = 0; i < n_iv_uses (data); i++)
2096     {
2097       use = iv_use (data, i);
2098       if (use->type != USE_ADDRESS || !use->iv->base_object)
2099         continue;
2100
2101       /* Check if it has the same stripped base and step.  */
2102       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2103           && operand_equal_p (iv->step, use->iv->step, 0)
2104           && operand_equal_p (addr_base, use->addr_base, 0))
2105         break;
2106     }
2107
2108   if (i == n_iv_uses (data))
2109     return record_use (data, use_p, iv, stmt,
2110                        use_type, addr_base, addr_offset);
2111   else
2112     return record_sub_use (data, use_p, iv, stmt,
2113                            use_type, addr_base, addr_offset, i);
2114 }
2115
2116 /* Finds addresses in *OP_P inside STMT.  */
2117
2118 static void
2119 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2120                                tree *op_p)
2121 {
2122   tree base = *op_p, step = size_zero_node;
2123   struct iv *civ;
2124   struct ifs_ivopts_data ifs_ivopts_data;
2125
2126   /* Do not play with volatile memory references.  A bit too conservative,
2127      perhaps, but safe.  */
2128   if (gimple_has_volatile_ops (stmt))
2129     goto fail;
2130
2131   /* Ignore bitfields for now.  Not really something terribly complicated
2132      to handle.  TODO.  */
2133   if (TREE_CODE (base) == BIT_FIELD_REF)
2134     goto fail;
2135
2136   base = unshare_expr (base);
2137
2138   if (TREE_CODE (base) == TARGET_MEM_REF)
2139     {
2140       tree type = build_pointer_type (TREE_TYPE (base));
2141       tree astep;
2142
2143       if (TMR_BASE (base)
2144           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2145         {
2146           civ = get_iv (data, TMR_BASE (base));
2147           if (!civ)
2148             goto fail;
2149
2150           TMR_BASE (base) = civ->base;
2151           step = civ->step;
2152         }
2153       if (TMR_INDEX2 (base)
2154           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2155         {
2156           civ = get_iv (data, TMR_INDEX2 (base));
2157           if (!civ)
2158             goto fail;
2159
2160           TMR_INDEX2 (base) = civ->base;
2161           step = civ->step;
2162         }
2163       if (TMR_INDEX (base)
2164           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2165         {
2166           civ = get_iv (data, TMR_INDEX (base));
2167           if (!civ)
2168             goto fail;
2169
2170           TMR_INDEX (base) = civ->base;
2171           astep = civ->step;
2172
2173           if (astep)
2174             {
2175               if (TMR_STEP (base))
2176                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2177
2178               step = fold_build2 (PLUS_EXPR, type, step, astep);
2179             }
2180         }
2181
2182       if (integer_zerop (step))
2183         goto fail;
2184       base = tree_mem_ref_addr (type, base);
2185     }
2186   else
2187     {
2188       ifs_ivopts_data.ivopts_data = data;
2189       ifs_ivopts_data.stmt = stmt;
2190       ifs_ivopts_data.step = size_zero_node;
2191       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2192           || integer_zerop (ifs_ivopts_data.step))
2193         goto fail;
2194       step = ifs_ivopts_data.step;
2195
2196       /* Check that the base expression is addressable.  This needs
2197          to be done after substituting bases of IVs into it.  */
2198       if (may_be_nonaddressable_p (base))
2199         goto fail;
2200
2201       /* Moreover, on strict alignment platforms, check that it is
2202          sufficiently aligned.  */
2203       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2204         goto fail;
2205
2206       base = build_fold_addr_expr (base);
2207
2208       /* Substituting bases of IVs into the base expression might
2209          have caused folding opportunities.  */
2210       if (TREE_CODE (base) == ADDR_EXPR)
2211         {
2212           tree *ref = &TREE_OPERAND (base, 0);
2213           while (handled_component_p (*ref))
2214             ref = &TREE_OPERAND (*ref, 0);
2215           if (TREE_CODE (*ref) == MEM_REF)
2216             {
2217               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2218                                       TREE_OPERAND (*ref, 0),
2219                                       TREE_OPERAND (*ref, 1));
2220               if (tem)
2221                 *ref = tem;
2222             }
2223         }
2224     }
2225
2226   civ = alloc_iv (data, base, step);
2227   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2228   return;
2229
2230 fail:
2231   for_each_index (op_p, idx_record_use, data);
2232 }
2233
2234 /* Finds and records invariants used in STMT.  */
2235
2236 static void
2237 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2238 {
2239   ssa_op_iter iter;
2240   use_operand_p use_p;
2241   tree op;
2242
2243   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2244     {
2245       op = USE_FROM_PTR (use_p);
2246       record_invariant (data, op, false);
2247     }
2248 }
2249
2250 /* Finds interesting uses of induction variables in the statement STMT.  */
2251
2252 static void
2253 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2254 {
2255   struct iv *iv;
2256   tree op, *lhs, *rhs;
2257   ssa_op_iter iter;
2258   use_operand_p use_p;
2259   enum tree_code code;
2260
2261   find_invariants_stmt (data, stmt);
2262
2263   if (gimple_code (stmt) == GIMPLE_COND)
2264     {
2265       find_interesting_uses_cond (data, stmt);
2266       return;
2267     }
2268
2269   if (is_gimple_assign (stmt))
2270     {
2271       lhs = gimple_assign_lhs_ptr (stmt);
2272       rhs = gimple_assign_rhs1_ptr (stmt);
2273
2274       if (TREE_CODE (*lhs) == SSA_NAME)
2275         {
2276           /* If the statement defines an induction variable, the uses are not
2277              interesting by themselves.  */
2278
2279           iv = get_iv (data, *lhs);
2280
2281           if (iv && !integer_zerop (iv->step))
2282             return;
2283         }
2284
2285       code = gimple_assign_rhs_code (stmt);
2286       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2287           && (REFERENCE_CLASS_P (*rhs)
2288               || is_gimple_val (*rhs)))
2289         {
2290           if (REFERENCE_CLASS_P (*rhs))
2291             find_interesting_uses_address (data, stmt, rhs);
2292           else
2293             find_interesting_uses_op (data, *rhs);
2294
2295           if (REFERENCE_CLASS_P (*lhs))
2296             find_interesting_uses_address (data, stmt, lhs);
2297           return;
2298         }
2299       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2300         {
2301           find_interesting_uses_cond (data, stmt);
2302           return;
2303         }
2304
2305       /* TODO -- we should also handle address uses of type
2306
2307          memory = call (whatever);
2308
2309          and
2310
2311          call (memory).  */
2312     }
2313
2314   if (gimple_code (stmt) == GIMPLE_PHI
2315       && gimple_bb (stmt) == data->current_loop->header)
2316     {
2317       iv = get_iv (data, PHI_RESULT (stmt));
2318
2319       if (iv && !integer_zerop (iv->step))
2320         return;
2321     }
2322
2323   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2324     {
2325       op = USE_FROM_PTR (use_p);
2326
2327       if (TREE_CODE (op) != SSA_NAME)
2328         continue;
2329
2330       iv = get_iv (data, op);
2331       if (!iv)
2332         continue;
2333
2334       find_interesting_uses_op (data, op);
2335     }
2336 }
2337
2338 /* Finds interesting uses of induction variables outside of loops
2339    on loop exit edge EXIT.  */
2340
2341 static void
2342 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2343 {
2344   gphi *phi;
2345   gphi_iterator psi;
2346   tree def;
2347
2348   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2349     {
2350       phi = psi.phi ();
2351       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2352       if (!virtual_operand_p (def))
2353         find_interesting_uses_op (data, def);
2354     }
2355 }
2356
2357 /* Finds uses of the induction variables that are interesting.  */
2358
2359 static void
2360 find_interesting_uses (struct ivopts_data *data)
2361 {
2362   basic_block bb;
2363   gimple_stmt_iterator bsi;
2364   basic_block *body = get_loop_body (data->current_loop);
2365   unsigned i;
2366   struct version_info *info;
2367   edge e;
2368
2369   if (dump_file && (dump_flags & TDF_DETAILS))
2370     fprintf (dump_file, "Uses:\n\n");
2371
2372   for (i = 0; i < data->current_loop->num_nodes; i++)
2373     {
2374       edge_iterator ei;
2375       bb = body[i];
2376
2377       FOR_EACH_EDGE (e, ei, bb->succs)
2378         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2379             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2380           find_interesting_uses_outside (data, e);
2381
2382       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2383         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2384       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2385         if (!is_gimple_debug (gsi_stmt (bsi)))
2386           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2387     }
2388
2389   if (dump_file && (dump_flags & TDF_DETAILS))
2390     {
2391       bitmap_iterator bi;
2392
2393       fprintf (dump_file, "\n");
2394
2395       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2396         {
2397           info = ver_info (data, i);
2398           if (info->inv_id)
2399             {
2400               fprintf (dump_file, "  ");
2401               print_generic_expr (dump_file, info->name, TDF_SLIM);
2402               fprintf (dump_file, " is invariant (%d)%s\n",
2403                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2404             }
2405         }
2406
2407       fprintf (dump_file, "\n");
2408     }
2409
2410   free (body);
2411 }
2412
2413 /* Compute maximum offset of [base + offset] addressing mode
2414    for memory reference represented by USE.  */
2415
2416 static HOST_WIDE_INT
2417 compute_max_addr_offset (struct iv_use *use)
2418 {
2419   int width;
2420   rtx reg, addr;
2421   HOST_WIDE_INT i, off;
2422   unsigned list_index, num;
2423   addr_space_t as;
2424   machine_mode mem_mode, addr_mode;
2425   static vec<HOST_WIDE_INT> max_offset_list;
2426
2427   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2428   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2429
2430   num = max_offset_list.length ();
2431   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2432   if (list_index >= num)
2433     {
2434       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2435       for (; num < max_offset_list.length (); num++)
2436         max_offset_list[num] = -1;
2437     }
2438
2439   off = max_offset_list[list_index];
2440   if (off != -1)
2441     return off;
2442
2443   addr_mode = targetm.addr_space.address_mode (as);
2444   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2445   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2446
2447   width = GET_MODE_BITSIZE (addr_mode) - 1;
2448   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2449     width = HOST_BITS_PER_WIDE_INT - 1;
2450
2451   for (i = width; i > 0; i--)
2452     {
2453       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2454       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2455       if (memory_address_addr_space_p (mem_mode, addr, as))
2456         break;
2457
2458       /* For some strict-alignment targets, the offset must be naturally
2459          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2460       off = ((unsigned HOST_WIDE_INT) 1 << i);
2461       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2462         {
2463           off -= GET_MODE_SIZE (mem_mode);
2464           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2465           if (memory_address_addr_space_p (mem_mode, addr, as))
2466             break;
2467         }
2468     }
2469   if (i == 0)
2470     off = 0;
2471
2472   max_offset_list[list_index] = off;
2473   return off;
2474 }
2475
2476 /* Check if all small groups should be split.  Return true if and
2477    only if:
2478
2479      1) At least one groups contain two uses with different offsets.
2480      2) No group contains more than two uses with different offsets.
2481
2482    Return false otherwise.  We want to split such groups because:
2483
2484      1) Small groups don't have much benefit and may interfer with
2485         general candidate selection.
2486      2) Size for problem with only small groups is usually small and
2487         general algorithm can handle it well.
2488
2489    TODO -- Above claim may not hold when auto increment is supported.  */
2490
2491 static bool
2492 split_all_small_groups (struct ivopts_data *data)
2493 {
2494   bool split_p = false;
2495   unsigned int i, n, distinct;
2496   struct iv_use *pre, *use;
2497
2498   n = n_iv_uses (data);
2499   for (i = 0; i < n; i++)
2500     {
2501       use = iv_use (data, i);
2502       if (!use->next)
2503         continue;
2504
2505       distinct = 1;
2506       gcc_assert (use->type == USE_ADDRESS);
2507       for (pre = use, use = use->next; use; pre = use, use = use->next)
2508         {
2509           if (pre->addr_offset != use->addr_offset)
2510             distinct++;
2511
2512           if (distinct > 2)
2513             return false;
2514         }
2515       if (distinct == 2)
2516         split_p = true;
2517     }
2518
2519   return split_p;
2520 }
2521
2522 /* For each group of address type uses, this function further groups
2523    these uses according to the maximum offset supported by target's
2524    [base + offset] addressing mode.  */
2525
2526 static void
2527 group_address_uses (struct ivopts_data *data)
2528 {
2529   HOST_WIDE_INT max_offset = -1;
2530   unsigned int i, n, sub_id;
2531   struct iv_use *pre, *use;
2532   unsigned HOST_WIDE_INT addr_offset_first;
2533
2534   /* Reset max offset to split all small groups.  */
2535   if (split_all_small_groups (data))
2536     max_offset = 0;
2537
2538   n = n_iv_uses (data);
2539   for (i = 0; i < n; i++)
2540     {
2541       use = iv_use (data, i);
2542       if (!use->next)
2543         continue;
2544
2545       gcc_assert (use->type == USE_ADDRESS);
2546       if (max_offset != 0)
2547         max_offset = compute_max_addr_offset (use);
2548
2549       while (use)
2550         {
2551           sub_id = 0;
2552           addr_offset_first = use->addr_offset;
2553           /* Only uses with offset that can fit in offset part against
2554              the first use can be grouped together.  */
2555           for (pre = use, use = use->next;
2556                use && (use->addr_offset - addr_offset_first
2557                        <= (unsigned HOST_WIDE_INT) max_offset);
2558                pre = use, use = use->next)
2559             {
2560               use->id = pre->id;
2561               use->sub_id = ++sub_id;
2562             }
2563
2564           /* Break the list and create new group.  */
2565           if (use)
2566             {
2567               pre->next = NULL;
2568               use->id = n_iv_uses (data);
2569               use->related_cands = BITMAP_ALLOC (NULL);
2570               data->iv_uses.safe_push (use);
2571             }
2572         }
2573     }
2574
2575   if (dump_file && (dump_flags & TDF_DETAILS))
2576     dump_uses (dump_file, data);
2577 }
2578
2579 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2580    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2581    we are at the top-level of the processed address.  */
2582
2583 static tree
2584 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2585                 HOST_WIDE_INT *offset)
2586 {
2587   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2588   enum tree_code code;
2589   tree type, orig_type = TREE_TYPE (expr);
2590   HOST_WIDE_INT off0, off1, st;
2591   tree orig_expr = expr;
2592
2593   STRIP_NOPS (expr);
2594
2595   type = TREE_TYPE (expr);
2596   code = TREE_CODE (expr);
2597   *offset = 0;
2598
2599   switch (code)
2600     {
2601     case INTEGER_CST:
2602       if (!cst_and_fits_in_hwi (expr)
2603           || integer_zerop (expr))
2604         return orig_expr;
2605
2606       *offset = int_cst_value (expr);
2607       return build_int_cst (orig_type, 0);
2608
2609     case POINTER_PLUS_EXPR:
2610     case PLUS_EXPR:
2611     case MINUS_EXPR:
2612       op0 = TREE_OPERAND (expr, 0);
2613       op1 = TREE_OPERAND (expr, 1);
2614
2615       op0 = strip_offset_1 (op0, false, false, &off0);
2616       op1 = strip_offset_1 (op1, false, false, &off1);
2617
2618       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2619       if (op0 == TREE_OPERAND (expr, 0)
2620           && op1 == TREE_OPERAND (expr, 1))
2621         return orig_expr;
2622
2623       if (integer_zerop (op1))
2624         expr = op0;
2625       else if (integer_zerop (op0))
2626         {
2627           if (code == MINUS_EXPR)
2628             expr = fold_build1 (NEGATE_EXPR, type, op1);
2629           else
2630             expr = op1;
2631         }
2632       else
2633         expr = fold_build2 (code, type, op0, op1);
2634
2635       return fold_convert (orig_type, expr);
2636
2637     case MULT_EXPR:
2638       op1 = TREE_OPERAND (expr, 1);
2639       if (!cst_and_fits_in_hwi (op1))
2640         return orig_expr;
2641
2642       op0 = TREE_OPERAND (expr, 0);
2643       op0 = strip_offset_1 (op0, false, false, &off0);
2644       if (op0 == TREE_OPERAND (expr, 0))
2645         return orig_expr;
2646
2647       *offset = off0 * int_cst_value (op1);
2648       if (integer_zerop (op0))
2649         expr = op0;
2650       else
2651         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2652
2653       return fold_convert (orig_type, expr);
2654
2655     case ARRAY_REF:
2656     case ARRAY_RANGE_REF:
2657       if (!inside_addr)
2658         return orig_expr;
2659
2660       step = array_ref_element_size (expr);
2661       if (!cst_and_fits_in_hwi (step))
2662         break;
2663
2664       st = int_cst_value (step);
2665       op1 = TREE_OPERAND (expr, 1);
2666       op1 = strip_offset_1 (op1, false, false, &off1);
2667       *offset = off1 * st;
2668
2669       if (top_compref
2670           && integer_zerop (op1))
2671         {
2672           /* Strip the component reference completely.  */
2673           op0 = TREE_OPERAND (expr, 0);
2674           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2675           *offset += off0;
2676           return op0;
2677         }
2678       break;
2679
2680     case COMPONENT_REF:
2681       {
2682         tree field;
2683
2684         if (!inside_addr)
2685           return orig_expr;
2686
2687         tmp = component_ref_field_offset (expr);
2688         field = TREE_OPERAND (expr, 1);
2689         if (top_compref
2690             && cst_and_fits_in_hwi (tmp)
2691             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2692           {
2693             HOST_WIDE_INT boffset, abs_off;
2694
2695             /* Strip the component reference completely.  */
2696             op0 = TREE_OPERAND (expr, 0);
2697             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2698             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2699             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2700             if (boffset < 0)
2701               abs_off = -abs_off;
2702
2703             *offset = off0 + int_cst_value (tmp) + abs_off;
2704             return op0;
2705           }
2706       }
2707       break;
2708
2709     case ADDR_EXPR:
2710       op0 = TREE_OPERAND (expr, 0);
2711       op0 = strip_offset_1 (op0, true, true, &off0);
2712       *offset += off0;
2713
2714       if (op0 == TREE_OPERAND (expr, 0))
2715         return orig_expr;
2716
2717       expr = build_fold_addr_expr (op0);
2718       return fold_convert (orig_type, expr);
2719
2720     case MEM_REF:
2721       /* ???  Offset operand?  */
2722       inside_addr = false;
2723       break;
2724
2725     default:
2726       return orig_expr;
2727     }
2728
2729   /* Default handling of expressions for that we want to recurse into
2730      the first operand.  */
2731   op0 = TREE_OPERAND (expr, 0);
2732   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2733   *offset += off0;
2734
2735   if (op0 == TREE_OPERAND (expr, 0)
2736       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2737     return orig_expr;
2738
2739   expr = copy_node (expr);
2740   TREE_OPERAND (expr, 0) = op0;
2741   if (op1)
2742     TREE_OPERAND (expr, 1) = op1;
2743
2744   /* Inside address, we might strip the top level component references,
2745      thus changing type of the expression.  Handling of ADDR_EXPR
2746      will fix that.  */
2747   expr = fold_convert (orig_type, expr);
2748
2749   return expr;
2750 }
2751
2752 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2753
2754 static tree
2755 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2756 {
2757   HOST_WIDE_INT off;
2758   tree core = strip_offset_1 (expr, false, false, &off);
2759   *offset = off;
2760   return core;
2761 }
2762
2763 /* Returns variant of TYPE that can be used as base for different uses.
2764    We return unsigned type with the same precision, which avoids problems
2765    with overflows.  */
2766
2767 static tree
2768 generic_type_for (tree type)
2769 {
2770   if (POINTER_TYPE_P (type))
2771     return unsigned_type_for (type);
2772
2773   if (TYPE_UNSIGNED (type))
2774     return type;
2775
2776   return unsigned_type_for (type);
2777 }
2778
2779 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2780    the bitmap to that we should store it.  */
2781
2782 static struct ivopts_data *fd_ivopts_data;
2783 static tree
2784 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2785 {
2786   bitmap *depends_on = (bitmap *) data;
2787   struct version_info *info;
2788
2789   if (TREE_CODE (*expr_p) != SSA_NAME)
2790     return NULL_TREE;
2791   info = name_info (fd_ivopts_data, *expr_p);
2792
2793   if (!info->inv_id || info->has_nonlin_use)
2794     return NULL_TREE;
2795
2796   if (!*depends_on)
2797     *depends_on = BITMAP_ALLOC (NULL);
2798   bitmap_set_bit (*depends_on, info->inv_id);
2799
2800   return NULL_TREE;
2801 }
2802
2803 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2804    position to POS.  If USE is not NULL, the candidate is set as related to
2805    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2806    replacement of the final value of the iv by a direct computation.  */
2807
2808 static struct iv_cand *
2809 add_candidate_1 (struct ivopts_data *data,
2810                  tree base, tree step, bool important, enum iv_position pos,
2811                  struct iv_use *use, gimple *incremented_at,
2812                  struct iv *orig_iv = NULL)
2813 {
2814   unsigned i;
2815   struct iv_cand *cand = NULL;
2816   tree type, orig_type;
2817
2818   /* For non-original variables, make sure their values are computed in a type
2819      that does not invoke undefined behavior on overflows (since in general,
2820      we cannot prove that these induction variables are non-wrapping).  */
2821   if (pos != IP_ORIGINAL)
2822     {
2823       orig_type = TREE_TYPE (base);
2824       type = generic_type_for (orig_type);
2825       if (type != orig_type)
2826         {
2827           base = fold_convert (type, base);
2828           step = fold_convert (type, step);
2829         }
2830     }
2831
2832   for (i = 0; i < n_iv_cands (data); i++)
2833     {
2834       cand = iv_cand (data, i);
2835
2836       if (cand->pos != pos)
2837         continue;
2838
2839       if (cand->incremented_at != incremented_at
2840           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2841               && cand->ainc_use != use))
2842         continue;
2843
2844       if (!cand->iv)
2845         {
2846           if (!base && !step)
2847             break;
2848
2849           continue;
2850         }
2851
2852       if (!base && !step)
2853         continue;
2854
2855       if (operand_equal_p (base, cand->iv->base, 0)
2856           && operand_equal_p (step, cand->iv->step, 0)
2857           && (TYPE_PRECISION (TREE_TYPE (base))
2858               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2859         break;
2860     }
2861
2862   if (i == n_iv_cands (data))
2863     {
2864       cand = XCNEW (struct iv_cand);
2865       cand->id = i;
2866
2867       if (!base && !step)
2868         cand->iv = NULL;
2869       else
2870         cand->iv = alloc_iv (data, base, step);
2871
2872       cand->pos = pos;
2873       if (pos != IP_ORIGINAL && cand->iv)
2874         {
2875           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2876           cand->var_after = cand->var_before;
2877         }
2878       cand->important = important;
2879       cand->incremented_at = incremented_at;
2880       data->iv_candidates.safe_push (cand);
2881
2882       if (step
2883           && TREE_CODE (step) != INTEGER_CST)
2884         {
2885           fd_ivopts_data = data;
2886           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2887         }
2888
2889       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2890         cand->ainc_use = use;
2891       else
2892         cand->ainc_use = NULL;
2893
2894       cand->orig_iv = orig_iv;
2895       if (dump_file && (dump_flags & TDF_DETAILS))
2896         dump_cand (dump_file, cand);
2897     }
2898
2899   if (important && !cand->important)
2900     {
2901       cand->important = true;
2902       if (dump_file && (dump_flags & TDF_DETAILS))
2903         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2904     }
2905
2906   if (use)
2907     {
2908       bitmap_set_bit (use->related_cands, i);
2909       if (dump_file && (dump_flags & TDF_DETAILS))
2910         fprintf (dump_file, "Candidate %d is related to use %d\n",
2911                  cand->id, use->id);
2912     }
2913
2914   return cand;
2915 }
2916
2917 /* Returns true if incrementing the induction variable at the end of the LOOP
2918    is allowed.
2919
2920    The purpose is to avoid splitting latch edge with a biv increment, thus
2921    creating a jump, possibly confusing other optimization passes and leaving
2922    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2923    is not available (so we do not have a better alternative), or if the latch
2924    edge is already nonempty.  */
2925
2926 static bool
2927 allow_ip_end_pos_p (struct loop *loop)
2928 {
2929   if (!ip_normal_pos (loop))
2930     return true;
2931
2932   if (!empty_block_p (ip_end_pos (loop)))
2933     return true;
2934
2935   return false;
2936 }
2937
2938 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2939    Important field is set to IMPORTANT.  */
2940
2941 static void
2942 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2943                         bool important, struct iv_use *use)
2944 {
2945   basic_block use_bb = gimple_bb (use->stmt);
2946   machine_mode mem_mode;
2947   unsigned HOST_WIDE_INT cstepi;
2948
2949   /* If we insert the increment in any position other than the standard
2950      ones, we must ensure that it is incremented once per iteration.
2951      It must not be in an inner nested loop, or one side of an if
2952      statement.  */
2953   if (use_bb->loop_father != data->current_loop
2954       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2955       || stmt_could_throw_p (use->stmt)
2956       || !cst_and_fits_in_hwi (step))
2957     return;
2958
2959   cstepi = int_cst_value (step);
2960
2961   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2962   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2963         || USE_STORE_PRE_INCREMENT (mem_mode))
2964        && GET_MODE_SIZE (mem_mode) == cstepi)
2965       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2966            || USE_STORE_PRE_DECREMENT (mem_mode))
2967           && GET_MODE_SIZE (mem_mode) == -cstepi))
2968     {
2969       enum tree_code code = MINUS_EXPR;
2970       tree new_base;
2971       tree new_step = step;
2972
2973       if (POINTER_TYPE_P (TREE_TYPE (base)))
2974         {
2975           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2976           code = POINTER_PLUS_EXPR;
2977         }
2978       else
2979         new_step = fold_convert (TREE_TYPE (base), new_step);
2980       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2981       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2982                        use->stmt);
2983     }
2984   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2985         || USE_STORE_POST_INCREMENT (mem_mode))
2986        && GET_MODE_SIZE (mem_mode) == cstepi)
2987       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2988            || USE_STORE_POST_DECREMENT (mem_mode))
2989           && GET_MODE_SIZE (mem_mode) == -cstepi))
2990     {
2991       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2992                        use->stmt);
2993     }
2994 }
2995
2996 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2997    position to POS.  If USE is not NULL, the candidate is set as related to
2998    it.  The candidate computation is scheduled before exit condition and at
2999    the end of loop.  */
3000
3001 static void
3002 add_candidate (struct ivopts_data *data,
3003                tree base, tree step, bool important, struct iv_use *use,
3004                struct iv *orig_iv = NULL)
3005 {
3006   gcc_assert (use == NULL || use->sub_id == 0);
3007
3008   if (ip_normal_pos (data->current_loop))
3009     add_candidate_1 (data, base, step, important,
3010                      IP_NORMAL, use, NULL, orig_iv);
3011   if (ip_end_pos (data->current_loop)
3012       && allow_ip_end_pos_p (data->current_loop))
3013     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3014 }
3015
3016 /* Adds standard iv candidates.  */
3017
3018 static void
3019 add_standard_iv_candidates (struct ivopts_data *data)
3020 {
3021   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3022
3023   /* The same for a double-integer type if it is still fast enough.  */
3024   if (TYPE_PRECISION
3025         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3026       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3027     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3028                    build_int_cst (long_integer_type_node, 1), true, NULL);
3029
3030   /* The same for a double-integer type if it is still fast enough.  */
3031   if (TYPE_PRECISION
3032         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3033       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3034     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3035                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3036 }
3037
3038
3039 /* Adds candidates bases on the old induction variable IV.  */
3040
3041 static void
3042 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3043 {
3044   gimple *phi;
3045   tree def;
3046   struct iv_cand *cand;
3047
3048   /* Check if this biv is used in address type use.  */
3049   if (iv->no_overflow  && iv->have_address_use
3050       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3051       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3052     {
3053       tree base = fold_convert (sizetype, iv->base);
3054       tree step = fold_convert (sizetype, iv->step);
3055
3056       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3057       add_candidate (data, base, step, true, NULL, iv);
3058       /* Add iv cand of the original type only if it has nonlinear use.  */
3059       if (iv->have_use_for)
3060         add_candidate (data, iv->base, iv->step, true, NULL);
3061     }
3062   else
3063     add_candidate (data, iv->base, iv->step, true, NULL);
3064
3065   /* The same, but with initial value zero.  */
3066   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3067     add_candidate (data, size_int (0), iv->step, true, NULL);
3068   else
3069     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3070                    iv->step, true, NULL);
3071
3072   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3073   if (gimple_code (phi) == GIMPLE_PHI)
3074     {
3075       /* Additionally record the possibility of leaving the original iv
3076          untouched.  */
3077       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3078       /* Don't add candidate if it's from another PHI node because
3079          it's an affine iv appearing in the form of PEELED_CHREC.  */
3080       phi = SSA_NAME_DEF_STMT (def);
3081       if (gimple_code (phi) != GIMPLE_PHI)
3082         {
3083           cand = add_candidate_1 (data,
3084                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3085                                   SSA_NAME_DEF_STMT (def));
3086           cand->var_before = iv->ssa_name;
3087           cand->var_after = def;
3088         }
3089       else
3090         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3091     }
3092 }
3093
3094 /* Adds candidates based on the old induction variables.  */
3095
3096 static void
3097 add_iv_candidate_for_bivs (struct ivopts_data *data)
3098 {
3099   unsigned i;
3100   struct iv *iv;
3101   bitmap_iterator bi;
3102
3103   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3104     {
3105       iv = ver_info (data, i)->iv;
3106       if (iv && iv->biv_p && !integer_zerop (iv->step))
3107         add_iv_candidate_for_biv (data, iv);
3108     }
3109 }
3110
3111 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3112
3113 static void
3114 record_common_cand (struct ivopts_data *data, tree base,
3115                     tree step, struct iv_use *use)
3116 {
3117   struct iv_common_cand ent;
3118   struct iv_common_cand **slot;
3119
3120   gcc_assert (use != NULL);
3121
3122   ent.base = base;
3123   ent.step = step;
3124   ent.hash = iterative_hash_expr (base, 0);
3125   ent.hash = iterative_hash_expr (step, ent.hash);
3126
3127   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3128   if (*slot == NULL)
3129     {
3130       *slot = new iv_common_cand ();
3131       (*slot)->base = base;
3132       (*slot)->step = step;
3133       (*slot)->uses.create (8);
3134       (*slot)->hash = ent.hash;
3135       data->iv_common_cands.safe_push ((*slot));
3136     }
3137   (*slot)->uses.safe_push (use);
3138   return;
3139 }
3140
3141 /* Comparison function used to sort common candidates.  */
3142
3143 static int
3144 common_cand_cmp (const void *p1, const void *p2)
3145 {
3146   unsigned n1, n2;
3147   const struct iv_common_cand *const *const ccand1
3148     = (const struct iv_common_cand *const *)p1;
3149   const struct iv_common_cand *const *const ccand2
3150     = (const struct iv_common_cand *const *)p2;
3151
3152   n1 = (*ccand1)->uses.length ();
3153   n2 = (*ccand2)->uses.length ();
3154   return n2 - n1;
3155 }
3156
3157 /* Adds IV candidates based on common candidated recorded.  */
3158
3159 static void
3160 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3161 {
3162   unsigned i, j;
3163   struct iv_cand *cand_1, *cand_2;
3164
3165   data->iv_common_cands.qsort (common_cand_cmp);
3166   for (i = 0; i < data->iv_common_cands.length (); i++)
3167     {
3168       struct iv_common_cand *ptr = data->iv_common_cands[i];
3169
3170       /* Only add IV candidate if it's derived from multiple uses.  */
3171       if (ptr->uses.length () <= 1)
3172         break;
3173
3174       cand_1 = NULL;
3175       cand_2 = NULL;
3176       if (ip_normal_pos (data->current_loop))
3177         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3178                                   false, IP_NORMAL, NULL, NULL);
3179
3180       if (ip_end_pos (data->current_loop)
3181           && allow_ip_end_pos_p (data->current_loop))
3182         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3183                                   false, IP_END, NULL, NULL);
3184
3185       /* Bind deriving uses and the new candidates.  */
3186       for (j = 0; j < ptr->uses.length (); j++)
3187         {
3188           struct iv_use *use = ptr->uses[j];
3189           if (cand_1)
3190             bitmap_set_bit (use->related_cands, cand_1->id);
3191           if (cand_2)
3192             bitmap_set_bit (use->related_cands, cand_2->id);
3193         }
3194     }
3195
3196   /* Release data since it is useless from this point.  */
3197   data->iv_common_cand_tab->empty ();
3198   data->iv_common_cands.truncate (0);
3199 }
3200
3201 /* Adds candidates based on the value of USE's iv.  */
3202
3203 static void
3204 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3205 {
3206   unsigned HOST_WIDE_INT offset;
3207   tree base;
3208   tree basetype;
3209   struct iv *iv = use->iv;
3210
3211   add_candidate (data, iv->base, iv->step, false, use);
3212
3213   /* Record common candidate for use in case it can be shared by others.  */
3214   record_common_cand (data, iv->base, iv->step, use);
3215
3216   /* Record common candidate with initial value zero.  */
3217   basetype = TREE_TYPE (iv->base);
3218   if (POINTER_TYPE_P (basetype))
3219     basetype = sizetype;
3220   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3221
3222   /* Record common candidate with constant offset stripped in base.  */
3223     {
3224       base = strip_offset (iv->base, &offset);
3225       if (offset || base != iv->base)
3226         record_common_cand (data, base, iv->step, use);
3227     }
3228
3229   /* Record common candidate with base_object removed in base.  */
3230   if (iv->base_object != NULL)
3231     {
3232       unsigned i;
3233       aff_tree aff_base;
3234       tree step, base_object = iv->base_object;
3235
3236       base = iv->base;
3237       step = iv->step;
3238       STRIP_NOPS (base);
3239       STRIP_NOPS (step);
3240       STRIP_NOPS (base_object);
3241       tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3242       for (i = 0; i < aff_base.n; i++)
3243         {
3244           if (aff_base.elts[i].coef != 1)
3245             continue;
3246
3247           if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3248             break;
3249         }
3250       if (i < aff_base.n)
3251         {
3252           aff_combination_remove_elt (&aff_base, i);
3253           base = aff_combination_to_tree (&aff_base);
3254           basetype = TREE_TYPE (base);
3255           if (POINTER_TYPE_P (basetype))
3256             basetype = sizetype;
3257
3258           step = fold_convert (basetype, step);
3259           record_common_cand (data, base, step, use);
3260           /* Also record common candidate with offset stripped.  */
3261           base = strip_offset (base, &offset);
3262           if (offset)
3263             record_common_cand (data, base, step, use);
3264         }
3265     }
3266
3267   /* At last, add auto-incremental candidates.  Make such variables
3268      important since other iv uses with same base object may be based
3269      on it.  */
3270   if (use != NULL && use->type == USE_ADDRESS)
3271     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3272 }
3273
3274 /* Adds candidates based on the uses.  */
3275
3276 static void
3277 add_iv_candidate_for_uses (struct ivopts_data *data)
3278 {
3279   unsigned i;
3280
3281   for (i = 0; i < n_iv_uses (data); i++)
3282     {
3283       struct iv_use *use = iv_use (data, i);
3284
3285       if (!use)
3286         continue;
3287
3288       switch (use->type)
3289         {
3290         case USE_NONLINEAR_EXPR:
3291         case USE_COMPARE:
3292         case USE_ADDRESS:
3293           /* Just add the ivs based on the value of the iv used here.  */
3294           add_iv_candidate_for_use (data, use);
3295           break;
3296
3297         default:
3298           gcc_unreachable ();
3299         }
3300     }
3301   add_iv_candidate_derived_from_uses (data);
3302 }
3303
3304 /* Record important candidates and add them to related_cands bitmaps.  */
3305
3306 static void
3307 record_important_candidates (struct ivopts_data *data)
3308 {
3309   unsigned i;
3310   struct iv_use *use;
3311
3312   for (i = 0; i < n_iv_cands (data); i++)
3313     {
3314       struct iv_cand *cand = iv_cand (data, i);
3315
3316       if (cand->important)
3317         bitmap_set_bit (data->important_candidates, i);
3318     }
3319
3320   data->consider_all_candidates = (n_iv_cands (data)
3321                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3322
3323   /* Add important candidates to uses' related_cands bitmaps.  */
3324   for (i = 0; i < n_iv_uses (data); i++)
3325     {
3326       use = iv_use (data, i);
3327       bitmap_ior_into (use->related_cands, data->important_candidates);
3328     }
3329 }
3330
3331 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3332    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3333    we allocate a simple list to every use.  */
3334
3335 static void
3336 alloc_use_cost_map (struct ivopts_data *data)
3337 {
3338   unsigned i, size, s;
3339
3340   for (i = 0; i < n_iv_uses (data); i++)
3341     {
3342       struct iv_use *use = iv_use (data, i);
3343
3344       if (data->consider_all_candidates)
3345         size = n_iv_cands (data);
3346       else
3347         {
3348           s = bitmap_count_bits (use->related_cands);
3349
3350           /* Round up to the power of two, so that moduling by it is fast.  */
3351           size = s ? (1 << ceil_log2 (s)) : 1;
3352         }
3353
3354       use->n_map_members = size;
3355       use->cost_map = XCNEWVEC (struct cost_pair, size);
3356     }
3357 }
3358
3359 /* Returns description of computation cost of expression whose runtime
3360    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3361
3362 static comp_cost
3363 new_cost (unsigned runtime, unsigned complexity)
3364 {
3365   comp_cost cost;
3366
3367   cost.cost = runtime;
3368   cost.complexity = complexity;
3369
3370   return cost;
3371 }
3372
3373 /* Returns true if COST is infinite.  */
3374
3375 static bool
3376 infinite_cost_p (comp_cost cost)
3377 {
3378   return cost.cost == INFTY;
3379 }
3380
3381 /* Adds costs COST1 and COST2.  */
3382
3383 static comp_cost
3384 add_costs (comp_cost cost1, comp_cost cost2)
3385 {
3386   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3387     return infinite_cost;
3388
3389   cost1.cost += cost2.cost;
3390   cost1.complexity += cost2.complexity;
3391
3392   return cost1;
3393 }
3394 /* Subtracts costs COST1 and COST2.  */
3395
3396 static comp_cost
3397 sub_costs (comp_cost cost1, comp_cost cost2)
3398 {
3399   cost1.cost -= cost2.cost;
3400   cost1.complexity -= cost2.complexity;
3401
3402   return cost1;
3403 }
3404
3405 /* Returns a negative number if COST1 < COST2, a positive number if
3406    COST1 > COST2, and 0 if COST1 = COST2.  */
3407
3408 static int
3409 compare_costs (comp_cost cost1, comp_cost cost2)
3410 {
3411   if (cost1.cost == cost2.cost)
3412     return cost1.complexity - cost2.complexity;
3413
3414   return cost1.cost - cost2.cost;
3415 }
3416
3417 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3418    on invariants DEPENDS_ON and that the value used in expressing it
3419    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3420
3421 static void
3422 set_use_iv_cost (struct ivopts_data *data,
3423                  struct iv_use *use, struct iv_cand *cand,
3424                  comp_cost cost, bitmap depends_on, tree value,
3425                  enum tree_code comp, int inv_expr_id)
3426 {
3427   unsigned i, s;
3428
3429   if (infinite_cost_p (cost))
3430     {
3431       BITMAP_FREE (depends_on);
3432       return;
3433     }
3434
3435   if (data->consider_all_candidates)
3436     {
3437       use->cost_map[cand->id].cand = cand;
3438       use->cost_map[cand->id].cost = cost;
3439       use->cost_map[cand->id].depends_on = depends_on;
3440       use->cost_map[cand->id].value = value;
3441       use->cost_map[cand->id].comp = comp;
3442       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3443       return;
3444     }
3445
3446   /* n_map_members is a power of two, so this computes modulo.  */
3447   s = cand->id & (use->n_map_members - 1);
3448   for (i = s; i < use->n_map_members; i++)
3449     if (!use->cost_map[i].cand)
3450       goto found;
3451   for (i = 0; i < s; i++)
3452     if (!use->cost_map[i].cand)
3453       goto found;
3454
3455   gcc_unreachable ();
3456
3457 found:
3458   use->cost_map[i].cand = cand;
3459   use->cost_map[i].cost = cost;
3460   use->cost_map[i].depends_on = depends_on;
3461   use->cost_map[i].value = value;
3462   use->cost_map[i].comp = comp;
3463   use->cost_map[i].inv_expr_id = inv_expr_id;
3464 }
3465
3466 /* Gets cost of (USE, CANDIDATE) pair.  */
3467
3468 static struct cost_pair *
3469 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3470                  struct iv_cand *cand)
3471 {
3472   unsigned i, s;
3473   struct cost_pair *ret;
3474
3475   if (!cand)
3476     return NULL;
3477
3478   if (data->consider_all_candidates)
3479     {
3480       ret = use->cost_map + cand->id;
3481       if (!ret->cand)
3482         return NULL;
3483
3484       return ret;
3485     }
3486
3487   /* n_map_members is a power of two, so this computes modulo.  */
3488   s = cand->id & (use->n_map_members - 1);
3489   for (i = s; i < use->n_map_members; i++)
3490     if (use->cost_map[i].cand == cand)
3491       return use->cost_map + i;
3492     else if (use->cost_map[i].cand == NULL)
3493       return NULL;
3494   for (i = 0; i < s; i++)
3495     if (use->cost_map[i].cand == cand)
3496       return use->cost_map + i;
3497     else if (use->cost_map[i].cand == NULL)
3498       return NULL;
3499
3500   return NULL;
3501 }
3502
3503 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3504 static rtx
3505 produce_memory_decl_rtl (tree obj, int *regno)
3506 {
3507   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3508   machine_mode address_mode = targetm.addr_space.address_mode (as);
3509   rtx x;
3510
3511   gcc_assert (obj);
3512   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3513     {
3514       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3515       x = gen_rtx_SYMBOL_REF (address_mode, name);
3516       SET_SYMBOL_REF_DECL (x, obj);
3517       x = gen_rtx_MEM (DECL_MODE (obj), x);
3518       set_mem_addr_space (x, as);
3519       targetm.encode_section_info (obj, x, true);
3520     }
3521   else
3522     {
3523       x = gen_raw_REG (address_mode, (*regno)++);
3524       x = gen_rtx_MEM (DECL_MODE (obj), x);
3525       set_mem_addr_space (x, as);
3526     }
3527
3528   return x;
3529 }
3530
3531 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3532    walk_tree.  DATA contains the actual fake register number.  */
3533
3534 static tree
3535 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3536 {
3537   tree obj = NULL_TREE;
3538   rtx x = NULL_RTX;
3539   int *regno = (int *) data;
3540
3541   switch (TREE_CODE (*expr_p))
3542     {
3543     case ADDR_EXPR:
3544       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3545            handled_component_p (*expr_p);
3546            expr_p = &TREE_OPERAND (*expr_p, 0))
3547         continue;
3548       obj = *expr_p;
3549       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3550         x = produce_memory_decl_rtl (obj, regno);
3551       break;
3552
3553     case SSA_NAME:
3554       *ws = 0;
3555       obj = SSA_NAME_VAR (*expr_p);
3556       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3557       if (!obj)
3558         return NULL_TREE;
3559       if (!DECL_RTL_SET_P (obj))
3560         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3561       break;
3562
3563     case VAR_DECL:
3564     case PARM_DECL:
3565     case RESULT_DECL:
3566       *ws = 0;
3567       obj = *expr_p;
3568
3569       if (DECL_RTL_SET_P (obj))
3570         break;
3571
3572       if (DECL_MODE (obj) == BLKmode)
3573         x = produce_memory_decl_rtl (obj, regno);
3574       else
3575         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3576
3577       break;
3578
3579     default:
3580       break;
3581     }
3582
3583   if (x)
3584     {
3585       decl_rtl_to_reset.safe_push (obj);
3586       SET_DECL_RTL (obj, x);
3587     }
3588
3589   return NULL_TREE;
3590 }
3591
3592 /* Determines cost of the computation of EXPR.  */
3593
3594 static unsigned
3595 computation_cost (tree expr, bool speed)
3596 {
3597   rtx_insn *seq;
3598   rtx rslt;
3599   tree type = TREE_TYPE (expr);
3600   unsigned cost;
3601   /* Avoid using hard regs in ways which may be unsupported.  */
3602   int regno = LAST_VIRTUAL_REGISTER + 1;
3603   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3604   enum node_frequency real_frequency = node->frequency;
3605
3606   node->frequency = NODE_FREQUENCY_NORMAL;
3607   crtl->maybe_hot_insn_p = speed;
3608   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3609   start_sequence ();
3610   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3611   seq = get_insns ();
3612   end_sequence ();
3613   default_rtl_profile ();
3614   node->frequency = real_frequency;
3615
3616   cost = seq_cost (seq, speed);
3617   if (MEM_P (rslt))
3618     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3619                           TYPE_ADDR_SPACE (type), speed);
3620   else if (!REG_P (rslt))
3621     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3622
3623   return cost;
3624 }
3625
3626 /* Returns variable containing the value of candidate CAND at statement AT.  */
3627
3628 static tree
3629 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3630 {
3631   if (stmt_after_increment (loop, cand, stmt))
3632     return cand->var_after;
3633   else
3634     return cand->var_before;
3635 }
3636
3637 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3638    same precision that is at least as wide as the precision of TYPE, stores
3639    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3640    type of A and B.  */
3641
3642 static tree
3643 determine_common_wider_type (tree *a, tree *b)
3644 {
3645   tree wider_type = NULL;
3646   tree suba, subb;
3647   tree atype = TREE_TYPE (*a);
3648
3649   if (CONVERT_EXPR_P (*a))
3650     {
3651       suba = TREE_OPERAND (*a, 0);
3652       wider_type = TREE_TYPE (suba);
3653       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3654         return atype;
3655     }
3656   else
3657     return atype;
3658
3659   if (CONVERT_EXPR_P (*b))
3660     {
3661       subb = TREE_OPERAND (*b, 0);
3662       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3663         return atype;
3664     }
3665   else
3666     return atype;
3667
3668   *a = suba;
3669   *b = subb;
3670   return wider_type;
3671 }
3672
3673 /* Determines the expression by that USE is expressed from induction variable
3674    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3675    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3676
3677 static bool
3678 get_computation_aff (struct loop *loop,
3679                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3680                      struct aff_tree *aff)
3681 {
3682   tree ubase = use->iv->base;
3683   tree ustep = use->iv->step;
3684   tree cbase = cand->iv->base;
3685   tree cstep = cand->iv->step, cstep_common;
3686   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3687   tree common_type, var;
3688   tree uutype;
3689   aff_tree cbase_aff, var_aff;
3690   widest_int rat;
3691
3692   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3693     {
3694       /* We do not have a precision to express the values of use.  */
3695       return false;
3696     }
3697
3698   var = var_at_stmt (loop, cand, at);
3699   uutype = unsigned_type_for (utype);
3700
3701   /* If the conversion is not noop, perform it.  */
3702   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3703     {
3704       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3705           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3706         {
3707           tree inner_base, inner_step, inner_type;
3708           inner_base = TREE_OPERAND (cbase, 0);
3709           if (CONVERT_EXPR_P (cstep))
3710             inner_step = TREE_OPERAND (cstep, 0);
3711           else
3712             inner_step = cstep;
3713
3714           inner_type = TREE_TYPE (inner_base);
3715           /* If candidate is added from a biv whose type is smaller than
3716              ctype, we know both candidate and the biv won't overflow.
3717              In this case, it's safe to skip the convertion in candidate.
3718              As an example, (unsigned short)((unsigned long)A) equals to
3719              (unsigned short)A, if A has a type no larger than short.  */
3720           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3721             {
3722               cbase = inner_base;
3723               cstep = inner_step;
3724             }
3725         }
3726       cstep = fold_convert (uutype, cstep);
3727       cbase = fold_convert (uutype, cbase);
3728       var = fold_convert (uutype, var);
3729     }
3730
3731   if (!constant_multiple_of (ustep, cstep, &rat))
3732     return false;
3733
3734   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3735      type, we achieve better folding by computing their difference in this
3736      wider type, and cast the result to UUTYPE.  We do not need to worry about
3737      overflows, as all the arithmetics will in the end be performed in UUTYPE
3738      anyway.  */
3739   common_type = determine_common_wider_type (&ubase, &cbase);
3740
3741   /* use = ubase - ratio * cbase + ratio * var.  */
3742   tree_to_aff_combination (ubase, common_type, aff);
3743   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3744   tree_to_aff_combination (var, uutype, &var_aff);
3745
3746   /* We need to shift the value if we are after the increment.  */
3747   if (stmt_after_increment (loop, cand, at))
3748     {
3749       aff_tree cstep_aff;
3750
3751       if (common_type != uutype)
3752         cstep_common = fold_convert (common_type, cstep);
3753       else
3754         cstep_common = cstep;
3755
3756       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3757       aff_combination_add (&cbase_aff, &cstep_aff);
3758     }
3759
3760   aff_combination_scale (&cbase_aff, -rat);
3761   aff_combination_add (aff, &cbase_aff);
3762   if (common_type != uutype)
3763     aff_combination_convert (aff, uutype);
3764
3765   aff_combination_scale (&var_aff, rat);
3766   aff_combination_add (aff, &var_aff);
3767
3768   return true;
3769 }
3770
3771 /* Return the type of USE.  */
3772
3773 static tree
3774 get_use_type (struct iv_use *use)
3775 {
3776   tree base_type = TREE_TYPE (use->iv->base);
3777   tree type;
3778
3779   if (use->type == USE_ADDRESS)
3780     {
3781       /* The base_type may be a void pointer.  Create a pointer type based on
3782          the mem_ref instead.  */
3783       type = build_pointer_type (TREE_TYPE (*use->op_p));
3784       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3785                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3786     }
3787   else
3788     type = base_type;
3789
3790   return type;
3791 }
3792
3793 /* Determines the expression by that USE is expressed from induction variable
3794    CAND at statement AT in LOOP.  The computation is unshared.  */
3795
3796 static tree
3797 get_computation_at (struct loop *loop,
3798                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3799 {
3800   aff_tree aff;
3801   tree type = get_use_type (use);
3802
3803   if (!get_computation_aff (loop, use, cand, at, &aff))
3804     return NULL_TREE;
3805   unshare_aff_combination (&aff);
3806   return fold_convert (type, aff_combination_to_tree (&aff));
3807 }
3808
3809 /* Determines the expression by that USE is expressed from induction variable
3810    CAND in LOOP.  The computation is unshared.  */
3811
3812 static tree
3813 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3814 {
3815   return get_computation_at (loop, use, cand, use->stmt);
3816 }
3817
3818 /* Adjust the cost COST for being in loop setup rather than loop body.
3819    If we're optimizing for space, the loop setup overhead is constant;
3820    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3821 static unsigned
3822 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3823 {
3824   if (cost == INFTY)
3825     return cost;
3826   else if (optimize_loop_for_speed_p (data->current_loop))
3827     return cost / avg_loop_niter (data->current_loop);
3828   else
3829     return cost;
3830 }
3831
3832 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3833    validity for a memory reference accessing memory of mode MODE in
3834    address space AS.  */
3835
3836
3837 bool
3838 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3839                                  addr_space_t as)
3840 {
3841 #define MAX_RATIO 128
3842   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3843   static vec<sbitmap> valid_mult_list;
3844   sbitmap valid_mult;
3845
3846   if (data_index >= valid_mult_list.length ())
3847     valid_mult_list.safe_grow_cleared (data_index + 1);
3848
3849   valid_mult = valid_mult_list[data_index];
3850   if (!valid_mult)
3851     {
3852       machine_mode address_mode = targetm.addr_space.address_mode (as);
3853       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3854       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3855       rtx addr, scaled;
3856       HOST_WIDE_INT i;
3857
3858       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3859       bitmap_clear (valid_mult);
3860       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3861       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3862       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3863         {
3864           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3865           if (memory_address_addr_space_p (mode, addr, as)
3866               || memory_address_addr_space_p (mode, scaled, as))
3867             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3868         }
3869
3870       if (dump_file && (dump_flags & TDF_DETAILS))
3871         {
3872           fprintf (dump_file, "  allowed multipliers:");
3873           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3874             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3875               fprintf (dump_file, " %d", (int) i);
3876           fprintf (dump_file, "\n");
3877           fprintf (dump_file, "\n");
3878         }
3879
3880       valid_mult_list[data_index] = valid_mult;
3881     }
3882
3883   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3884     return false;
3885
3886   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3887 }
3888
3889 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3890    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3891    variable is omitted.  Compute the cost for a memory reference that accesses
3892    a memory location of mode MEM_MODE in address space AS.
3893
3894    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3895    size of MEM_MODE / RATIO) is available.  To make this determination, we
3896    look at the size of the increment to be made, which is given in CSTEP.
3897    CSTEP may be zero if the step is unknown.
3898    STMT_AFTER_INC is true iff the statement we're looking at is after the
3899    increment of the original biv.
3900
3901    TODO -- there must be some better way.  This all is quite crude.  */
3902
3903 enum ainc_type
3904 {
3905   AINC_PRE_INC,         /* Pre increment.  */
3906   AINC_PRE_DEC,         /* Pre decrement.  */
3907   AINC_POST_INC,        /* Post increment.  */
3908   AINC_POST_DEC,        /* Post decrement.  */
3909   AINC_NONE             /* Also the number of auto increment types.  */
3910 };
3911
3912 struct address_cost_data
3913 {
3914   HOST_WIDE_INT min_offset, max_offset;
3915   unsigned costs[2][2][2][2];
3916   unsigned ainc_costs[AINC_NONE];
3917 };
3918
3919
3920 static comp_cost
3921 get_address_cost (bool symbol_present, bool var_present,
3922                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3923                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3924                   addr_space_t as, bool speed,
3925                   bool stmt_after_inc, bool *may_autoinc)
3926 {
3927   machine_mode address_mode = targetm.addr_space.address_mode (as);
3928   static vec<address_cost_data *> address_cost_data_list;
3929   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3930   address_cost_data *data;
3931   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3932   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3933   unsigned cost, acost, complexity;
3934   enum ainc_type autoinc_type;
3935   bool offset_p, ratio_p, autoinc;
3936   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3937   unsigned HOST_WIDE_INT mask;
3938   unsigned bits;
3939
3940   if (data_index >= address_cost_data_list.length ())
3941     address_cost_data_list.safe_grow_cleared (data_index + 1);
3942
3943   data = address_cost_data_list[data_index];
3944   if (!data)
3945     {
3946       HOST_WIDE_INT i;
3947       HOST_WIDE_INT rat, off = 0;
3948       int old_cse_not_expected, width;
3949       unsigned sym_p, var_p, off_p, rat_p, add_c;
3950       rtx_insn *seq;
3951       rtx addr, base;
3952       rtx reg0, reg1;
3953
3954       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3955
3956       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3957
3958       width = GET_MODE_BITSIZE (address_mode) - 1;
3959       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3960         width = HOST_BITS_PER_WIDE_INT - 1;
3961       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3962
3963       for (i = width; i >= 0; i--)
3964         {
3965           off = -((unsigned HOST_WIDE_INT) 1 << i);
3966           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3967           if (memory_address_addr_space_p (mem_mode, addr, as))
3968             break;
3969         }
3970       data->min_offset = (i == -1? 0 : off);
3971
3972       for (i = width; i >= 0; i--)
3973         {
3974           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3975           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3976           if (memory_address_addr_space_p (mem_mode, addr, as))
3977             break;
3978           /* For some strict-alignment targets, the offset must be naturally
3979              aligned.  Try an aligned offset if mem_mode is not QImode.  */
3980           off = mem_mode != QImode
3981                 ? ((unsigned HOST_WIDE_INT) 1 << i)
3982                     - GET_MODE_SIZE (mem_mode)
3983                 : 0;
3984           if (off > 0)
3985             {
3986               XEXP (addr, 1) = gen_int_mode (off, address_mode);
3987               if (memory_address_addr_space_p (mem_mode, addr, as))
3988                 break;
3989             }
3990         }
3991       if (i == -1)
3992         off = 0;
3993       data->max_offset = off;
3994
3995       if (dump_file && (dump_flags & TDF_DETAILS))
3996         {
3997           fprintf (dump_file, "get_address_cost:\n");
3998           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3999                    GET_MODE_NAME (mem_mode),
4000                    data->min_offset);
4001           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4002                    GET_MODE_NAME (mem_mode),
4003                    data->max_offset);
4004         }
4005
4006       rat = 1;
4007       for (i = 2; i <= MAX_RATIO; i++)
4008         if (multiplier_allowed_in_address_p (i, mem_mode, as))
4009           {
4010             rat = i;
4011             break;
4012           }
4013
4014       /* Compute the cost of various addressing modes.  */
4015       acost = 0;
4016       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4017       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4018
4019       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4020           || USE_STORE_PRE_DECREMENT (mem_mode))
4021         {
4022           addr = gen_rtx_PRE_DEC (address_mode, reg0);
4023           has_predec[mem_mode]
4024             = memory_address_addr_space_p (mem_mode, addr, as);
4025
4026           if (has_predec[mem_mode])
4027             data->ainc_costs[AINC_PRE_DEC]
4028               = address_cost (addr, mem_mode, as, speed);
4029         }
4030       if (USE_LOAD_POST_DECREMENT (mem_mode)
4031           || USE_STORE_POST_DECREMENT (mem_mode))
4032         {
4033           addr = gen_rtx_POST_DEC (address_mode, reg0);
4034           has_postdec[mem_mode]
4035             = memory_address_addr_space_p (mem_mode, addr, as);
4036
4037           if (has_postdec[mem_mode])
4038             data->ainc_costs[AINC_POST_DEC]
4039               = address_cost (addr, mem_mode, as, speed);
4040         }
4041       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4042           || USE_STORE_PRE_DECREMENT (mem_mode))
4043         {
4044           addr = gen_rtx_PRE_INC (address_mode, reg0);
4045           has_preinc[mem_mode]
4046             = memory_address_addr_space_p (mem_mode, addr, as);
4047
4048           if (has_preinc[mem_mode])
4049             data->ainc_costs[AINC_PRE_INC]
4050               = address_cost (addr, mem_mode, as, speed);
4051         }
4052       if (USE_LOAD_POST_INCREMENT (mem_mode)
4053           || USE_STORE_POST_INCREMENT (mem_mode))
4054         {
4055           addr = gen_rtx_POST_INC (address_mode, reg0);
4056           has_postinc[mem_mode]
4057             = memory_address_addr_space_p (mem_mode, addr, as);
4058
4059           if (has_postinc[mem_mode])
4060             data->ainc_costs[AINC_POST_INC]
4061               = address_cost (addr, mem_mode, as, speed);
4062         }
4063       for (i = 0; i < 16; i++)
4064         {
4065           sym_p = i & 1;
4066           var_p = (i >> 1) & 1;
4067           off_p = (i >> 2) & 1;
4068           rat_p = (i >> 3) & 1;
4069
4070           addr = reg0;
4071           if (rat_p)
4072             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4073                                    gen_int_mode (rat, address_mode));
4074
4075           if (var_p)
4076             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4077
4078           if (sym_p)
4079             {
4080               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4081               /* ??? We can run into trouble with some backends by presenting
4082                  it with symbols which haven't been properly passed through
4083                  targetm.encode_section_info.  By setting the local bit, we
4084                  enhance the probability of things working.  */
4085               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4086
4087               if (off_p)
4088                 base = gen_rtx_fmt_e (CONST, address_mode,
4089                                       gen_rtx_fmt_ee
4090                                         (PLUS, address_mode, base,
4091                                          gen_int_mode (off, address_mode)));
4092             }
4093           else if (off_p)
4094             base = gen_int_mode (off, address_mode);
4095           else
4096             base = NULL_RTX;
4097
4098           if (base)
4099             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4100
4101           start_sequence ();
4102           /* To avoid splitting addressing modes, pretend that no cse will
4103              follow.  */
4104           old_cse_not_expected = cse_not_expected;
4105           cse_not_expected = true;
4106           addr = memory_address_addr_space (mem_mode, addr, as);
4107           cse_not_expected = old_cse_not_expected;
4108           seq = get_insns ();
4109           end_sequence ();
4110
4111           acost = seq_cost (seq, speed);
4112           acost += address_cost (addr, mem_mode, as, speed);
4113
4114           if (!acost)
4115             acost = 1;
4116           data->costs[sym_p][var_p][off_p][rat_p] = acost;
4117         }
4118
4119       /* On some targets, it is quite expensive to load symbol to a register,
4120          which makes addresses that contain symbols look much more expensive.
4121          However, the symbol will have to be loaded in any case before the
4122          loop (and quite likely we have it in register already), so it does not
4123          make much sense to penalize them too heavily.  So make some final
4124          tweaks for the SYMBOL_PRESENT modes:
4125
4126          If VAR_PRESENT is false, and the mode obtained by changing symbol to
4127          var is cheaper, use this mode with small penalty.
4128          If VAR_PRESENT is true, try whether the mode with
4129          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4130          if this is the case, use it.  */
4131       add_c = add_cost (speed, address_mode);
4132       for (i = 0; i < 8; i++)
4133         {
4134           var_p = i & 1;
4135           off_p = (i >> 1) & 1;
4136           rat_p = (i >> 2) & 1;
4137
4138           acost = data->costs[0][1][off_p][rat_p] + 1;
4139           if (var_p)
4140             acost += add_c;
4141
4142           if (acost < data->costs[1][var_p][off_p][rat_p])
4143             data->costs[1][var_p][off_p][rat_p] = acost;
4144         }
4145
4146       if (dump_file && (dump_flags & TDF_DETAILS))
4147         {
4148           fprintf (dump_file, "Address costs:\n");
4149
4150           for (i = 0; i < 16; i++)
4151             {
4152               sym_p = i & 1;
4153               var_p = (i >> 1) & 1;
4154               off_p = (i >> 2) & 1;
4155               rat_p = (i >> 3) & 1;
4156
4157               fprintf (dump_file, "  ");
4158               if (sym_p)
4159                 fprintf (dump_file, "sym + ");
4160               if (var_p)
4161                 fprintf (dump_file, "var + ");
4162               if (off_p)
4163                 fprintf (dump_file, "cst + ");
4164               if (rat_p)
4165                 fprintf (dump_file, "rat * ");
4166
4167               acost = data->costs[sym_p][var_p][off_p][rat_p];
4168               fprintf (dump_file, "index costs %d\n", acost);
4169             }
4170           if (has_predec[mem_mode] || has_postdec[mem_mode]
4171               || has_preinc[mem_mode] || has_postinc[mem_mode])
4172             fprintf (dump_file, "  May include autoinc/dec\n");
4173           fprintf (dump_file, "\n");
4174         }
4175
4176       address_cost_data_list[data_index] = data;
4177     }
4178
4179   bits = GET_MODE_BITSIZE (address_mode);
4180   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4181   offset &= mask;
4182   if ((offset >> (bits - 1) & 1))
4183     offset |= ~mask;
4184   s_offset = offset;
4185
4186   autoinc = false;
4187   autoinc_type = AINC_NONE;
4188   msize = GET_MODE_SIZE (mem_mode);
4189   autoinc_offset = offset;
4190   if (stmt_after_inc)
4191     autoinc_offset += ratio * cstep;
4192   if (symbol_present || var_present || ratio != 1)
4193     autoinc = false;
4194   else
4195     {
4196       if (has_postinc[mem_mode] && autoinc_offset == 0
4197           && msize == cstep)
4198         autoinc_type = AINC_POST_INC;
4199       else if (has_postdec[mem_mode] && autoinc_offset == 0
4200                && msize == -cstep)
4201         autoinc_type = AINC_POST_DEC;
4202       else if (has_preinc[mem_mode] && autoinc_offset == msize
4203                && msize == cstep)
4204         autoinc_type = AINC_PRE_INC;
4205       else if (has_predec[mem_mode] && autoinc_offset == -msize
4206                && msize == -cstep)
4207         autoinc_type = AINC_PRE_DEC;
4208
4209       if (autoinc_type != AINC_NONE)
4210         autoinc = true;
4211     }
4212
4213   cost = 0;
4214   offset_p = (s_offset != 0
4215               && data->min_offset <= s_offset
4216               && s_offset <= data->max_offset);
4217   ratio_p = (ratio != 1
4218              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4219
4220   if (ratio != 1 && !ratio_p)
4221     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4222
4223   if (s_offset && !offset_p && !symbol_present)
4224     cost += add_cost (speed, address_mode);
4225
4226   if (may_autoinc)
4227     *may_autoinc = autoinc;
4228   if (autoinc)
4229     acost = data->ainc_costs[autoinc_type];
4230   else
4231     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4232   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4233   return new_cost (cost + acost, complexity);
4234 }
4235
4236  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4237     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
4238     calculating the operands of EXPR.  Returns true if successful, and returns
4239     the cost in COST.  */
4240
4241 static bool
4242 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4243                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4244 {
4245   comp_cost res;
4246   tree op1 = TREE_OPERAND (expr, 1);
4247   tree cst = TREE_OPERAND (mult, 1);
4248   tree multop = TREE_OPERAND (mult, 0);
4249   int m = exact_log2 (int_cst_value (cst));
4250   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4251   int as_cost, sa_cost;
4252   bool mult_in_op1;
4253
4254   if (!(m >= 0 && m < maxm))
4255     return false;
4256
4257   STRIP_NOPS (op1);
4258   mult_in_op1 = operand_equal_p (op1, mult, 0);
4259
4260   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4261
4262   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4263      use that in preference to a shift insn followed by an add insn.  */
4264   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4265              ? shiftadd_cost (speed, mode, m)
4266              : (mult_in_op1
4267                 ? shiftsub1_cost (speed, mode, m)
4268                 : shiftsub0_cost (speed, mode, m)));
4269
4270   res = new_cost (MIN (as_cost, sa_cost), 0);
4271   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4272
4273   STRIP_NOPS (multop);
4274   if (!is_gimple_val (multop))
4275     res = add_costs (res, force_expr_to_var_cost (multop, speed));
4276
4277   *cost = res;
4278   return true;
4279 }
4280
4281 /* Estimates cost of forcing expression EXPR into a variable.  */
4282
4283 static comp_cost
4284 force_expr_to_var_cost (tree expr, bool speed)
4285 {
4286   static bool costs_initialized = false;
4287   static unsigned integer_cost [2];
4288   static unsigned symbol_cost [2];
4289   static unsigned address_cost [2];
4290   tree op0, op1;
4291   comp_cost cost0, cost1, cost;
4292   machine_mode mode;
4293
4294   if (!costs_initialized)
4295     {
4296       tree type = build_pointer_type (integer_type_node);
4297       tree var, addr;
4298       rtx x;
4299       int i;
4300
4301       var = create_tmp_var_raw (integer_type_node, "test_var");
4302       TREE_STATIC (var) = 1;
4303       x = produce_memory_decl_rtl (var, NULL);
4304       SET_DECL_RTL (var, x);
4305
4306       addr = build1 (ADDR_EXPR, type, var);
4307
4308
4309       for (i = 0; i < 2; i++)
4310         {
4311           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4312                                                              2000), i);
4313
4314           symbol_cost[i] = computation_cost (addr, i) + 1;
4315
4316           address_cost[i]
4317             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4318           if (dump_file && (dump_flags & TDF_DETAILS))
4319             {
4320               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4321               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4322               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4323               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4324               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4325               fprintf (dump_file, "\n");
4326             }
4327         }
4328
4329       costs_initialized = true;
4330     }
4331
4332   STRIP_NOPS (expr);
4333
4334   if (SSA_VAR_P (expr))
4335     return no_cost;
4336
4337   if (is_gimple_min_invariant (expr))
4338     {
4339       if (TREE_CODE (expr) == INTEGER_CST)
4340         return new_cost (integer_cost [speed], 0);
4341
4342       if (TREE_CODE (expr) == ADDR_EXPR)
4343         {
4344           tree obj = TREE_OPERAND (expr, 0);
4345
4346           if (TREE_CODE (obj) == VAR_DECL
4347               || TREE_CODE (obj) == PARM_DECL
4348               || TREE_CODE (obj) == RESULT_DECL)
4349             return new_cost (symbol_cost [speed], 0);
4350         }
4351
4352       return new_cost (address_cost [speed], 0);
4353     }
4354
4355   switch (TREE_CODE (expr))
4356     {
4357     case POINTER_PLUS_EXPR:
4358     case PLUS_EXPR:
4359     case MINUS_EXPR:
4360     case MULT_EXPR:
4361       op0 = TREE_OPERAND (expr, 0);
4362       op1 = TREE_OPERAND (expr, 1);
4363       STRIP_NOPS (op0);
4364       STRIP_NOPS (op1);
4365       break;
4366
4367     CASE_CONVERT:
4368     case NEGATE_EXPR:
4369       op0 = TREE_OPERAND (expr, 0);
4370       STRIP_NOPS (op0);
4371       op1 = NULL_TREE;
4372       break;
4373
4374     default:
4375       /* Just an arbitrary value, FIXME.  */
4376       return new_cost (target_spill_cost[speed], 0);
4377     }
4378
4379   if (op0 == NULL_TREE
4380       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4381     cost0 = no_cost;
4382   else
4383     cost0 = force_expr_to_var_cost (op0, speed);
4384
4385   if (op1 == NULL_TREE
4386       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4387     cost1 = no_cost;
4388   else
4389     cost1 = force_expr_to_var_cost (op1, speed);
4390
4391   mode = TYPE_MODE (TREE_TYPE (expr));
4392   switch (TREE_CODE (expr))
4393     {
4394     case POINTER_PLUS_EXPR:
4395     case PLUS_EXPR:
4396     case MINUS_EXPR:
4397     case NEGATE_EXPR:
4398       cost = new_cost (add_cost (speed, mode), 0);
4399       if (TREE_CODE (expr) != NEGATE_EXPR)
4400         {
4401           tree mult = NULL_TREE;
4402           comp_cost sa_cost;
4403           if (TREE_CODE (op1) == MULT_EXPR)
4404             mult = op1;
4405           else if (TREE_CODE (op0) == MULT_EXPR)
4406             mult = op0;
4407
4408           if (mult != NULL_TREE
4409               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4410               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4411                                     speed, &sa_cost))
4412             return sa_cost;
4413         }
4414       break;
4415
4416     CASE_CONVERT:
4417       {
4418         tree inner_mode, outer_mode;
4419         outer_mode = TREE_TYPE (expr);
4420         inner_mode = TREE_TYPE (op0);
4421         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4422                                        TYPE_MODE (inner_mode), speed), 0);
4423       }
4424       break;
4425
4426     case MULT_EXPR:
4427       if (cst_and_fits_in_hwi (op0))
4428         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4429                                              mode, speed), 0);
4430       else if (cst_and_fits_in_hwi (op1))
4431         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4432                                              mode, speed), 0);
4433       else
4434         return new_cost (target_spill_cost [speed], 0);
4435       break;
4436
4437     default:
4438       gcc_unreachable ();
4439     }
4440
4441   cost = add_costs (cost, cost0);
4442   cost = add_costs (cost, cost1);
4443
4444   /* Bound the cost by target_spill_cost.  The parts of complicated
4445      computations often are either loop invariant or at least can
4446      be shared between several iv uses, so letting this grow without
4447      limits would not give reasonable results.  */
4448   if (cost.cost > (int) target_spill_cost [speed])
4449     cost.cost = target_spill_cost [speed];
4450
4451   return cost;
4452 }
4453
4454 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4455    invariants the computation depends on.  */
4456
4457 static comp_cost
4458 force_var_cost (struct ivopts_data *data,
4459                 tree expr, bitmap *depends_on)
4460 {
4461   if (depends_on)
4462     {
4463       fd_ivopts_data = data;
4464       walk_tree (&expr, find_depends, depends_on, NULL);
4465     }
4466
4467   return force_expr_to_var_cost (expr, data->speed);
4468 }
4469
4470 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4471    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4472    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4473    invariants the computation depends on.  */
4474
4475 static comp_cost
4476 split_address_cost (struct ivopts_data *data,
4477                     tree addr, bool *symbol_present, bool *var_present,
4478                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4479 {
4480   tree core;
4481   HOST_WIDE_INT bitsize;
4482   HOST_WIDE_INT bitpos;
4483   tree toffset;
4484   machine_mode mode;
4485   int unsignedp, reversep, volatilep;
4486
4487   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4488                               &unsignedp, &reversep, &volatilep, false);
4489
4490   if (toffset != 0
4491       || bitpos % BITS_PER_UNIT != 0
4492       || reversep
4493       || TREE_CODE (core) != VAR_DECL)
4494     {
4495       *symbol_present = false;
4496       *var_present = true;
4497       fd_ivopts_data = data;
4498       if (depends_on)
4499         walk_tree (&addr, find_depends, depends_on, NULL);
4500
4501       return new_cost (target_spill_cost[data->speed], 0);
4502     }
4503
4504   *offset += bitpos / BITS_PER_UNIT;
4505   if (TREE_STATIC (core)
4506       || DECL_EXTERNAL (core))
4507     {
4508       *symbol_present = true;
4509       *var_present = false;
4510       return no_cost;
4511     }
4512
4513   *symbol_present = false;
4514   *var_present = true;
4515   return no_cost;
4516 }
4517
4518 /* Estimates cost of expressing difference of addresses E1 - E2 as
4519    var + symbol + offset.  The value of offset is added to OFFSET,
4520    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4521    part is missing.  DEPENDS_ON is a set of the invariants the computation
4522    depends on.  */
4523
4524 static comp_cost
4525 ptr_difference_cost (struct ivopts_data *data,
4526                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4527                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4528 {
4529   HOST_WIDE_INT diff = 0;
4530   aff_tree aff_e1, aff_e2;
4531   tree type;
4532
4533   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4534
4535   if (ptr_difference_const (e1, e2, &diff))
4536     {
4537       *offset += diff;
4538       *symbol_present = false;
4539       *var_present = false;
4540       return no_cost;
4541     }
4542
4543   if (integer_zerop (e2))
4544     return split_address_cost (data, TREE_OPERAND (e1, 0),
4545                                symbol_present, var_present, offset, depends_on);
4546
4547   *symbol_present = false;
4548   *var_present = true;
4549
4550   type = signed_type_for (TREE_TYPE (e1));
4551   tree_to_aff_combination (e1, type, &aff_e1);
4552   tree_to_aff_combination (e2, type, &aff_e2);
4553   aff_combination_scale (&aff_e2, -1);
4554   aff_combination_add (&aff_e1, &aff_e2);
4555
4556   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4557 }
4558
4559 /* Estimates cost of expressing difference E1 - E2 as
4560    var + symbol + offset.  The value of offset is added to OFFSET,
4561    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4562    part is missing.  DEPENDS_ON is a set of the invariants the computation
4563    depends on.  */
4564
4565 static comp_cost
4566 difference_cost (struct ivopts_data *data,
4567                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4568                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4569 {
4570   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4571   unsigned HOST_WIDE_INT off1, off2;
4572   aff_tree aff_e1, aff_e2;
4573   tree type;
4574
4575   e1 = strip_offset (e1, &off1);
4576   e2 = strip_offset (e2, &off2);
4577   *offset += off1 - off2;
4578
4579   STRIP_NOPS (e1);
4580   STRIP_NOPS (e2);
4581
4582   if (TREE_CODE (e1) == ADDR_EXPR)
4583     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4584                                 offset, depends_on);
4585   *symbol_present = false;
4586
4587   if (operand_equal_p (e1, e2, 0))
4588     {
4589       *var_present = false;
4590       return no_cost;
4591     }
4592
4593   *var_present = true;
4594
4595   if (integer_zerop (e2))
4596     return force_var_cost (data, e1, depends_on);
4597
4598   if (integer_zerop (e1))
4599     {
4600       comp_cost cost = force_var_cost (data, e2, depends_on);
4601       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4602       return cost;
4603     }
4604
4605   type = signed_type_for (TREE_TYPE (e1));
4606   tree_to_aff_combination (e1, type, &aff_e1);
4607   tree_to_aff_combination (e2, type, &aff_e2);
4608   aff_combination_scale (&aff_e2, -1);
4609   aff_combination_add (&aff_e1, &aff_e2);
4610
4611   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4612 }
4613
4614 /* Returns true if AFF1 and AFF2 are identical.  */
4615
4616 static bool
4617 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4618 {
4619   unsigned i;
4620
4621   if (aff1->n != aff2->n)
4622     return false;
4623
4624   for (i = 0; i < aff1->n; i++)
4625     {
4626       if (aff1->elts[i].coef != aff2->elts[i].coef)
4627         return false;
4628
4629       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4630         return false;
4631     }
4632   return true;
4633 }
4634
4635 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4636
4637 static int
4638 get_expr_id (struct ivopts_data *data, tree expr)
4639 {
4640   struct iv_inv_expr_ent ent;
4641   struct iv_inv_expr_ent **slot;
4642
4643   ent.expr = expr;
4644   ent.hash = iterative_hash_expr (expr, 0);
4645   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4646   if (*slot)
4647     return (*slot)->id;
4648
4649   *slot = XNEW (struct iv_inv_expr_ent);
4650   (*slot)->expr = expr;
4651   (*slot)->hash = ent.hash;
4652   (*slot)->id = data->inv_expr_id++;
4653   return (*slot)->id;
4654 }
4655
4656 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4657    requires a new compiler generated temporary.  Returns -1 otherwise.
4658    ADDRESS_P is a flag indicating if the expression is for address
4659    computation.  */
4660
4661 static int
4662 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4663                             tree cbase, HOST_WIDE_INT ratio,
4664                             bool address_p)
4665 {
4666   aff_tree ubase_aff, cbase_aff;
4667   tree expr, ub, cb;
4668
4669   STRIP_NOPS (ubase);
4670   STRIP_NOPS (cbase);
4671   ub = ubase;
4672   cb = cbase;
4673
4674   if ((TREE_CODE (ubase) == INTEGER_CST)
4675       && (TREE_CODE (cbase) == INTEGER_CST))
4676     return -1;
4677
4678   /* Strips the constant part. */
4679   if (TREE_CODE (ubase) == PLUS_EXPR
4680       || TREE_CODE (ubase) == MINUS_EXPR
4681       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4682     {
4683       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4684         ubase = TREE_OPERAND (ubase, 0);
4685     }
4686
4687   /* Strips the constant part. */
4688   if (TREE_CODE (cbase) == PLUS_EXPR
4689       || TREE_CODE (cbase) == MINUS_EXPR
4690       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4691     {
4692       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4693         cbase = TREE_OPERAND (cbase, 0);
4694     }
4695
4696   if (address_p)
4697     {
4698       if (((TREE_CODE (ubase) == SSA_NAME)
4699            || (TREE_CODE (ubase) == ADDR_EXPR
4700                && is_gimple_min_invariant (ubase)))
4701           && (TREE_CODE (cbase) == INTEGER_CST))
4702         return -1;
4703
4704       if (((TREE_CODE (cbase) == SSA_NAME)
4705            || (TREE_CODE (cbase) == ADDR_EXPR
4706                && is_gimple_min_invariant (cbase)))
4707           && (TREE_CODE (ubase) == INTEGER_CST))
4708         return -1;
4709     }
4710
4711   if (ratio == 1)
4712     {
4713       if (operand_equal_p (ubase, cbase, 0))
4714         return -1;
4715
4716       if (TREE_CODE (ubase) == ADDR_EXPR
4717           && TREE_CODE (cbase) == ADDR_EXPR)
4718         {
4719           tree usym, csym;
4720
4721           usym = TREE_OPERAND (ubase, 0);
4722           csym = TREE_OPERAND (cbase, 0);
4723           if (TREE_CODE (usym) == ARRAY_REF)
4724             {
4725               tree ind = TREE_OPERAND (usym, 1);
4726               if (TREE_CODE (ind) == INTEGER_CST
4727                   && tree_fits_shwi_p (ind)
4728                   && tree_to_shwi (ind) == 0)
4729                 usym = TREE_OPERAND (usym, 0);
4730             }
4731           if (TREE_CODE (csym) == ARRAY_REF)
4732             {
4733               tree ind = TREE_OPERAND (csym, 1);
4734               if (TREE_CODE (ind) == INTEGER_CST
4735                   && tree_fits_shwi_p (ind)
4736                   && tree_to_shwi (ind) == 0)
4737                 csym = TREE_OPERAND (csym, 0);
4738             }
4739           if (operand_equal_p (usym, csym, 0))
4740             return -1;
4741         }
4742       /* Now do more complex comparison  */
4743       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4744       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4745       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4746         return -1;
4747     }
4748
4749   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4750   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4751
4752   aff_combination_scale (&cbase_aff, -1 * ratio);
4753   aff_combination_add (&ubase_aff, &cbase_aff);
4754   expr = aff_combination_to_tree (&ubase_aff);
4755   return get_expr_id (data, expr);
4756 }
4757
4758
4759
4760 /* Determines the cost of the computation by that USE is expressed
4761    from induction variable CAND.  If ADDRESS_P is true, we just need
4762    to create an address from it, otherwise we want to get it into
4763    register.  A set of invariants we depend on is stored in
4764    DEPENDS_ON.  AT is the statement at that the value is computed.
4765    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4766    addressing is likely.  */
4767
4768 static comp_cost
4769 get_computation_cost_at (struct ivopts_data *data,
4770                          struct iv_use *use, struct iv_cand *cand,
4771                          bool address_p, bitmap *depends_on, gimple *at,
4772                          bool *can_autoinc,
4773                          int *inv_expr_id)
4774 {
4775   tree ubase = use->iv->base, ustep = use->iv->step;
4776   tree cbase, cstep;
4777   tree utype = TREE_TYPE (ubase), ctype;
4778   unsigned HOST_WIDE_INT cstepi, offset = 0;
4779   HOST_WIDE_INT ratio, aratio;
4780   bool var_present, symbol_present, stmt_is_after_inc;
4781   comp_cost cost;
4782   widest_int rat;
4783   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4784   machine_mode mem_mode = (address_p
4785                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4786                                 : VOIDmode);
4787
4788   if (depends_on)
4789     *depends_on = NULL;
4790
4791   /* Only consider real candidates.  */
4792   if (!cand->iv)
4793     return infinite_cost;
4794
4795   cbase = cand->iv->base;
4796   cstep = cand->iv->step;
4797   ctype = TREE_TYPE (cbase);
4798
4799   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4800     {
4801       /* We do not have a precision to express the values of use.  */
4802       return infinite_cost;
4803     }
4804
4805   if (address_p
4806       || (use->iv->base_object
4807           && cand->iv->base_object
4808           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4809           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4810     {
4811       /* Do not try to express address of an object with computation based
4812          on address of a different object.  This may cause problems in rtl
4813          level alias analysis (that does not expect this to be happening,
4814          as this is illegal in C), and would be unlikely to be useful
4815          anyway.  */
4816       if (use->iv->base_object
4817           && cand->iv->base_object
4818           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4819         return infinite_cost;
4820     }
4821
4822   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4823     {
4824       /* TODO -- add direct handling of this case.  */
4825       goto fallback;
4826     }
4827
4828   /* CSTEPI is removed from the offset in case statement is after the
4829      increment.  If the step is not constant, we use zero instead.
4830      This is a bit imprecise (there is the extra addition), but
4831      redundancy elimination is likely to transform the code so that
4832      it uses value of the variable before increment anyway,
4833      so it is not that much unrealistic.  */
4834   if (cst_and_fits_in_hwi (cstep))
4835     cstepi = int_cst_value (cstep);
4836   else
4837     cstepi = 0;
4838
4839   if (!constant_multiple_of (ustep, cstep, &rat))
4840     return infinite_cost;
4841
4842   if (wi::fits_shwi_p (rat))
4843     ratio = rat.to_shwi ();
4844   else
4845     return infinite_cost;
4846
4847   STRIP_NOPS (cbase);
4848   ctype = TREE_TYPE (cbase);
4849
4850   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4851
4852   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4853      or ratio == 1, it is better to handle this like
4854
4855      ubase - ratio * cbase + ratio * var
4856
4857      (also holds in the case ratio == -1, TODO.  */
4858
4859   if (cst_and_fits_in_hwi (cbase))
4860     {
4861       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4862       cost = difference_cost (data,
4863                               ubase, build_int_cst (utype, 0),
4864                               &symbol_present, &var_present, &offset,
4865                               depends_on);
4866       cost.cost /= avg_loop_niter (data->current_loop);
4867     }
4868   else if (ratio == 1)
4869     {
4870       tree real_cbase = cbase;
4871
4872       /* Check to see if any adjustment is needed.  */
4873       if (cstepi == 0 && stmt_is_after_inc)
4874         {
4875           aff_tree real_cbase_aff;
4876           aff_tree cstep_aff;
4877
4878           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4879                                    &real_cbase_aff);
4880           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4881
4882           aff_combination_add (&real_cbase_aff, &cstep_aff);
4883           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4884         }
4885
4886       cost = difference_cost (data,
4887                               ubase, real_cbase,
4888                               &symbol_present, &var_present, &offset,
4889                               depends_on);
4890       cost.cost /= avg_loop_niter (data->current_loop);
4891     }
4892   else if (address_p
4893            && !POINTER_TYPE_P (ctype)
4894            && multiplier_allowed_in_address_p
4895                 (ratio, mem_mode,
4896                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4897     {
4898       if (cstepi == 0 && stmt_is_after_inc)
4899         {
4900           if (POINTER_TYPE_P (ctype))
4901             cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4902           else
4903             cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4904         }
4905       cbase
4906         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4907       cost = difference_cost (data,
4908                               ubase, cbase,
4909                               &symbol_present, &var_present, &offset,
4910                               depends_on);
4911       cost.cost /= avg_loop_niter (data->current_loop);
4912     }
4913   else
4914     {
4915       cost = force_var_cost (data, cbase, depends_on);
4916       cost = add_costs (cost,
4917                         difference_cost (data,
4918                                          ubase, build_int_cst (utype, 0),
4919                                          &symbol_present, &var_present,
4920                                          &offset, depends_on));
4921       cost.cost /= avg_loop_niter (data->current_loop);
4922       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4923     }
4924
4925   /* Set of invariants depended on by sub use has already been computed
4926      for the first use in the group.  */
4927   if (use->sub_id)
4928     {
4929       cost.cost = 0;
4930       if (depends_on && *depends_on)
4931         bitmap_clear (*depends_on);
4932     }
4933   else if (inv_expr_id)
4934     {
4935       *inv_expr_id =
4936           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4937       /* Clear depends on.  */
4938       if (*inv_expr_id != -1 && depends_on && *depends_on)
4939         bitmap_clear (*depends_on);
4940     }
4941
4942   /* If we are after the increment, the value of the candidate is higher by
4943      one iteration.  */
4944   if (stmt_is_after_inc)
4945     offset -= ratio * cstepi;
4946
4947   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4948      (symbol/var1/const parts may be omitted).  If we are looking for an
4949      address, find the cost of addressing this.  */
4950   if (address_p)
4951     return add_costs (cost,
4952                       get_address_cost (symbol_present, var_present,
4953                                         offset, ratio, cstepi,
4954                                         mem_mode,
4955                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4956                                         speed, stmt_is_after_inc,
4957                                         can_autoinc));
4958
4959   /* Otherwise estimate the costs for computing the expression.  */
4960   if (!symbol_present && !var_present && !offset)
4961     {
4962       if (ratio != 1)
4963         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4964       return cost;
4965     }
4966
4967   /* Symbol + offset should be compile-time computable so consider that they
4968       are added once to the variable, if present.  */
4969   if (var_present && (symbol_present || offset))
4970     cost.cost += adjust_setup_cost (data,
4971                                     add_cost (speed, TYPE_MODE (ctype)));
4972
4973   /* Having offset does not affect runtime cost in case it is added to
4974      symbol, but it increases complexity.  */
4975   if (offset)
4976     cost.complexity++;
4977
4978   cost.cost += add_cost (speed, TYPE_MODE (ctype));
4979
4980   aratio = ratio > 0 ? ratio : -ratio;
4981   if (aratio != 1)
4982     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4983   return cost;
4984
4985 fallback:
4986   if (can_autoinc)
4987     *can_autoinc = false;
4988
4989   {
4990     /* Just get the expression, expand it and measure the cost.  */
4991     tree comp = get_computation_at (data->current_loop, use, cand, at);
4992
4993     if (!comp)
4994       return infinite_cost;
4995
4996     if (address_p)
4997       comp = build_simple_mem_ref (comp);
4998
4999     return new_cost (computation_cost (comp, speed), 0);
5000   }
5001 }
5002
5003 /* Determines the cost of the computation by that USE is expressed
5004    from induction variable CAND.  If ADDRESS_P is true, we just need
5005    to create an address from it, otherwise we want to get it into
5006    register.  A set of invariants we depend on is stored in
5007    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
5008    autoinc addressing is likely.  */
5009
5010 static comp_cost
5011 get_computation_cost (struct ivopts_data *data,
5012                       struct iv_use *use, struct iv_cand *cand,
5013                       bool address_p, bitmap *depends_on,
5014                       bool *can_autoinc, int *inv_expr_id)
5015 {
5016   return get_computation_cost_at (data,
5017                                   use, cand, address_p, depends_on, use->stmt,
5018                                   can_autoinc, inv_expr_id);
5019 }
5020
5021 /* Determines cost of basing replacement of USE on CAND in a generic
5022    expression.  */
5023
5024 static bool
5025 determine_use_iv_cost_generic (struct ivopts_data *data,
5026                                struct iv_use *use, struct iv_cand *cand)
5027 {
5028   bitmap depends_on;
5029   comp_cost cost;
5030   int inv_expr_id = -1;
5031
5032   /* The simple case first -- if we need to express value of the preserved
5033      original biv, the cost is 0.  This also prevents us from counting the
5034      cost of increment twice -- once at this use and once in the cost of
5035      the candidate.  */
5036   if (cand->pos == IP_ORIGINAL
5037       && cand->incremented_at == use->stmt)
5038     {
5039       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
5040                        ERROR_MARK, -1);
5041       return true;
5042     }
5043
5044   cost = get_computation_cost (data, use, cand, false, &depends_on,
5045                                NULL, &inv_expr_id);
5046
5047   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5048                    inv_expr_id);
5049
5050   return !infinite_cost_p (cost);
5051 }
5052
5053 /* Determines cost of basing replacement of USE on CAND in an address.  */
5054
5055 static bool
5056 determine_use_iv_cost_address (struct ivopts_data *data,
5057                                struct iv_use *use, struct iv_cand *cand)
5058 {
5059   bitmap depends_on;
5060   bool can_autoinc;
5061   int inv_expr_id = -1;
5062   struct iv_use *sub_use;
5063   comp_cost sub_cost;
5064   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
5065                                          &can_autoinc, &inv_expr_id);
5066
5067   if (cand->ainc_use == use)
5068     {
5069       if (can_autoinc)
5070         cost.cost -= cand->cost_step;
5071       /* If we generated the candidate solely for exploiting autoincrement
5072          opportunities, and it turns out it can't be used, set the cost to
5073          infinity to make sure we ignore it.  */
5074       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5075         cost = infinite_cost;
5076     }
5077   for (sub_use = use->next;
5078        sub_use && !infinite_cost_p (cost);
5079        sub_use = sub_use->next)
5080     {
5081       sub_cost = get_computation_cost (data, sub_use, cand, true, NULL,
5082                                        &can_autoinc, NULL);
5083       cost = add_costs (cost, sub_cost);
5084     }
5085
5086   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5087                    inv_expr_id);
5088
5089   return !infinite_cost_p (cost);
5090 }
5091
5092 /* Computes value of candidate CAND at position AT in iteration NITER, and
5093    stores it to VAL.  */
5094
5095 static void
5096 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5097                aff_tree *val)
5098 {
5099   aff_tree step, delta, nit;
5100   struct iv *iv = cand->iv;
5101   tree type = TREE_TYPE (iv->base);
5102   tree steptype = type;
5103   if (POINTER_TYPE_P (type))
5104     steptype = sizetype;
5105   steptype = unsigned_type_for (type);
5106
5107   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5108   aff_combination_convert (&step, steptype);
5109   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5110   aff_combination_convert (&nit, steptype);
5111   aff_combination_mult (&nit, &step, &delta);
5112   if (stmt_after_increment (loop, cand, at))
5113     aff_combination_add (&delta, &step);
5114
5115   tree_to_aff_combination (iv->base, type, val);
5116   if (!POINTER_TYPE_P (type))
5117     aff_combination_convert (val, steptype);
5118   aff_combination_add (val, &delta);
5119 }
5120
5121 /* Returns period of induction variable iv.  */
5122
5123 static tree
5124 iv_period (struct iv *iv)
5125 {
5126   tree step = iv->step, period, type;
5127   tree pow2div;
5128
5129   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5130
5131   type = unsigned_type_for (TREE_TYPE (step));
5132   /* Period of the iv is lcm (step, type_range)/step -1,
5133      i.e., N*type_range/step - 1. Since type range is power
5134      of two, N == (step >> num_of_ending_zeros_binary (step),
5135      so the final result is
5136
5137        (type_range >> num_of_ending_zeros_binary (step)) - 1
5138
5139   */
5140   pow2div = num_ending_zeros (step);
5141
5142   period = build_low_bits_mask (type,
5143                                 (TYPE_PRECISION (type)
5144                                  - tree_to_uhwi (pow2div)));
5145
5146   return period;
5147 }
5148
5149 /* Returns the comparison operator used when eliminating the iv USE.  */
5150
5151 static enum tree_code
5152 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5153 {
5154   struct loop *loop = data->current_loop;
5155   basic_block ex_bb;
5156   edge exit;
5157
5158   ex_bb = gimple_bb (use->stmt);
5159   exit = EDGE_SUCC (ex_bb, 0);
5160   if (flow_bb_inside_loop_p (loop, exit->dest))
5161     exit = EDGE_SUCC (ex_bb, 1);
5162
5163   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5164 }
5165
5166 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5167    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5168    calculation is performed in non-wrapping type.
5169
5170    TODO: More generally, we could test for the situation that
5171          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5172          This would require knowing the sign of OFFSET.  */
5173
5174 static bool
5175 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5176 {
5177   enum tree_code code;
5178   tree e1, e2;
5179   aff_tree aff_e1, aff_e2, aff_offset;
5180
5181   if (!nowrap_type_p (TREE_TYPE (base)))
5182     return false;
5183
5184   base = expand_simple_operations (base);
5185
5186   if (TREE_CODE (base) == SSA_NAME)
5187     {
5188       gimple *stmt = SSA_NAME_DEF_STMT (base);
5189
5190       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5191         return false;
5192
5193       code = gimple_assign_rhs_code (stmt);
5194       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5195         return false;
5196
5197       e1 = gimple_assign_rhs1 (stmt);
5198       e2 = gimple_assign_rhs2 (stmt);
5199     }
5200   else
5201     {
5202       code = TREE_CODE (base);
5203       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5204         return false;
5205       e1 = TREE_OPERAND (base, 0);
5206       e2 = TREE_OPERAND (base, 1);
5207     }
5208
5209   /* Use affine expansion as deeper inspection to prove the equality.  */
5210   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5211                                   &aff_e2, &data->name_expansion_cache);
5212   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5213                                   &aff_offset, &data->name_expansion_cache);
5214   aff_combination_scale (&aff_offset, -1);
5215   switch (code)
5216     {
5217     case PLUS_EXPR:
5218       aff_combination_add (&aff_e2, &aff_offset);
5219       if (aff_combination_zero_p (&aff_e2))
5220         return true;
5221
5222       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5223                                       &aff_e1, &data->name_expansion_cache);
5224       aff_combination_add (&aff_e1, &aff_offset);
5225       return aff_combination_zero_p (&aff_e1);
5226
5227     case POINTER_PLUS_EXPR:
5228       aff_combination_add (&aff_e2, &aff_offset);
5229       return aff_combination_zero_p (&aff_e2);
5230
5231     default:
5232       return false;
5233     }
5234 }
5235
5236 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5237    comparison with CAND.  NITER describes the number of iterations of
5238    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5239
5240    We aim to handle the following situation:
5241
5242    sometype *base, *p;
5243    int a, b, i;
5244
5245    i = a;
5246    p = p_0 = base + a;
5247
5248    do
5249      {
5250        bla (*p);
5251        p++;
5252        i++;
5253      }
5254    while (i < b);
5255
5256    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5257    We aim to optimize this to
5258
5259    p = p_0 = base + a;
5260    do
5261      {
5262        bla (*p);
5263        p++;
5264      }
5265    while (p < p_0 - a + b);
5266
5267    This preserves the correctness, since the pointer arithmetics does not
5268    overflow.  More precisely:
5269
5270    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5271       overflow in computing it or the values of p.
5272    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5273       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5274
5275 static bool
5276 iv_elimination_compare_lt (struct ivopts_data *data,
5277                            struct iv_cand *cand, enum tree_code *comp_p,
5278                            struct tree_niter_desc *niter)
5279 {
5280   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5281   struct aff_tree nit, tmpa, tmpb;
5282   enum tree_code comp;
5283   HOST_WIDE_INT step;
5284
5285   /* We need to know that the candidate induction variable does not overflow.
5286      While more complex analysis may be used to prove this, for now just
5287      check that the variable appears in the original program and that it
5288      is computed in a type that guarantees no overflows.  */
5289   cand_type = TREE_TYPE (cand->iv->base);
5290   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5291     return false;
5292
5293   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5294      the calculation of the BOUND could overflow, making the comparison
5295      invalid.  */
5296   if (!data->loop_single_exit_p)
5297     return false;
5298
5299   /* We need to be able to decide whether candidate is increasing or decreasing
5300      in order to choose the right comparison operator.  */
5301   if (!cst_and_fits_in_hwi (cand->iv->step))
5302     return false;
5303   step = int_cst_value (cand->iv->step);
5304
5305   /* Check that the number of iterations matches the expected pattern:
5306      a + 1 > b ? 0 : b - a - 1.  */
5307   mbz = niter->may_be_zero;
5308   if (TREE_CODE (mbz) == GT_EXPR)
5309     {
5310       /* Handle a + 1 > b.  */
5311       tree op0 = TREE_OPERAND (mbz, 0);
5312       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5313         {
5314           a = TREE_OPERAND (op0, 0);
5315           b = TREE_OPERAND (mbz, 1);
5316         }
5317       else
5318         return false;
5319     }
5320   else if (TREE_CODE (mbz) == LT_EXPR)
5321     {
5322       tree op1 = TREE_OPERAND (mbz, 1);
5323
5324       /* Handle b < a + 1.  */
5325       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5326         {
5327           a = TREE_OPERAND (op1, 0);
5328           b = TREE_OPERAND (mbz, 0);
5329         }
5330       else
5331         return false;
5332     }
5333   else
5334     return false;
5335
5336   /* Expected number of iterations is B - A - 1.  Check that it matches
5337      the actual number, i.e., that B - A - NITER = 1.  */
5338   tree_to_aff_combination (niter->niter, nit_type, &nit);
5339   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5340   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5341   aff_combination_scale (&nit, -1);
5342   aff_combination_scale (&tmpa, -1);
5343   aff_combination_add (&tmpb, &tmpa);
5344   aff_combination_add (&tmpb, &nit);
5345   if (tmpb.n != 0 || tmpb.offset != 1)
5346     return false;
5347
5348   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5349      overflow.  */
5350   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5351                         cand->iv->step,
5352                         fold_convert (TREE_TYPE (cand->iv->step), a));
5353   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5354     return false;
5355
5356   /* Determine the new comparison operator.  */
5357   comp = step < 0 ? GT_EXPR : LT_EXPR;
5358   if (*comp_p == NE_EXPR)
5359     *comp_p = comp;
5360   else if (*comp_p == EQ_EXPR)
5361     *comp_p = invert_tree_comparison (comp, false);
5362   else
5363     gcc_unreachable ();
5364
5365   return true;
5366 }
5367
5368 /* Check whether it is possible to express the condition in USE by comparison
5369    of candidate CAND.  If so, store the value compared with to BOUND, and the
5370    comparison operator to COMP.  */
5371
5372 static bool
5373 may_eliminate_iv (struct ivopts_data *data,
5374                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5375                   enum tree_code *comp)
5376 {
5377   basic_block ex_bb;
5378   edge exit;
5379   tree period;
5380   struct loop *loop = data->current_loop;
5381   aff_tree bnd;
5382   struct tree_niter_desc *desc = NULL;
5383
5384   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5385     return false;
5386
5387   /* For now works only for exits that dominate the loop latch.
5388      TODO: extend to other conditions inside loop body.  */
5389   ex_bb = gimple_bb (use->stmt);
5390   if (use->stmt != last_stmt (ex_bb)
5391       || gimple_code (use->stmt) != GIMPLE_COND
5392       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5393     return false;
5394
5395   exit = EDGE_SUCC (ex_bb, 0);
5396   if (flow_bb_inside_loop_p (loop, exit->dest))
5397     exit = EDGE_SUCC (ex_bb, 1);
5398   if (flow_bb_inside_loop_p (loop, exit->dest))
5399     return false;
5400
5401   desc = niter_for_exit (data, exit);
5402   if (!desc)
5403     return false;
5404
5405   /* Determine whether we can use the variable to test the exit condition.
5406      This is the case iff the period of the induction variable is greater
5407      than the number of iterations for which the exit condition is true.  */
5408   period = iv_period (cand->iv);
5409
5410   /* If the number of iterations is constant, compare against it directly.  */
5411   if (TREE_CODE (desc->niter) == INTEGER_CST)
5412     {
5413       /* See cand_value_at.  */
5414       if (stmt_after_increment (loop, cand, use->stmt))
5415         {
5416           if (!tree_int_cst_lt (desc->niter, period))
5417             return false;
5418         }
5419       else
5420         {
5421           if (tree_int_cst_lt (period, desc->niter))
5422             return false;
5423         }
5424     }
5425
5426   /* If not, and if this is the only possible exit of the loop, see whether
5427      we can get a conservative estimate on the number of iterations of the
5428      entire loop and compare against that instead.  */
5429   else
5430     {
5431       widest_int period_value, max_niter;
5432
5433       max_niter = desc->max;
5434       if (stmt_after_increment (loop, cand, use->stmt))
5435         max_niter += 1;
5436       period_value = wi::to_widest (period);
5437       if (wi::gtu_p (max_niter, period_value))
5438         {
5439           /* See if we can take advantage of inferred loop bound information.  */
5440           if (data->loop_single_exit_p)
5441             {
5442               if (!max_loop_iterations (loop, &max_niter))
5443                 return false;
5444               /* The loop bound is already adjusted by adding 1.  */
5445               if (wi::gtu_p (max_niter, period_value))
5446                 return false;
5447             }
5448           else
5449             return false;
5450         }
5451     }
5452
5453   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5454
5455   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5456                          aff_combination_to_tree (&bnd));
5457   *comp = iv_elimination_compare (data, use);
5458
5459   /* It is unlikely that computing the number of iterations using division
5460      would be more profitable than keeping the original induction variable.  */
5461   if (expression_expensive_p (*bound))
5462     return false;
5463
5464   /* Sometimes, it is possible to handle the situation that the number of
5465      iterations may be zero unless additional assumtions by using <
5466      instead of != in the exit condition.
5467
5468      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5469            base the exit condition on it.  However, that is often too
5470            expensive.  */
5471   if (!integer_zerop (desc->may_be_zero))
5472     return iv_elimination_compare_lt (data, cand, comp, desc);
5473
5474   return true;
5475 }
5476
5477  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5478     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5479
5480 static int
5481 parm_decl_cost (struct ivopts_data *data, tree bound)
5482 {
5483   tree sbound = bound;
5484   STRIP_NOPS (sbound);
5485
5486   if (TREE_CODE (sbound) == SSA_NAME
5487       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5488       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5489       && data->body_includes_call)
5490     return COSTS_N_INSNS (1);
5491
5492   return 0;
5493 }
5494
5495 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5496
5497 static bool
5498 determine_use_iv_cost_condition (struct ivopts_data *data,
5499                                  struct iv_use *use, struct iv_cand *cand)
5500 {
5501   tree bound = NULL_TREE;
5502   struct iv *cmp_iv;
5503   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5504   comp_cost elim_cost, express_cost, cost, bound_cost;
5505   bool ok;
5506   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5507   tree *control_var, *bound_cst;
5508   enum tree_code comp = ERROR_MARK;
5509
5510   /* Only consider real candidates.  */
5511   if (!cand->iv)
5512     {
5513       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5514                        ERROR_MARK, -1);
5515       return false;
5516     }
5517
5518   /* Try iv elimination.  */
5519   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5520     {
5521       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5522       if (elim_cost.cost == 0)
5523         elim_cost.cost = parm_decl_cost (data, bound);
5524       else if (TREE_CODE (bound) == INTEGER_CST)
5525         elim_cost.cost = 0;
5526       /* If we replace a loop condition 'i < n' with 'p < base + n',
5527          depends_on_elim will have 'base' and 'n' set, which implies
5528          that both 'base' and 'n' will be live during the loop.  More likely,
5529          'base + n' will be loop invariant, resulting in only one live value
5530          during the loop.  So in that case we clear depends_on_elim and set
5531         elim_inv_expr_id instead.  */
5532       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5533         {
5534           elim_inv_expr_id = get_expr_id (data, bound);
5535           bitmap_clear (depends_on_elim);
5536         }
5537       /* The bound is a loop invariant, so it will be only computed
5538          once.  */
5539       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5540     }
5541   else
5542     elim_cost = infinite_cost;
5543
5544   /* Try expressing the original giv.  If it is compared with an invariant,
5545      note that we cannot get rid of it.  */
5546   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5547                               NULL, &cmp_iv);
5548   gcc_assert (ok);
5549
5550   /* When the condition is a comparison of the candidate IV against
5551      zero, prefer this IV.
5552
5553      TODO: The constant that we're subtracting from the cost should
5554      be target-dependent.  This information should be added to the
5555      target costs for each backend.  */
5556   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5557       && integer_zerop (*bound_cst)
5558       && (operand_equal_p (*control_var, cand->var_after, 0)
5559           || operand_equal_p (*control_var, cand->var_before, 0)))
5560     elim_cost.cost -= 1;
5561
5562   express_cost = get_computation_cost (data, use, cand, false,
5563                                        &depends_on_express, NULL,
5564                                        &express_inv_expr_id);
5565   fd_ivopts_data = data;
5566   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5567
5568   /* Count the cost of the original bound as well.  */
5569   bound_cost = force_var_cost (data, *bound_cst, NULL);
5570   if (bound_cost.cost == 0)
5571     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5572   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5573     bound_cost.cost = 0;
5574   express_cost.cost += bound_cost.cost;
5575
5576   /* Choose the better approach, preferring the eliminated IV. */
5577   if (compare_costs (elim_cost, express_cost) <= 0)
5578     {
5579       cost = elim_cost;
5580       depends_on = depends_on_elim;
5581       depends_on_elim = NULL;
5582       inv_expr_id = elim_inv_expr_id;
5583     }
5584   else
5585     {
5586       cost = express_cost;
5587       depends_on = depends_on_express;
5588       depends_on_express = NULL;
5589       bound = NULL_TREE;
5590       comp = ERROR_MARK;
5591       inv_expr_id = express_inv_expr_id;
5592     }
5593
5594   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5595
5596   if (depends_on_elim)
5597     BITMAP_FREE (depends_on_elim);
5598   if (depends_on_express)
5599     BITMAP_FREE (depends_on_express);
5600
5601   return !infinite_cost_p (cost);
5602 }
5603
5604 /* Determines cost of basing replacement of USE on CAND.  Returns false
5605    if USE cannot be based on CAND.  */
5606
5607 static bool
5608 determine_use_iv_cost (struct ivopts_data *data,
5609                        struct iv_use *use, struct iv_cand *cand)
5610 {
5611   switch (use->type)
5612     {
5613     case USE_NONLINEAR_EXPR:
5614       return determine_use_iv_cost_generic (data, use, cand);
5615
5616     case USE_ADDRESS:
5617       return determine_use_iv_cost_address (data, use, cand);
5618
5619     case USE_COMPARE:
5620       return determine_use_iv_cost_condition (data, use, cand);
5621
5622     default:
5623       gcc_unreachable ();
5624     }
5625 }
5626
5627 /* Return true if get_computation_cost indicates that autoincrement is
5628    a possibility for the pair of USE and CAND, false otherwise.  */
5629
5630 static bool
5631 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5632                            struct iv_cand *cand)
5633 {
5634   bitmap depends_on;
5635   bool can_autoinc;
5636   comp_cost cost;
5637
5638   if (use->type != USE_ADDRESS)
5639     return false;
5640
5641   cost = get_computation_cost (data, use, cand, true, &depends_on,
5642                                &can_autoinc, NULL);
5643
5644   BITMAP_FREE (depends_on);
5645
5646   return !infinite_cost_p (cost) && can_autoinc;
5647 }
5648
5649 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5650    use that allows autoincrement, and set their AINC_USE if possible.  */
5651
5652 static void
5653 set_autoinc_for_original_candidates (struct ivopts_data *data)
5654 {
5655   unsigned i, j;
5656
5657   for (i = 0; i < n_iv_cands (data); i++)
5658     {
5659       struct iv_cand *cand = iv_cand (data, i);
5660       struct iv_use *closest_before = NULL;
5661       struct iv_use *closest_after = NULL;
5662       if (cand->pos != IP_ORIGINAL)
5663         continue;
5664
5665       for (j = 0; j < n_iv_uses (data); j++)
5666         {
5667           struct iv_use *use = iv_use (data, j);
5668           unsigned uid = gimple_uid (use->stmt);
5669
5670           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5671             continue;
5672
5673           if (uid < gimple_uid (cand->incremented_at)
5674               && (closest_before == NULL
5675                   || uid > gimple_uid (closest_before->stmt)))
5676             closest_before = use;
5677
5678           if (uid > gimple_uid (cand->incremented_at)
5679               && (closest_after == NULL
5680                   || uid < gimple_uid (closest_after->stmt)))
5681             closest_after = use;
5682         }
5683
5684       if (closest_before != NULL
5685           && autoinc_possible_for_pair (data, closest_before, cand))
5686         cand->ainc_use = closest_before;
5687       else if (closest_after != NULL
5688                && autoinc_possible_for_pair (data, closest_after, cand))
5689         cand->ainc_use = closest_after;
5690     }
5691 }
5692
5693 /* Finds the candidates for the induction variables.  */
5694
5695 static void
5696 find_iv_candidates (struct ivopts_data *data)
5697 {
5698   /* Add commonly used ivs.  */
5699   add_standard_iv_candidates (data);
5700
5701   /* Add old induction variables.  */
5702   add_iv_candidate_for_bivs (data);
5703
5704   /* Add induction variables derived from uses.  */
5705   add_iv_candidate_for_uses (data);
5706
5707   set_autoinc_for_original_candidates (data);
5708
5709   /* Record the important candidates.  */
5710   record_important_candidates (data);
5711 }
5712
5713 /* Determines costs of basing the use of the iv on an iv candidate.  */
5714
5715 static void
5716 determine_use_iv_costs (struct ivopts_data *data)
5717 {
5718   unsigned i, j;
5719   struct iv_use *use;
5720   struct iv_cand *cand;
5721   bitmap to_clear = BITMAP_ALLOC (NULL);
5722
5723   alloc_use_cost_map (data);
5724
5725   for (i = 0; i < n_iv_uses (data); i++)
5726     {
5727       use = iv_use (data, i);
5728
5729       if (data->consider_all_candidates)
5730         {
5731           for (j = 0; j < n_iv_cands (data); j++)
5732             {
5733               cand = iv_cand (data, j);
5734               determine_use_iv_cost (data, use, cand);
5735             }
5736         }
5737       else
5738         {
5739           bitmap_iterator bi;
5740
5741           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5742             {
5743               cand = iv_cand (data, j);
5744               if (!determine_use_iv_cost (data, use, cand))
5745                 bitmap_set_bit (to_clear, j);
5746             }
5747
5748           /* Remove the candidates for that the cost is infinite from
5749              the list of related candidates.  */
5750           bitmap_and_compl_into (use->related_cands, to_clear);
5751           bitmap_clear (to_clear);
5752         }
5753     }
5754
5755   BITMAP_FREE (to_clear);
5756
5757   if (dump_file && (dump_flags & TDF_DETAILS))
5758     {
5759       fprintf (dump_file, "Use-candidate costs:\n");
5760
5761       for (i = 0; i < n_iv_uses (data); i++)
5762         {
5763           use = iv_use (data, i);
5764
5765           fprintf (dump_file, "Use %d:\n", i);
5766           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5767           for (j = 0; j < use->n_map_members; j++)
5768             {
5769               if (!use->cost_map[j].cand
5770                   || infinite_cost_p (use->cost_map[j].cost))
5771                 continue;
5772
5773               fprintf (dump_file, "  %d\t%d\t%d\t",
5774                        use->cost_map[j].cand->id,
5775                        use->cost_map[j].cost.cost,
5776                        use->cost_map[j].cost.complexity);
5777               if (use->cost_map[j].depends_on)
5778                 bitmap_print (dump_file,
5779                               use->cost_map[j].depends_on, "","");
5780               if (use->cost_map[j].inv_expr_id != -1)
5781                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5782               fprintf (dump_file, "\n");
5783             }
5784
5785           fprintf (dump_file, "\n");
5786         }
5787       fprintf (dump_file, "\n");
5788     }
5789 }
5790
5791 /* Determines cost of the candidate CAND.  */
5792
5793 static void
5794 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5795 {
5796   comp_cost cost_base;
5797   unsigned cost, cost_step;
5798   tree base;
5799
5800   if (!cand->iv)
5801     {
5802       cand->cost = 0;
5803       return;
5804     }
5805
5806   /* There are two costs associated with the candidate -- its increment
5807      and its initialization.  The second is almost negligible for any loop
5808      that rolls enough, so we take it just very little into account.  */
5809
5810   base = cand->iv->base;
5811   cost_base = force_var_cost (data, base, NULL);
5812   /* It will be exceptional that the iv register happens to be initialized with
5813      the proper value at no cost.  In general, there will at least be a regcopy
5814      or a const set.  */
5815   if (cost_base.cost == 0)
5816     cost_base.cost = COSTS_N_INSNS (1);
5817   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5818
5819   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5820
5821   /* Prefer the original ivs unless we may gain something by replacing it.
5822      The reason is to make debugging simpler; so this is not relevant for
5823      artificial ivs created by other optimization passes.  */
5824   if (cand->pos != IP_ORIGINAL
5825       || !SSA_NAME_VAR (cand->var_before)
5826       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5827     cost++;
5828
5829   /* Prefer not to insert statements into latch unless there are some
5830      already (so that we do not create unnecessary jumps).  */
5831   if (cand->pos == IP_END
5832       && empty_block_p (ip_end_pos (data->current_loop)))
5833     cost++;
5834
5835   cand->cost = cost;
5836   cand->cost_step = cost_step;
5837 }
5838
5839 /* Determines costs of computation of the candidates.  */
5840
5841 static void
5842 determine_iv_costs (struct ivopts_data *data)
5843 {
5844   unsigned i;
5845
5846   if (dump_file && (dump_flags & TDF_DETAILS))
5847     {
5848       fprintf (dump_file, "Candidate costs:\n");
5849       fprintf (dump_file, "  cand\tcost\n");
5850     }
5851
5852   for (i = 0; i < n_iv_cands (data); i++)
5853     {
5854       struct iv_cand *cand = iv_cand (data, i);
5855
5856       determine_iv_cost (data, cand);
5857
5858       if (dump_file && (dump_flags & TDF_DETAILS))
5859         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5860     }
5861
5862   if (dump_file && (dump_flags & TDF_DETAILS))
5863     fprintf (dump_file, "\n");
5864 }
5865
5866 /* Calculates cost for having SIZE induction variables.  */
5867
5868 static unsigned
5869 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5870 {
5871   /* We add size to the cost, so that we prefer eliminating ivs
5872      if possible.  */
5873   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5874                                             data->body_includes_call);
5875 }
5876
5877 /* For each size of the induction variable set determine the penalty.  */
5878
5879 static void
5880 determine_set_costs (struct ivopts_data *data)
5881 {
5882   unsigned j, n;
5883   gphi *phi;
5884   gphi_iterator psi;
5885   tree op;
5886   struct loop *loop = data->current_loop;
5887   bitmap_iterator bi;
5888
5889   if (dump_file && (dump_flags & TDF_DETAILS))
5890     {
5891       fprintf (dump_file, "Global costs:\n");
5892       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5893       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5894       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5895       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5896     }
5897
5898   n = 0;
5899   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5900     {
5901       phi = psi.phi ();
5902       op = PHI_RESULT (phi);
5903
5904       if (virtual_operand_p (op))
5905         continue;
5906
5907       if (get_iv (data, op))
5908         continue;
5909
5910       n++;
5911     }
5912
5913   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5914     {
5915       struct version_info *info = ver_info (data, j);
5916
5917       if (info->inv_id && info->has_nonlin_use)
5918         n++;
5919     }
5920
5921   data->regs_used = n;
5922   if (dump_file && (dump_flags & TDF_DETAILS))
5923     fprintf (dump_file, "  regs_used %d\n", n);
5924
5925   if (dump_file && (dump_flags & TDF_DETAILS))
5926     {
5927       fprintf (dump_file, "  cost for size:\n");
5928       fprintf (dump_file, "  ivs\tcost\n");
5929       for (j = 0; j <= 2 * target_avail_regs; j++)
5930         fprintf (dump_file, "  %d\t%d\n", j,
5931                  ivopts_global_cost_for_size (data, j));
5932       fprintf (dump_file, "\n");
5933     }
5934 }
5935
5936 /* Returns true if A is a cheaper cost pair than B.  */
5937
5938 static bool
5939 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5940 {
5941   int cmp;
5942
5943   if (!a)
5944     return false;
5945
5946   if (!b)
5947     return true;
5948
5949   cmp = compare_costs (a->cost, b->cost);
5950   if (cmp < 0)
5951     return true;
5952
5953   if (cmp > 0)
5954     return false;
5955
5956   /* In case the costs are the same, prefer the cheaper candidate.  */
5957   if (a->cand->cost < b->cand->cost)
5958     return true;
5959
5960   return false;
5961 }
5962
5963
5964 /* Returns candidate by that USE is expressed in IVS.  */
5965
5966 static struct cost_pair *
5967 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5968 {
5969   return ivs->cand_for_use[use->id];
5970 }
5971
5972 /* Computes the cost field of IVS structure.  */
5973
5974 static void
5975 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5976 {
5977   comp_cost cost = ivs->cand_use_cost;
5978
5979   cost.cost += ivs->cand_cost;
5980
5981   cost.cost += ivopts_global_cost_for_size (data,
5982                                             ivs->n_regs + ivs->num_used_inv_expr);
5983
5984   ivs->cost = cost;
5985 }
5986
5987 /* Remove invariants in set INVS to set IVS.  */
5988
5989 static void
5990 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5991 {
5992   bitmap_iterator bi;
5993   unsigned iid;
5994
5995   if (!invs)
5996     return;
5997
5998   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5999     {
6000       ivs->n_invariant_uses[iid]--;
6001       if (ivs->n_invariant_uses[iid] == 0)
6002         ivs->n_regs--;
6003     }
6004 }
6005
6006 /* Set USE not to be expressed by any candidate in IVS.  */
6007
6008 static void
6009 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6010                  struct iv_use *use)
6011 {
6012   unsigned uid = use->id, cid;
6013   struct cost_pair *cp;
6014
6015   cp = ivs->cand_for_use[uid];
6016   if (!cp)
6017     return;
6018   cid = cp->cand->id;
6019
6020   ivs->bad_uses++;
6021   ivs->cand_for_use[uid] = NULL;
6022   ivs->n_cand_uses[cid]--;
6023
6024   if (ivs->n_cand_uses[cid] == 0)
6025     {
6026       bitmap_clear_bit (ivs->cands, cid);
6027       /* Do not count the pseudocandidates.  */
6028       if (cp->cand->iv)
6029         ivs->n_regs--;
6030       ivs->n_cands--;
6031       ivs->cand_cost -= cp->cand->cost;
6032
6033       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6034     }
6035
6036   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6037
6038   iv_ca_set_remove_invariants (ivs, cp->depends_on);
6039
6040   if (cp->inv_expr_id != -1)
6041     {
6042       ivs->used_inv_expr[cp->inv_expr_id]--;
6043       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
6044         ivs->num_used_inv_expr--;
6045     }
6046   iv_ca_recount_cost (data, ivs);
6047 }
6048
6049 /* Add invariants in set INVS to set IVS.  */
6050
6051 static void
6052 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6053 {
6054   bitmap_iterator bi;
6055   unsigned iid;
6056
6057   if (!invs)
6058     return;
6059
6060   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6061     {
6062       ivs->n_invariant_uses[iid]++;
6063       if (ivs->n_invariant_uses[iid] == 1)
6064         ivs->n_regs++;
6065     }
6066 }
6067
6068 /* Set cost pair for USE in set IVS to CP.  */
6069
6070 static void
6071 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6072               struct iv_use *use, struct cost_pair *cp)
6073 {
6074   unsigned uid = use->id, cid;
6075
6076   if (ivs->cand_for_use[uid] == cp)
6077     return;
6078
6079   if (ivs->cand_for_use[uid])
6080     iv_ca_set_no_cp (data, ivs, use);
6081
6082   if (cp)
6083     {
6084       cid = cp->cand->id;
6085
6086       ivs->bad_uses--;
6087       ivs->cand_for_use[uid] = cp;
6088       ivs->n_cand_uses[cid]++;
6089       if (ivs->n_cand_uses[cid] == 1)
6090         {
6091           bitmap_set_bit (ivs->cands, cid);
6092           /* Do not count the pseudocandidates.  */
6093           if (cp->cand->iv)
6094             ivs->n_regs++;
6095           ivs->n_cands++;
6096           ivs->cand_cost += cp->cand->cost;
6097
6098           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6099         }
6100
6101       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6102       iv_ca_set_add_invariants (ivs, cp->depends_on);
6103
6104       if (cp->inv_expr_id != -1)
6105         {
6106           ivs->used_inv_expr[cp->inv_expr_id]++;
6107           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
6108             ivs->num_used_inv_expr++;
6109         }
6110       iv_ca_recount_cost (data, ivs);
6111     }
6112 }
6113
6114 /* Extend set IVS by expressing USE by some of the candidates in it
6115    if possible.  Consider all important candidates if candidates in
6116    set IVS don't give any result.  */
6117
6118 static void
6119 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
6120                struct iv_use *use)
6121 {
6122   struct cost_pair *best_cp = NULL, *cp;
6123   bitmap_iterator bi;
6124   unsigned i;
6125   struct iv_cand *cand;
6126
6127   gcc_assert (ivs->upto >= use->id);
6128   ivs->upto++;
6129   ivs->bad_uses++;
6130
6131   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6132     {
6133       cand = iv_cand (data, i);
6134       cp = get_use_iv_cost (data, use, cand);
6135       if (cheaper_cost_pair (cp, best_cp))
6136         best_cp = cp;
6137     }
6138
6139   if (best_cp == NULL)
6140     {
6141       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6142         {
6143           cand = iv_cand (data, i);
6144           cp = get_use_iv_cost (data, use, cand);
6145           if (cheaper_cost_pair (cp, best_cp))
6146             best_cp = cp;
6147         }
6148     }
6149
6150   iv_ca_set_cp (data, ivs, use, best_cp);
6151 }
6152
6153 /* Get cost for assignment IVS.  */
6154
6155 static comp_cost
6156 iv_ca_cost (struct iv_ca *ivs)
6157 {
6158   /* This was a conditional expression but it triggered a bug in
6159      Sun C 5.5.  */
6160   if (ivs->bad_uses)
6161     return infinite_cost;
6162   else
6163     return ivs->cost;
6164 }
6165
6166 /* Returns true if all dependences of CP are among invariants in IVS.  */
6167
6168 static bool
6169 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6170 {
6171   unsigned i;
6172   bitmap_iterator bi;
6173
6174   if (!cp->depends_on)
6175     return true;
6176
6177   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6178     {
6179       if (ivs->n_invariant_uses[i] == 0)
6180         return false;
6181     }
6182
6183   return true;
6184 }
6185
6186 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6187    it before NEXT_CHANGE.  */
6188
6189 static struct iv_ca_delta *
6190 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6191                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6192 {
6193   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6194
6195   change->use = use;
6196   change->old_cp = old_cp;
6197   change->new_cp = new_cp;
6198   change->next_change = next_change;
6199
6200   return change;
6201 }
6202
6203 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6204    are rewritten.  */
6205
6206 static struct iv_ca_delta *
6207 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6208 {
6209   struct iv_ca_delta *last;
6210
6211   if (!l2)
6212     return l1;
6213
6214   if (!l1)
6215     return l2;
6216
6217   for (last = l1; last->next_change; last = last->next_change)
6218     continue;
6219   last->next_change = l2;
6220
6221   return l1;
6222 }
6223
6224 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6225
6226 static struct iv_ca_delta *
6227 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6228 {
6229   struct iv_ca_delta *act, *next, *prev = NULL;
6230
6231   for (act = delta; act; act = next)
6232     {
6233       next = act->next_change;
6234       act->next_change = prev;
6235       prev = act;
6236
6237       std::swap (act->old_cp, act->new_cp);
6238     }
6239
6240   return prev;
6241 }
6242
6243 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6244    reverted instead.  */
6245
6246 static void
6247 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6248                     struct iv_ca_delta *delta, bool forward)
6249 {
6250   struct cost_pair *from, *to;
6251   struct iv_ca_delta *act;
6252
6253   if (!forward)
6254     delta = iv_ca_delta_reverse (delta);
6255
6256   for (act = delta; act; act = act->next_change)
6257     {
6258       from = act->old_cp;
6259       to = act->new_cp;
6260       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6261       iv_ca_set_cp (data, ivs, act->use, to);
6262     }
6263
6264   if (!forward)
6265     iv_ca_delta_reverse (delta);
6266 }
6267
6268 /* Returns true if CAND is used in IVS.  */
6269
6270 static bool
6271 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6272 {
6273   return ivs->n_cand_uses[cand->id] > 0;
6274 }
6275
6276 /* Returns number of induction variable candidates in the set IVS.  */
6277
6278 static unsigned
6279 iv_ca_n_cands (struct iv_ca *ivs)
6280 {
6281   return ivs->n_cands;
6282 }
6283
6284 /* Free the list of changes DELTA.  */
6285
6286 static void
6287 iv_ca_delta_free (struct iv_ca_delta **delta)
6288 {
6289   struct iv_ca_delta *act, *next;
6290
6291   for (act = *delta; act; act = next)
6292     {
6293       next = act->next_change;
6294       free (act);
6295     }
6296
6297   *delta = NULL;
6298 }
6299
6300 /* Allocates new iv candidates assignment.  */
6301
6302 static struct iv_ca *
6303 iv_ca_new (struct ivopts_data *data)
6304 {
6305   struct iv_ca *nw = XNEW (struct iv_ca);
6306
6307   nw->upto = 0;
6308   nw->bad_uses = 0;
6309   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6310   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6311   nw->cands = BITMAP_ALLOC (NULL);
6312   nw->n_cands = 0;
6313   nw->n_regs = 0;
6314   nw->cand_use_cost = no_cost;
6315   nw->cand_cost = 0;
6316   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6317   nw->cost = no_cost;
6318   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6319   nw->num_used_inv_expr = 0;
6320
6321   return nw;
6322 }
6323
6324 /* Free memory occupied by the set IVS.  */
6325
6326 static void
6327 iv_ca_free (struct iv_ca **ivs)
6328 {
6329   free ((*ivs)->cand_for_use);
6330   free ((*ivs)->n_cand_uses);
6331   BITMAP_FREE ((*ivs)->cands);
6332   free ((*ivs)->n_invariant_uses);
6333   free ((*ivs)->used_inv_expr);
6334   free (*ivs);
6335   *ivs = NULL;
6336 }
6337
6338 /* Dumps IVS to FILE.  */
6339
6340 static void
6341 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6342 {
6343   const char *pref = "  invariants ";
6344   unsigned i;
6345   comp_cost cost = iv_ca_cost (ivs);
6346
6347   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6348   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
6349            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6350   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6351
6352    for (i = 0; i < ivs->upto; i++)
6353     {
6354       struct iv_use *use = iv_use (data, i);
6355       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6356       if (cp)
6357         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6358                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6359       else
6360         fprintf (file, "   use:%d --> ??\n", use->id);
6361     }
6362
6363   for (i = 1; i <= data->max_inv_id; i++)
6364     if (ivs->n_invariant_uses[i])
6365       {
6366         fprintf (file, "%s%d", pref, i);
6367         pref = ", ";
6368       }
6369   fprintf (file, "\n\n");
6370 }
6371
6372 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6373    new set, and store differences in DELTA.  Number of induction variables
6374    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6375    the function will try to find a solution with mimimal iv candidates.  */
6376
6377 static comp_cost
6378 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6379               struct iv_cand *cand, struct iv_ca_delta **delta,
6380               unsigned *n_ivs, bool min_ncand)
6381 {
6382   unsigned i;
6383   comp_cost cost;
6384   struct iv_use *use;
6385   struct cost_pair *old_cp, *new_cp;
6386
6387   *delta = NULL;
6388   for (i = 0; i < ivs->upto; i++)
6389     {
6390       use = iv_use (data, i);
6391       old_cp = iv_ca_cand_for_use (ivs, use);
6392
6393       if (old_cp
6394           && old_cp->cand == cand)
6395         continue;
6396
6397       new_cp = get_use_iv_cost (data, use, cand);
6398       if (!new_cp)
6399         continue;
6400
6401       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6402         continue;
6403
6404       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6405         continue;
6406
6407       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6408     }
6409
6410   iv_ca_delta_commit (data, ivs, *delta, true);
6411   cost = iv_ca_cost (ivs);
6412   if (n_ivs)
6413     *n_ivs = iv_ca_n_cands (ivs);
6414   iv_ca_delta_commit (data, ivs, *delta, false);
6415
6416   return cost;
6417 }
6418
6419 /* Try narrowing set IVS by removing CAND.  Return the cost of
6420    the new set and store the differences in DELTA.  START is
6421    the candidate with which we start narrowing.  */
6422
6423 static comp_cost
6424 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6425               struct iv_cand *cand, struct iv_cand *start,
6426               struct iv_ca_delta **delta)
6427 {
6428   unsigned i, ci;
6429   struct iv_use *use;
6430   struct cost_pair *old_cp, *new_cp, *cp;
6431   bitmap_iterator bi;
6432   struct iv_cand *cnd;
6433   comp_cost cost, best_cost, acost;
6434
6435   *delta = NULL;
6436   for (i = 0; i < n_iv_uses (data); i++)
6437     {
6438       use = iv_use (data, i);
6439
6440       old_cp = iv_ca_cand_for_use (ivs, use);
6441       if (old_cp->cand != cand)
6442         continue;
6443
6444       best_cost = iv_ca_cost (ivs);
6445       /* Start narrowing with START.  */
6446       new_cp = get_use_iv_cost (data, use, start);
6447
6448       if (data->consider_all_candidates)
6449         {
6450           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6451             {
6452               if (ci == cand->id || (start && ci == start->id))
6453                 continue;
6454
6455               cnd = iv_cand (data, ci);
6456
6457               cp = get_use_iv_cost (data, use, cnd);
6458               if (!cp)
6459                 continue;
6460
6461               iv_ca_set_cp (data, ivs, use, cp);
6462               acost = iv_ca_cost (ivs);
6463
6464               if (compare_costs (acost, best_cost) < 0)
6465                 {
6466                   best_cost = acost;
6467                   new_cp = cp;
6468                 }
6469             }
6470         }
6471       else
6472         {
6473           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6474             {
6475               if (ci == cand->id || (start && ci == start->id))
6476                 continue;
6477
6478               cnd = iv_cand (data, ci);
6479
6480               cp = get_use_iv_cost (data, use, cnd);
6481               if (!cp)
6482                 continue;
6483
6484               iv_ca_set_cp (data, ivs, use, cp);
6485               acost = iv_ca_cost (ivs);
6486
6487               if (compare_costs (acost, best_cost) < 0)
6488                 {
6489                   best_cost = acost;
6490                   new_cp = cp;
6491                 }
6492             }
6493         }
6494       /* Restore to old cp for use.  */
6495       iv_ca_set_cp (data, ivs, use, old_cp);
6496
6497       if (!new_cp)
6498         {
6499           iv_ca_delta_free (delta);
6500           return infinite_cost;
6501         }
6502
6503       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6504     }
6505
6506   iv_ca_delta_commit (data, ivs, *delta, true);
6507   cost = iv_ca_cost (ivs);
6508   iv_ca_delta_commit (data, ivs, *delta, false);
6509
6510   return cost;
6511 }
6512
6513 /* Try optimizing the set of candidates IVS by removing candidates different
6514    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6515    differences in DELTA.  */
6516
6517 static comp_cost
6518 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6519              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6520 {
6521   bitmap_iterator bi;
6522   struct iv_ca_delta *act_delta, *best_delta;
6523   unsigned i;
6524   comp_cost best_cost, acost;
6525   struct iv_cand *cand;
6526
6527   best_delta = NULL;
6528   best_cost = iv_ca_cost (ivs);
6529
6530   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6531     {
6532       cand = iv_cand (data, i);
6533
6534       if (cand == except_cand)
6535         continue;
6536
6537       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6538
6539       if (compare_costs (acost, best_cost) < 0)
6540         {
6541           best_cost = acost;
6542           iv_ca_delta_free (&best_delta);
6543           best_delta = act_delta;
6544         }
6545       else
6546         iv_ca_delta_free (&act_delta);
6547     }
6548
6549   if (!best_delta)
6550     {
6551       *delta = NULL;
6552       return best_cost;
6553     }
6554
6555   /* Recurse to possibly remove other unnecessary ivs.  */
6556   iv_ca_delta_commit (data, ivs, best_delta, true);
6557   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6558   iv_ca_delta_commit (data, ivs, best_delta, false);
6559   *delta = iv_ca_delta_join (best_delta, *delta);
6560   return best_cost;
6561 }
6562
6563 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6564    cheaper local cost for USE than BEST_CP.  Return pointer to
6565    the corresponding cost_pair, otherwise just return BEST_CP.  */
6566
6567 static struct cost_pair*
6568 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6569                         unsigned int cand_idx, struct iv_cand *old_cand,
6570                         struct cost_pair *best_cp)
6571 {
6572   struct iv_cand *cand;
6573   struct cost_pair *cp;
6574
6575   gcc_assert (old_cand != NULL && best_cp != NULL);
6576   if (cand_idx == old_cand->id)
6577     return best_cp;
6578
6579   cand = iv_cand (data, cand_idx);
6580   cp = get_use_iv_cost (data, use, cand);
6581   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6582     return cp;
6583
6584   return best_cp;
6585 }
6586
6587 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6588    which are used by more than one iv uses.  For each of those candidates,
6589    this function tries to represent iv uses under that candidate using
6590    other ones with lower local cost, then tries to prune the new set.
6591    If the new set has lower cost, It returns the new cost after recording
6592    candidate replacement in list DELTA.  */
6593
6594 static comp_cost
6595 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6596                struct iv_ca_delta **delta)
6597 {
6598   bitmap_iterator bi, bj;
6599   unsigned int i, j, k;
6600   struct iv_use *use;
6601   struct iv_cand *cand;
6602   comp_cost orig_cost, acost;
6603   struct iv_ca_delta *act_delta, *tmp_delta;
6604   struct cost_pair *old_cp, *best_cp = NULL;
6605
6606   *delta = NULL;
6607   orig_cost = iv_ca_cost (ivs);
6608
6609   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6610     {
6611       if (ivs->n_cand_uses[i] == 1
6612           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6613         continue;
6614
6615       cand = iv_cand (data, i);
6616
6617       act_delta = NULL;
6618       /*  Represent uses under current candidate using other ones with
6619           lower local cost.  */
6620       for (j = 0; j < ivs->upto; j++)
6621         {
6622           use = iv_use (data, j);
6623           old_cp = iv_ca_cand_for_use (ivs, use);
6624
6625           if (old_cp->cand != cand)
6626             continue;
6627
6628           best_cp = old_cp;
6629           if (data->consider_all_candidates)
6630             for (k = 0; k < n_iv_cands (data); k++)
6631               best_cp = cheaper_cost_with_cand (data, use, k,
6632                                                 old_cp->cand, best_cp);
6633           else
6634             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6635               best_cp = cheaper_cost_with_cand (data, use, k,
6636                                                 old_cp->cand, best_cp);
6637
6638           if (best_cp == old_cp)
6639             continue;
6640
6641           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6642         }
6643       /* No need for further prune.  */
6644       if (!act_delta)
6645         continue;
6646
6647       /* Prune the new candidate set.  */
6648       iv_ca_delta_commit (data, ivs, act_delta, true);
6649       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6650       iv_ca_delta_commit (data, ivs, act_delta, false);
6651       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6652
6653       if (compare_costs (acost, orig_cost) < 0)
6654         {
6655           *delta = act_delta;
6656           return acost;
6657         }
6658       else
6659         iv_ca_delta_free (&act_delta);
6660     }
6661
6662   return orig_cost;
6663 }
6664
6665 /* Tries to extend the sets IVS in the best possible way in order
6666    to express the USE.  If ORIGINALP is true, prefer candidates from
6667    the original set of IVs, otherwise favor important candidates not
6668    based on any memory object.  */
6669
6670 static bool
6671 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6672                   struct iv_use *use, bool originalp)
6673 {
6674   comp_cost best_cost, act_cost;
6675   unsigned i;
6676   bitmap_iterator bi;
6677   struct iv_cand *cand;
6678   struct iv_ca_delta *best_delta = NULL, *act_delta;
6679   struct cost_pair *cp;
6680
6681   iv_ca_add_use (data, ivs, use);
6682   best_cost = iv_ca_cost (ivs);
6683   cp = iv_ca_cand_for_use (ivs, use);
6684   if (cp)
6685     {
6686       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6687       iv_ca_set_no_cp (data, ivs, use);
6688     }
6689
6690   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6691      first try important candidates not based on any memory object.  Only if
6692      this fails, try the specific ones.  Rationale -- in loops with many
6693      variables the best choice often is to use just one generic biv.  If we
6694      added here many ivs specific to the uses, the optimization algorithm later
6695      would be likely to get stuck in a local minimum, thus causing us to create
6696      too many ivs.  The approach from few ivs to more seems more likely to be
6697      successful -- starting from few ivs, replacing an expensive use by a
6698      specific iv should always be a win.  */
6699   EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
6700     {
6701       cand = iv_cand (data, i);
6702
6703       if (originalp && cand->pos !=IP_ORIGINAL)
6704         continue;
6705
6706       if (!originalp && cand->iv->base_object != NULL_TREE)
6707         continue;
6708
6709       if (iv_ca_cand_used_p (ivs, cand))
6710         continue;
6711
6712       cp = get_use_iv_cost (data, use, cand);
6713       if (!cp)
6714         continue;
6715
6716       iv_ca_set_cp (data, ivs, use, cp);
6717       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6718                                true);
6719       iv_ca_set_no_cp (data, ivs, use);
6720       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6721
6722       if (compare_costs (act_cost, best_cost) < 0)
6723         {
6724           best_cost = act_cost;
6725
6726           iv_ca_delta_free (&best_delta);
6727           best_delta = act_delta;
6728         }
6729       else
6730         iv_ca_delta_free (&act_delta);
6731     }
6732
6733   if (infinite_cost_p (best_cost))
6734     {
6735       for (i = 0; i < use->n_map_members; i++)
6736         {
6737           cp = use->cost_map + i;
6738           cand = cp->cand;
6739           if (!cand)
6740             continue;
6741
6742           /* Already tried this.  */
6743           if (cand->important)
6744             {
6745               if (originalp && cand->pos == IP_ORIGINAL)
6746                 continue;
6747               if (!originalp && cand->iv->base_object == NULL_TREE)
6748                 continue;
6749             }
6750
6751           if (iv_ca_cand_used_p (ivs, cand))
6752             continue;
6753
6754           act_delta = NULL;
6755           iv_ca_set_cp (data, ivs, use, cp);
6756           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6757           iv_ca_set_no_cp (data, ivs, use);
6758           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6759                                        cp, act_delta);
6760
6761           if (compare_costs (act_cost, best_cost) < 0)
6762             {
6763               best_cost = act_cost;
6764
6765               if (best_delta)
6766                 iv_ca_delta_free (&best_delta);
6767               best_delta = act_delta;
6768             }
6769           else
6770             iv_ca_delta_free (&act_delta);
6771         }
6772     }
6773
6774   iv_ca_delta_commit (data, ivs, best_delta, true);
6775   iv_ca_delta_free (&best_delta);
6776
6777   return !infinite_cost_p (best_cost);
6778 }
6779
6780 /* Finds an initial assignment of candidates to uses.  */
6781
6782 static struct iv_ca *
6783 get_initial_solution (struct ivopts_data *data, bool originalp)
6784 {
6785   struct iv_ca *ivs = iv_ca_new (data);
6786   unsigned i;
6787
6788   for (i = 0; i < n_iv_uses (data); i++)
6789     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6790       {
6791         iv_ca_free (&ivs);
6792         return NULL;
6793       }
6794
6795   return ivs;
6796 }
6797
6798 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6799    points to a bool variable, this function tries to break local
6800    optimal fixed-point by replacing candidates in IVS if it's true.  */
6801
6802 static bool
6803 try_improve_iv_set (struct ivopts_data *data,
6804                     struct iv_ca *ivs, bool *try_replace_p)
6805 {
6806   unsigned i, n_ivs;
6807   comp_cost acost, best_cost = iv_ca_cost (ivs);
6808   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6809   struct iv_cand *cand;
6810
6811   /* Try extending the set of induction variables by one.  */
6812   for (i = 0; i < n_iv_cands (data); i++)
6813     {
6814       cand = iv_cand (data, i);
6815
6816       if (iv_ca_cand_used_p (ivs, cand))
6817         continue;
6818
6819       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6820       if (!act_delta)
6821         continue;
6822
6823       /* If we successfully added the candidate and the set is small enough,
6824          try optimizing it by removing other candidates.  */
6825       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6826         {
6827           iv_ca_delta_commit (data, ivs, act_delta, true);
6828           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6829           iv_ca_delta_commit (data, ivs, act_delta, false);
6830           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6831         }
6832
6833       if (compare_costs (acost, best_cost) < 0)
6834         {
6835           best_cost = acost;
6836           iv_ca_delta_free (&best_delta);
6837           best_delta = act_delta;
6838         }
6839       else
6840         iv_ca_delta_free (&act_delta);
6841     }
6842
6843   if (!best_delta)
6844     {
6845       /* Try removing the candidates from the set instead.  */
6846       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6847
6848       if (!best_delta && *try_replace_p)
6849         {
6850           *try_replace_p = false;
6851           /* So far candidate selecting algorithm tends to choose fewer IVs
6852              so that it can handle cases in which loops have many variables
6853              but the best choice is often to use only one general biv.  One
6854              weakness is it can't handle opposite cases, in which different
6855              candidates should be chosen with respect to each use.  To solve
6856              the problem, we replace candidates in a manner described by the
6857              comments of iv_ca_replace, thus give general algorithm a chance
6858              to break local optimal fixed-point in these cases.  */
6859           best_cost = iv_ca_replace (data, ivs, &best_delta);
6860         }
6861
6862       if (!best_delta)
6863         return false;
6864     }
6865
6866   iv_ca_delta_commit (data, ivs, best_delta, true);
6867   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6868   iv_ca_delta_free (&best_delta);
6869   return true;
6870 }
6871
6872 /* Attempts to find the optimal set of induction variables.  We do simple
6873    greedy heuristic -- we try to replace at most one candidate in the selected
6874    solution and remove the unused ivs while this improves the cost.  */
6875
6876 static struct iv_ca *
6877 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6878 {
6879   struct iv_ca *set;
6880   bool try_replace_p = true;
6881
6882   /* Get the initial solution.  */
6883   set = get_initial_solution (data, originalp);
6884   if (!set)
6885     {
6886       if (dump_file && (dump_flags & TDF_DETAILS))
6887         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6888       return NULL;
6889     }
6890
6891   if (dump_file && (dump_flags & TDF_DETAILS))
6892     {
6893       fprintf (dump_file, "Initial set of candidates:\n");
6894       iv_ca_dump (data, dump_file, set);
6895     }
6896
6897   while (try_improve_iv_set (data, set, &try_replace_p))
6898     {
6899       if (dump_file && (dump_flags & TDF_DETAILS))
6900         {
6901           fprintf (dump_file, "Improved to:\n");
6902           iv_ca_dump (data, dump_file, set);
6903         }
6904     }
6905
6906   return set;
6907 }
6908
6909 static struct iv_ca *
6910 find_optimal_iv_set (struct ivopts_data *data)
6911 {
6912   unsigned i;
6913   struct iv_ca *set, *origset;
6914   struct iv_use *use;
6915   comp_cost cost, origcost;
6916
6917   /* Determine the cost based on a strategy that starts with original IVs,
6918      and try again using a strategy that prefers candidates not based
6919      on any IVs.  */
6920   origset = find_optimal_iv_set_1 (data, true);
6921   set = find_optimal_iv_set_1 (data, false);
6922
6923   if (!origset && !set)
6924     return NULL;
6925
6926   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6927   cost = set ? iv_ca_cost (set) : infinite_cost;
6928
6929   if (dump_file && (dump_flags & TDF_DETAILS))
6930     {
6931       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6932                origcost.cost, origcost.complexity);
6933       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6934                cost.cost, cost.complexity);
6935     }
6936
6937   /* Choose the one with the best cost.  */
6938   if (compare_costs (origcost, cost) <= 0)
6939     {
6940       if (set)
6941         iv_ca_free (&set);
6942       set = origset;
6943     }
6944   else if (origset)
6945     iv_ca_free (&origset);
6946
6947   for (i = 0; i < n_iv_uses (data); i++)
6948     {
6949       use = iv_use (data, i);
6950       use->selected = iv_ca_cand_for_use (set, use)->cand;
6951     }
6952
6953   return set;
6954 }
6955
6956 /* Creates a new induction variable corresponding to CAND.  */
6957
6958 static void
6959 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6960 {
6961   gimple_stmt_iterator incr_pos;
6962   tree base;
6963   bool after = false;
6964
6965   if (!cand->iv)
6966     return;
6967
6968   switch (cand->pos)
6969     {
6970     case IP_NORMAL:
6971       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6972       break;
6973
6974     case IP_END:
6975       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6976       after = true;
6977       break;
6978
6979     case IP_AFTER_USE:
6980       after = true;
6981       /* fall through */
6982     case IP_BEFORE_USE:
6983       incr_pos = gsi_for_stmt (cand->incremented_at);
6984       break;
6985
6986     case IP_ORIGINAL:
6987       /* Mark that the iv is preserved.  */
6988       name_info (data, cand->var_before)->preserve_biv = true;
6989       name_info (data, cand->var_after)->preserve_biv = true;
6990
6991       /* Rewrite the increment so that it uses var_before directly.  */
6992       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6993       return;
6994     }
6995
6996   gimple_add_tmp_var (cand->var_before);
6997
6998   base = unshare_expr (cand->iv->base);
6999
7000   create_iv (base, unshare_expr (cand->iv->step),
7001              cand->var_before, data->current_loop,
7002              &incr_pos, after, &cand->var_before, &cand->var_after);
7003 }
7004
7005 /* Creates new induction variables described in SET.  */
7006
7007 static void
7008 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7009 {
7010   unsigned i;
7011   struct iv_cand *cand;
7012   bitmap_iterator bi;
7013
7014   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7015     {
7016       cand = iv_cand (data, i);
7017       create_new_iv (data, cand);
7018     }
7019
7020   if (dump_file && (dump_flags & TDF_DETAILS))
7021     {
7022       fprintf (dump_file, "Selected IV set for loop %d",
7023                data->current_loop->num);
7024       if (data->loop_loc != UNKNOWN_LOCATION)
7025         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7026                  LOCATION_LINE (data->loop_loc));
7027       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7028       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7029         {
7030           cand = iv_cand (data, i);
7031           dump_cand (dump_file, cand);
7032         }
7033       fprintf (dump_file, "\n");
7034     }
7035 }
7036
7037 /* Rewrites USE (definition of iv used in a nonlinear expression)
7038    using candidate CAND.  */
7039
7040 static void
7041 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7042                             struct iv_use *use, struct iv_cand *cand)
7043 {
7044   tree comp;
7045   tree op, tgt;
7046   gassign *ass;
7047   gimple_stmt_iterator bsi;
7048
7049   /* An important special case -- if we are asked to express value of
7050      the original iv by itself, just exit; there is no need to
7051      introduce a new computation (that might also need casting the
7052      variable to unsigned and back).  */
7053   if (cand->pos == IP_ORIGINAL
7054       && cand->incremented_at == use->stmt)
7055     {
7056       enum tree_code stmt_code;
7057
7058       gcc_assert (is_gimple_assign (use->stmt));
7059       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7060
7061       /* Check whether we may leave the computation unchanged.
7062          This is the case only if it does not rely on other
7063          computations in the loop -- otherwise, the computation
7064          we rely upon may be removed in remove_unused_ivs,
7065          thus leading to ICE.  */
7066       stmt_code = gimple_assign_rhs_code (use->stmt);
7067       if (stmt_code == PLUS_EXPR
7068           || stmt_code == MINUS_EXPR
7069           || stmt_code == POINTER_PLUS_EXPR)
7070         {
7071           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7072             op = gimple_assign_rhs2 (use->stmt);
7073           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7074             op = gimple_assign_rhs1 (use->stmt);
7075           else
7076             op = NULL_TREE;
7077         }
7078       else
7079         op = NULL_TREE;
7080
7081       if (op && expr_invariant_in_loop_p (data->current_loop, op))
7082         return;
7083     }
7084
7085   comp = get_computation (data->current_loop, use, cand);
7086   gcc_assert (comp != NULL_TREE);
7087
7088   switch (gimple_code (use->stmt))
7089     {
7090     case GIMPLE_PHI:
7091       tgt = PHI_RESULT (use->stmt);
7092
7093       /* If we should keep the biv, do not replace it.  */
7094       if (name_info (data, tgt)->preserve_biv)
7095         return;
7096
7097       bsi = gsi_after_labels (gimple_bb (use->stmt));
7098       break;
7099
7100     case GIMPLE_ASSIGN:
7101       tgt = gimple_assign_lhs (use->stmt);
7102       bsi = gsi_for_stmt (use->stmt);
7103       break;
7104
7105     default:
7106       gcc_unreachable ();
7107     }
7108
7109   if (!valid_gimple_rhs_p (comp)
7110       || (gimple_code (use->stmt) != GIMPLE_PHI
7111           /* We can't allow re-allocating the stmt as it might be pointed
7112              to still.  */
7113           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7114               >= gimple_num_ops (gsi_stmt (bsi)))))
7115     {
7116       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7117                                        true, GSI_SAME_STMT);
7118       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7119         {
7120           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7121           /* As this isn't a plain copy we have to reset alignment
7122              information.  */
7123           if (SSA_NAME_PTR_INFO (comp))
7124             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7125         }
7126     }
7127
7128   if (gimple_code (use->stmt) == GIMPLE_PHI)
7129     {
7130       ass = gimple_build_assign (tgt, comp);
7131       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7132
7133       bsi = gsi_for_stmt (use->stmt);
7134       remove_phi_node (&bsi, false);
7135     }
7136   else
7137     {
7138       gimple_assign_set_rhs_from_tree (&bsi, comp);
7139       use->stmt = gsi_stmt (bsi);
7140     }
7141 }
7142
7143 /* Performs a peephole optimization to reorder the iv update statement with
7144    a mem ref to enable instruction combining in later phases. The mem ref uses
7145    the iv value before the update, so the reordering transformation requires
7146    adjustment of the offset. CAND is the selected IV_CAND.
7147
7148    Example:
7149
7150    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7151    iv2 = iv1 + 1;
7152
7153    if (t < val)      (1)
7154      goto L;
7155    goto Head;
7156
7157
7158    directly propagating t over to (1) will introduce overlapping live range
7159    thus increase register pressure. This peephole transform it into:
7160
7161
7162    iv2 = iv1 + 1;
7163    t = MEM_REF (base, iv2, 8, 8);
7164    if (t < val)
7165      goto L;
7166    goto Head;
7167 */
7168
7169 static void
7170 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7171 {
7172   tree var_after;
7173   gimple *iv_update, *stmt;
7174   basic_block bb;
7175   gimple_stmt_iterator gsi, gsi_iv;
7176
7177   if (cand->pos != IP_NORMAL)
7178     return;
7179
7180   var_after = cand->var_after;
7181   iv_update = SSA_NAME_DEF_STMT (var_after);
7182
7183   bb = gimple_bb (iv_update);
7184   gsi = gsi_last_nondebug_bb (bb);
7185   stmt = gsi_stmt (gsi);
7186
7187   /* Only handle conditional statement for now.  */
7188   if (gimple_code (stmt) != GIMPLE_COND)
7189     return;
7190
7191   gsi_prev_nondebug (&gsi);
7192   stmt = gsi_stmt (gsi);
7193   if (stmt != iv_update)
7194     return;
7195
7196   gsi_prev_nondebug (&gsi);
7197   if (gsi_end_p (gsi))
7198     return;
7199
7200   stmt = gsi_stmt (gsi);
7201   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7202     return;
7203
7204   if (stmt != use->stmt)
7205     return;
7206
7207   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7208     return;
7209
7210   if (dump_file && (dump_flags & TDF_DETAILS))
7211     {
7212       fprintf (dump_file, "Reordering \n");
7213       print_gimple_stmt (dump_file, iv_update, 0, 0);
7214       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7215       fprintf (dump_file, "\n");
7216     }
7217
7218   gsi = gsi_for_stmt (use->stmt);
7219   gsi_iv = gsi_for_stmt (iv_update);
7220   gsi_move_before (&gsi_iv, &gsi);
7221
7222   cand->pos = IP_BEFORE_USE;
7223   cand->incremented_at = use->stmt;
7224 }
7225
7226 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7227
7228 static void
7229 rewrite_use_address_1 (struct ivopts_data *data,
7230                        struct iv_use *use, struct iv_cand *cand)
7231 {
7232   aff_tree aff;
7233   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7234   tree base_hint = NULL_TREE;
7235   tree ref, iv;
7236   bool ok;
7237
7238   adjust_iv_update_pos (cand, use);
7239   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7240   gcc_assert (ok);
7241   unshare_aff_combination (&aff);
7242
7243   /* To avoid undefined overflow problems, all IV candidates use unsigned
7244      integer types.  The drawback is that this makes it impossible for
7245      create_mem_ref to distinguish an IV that is based on a memory object
7246      from one that represents simply an offset.
7247
7248      To work around this problem, we pass a hint to create_mem_ref that
7249      indicates which variable (if any) in aff is an IV based on a memory
7250      object.  Note that we only consider the candidate.  If this is not
7251      based on an object, the base of the reference is in some subexpression
7252      of the use -- but these will use pointer types, so they are recognized
7253      by the create_mem_ref heuristics anyway.  */
7254   if (cand->iv->base_object)
7255     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7256
7257   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7258   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7259                         reference_alias_ptr_type (*use->op_p),
7260                         iv, base_hint, data->speed);
7261   copy_ref_info (ref, *use->op_p);
7262   *use->op_p = ref;
7263 }
7264
7265 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
7266    first use of a group, rewrites sub uses in the group too.  */
7267
7268 static void
7269 rewrite_use_address (struct ivopts_data *data,
7270                       struct iv_use *use, struct iv_cand *cand)
7271 {
7272   struct iv_use *next;
7273
7274   gcc_assert (use->sub_id == 0);
7275   rewrite_use_address_1 (data, use, cand);
7276   update_stmt (use->stmt);
7277
7278   for (next = use->next; next != NULL; next = next->next)
7279     {
7280       rewrite_use_address_1 (data, next, cand);
7281       update_stmt (next->stmt);
7282     }
7283
7284   return;
7285 }
7286
7287 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7288    candidate CAND.  */
7289
7290 static void
7291 rewrite_use_compare (struct ivopts_data *data,
7292                      struct iv_use *use, struct iv_cand *cand)
7293 {
7294   tree comp, *var_p, op, bound;
7295   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7296   enum tree_code compare;
7297   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7298   bool ok;
7299
7300   bound = cp->value;
7301   if (bound)
7302     {
7303       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7304       tree var_type = TREE_TYPE (var);
7305       gimple_seq stmts;
7306
7307       if (dump_file && (dump_flags & TDF_DETAILS))
7308         {
7309           fprintf (dump_file, "Replacing exit test: ");
7310           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7311         }
7312       compare = cp->comp;
7313       bound = unshare_expr (fold_convert (var_type, bound));
7314       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7315       if (stmts)
7316         gsi_insert_seq_on_edge_immediate (
7317                 loop_preheader_edge (data->current_loop),
7318                 stmts);
7319
7320       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7321       gimple_cond_set_lhs (cond_stmt, var);
7322       gimple_cond_set_code (cond_stmt, compare);
7323       gimple_cond_set_rhs (cond_stmt, op);
7324       return;
7325     }
7326
7327   /* The induction variable elimination failed; just express the original
7328      giv.  */
7329   comp = get_computation (data->current_loop, use, cand);
7330   gcc_assert (comp != NULL_TREE);
7331
7332   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7333   gcc_assert (ok);
7334
7335   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7336                                      true, GSI_SAME_STMT);
7337 }
7338
7339 /* Rewrites USE using candidate CAND.  */
7340
7341 static void
7342 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7343 {
7344   switch (use->type)
7345     {
7346       case USE_NONLINEAR_EXPR:
7347         rewrite_use_nonlinear_expr (data, use, cand);
7348         break;
7349
7350       case USE_ADDRESS:
7351         rewrite_use_address (data, use, cand);
7352         break;
7353
7354       case USE_COMPARE:
7355         rewrite_use_compare (data, use, cand);
7356         break;
7357
7358       default:
7359         gcc_unreachable ();
7360     }
7361
7362   update_stmt (use->stmt);
7363 }
7364
7365 /* Rewrite the uses using the selected induction variables.  */
7366
7367 static void
7368 rewrite_uses (struct ivopts_data *data)
7369 {
7370   unsigned i;
7371   struct iv_cand *cand;
7372   struct iv_use *use;
7373
7374   for (i = 0; i < n_iv_uses (data); i++)
7375     {
7376       use = iv_use (data, i);
7377       cand = use->selected;
7378       gcc_assert (cand);
7379
7380       rewrite_use (data, use, cand);
7381     }
7382 }
7383
7384 /* Removes the ivs that are not used after rewriting.  */
7385
7386 static void
7387 remove_unused_ivs (struct ivopts_data *data)
7388 {
7389   unsigned j;
7390   bitmap_iterator bi;
7391   bitmap toremove = BITMAP_ALLOC (NULL);
7392
7393   /* Figure out an order in which to release SSA DEFs so that we don't
7394      release something that we'd have to propagate into a debug stmt
7395      afterwards.  */
7396   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7397     {
7398       struct version_info *info;
7399
7400       info = ver_info (data, j);
7401       if (info->iv
7402           && !integer_zerop (info->iv->step)
7403           && !info->inv_id
7404           && !info->iv->have_use_for
7405           && !info->preserve_biv)
7406         {
7407           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7408
7409           tree def = info->iv->ssa_name;
7410
7411           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7412             {
7413               imm_use_iterator imm_iter;
7414               use_operand_p use_p;
7415               gimple *stmt;
7416               int count = 0;
7417
7418               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7419                 {
7420                   if (!gimple_debug_bind_p (stmt))
7421                     continue;
7422
7423                   /* We just want to determine whether to do nothing
7424                      (count == 0), to substitute the computed
7425                      expression into a single use of the SSA DEF by
7426                      itself (count == 1), or to use a debug temp
7427                      because the SSA DEF is used multiple times or as
7428                      part of a larger expression (count > 1). */
7429                   count++;
7430                   if (gimple_debug_bind_get_value (stmt) != def)
7431                     count++;
7432
7433                   if (count > 1)
7434                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7435                 }
7436
7437               if (!count)
7438                 continue;
7439
7440               struct iv_use dummy_use;
7441               struct iv_cand *best_cand = NULL, *cand;
7442               unsigned i, best_pref = 0, cand_pref;
7443
7444               memset (&dummy_use, 0, sizeof (dummy_use));
7445               dummy_use.iv = info->iv;
7446               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7447                 {
7448                   cand = iv_use (data, i)->selected;
7449                   if (cand == best_cand)
7450                     continue;
7451                   cand_pref = operand_equal_p (cand->iv->step,
7452                                                info->iv->step, 0)
7453                     ? 4 : 0;
7454                   cand_pref
7455                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7456                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7457                     ? 2 : 0;
7458                   cand_pref
7459                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7460                     ? 1 : 0;
7461                   if (best_cand == NULL || best_pref < cand_pref)
7462                     {
7463                       best_cand = cand;
7464                       best_pref = cand_pref;
7465                     }
7466                 }
7467
7468               if (!best_cand)
7469                 continue;
7470
7471               tree comp = get_computation_at (data->current_loop,
7472                                               &dummy_use, best_cand,
7473                                               SSA_NAME_DEF_STMT (def));
7474               if (!comp)
7475                 continue;
7476
7477               if (count > 1)
7478                 {
7479                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7480                   DECL_ARTIFICIAL (vexpr) = 1;
7481                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7482                   if (SSA_NAME_VAR (def))
7483                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7484                   else
7485                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7486                   gdebug *def_temp
7487                     = gimple_build_debug_bind (vexpr, comp, NULL);
7488                   gimple_stmt_iterator gsi;
7489
7490                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7491                     gsi = gsi_after_labels (gimple_bb
7492                                             (SSA_NAME_DEF_STMT (def)));
7493                   else
7494                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7495
7496                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7497                   comp = vexpr;
7498                 }
7499
7500               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7501                 {
7502                   if (!gimple_debug_bind_p (stmt))
7503                     continue;
7504
7505                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7506                     SET_USE (use_p, comp);
7507
7508                   update_stmt (stmt);
7509                 }
7510             }
7511         }
7512     }
7513
7514   release_defs_bitset (toremove);
7515
7516   BITMAP_FREE (toremove);
7517 }
7518
7519 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7520    for hash_map::traverse.  */
7521
7522 bool
7523 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7524 {
7525   free (value);
7526   return true;
7527 }
7528
7529 /* Frees data allocated by the optimization of a single loop.  */
7530
7531 static void
7532 free_loop_data (struct ivopts_data *data)
7533 {
7534   unsigned i, j;
7535   bitmap_iterator bi;
7536   tree obj;
7537
7538   if (data->niters)
7539     {
7540       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7541       delete data->niters;
7542       data->niters = NULL;
7543     }
7544
7545   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7546     {
7547       struct version_info *info;
7548
7549       info = ver_info (data, i);
7550       info->iv = NULL;
7551       info->has_nonlin_use = false;
7552       info->preserve_biv = false;
7553       info->inv_id = 0;
7554     }
7555   bitmap_clear (data->relevant);
7556   bitmap_clear (data->important_candidates);
7557
7558   for (i = 0; i < n_iv_uses (data); i++)
7559     {
7560       struct iv_use *use = iv_use (data, i);
7561       struct iv_use *pre = use, *sub = use->next;
7562
7563       while (sub)
7564         {
7565           gcc_assert (sub->related_cands == NULL);
7566           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7567
7568           pre = sub;
7569           sub = sub->next;
7570           free (pre);
7571         }
7572
7573       BITMAP_FREE (use->related_cands);
7574       for (j = 0; j < use->n_map_members; j++)
7575         if (use->cost_map[j].depends_on)
7576           BITMAP_FREE (use->cost_map[j].depends_on);
7577       free (use->cost_map);
7578       free (use);
7579     }
7580   data->iv_uses.truncate (0);
7581
7582   for (i = 0; i < n_iv_cands (data); i++)
7583     {
7584       struct iv_cand *cand = iv_cand (data, i);
7585
7586       if (cand->depends_on)
7587         BITMAP_FREE (cand->depends_on);
7588       free (cand);
7589     }
7590   data->iv_candidates.truncate (0);
7591
7592   if (data->version_info_size < num_ssa_names)
7593     {
7594       data->version_info_size = 2 * num_ssa_names;
7595       free (data->version_info);
7596       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7597     }
7598
7599   data->max_inv_id = 0;
7600
7601   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7602     SET_DECL_RTL (obj, NULL_RTX);
7603
7604   decl_rtl_to_reset.truncate (0);
7605
7606   data->inv_expr_tab->empty ();
7607   data->inv_expr_id = 0;
7608
7609   data->iv_common_cand_tab->empty ();
7610   data->iv_common_cands.truncate (0);
7611 }
7612
7613 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7614    loop tree.  */
7615
7616 static void
7617 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7618 {
7619   free_loop_data (data);
7620   free (data->version_info);
7621   BITMAP_FREE (data->relevant);
7622   BITMAP_FREE (data->important_candidates);
7623
7624   decl_rtl_to_reset.release ();
7625   data->iv_uses.release ();
7626   data->iv_candidates.release ();
7627   delete data->inv_expr_tab;
7628   data->inv_expr_tab = NULL;
7629   free_affine_expand_cache (&data->name_expansion_cache);
7630   delete data->iv_common_cand_tab;
7631   data->iv_common_cand_tab = NULL;
7632   data->iv_common_cands.release ();
7633   obstack_free (&data->iv_obstack, NULL);
7634 }
7635
7636 /* Returns true if the loop body BODY includes any function calls.  */
7637
7638 static bool
7639 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7640 {
7641   gimple_stmt_iterator gsi;
7642   unsigned i;
7643
7644   for (i = 0; i < num_nodes; i++)
7645     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7646       {
7647         gimple *stmt = gsi_stmt (gsi);
7648         if (is_gimple_call (stmt)
7649             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7650           return true;
7651       }
7652   return false;
7653 }
7654
7655 /* Optimizes the LOOP.  Returns true if anything changed.  */
7656
7657 static bool
7658 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7659 {
7660   bool changed = false;
7661   struct iv_ca *iv_ca;
7662   edge exit = single_dom_exit (loop);
7663   basic_block *body;
7664
7665   gcc_assert (!data->niters);
7666   data->current_loop = loop;
7667   data->loop_loc = find_loop_location (loop);
7668   data->speed = optimize_loop_for_speed_p (loop);
7669
7670   if (dump_file && (dump_flags & TDF_DETAILS))
7671     {
7672       fprintf (dump_file, "Processing loop %d", loop->num);
7673       if (data->loop_loc != UNKNOWN_LOCATION)
7674         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7675                  LOCATION_LINE (data->loop_loc));
7676       fprintf (dump_file, "\n");
7677
7678       if (exit)
7679         {
7680           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7681                    exit->src->index, exit->dest->index);
7682           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7683           fprintf (dump_file, "\n");
7684         }
7685
7686       fprintf (dump_file, "\n");
7687     }
7688
7689   body = get_loop_body (loop);
7690   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7691   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7692   free (body);
7693
7694   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7695
7696   /* For each ssa name determines whether it behaves as an induction variable
7697      in some loop.  */
7698   if (!find_induction_variables (data))
7699     goto finish;
7700
7701   /* Finds interesting uses (item 1).  */
7702   find_interesting_uses (data);
7703   group_address_uses (data);
7704   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7705     goto finish;
7706
7707   /* Finds candidates for the induction variables (item 2).  */
7708   find_iv_candidates (data);
7709
7710   /* Calculates the costs (item 3, part 1).  */
7711   determine_iv_costs (data);
7712   determine_use_iv_costs (data);
7713   determine_set_costs (data);
7714
7715   /* Find the optimal set of induction variables (item 3, part 2).  */
7716   iv_ca = find_optimal_iv_set (data);
7717   if (!iv_ca)
7718     goto finish;
7719   changed = true;
7720
7721   /* Create the new induction variables (item 4, part 1).  */
7722   create_new_ivs (data, iv_ca);
7723   iv_ca_free (&iv_ca);
7724
7725   /* Rewrite the uses (item 4, part 2).  */
7726   rewrite_uses (data);
7727
7728   /* Remove the ivs that are unused after rewriting.  */
7729   remove_unused_ivs (data);
7730
7731   /* We have changed the structure of induction variables; it might happen
7732      that definitions in the scev database refer to some of them that were
7733      eliminated.  */
7734   scev_reset ();
7735
7736 finish:
7737   free_loop_data (data);
7738
7739   return changed;
7740 }
7741
7742 /* Main entry point.  Optimizes induction variables in loops.  */
7743
7744 void
7745 tree_ssa_iv_optimize (void)
7746 {
7747   struct loop *loop;
7748   struct ivopts_data data;
7749
7750   tree_ssa_iv_optimize_init (&data);
7751
7752   /* Optimize the loops starting with the innermost ones.  */
7753   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7754     {
7755       if (dump_file && (dump_flags & TDF_DETAILS))
7756         flow_loop_dump (loop, dump_file, NULL, 1);
7757
7758       tree_ssa_iv_optimize_loop (&data, loop);
7759     }
7760
7761   tree_ssa_iv_optimize_finalize (&data);
7762 }