gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003-2016 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "backend.h"
  68 #include "rtl.h"
  69 #include "tree.h"
  70 #include "gimple.h"
  71 #include "cfghooks.h"
  72 #include "tree-pass.h"
  73 #include "tm_p.h"
  74 #include "ssa.h"
  75 #include "expmed.h"
  76 #include "insn-config.h"
  77 #include "emit-rtl.h"
  78 #include "recog.h"
  79 #include "cgraph.h"
  80 #include "gimple-pretty-print.h"
  81 #include "alias.h"
  82 #include "fold-const.h"
  83 #include "stor-layout.h"
  84 #include "tree-eh.h"
  85 #include "gimplify.h"
  86 #include "gimple-iterator.h"
  87 #include "gimplify-me.h"
  88 #include "tree-cfg.h"
  89 #include "tree-ssa-loop-ivopts.h"
  90 #include "tree-ssa-loop-manip.h"
  91 #include "tree-ssa-loop-niter.h"
  92 #include "tree-ssa-loop.h"
  93 #include "explow.h"
  94 #include "expr.h"
  95 #include "tree-dfa.h"
  96 #include "tree-ssa.h"
  97 #include "cfgloop.h"
  98 #include "tree-scalar-evolution.h"
  99 #include "params.h"
 100 #include "tree-affine.h"
 101 #include "tree-ssa-propagate.h"
 102 #include "tree-ssa-address.h"
 103 #include "builtins.h"
 104 #include "tree-vectorizer.h"
 105
 106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 107    cost of different addressing modes.  This should be moved to a TBD
 108    interface between the GIMPLE and RTL worlds.  */
 109
 110 /* The infinite cost.  */
 111 #define INFTY 10000000
 112
 113 #define AVG_LOOP_NITER(LOOP) 5
 114
 115 /* Returns the expected number of loop iterations for LOOP.
 116    The average trip count is computed from profile data if it
 117    exists. */
 118
 119 static inline HOST_WIDE_INT
 120 avg_loop_niter (struct loop *loop)
 121 {
 122   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 123   if (niter == -1)
 124     {
 125       niter = max_stmt_executions_int (loop);
 126       if (niter == -1 || niter > AVG_LOOP_NITER (loop))
 127         return AVG_LOOP_NITER (loop);
 128     }
 129
 130   return niter;
 131 }
 132
 133 /* Representation of the induction variable.  */
 134 struct iv
 135 {
 136   tree base;            /* Initial value of the iv.  */
 137   tree base_object;     /* A memory object to that the induction variable points.  */
 138   tree step;            /* Step of the iv (constant only).  */
 139   tree ssa_name;        /* The ssa name with the value.  */
 140   unsigned use_id;      /* The identifier in the use if it is the case.  */
 141   bool biv_p;           /* Is it a biv?  */
 142   bool have_use_for;    /* Do we already have a use for it?  */
 143   bool no_overflow;     /* True if the iv doesn't overflow.  */
 144   bool have_address_use;/* For biv, indicate if it's used in any address
 145                            type use.  */
 146 };
 147
 148 /* Per-ssa version information (induction variable descriptions, etc.).  */
 149 struct version_info
 150 {
 151   tree name;            /* The ssa name.  */
 152   struct iv *iv;        /* Induction variable description.  */
 153   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 154                            an expression that is not an induction variable.  */
 155   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 156   unsigned inv_id;      /* Id of an invariant.  */
 157 };
 158
 159 /* Types of uses.  */
 160 enum use_type
 161 {
 162   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 163   USE_ADDRESS,          /* Use in an address.  */
 164   USE_COMPARE           /* Use is a compare.  */
 165 };
 166
 167 /* Cost of a computation.  */
 168 struct comp_cost
 169 {
 170   int cost;             /* The runtime cost.  */
 171   unsigned complexity;  /* The estimate of the complexity of the code for
 172                            the computation (in no concrete units --
 173                            complexity field should be larger for more
 174                            complex expressions and addressing modes).  */
 175   int scratch;          /* Scratch used during cost computation.  */
 176 };
 177
 178 static const comp_cost no_cost = {0, 0, 0};
 179 static const comp_cost infinite_cost = {INFTY, INFTY, INFTY};
 180
 181 /* The candidate - cost pair.  */
 182 struct cost_pair
 183 {
 184   struct iv_cand *cand; /* The candidate.  */
 185   comp_cost cost;       /* The cost.  */
 186   bitmap depends_on;    /* The list of invariants that have to be
 187                            preserved.  */
 188   tree value;           /* For final value elimination, the expression for
 189                            the final value of the iv.  For iv elimination,
 190                            the new bound to compare with.  */
 191   enum tree_code comp;  /* For iv elimination, the comparison.  */
 192   int inv_expr_id;      /* Loop invariant expression id.  */
 193 };
 194
 195 /* Use.  */
 196 struct iv_use
 197 {
 198   unsigned id;          /* The id of the use.  */
 199   unsigned sub_id;      /* The id of the sub use.  */
 200   enum use_type type;   /* Type of the use.  */
 201   struct iv *iv;        /* The induction variable it is based on.  */
 202   gimple *stmt;         /* Statement in that it occurs.  */
 203   tree *op_p;           /* The place where it occurs.  */
 204   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 205                            important ones.  */
 206
 207   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 208   struct cost_pair *cost_map;
 209                         /* The costs wrto the iv candidates.  */
 210
 211   struct iv_cand *selected;
 212                         /* The selected candidate.  */
 213
 214   struct iv_use *next;  /* The next sub use.  */
 215   tree addr_base;       /* Base address with const offset stripped.  */
 216   unsigned HOST_WIDE_INT addr_offset;
 217                         /* Const offset stripped from base address.  */
 218 };
 219
 220 /* The position where the iv is computed.  */
 221 enum iv_position
 222 {
 223   IP_NORMAL,            /* At the end, just before the exit condition.  */
 224   IP_END,               /* At the end of the latch block.  */
 225   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 226   IP_AFTER_USE,         /* Immediately after a specific use.  */
 227   IP_ORIGINAL           /* The original biv.  */
 228 };
 229
 230 /* The induction variable candidate.  */
 231 struct iv_cand
 232 {
 233   unsigned id;          /* The number of the candidate.  */
 234   bool important;       /* Whether this is an "important" candidate, i.e. such
 235                            that it should be considered by all uses.  */
 236   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 237   gimple *incremented_at;/* For original biv, the statement where it is
 238                            incremented.  */
 239   tree var_before;      /* The variable used for it before increment.  */
 240   tree var_after;       /* The variable used for it after increment.  */
 241   struct iv *iv;        /* The value of the candidate.  NULL for
 242                            "pseudocandidate" used to indicate the possibility
 243                            to replace the final value of an iv by direct
 244                            computation of the value.  */
 245   unsigned cost;        /* Cost of the candidate.  */
 246   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 247   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 248                               where it is incremented.  */
 249   bitmap depends_on;    /* The list of invariants that are used in step of the
 250                            biv.  */
 251   struct iv *orig_iv;   /* The original iv if this cand is added from biv with
 252                            smaller type.  */
 253 };
 254
 255 /* Hashtable entry for common candidate derived from iv uses.  */
 256 struct iv_common_cand
 257 {
 258   tree base;
 259   tree step;
 260   /* IV uses from which this common candidate is derived.  */
 261   auto_vec<iv_use *> uses;
 262   hashval_t hash;
 263 };
 264
 265 /* Hashtable helpers.  */
 266
 267 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
 268 {
 269   static inline hashval_t hash (const iv_common_cand *);
 270   static inline bool equal (const iv_common_cand *, const iv_common_cand *);
 271 };
 272
 273 /* Hash function for possible common candidates.  */
 274
 275 inline hashval_t
 276 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
 277 {
 278   return ccand->hash;
 279 }
 280
 281 /* Hash table equality function for common candidates.  */
 282
 283 inline bool
 284 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
 285                               const iv_common_cand *ccand2)
 286 {
 287   return (ccand1->hash == ccand2->hash
 288           && operand_equal_p (ccand1->base, ccand2->base, 0)
 289           && operand_equal_p (ccand1->step, ccand2->step, 0)
 290           && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
 291               == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
 292 }
 293
 294 /* Loop invariant expression hashtable entry.  */
 295 struct iv_inv_expr_ent
 296 {
 297   tree expr;
 298   int id;
 299   hashval_t hash;
 300 };
 301
 302 /* Hashtable helpers.  */
 303
 304 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
 305 {
 306   static inline hashval_t hash (const iv_inv_expr_ent *);
 307   static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
 308 };
 309
 310 /* Hash function for loop invariant expressions.  */
 311
 312 inline hashval_t
 313 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
 314 {
 315   return expr->hash;
 316 }
 317
 318 /* Hash table equality function for expressions.  */
 319
 320 inline bool
 321 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
 322                            const iv_inv_expr_ent *expr2)
 323 {
 324   return expr1->hash == expr2->hash
 325          && operand_equal_p (expr1->expr, expr2->expr, 0);
 326 }
 327
 328 struct ivopts_data
 329 {
 330   /* The currently optimized loop.  */
 331   struct loop *current_loop;
 332   source_location loop_loc;
 333
 334   /* Numbers of iterations for all exits of the current loop.  */
 335   hash_map<edge, tree_niter_desc *> *niters;
 336
 337   /* Number of registers used in it.  */
 338   unsigned regs_used;
 339
 340   /* The size of version_info array allocated.  */
 341   unsigned version_info_size;
 342
 343   /* The array of information for the ssa names.  */
 344   struct version_info *version_info;
 345
 346   /* The hashtable of loop invariant expressions created
 347      by ivopt.  */
 348   hash_table<iv_inv_expr_hasher> *inv_expr_tab;
 349
 350   /* Loop invariant expression id.  */
 351   int inv_expr_id;
 352
 353   /* The bitmap of indices in version_info whose value was changed.  */
 354   bitmap relevant;
 355
 356   /* The uses of induction variables.  */
 357   vec<iv_use *> iv_uses;
 358
 359   /* The candidates.  */
 360   vec<iv_cand *> iv_candidates;
 361
 362   /* A bitmap of important candidates.  */
 363   bitmap important_candidates;
 364
 365   /* Cache used by tree_to_aff_combination_expand.  */
 366   hash_map<tree, name_expansion *> *name_expansion_cache;
 367
 368   /* The hashtable of common candidates derived from iv uses.  */
 369   hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
 370
 371   /* The common candidates.  */
 372   vec<iv_common_cand *> iv_common_cands;
 373
 374   /* The maximum invariant id.  */
 375   unsigned max_inv_id;
 376
 377   /* Number of no_overflow BIVs which are not used in memory address.  */
 378   unsigned bivs_not_used_in_addr;
 379
 380   /* Obstack for iv structure.  */
 381   struct obstack iv_obstack;
 382
 383   /* Whether to consider just related and important candidates when replacing a
 384      use.  */
 385   bool consider_all_candidates;
 386
 387   /* Are we optimizing for speed?  */
 388   bool speed;
 389
 390   /* Whether the loop body includes any function calls.  */
 391   bool body_includes_call;
 392
 393   /* Whether the loop body can only be exited via single exit.  */
 394   bool loop_single_exit_p;
 395 };
 396
 397 /* An assignment of iv candidates to uses.  */
 398
 399 struct iv_ca
 400 {
 401   /* The number of uses covered by the assignment.  */
 402   unsigned upto;
 403
 404   /* Number of uses that cannot be expressed by the candidates in the set.  */
 405   unsigned bad_uses;
 406
 407   /* Candidate assigned to a use, together with the related costs.  */
 408   struct cost_pair **cand_for_use;
 409
 410   /* Number of times each candidate is used.  */
 411   unsigned *n_cand_uses;
 412
 413   /* The candidates used.  */
 414   bitmap cands;
 415
 416   /* The number of candidates in the set.  */
 417   unsigned n_cands;
 418
 419   /* Total number of registers needed.  */
 420   unsigned n_regs;
 421
 422   /* Total cost of expressing uses.  */
 423   comp_cost cand_use_cost;
 424
 425   /* Total cost of candidates.  */
 426   unsigned cand_cost;
 427
 428   /* Number of times each invariant is used.  */
 429   unsigned *n_invariant_uses;
 430
 431   /* The array holding the number of uses of each loop
 432      invariant expressions created by ivopt.  */
 433   unsigned *used_inv_expr;
 434
 435   /* The number of created loop invariants.  */
 436   unsigned num_used_inv_expr;
 437
 438   /* Total cost of the assignment.  */
 439   comp_cost cost;
 440 };
 441
 442 /* Difference of two iv candidate assignments.  */
 443
 444 struct iv_ca_delta
 445 {
 446   /* Changed use.  */
 447   struct iv_use *use;
 448
 449   /* An old assignment (for rollback purposes).  */
 450   struct cost_pair *old_cp;
 451
 452   /* A new assignment.  */
 453   struct cost_pair *new_cp;
 454
 455   /* Next change in the list.  */
 456   struct iv_ca_delta *next_change;
 457 };
 458
 459 /* Bound on number of candidates below that all candidates are considered.  */
 460
 461 #define CONSIDER_ALL_CANDIDATES_BOUND \
 462   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 463
 464 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 465    optimizing such a loop would help, and it would take ages).  */
 466
 467 #define MAX_CONSIDERED_USES \
 468   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 469
 470 /* If there are at most this number of ivs in the set, try removing unnecessary
 471    ivs from the set always.  */
 472
 473 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 474   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 475
 476 /* The list of trees for that the decl_rtl field must be reset is stored
 477    here.  */
 478
 479 static vec<tree> decl_rtl_to_reset;
 480
 481 static comp_cost force_expr_to_var_cost (tree, bool);
 482
 483 /* Number of uses recorded in DATA.  */
 484
 485 static inline unsigned
 486 n_iv_uses (struct ivopts_data *data)
 487 {
 488   return data->iv_uses.length ();
 489 }
 490
 491 /* Ith use recorded in DATA.  */
 492
 493 static inline struct iv_use *
 494 iv_use (struct ivopts_data *data, unsigned i)
 495 {
 496   return data->iv_uses[i];
 497 }
 498
 499 /* Number of candidates recorded in DATA.  */
 500
 501 static inline unsigned
 502 n_iv_cands (struct ivopts_data *data)
 503 {
 504   return data->iv_candidates.length ();
 505 }
 506
 507 /* Ith candidate recorded in DATA.  */
 508
 509 static inline struct iv_cand *
 510 iv_cand (struct ivopts_data *data, unsigned i)
 511 {
 512   return data->iv_candidates[i];
 513 }
 514
 515 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 516
 517 edge
 518 single_dom_exit (struct loop *loop)
 519 {
 520   edge exit = single_exit (loop);
 521
 522   if (!exit)
 523     return NULL;
 524
 525   if (!just_once_each_iteration_p (loop, exit->src))
 526     return NULL;
 527
 528   return exit;
 529 }
 530
 531 /* Dumps information about the induction variable IV to FILE.  */
 532
 533 void
 534 dump_iv (FILE *file, struct iv *iv, bool dump_name)
 535 {
 536   if (iv->ssa_name && dump_name)
 537     {
 538       fprintf (file, "ssa name ");
 539       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 540       fprintf (file, "\n");
 541     }
 542
 543   fprintf (file, "  type ");
 544   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 545   fprintf (file, "\n");
 546
 547   if (iv->step)
 548     {
 549       fprintf (file, "  base ");
 550       print_generic_expr (file, iv->base, TDF_SLIM);
 551       fprintf (file, "\n");
 552
 553       fprintf (file, "  step ");
 554       print_generic_expr (file, iv->step, TDF_SLIM);
 555       fprintf (file, "\n");
 556     }
 557   else
 558     {
 559       fprintf (file, "  invariant ");
 560       print_generic_expr (file, iv->base, TDF_SLIM);
 561       fprintf (file, "\n");
 562     }
 563
 564   if (iv->base_object)
 565     {
 566       fprintf (file, "  base object ");
 567       print_generic_expr (file, iv->base_object, TDF_SLIM);
 568       fprintf (file, "\n");
 569     }
 570
 571   if (iv->biv_p)
 572     fprintf (file, "  is a biv\n");
 573
 574   if (iv->no_overflow)
 575     fprintf (file, "  iv doesn't overflow wrto loop niter\n");
 576 }
 577
 578 /* Dumps information about the USE to FILE.  */
 579
 580 void
 581 dump_use (FILE *file, struct iv_use *use)
 582 {
 583   fprintf (file, "use %d", use->id);
 584   if (use->sub_id)
 585     fprintf (file, ".%d", use->sub_id);
 586
 587   fprintf (file, "\n");
 588
 589   switch (use->type)
 590     {
 591     case USE_NONLINEAR_EXPR:
 592       fprintf (file, "  generic\n");
 593       break;
 594
 595     case USE_ADDRESS:
 596       fprintf (file, "  address\n");
 597       break;
 598
 599     case USE_COMPARE:
 600       fprintf (file, "  compare\n");
 601       break;
 602
 603     default:
 604       gcc_unreachable ();
 605     }
 606
 607   fprintf (file, "  in statement ");
 608   print_gimple_stmt (file, use->stmt, 0, 0);
 609   fprintf (file, "\n");
 610
 611   fprintf (file, "  at position ");
 612   if (use->op_p)
 613     print_generic_expr (file, *use->op_p, TDF_SLIM);
 614   fprintf (file, "\n");
 615
 616   dump_iv (file, use->iv, false);
 617
 618   if (use->related_cands)
 619     {
 620       fprintf (file, "  related candidates ");
 621       dump_bitmap (file, use->related_cands);
 622     }
 623 }
 624
 625 /* Dumps information about the uses to FILE.  */
 626
 627 void
 628 dump_uses (FILE *file, struct ivopts_data *data)
 629 {
 630   unsigned i;
 631   struct iv_use *use;
 632
 633   for (i = 0; i < n_iv_uses (data); i++)
 634     {
 635       use = iv_use (data, i);
 636       do
 637         {
 638           dump_use (file, use);
 639           use = use->next;
 640         }
 641       while (use);
 642       fprintf (file, "\n");
 643     }
 644 }
 645
 646 /* Dumps information about induction variable candidate CAND to FILE.  */
 647
 648 void
 649 dump_cand (FILE *file, struct iv_cand *cand)
 650 {
 651   struct iv *iv = cand->iv;
 652
 653   fprintf (file, "candidate %d%s\n",
 654            cand->id, cand->important ? " (important)" : "");
 655
 656   if (cand->depends_on)
 657     {
 658       fprintf (file, "  depends on ");
 659       dump_bitmap (file, cand->depends_on);
 660     }
 661
 662   if (!iv)
 663     {
 664       fprintf (file, "  final value replacement\n");
 665       return;
 666     }
 667
 668   if (cand->var_before)
 669     {
 670       fprintf (file, "  var_before ");
 671       print_generic_expr (file, cand->var_before, TDF_SLIM);
 672       fprintf (file, "\n");
 673     }
 674   if (cand->var_after)
 675     {
 676       fprintf (file, "  var_after ");
 677       print_generic_expr (file, cand->var_after, TDF_SLIM);
 678       fprintf (file, "\n");
 679     }
 680
 681   switch (cand->pos)
 682     {
 683     case IP_NORMAL:
 684       fprintf (file, "  incremented before exit test\n");
 685       break;
 686
 687     case IP_BEFORE_USE:
 688       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 689       break;
 690
 691     case IP_AFTER_USE:
 692       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 693       break;
 694
 695     case IP_END:
 696       fprintf (file, "  incremented at end\n");
 697       break;
 698
 699     case IP_ORIGINAL:
 700       fprintf (file, "  original biv\n");
 701       break;
 702     }
 703
 704   dump_iv (file, iv, false);
 705 }
 706
 707 /* Returns the info for ssa version VER.  */
 708
 709 static inline struct version_info *
 710 ver_info (struct ivopts_data *data, unsigned ver)
 711 {
 712   return data->version_info + ver;
 713 }
 714
 715 /* Returns the info for ssa name NAME.  */
 716
 717 static inline struct version_info *
 718 name_info (struct ivopts_data *data, tree name)
 719 {
 720   return ver_info (data, SSA_NAME_VERSION (name));
 721 }
 722
 723 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 724    emitted in LOOP.  */
 725
 726 static bool
 727 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
 728 {
 729   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 730
 731   gcc_assert (bb);
 732
 733   if (sbb == loop->latch)
 734     return true;
 735
 736   if (sbb != bb)
 737     return false;
 738
 739   return stmt == last_stmt (bb);
 740 }
 741
 742 /* Returns true if STMT if after the place where the original induction
 743    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 744    if the positions are identical.  */
 745
 746 static bool
 747 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
 748 {
 749   basic_block cand_bb = gimple_bb (cand->incremented_at);
 750   basic_block stmt_bb = gimple_bb (stmt);
 751
 752   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 753     return false;
 754
 755   if (stmt_bb != cand_bb)
 756     return true;
 757
 758   if (true_if_equal
 759       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 760     return true;
 761   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 762 }
 763
 764 /* Returns true if STMT if after the place where the induction variable
 765    CAND is incremented in LOOP.  */
 766
 767 static bool
 768 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
 769 {
 770   switch (cand->pos)
 771     {
 772     case IP_END:
 773       return false;
 774
 775     case IP_NORMAL:
 776       return stmt_after_ip_normal_pos (loop, stmt);
 777
 778     case IP_ORIGINAL:
 779     case IP_AFTER_USE:
 780       return stmt_after_inc_pos (cand, stmt, false);
 781
 782     case IP_BEFORE_USE:
 783       return stmt_after_inc_pos (cand, stmt, true);
 784
 785     default:
 786       gcc_unreachable ();
 787     }
 788 }
 789
 790 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 791
 792 static bool
 793 abnormal_ssa_name_p (tree exp)
 794 {
 795   if (!exp)
 796     return false;
 797
 798   if (TREE_CODE (exp) != SSA_NAME)
 799     return false;
 800
 801   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 802 }
 803
 804 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 805    abnormal phi node.  Callback for for_each_index.  */
 806
 807 static bool
 808 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 809                                   void *data ATTRIBUTE_UNUSED)
 810 {
 811   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 812     {
 813       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 814         return false;
 815       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 816         return false;
 817     }
 818
 819   return !abnormal_ssa_name_p (*index);
 820 }
 821
 822 /* Returns true if EXPR contains a ssa name that occurs in an
 823    abnormal phi node.  */
 824
 825 bool
 826 contains_abnormal_ssa_name_p (tree expr)
 827 {
 828   enum tree_code code;
 829   enum tree_code_class codeclass;
 830
 831   if (!expr)
 832     return false;
 833
 834   code = TREE_CODE (expr);
 835   codeclass = TREE_CODE_CLASS (code);
 836
 837   if (code == SSA_NAME)
 838     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 839
 840   if (code == INTEGER_CST
 841       || is_gimple_min_invariant (expr))
 842     return false;
 843
 844   if (code == ADDR_EXPR)
 845     return !for_each_index (&TREE_OPERAND (expr, 0),
 846                             idx_contains_abnormal_ssa_name_p,
 847                             NULL);
 848
 849   if (code == COND_EXPR)
 850     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 851       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 852       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 853
 854   switch (codeclass)
 855     {
 856     case tcc_binary:
 857     case tcc_comparison:
 858       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 859         return true;
 860
 861       /* Fallthru.  */
 862     case tcc_unary:
 863       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 864         return true;
 865
 866       break;
 867
 868     default:
 869       gcc_unreachable ();
 870     }
 871
 872   return false;
 873 }
 874
 875 /*  Returns the structure describing number of iterations determined from
 876     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 877
 878 static struct tree_niter_desc *
 879 niter_for_exit (struct ivopts_data *data, edge exit)
 880 {
 881   struct tree_niter_desc *desc;
 882   tree_niter_desc **slot;
 883
 884   if (!data->niters)
 885     {
 886       data->niters = new hash_map<edge, tree_niter_desc *>;
 887       slot = NULL;
 888     }
 889   else
 890     slot = data->niters->get (exit);
 891
 892   if (!slot)
 893     {
 894       /* Try to determine number of iterations.  We cannot safely work with ssa
 895          names that appear in phi nodes on abnormal edges, so that we do not
 896          create overlapping life ranges for them (PR 27283).  */
 897       desc = XNEW (struct tree_niter_desc);
 898       if (!number_of_iterations_exit (data->current_loop,
 899                                       exit, desc, true)
 900           || contains_abnormal_ssa_name_p (desc->niter))
 901         {
 902           XDELETE (desc);
 903           desc = NULL;
 904         }
 905       data->niters->put (exit, desc);
 906     }
 907   else
 908     desc = *slot;
 909
 910   return desc;
 911 }
 912
 913 /* Returns the structure describing number of iterations determined from
 914    single dominating exit of DATA->current_loop, or NULL if something
 915    goes wrong.  */
 916
 917 static struct tree_niter_desc *
 918 niter_for_single_dom_exit (struct ivopts_data *data)
 919 {
 920   edge exit = single_dom_exit (data->current_loop);
 921
 922   if (!exit)
 923     return NULL;
 924
 925   return niter_for_exit (data, exit);
 926 }
 927
 928 /* Initializes data structures used by the iv optimization pass, stored
 929    in DATA.  */
 930
 931 static void
 932 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 933 {
 934   data->version_info_size = 2 * num_ssa_names;
 935   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 936   data->relevant = BITMAP_ALLOC (NULL);
 937   data->important_candidates = BITMAP_ALLOC (NULL);
 938   data->max_inv_id = 0;
 939   data->niters = NULL;
 940   data->iv_uses.create (20);
 941   data->iv_candidates.create (20);
 942   data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
 943   data->inv_expr_id = 0;
 944   data->name_expansion_cache = NULL;
 945   data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
 946   data->iv_common_cands.create (20);
 947   decl_rtl_to_reset.create (20);
 948   gcc_obstack_init (&data->iv_obstack);
 949 }
 950
 951 /* Returns a memory object to that EXPR points.  In case we are able to
 952    determine that it does not point to any such object, NULL is returned.  */
 953
 954 static tree
 955 determine_base_object (tree expr)
 956 {
 957   enum tree_code code = TREE_CODE (expr);
 958   tree base, obj;
 959
 960   /* If this is a pointer casted to any type, we need to determine
 961      the base object for the pointer; so handle conversions before
 962      throwing away non-pointer expressions.  */
 963   if (CONVERT_EXPR_P (expr))
 964     return determine_base_object (TREE_OPERAND (expr, 0));
 965
 966   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 967     return NULL_TREE;
 968
 969   switch (code)
 970     {
 971     case INTEGER_CST:
 972       return NULL_TREE;
 973
 974     case ADDR_EXPR:
 975       obj = TREE_OPERAND (expr, 0);
 976       base = get_base_address (obj);
 977
 978       if (!base)
 979         return expr;
 980
 981       if (TREE_CODE (base) == MEM_REF)
 982         return determine_base_object (TREE_OPERAND (base, 0));
 983
 984       return fold_convert (ptr_type_node,
 985                            build_fold_addr_expr (base));
 986
 987     case POINTER_PLUS_EXPR:
 988       return determine_base_object (TREE_OPERAND (expr, 0));
 989
 990     case PLUS_EXPR:
 991     case MINUS_EXPR:
 992       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 993       gcc_unreachable ();
 994
 995     default:
 996       return fold_convert (ptr_type_node, expr);
 997     }
 998 }
 999
1000 /* Return true if address expression with non-DECL_P operand appears
1001    in EXPR.  */
1002
1003 static bool
1004 contain_complex_addr_expr (tree expr)
1005 {
1006   bool res = false;
1007
1008   STRIP_NOPS (expr);
1009   switch (TREE_CODE (expr))
1010     {
1011     case POINTER_PLUS_EXPR:
1012     case PLUS_EXPR:
1013     case MINUS_EXPR:
1014       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1015       res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1016       break;
1017
1018     case ADDR_EXPR:
1019       return (!DECL_P (TREE_OPERAND (expr, 0)));
1020
1021     default:
1022       return false;
1023     }
1024
1025   return res;
1026 }
1027
1028 /* Allocates an induction variable with given initial value BASE and step STEP
1029    for loop LOOP.  NO_OVERFLOW implies the iv doesn't overflow.  */
1030
1031 static struct iv *
1032 alloc_iv (struct ivopts_data *data, tree base, tree step,
1033           bool no_overflow = false)
1034 {
1035   tree expr = base;
1036   struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1037                                               sizeof (struct iv));
1038   gcc_assert (step != NULL_TREE);
1039
1040   /* Lower address expression in base except ones with DECL_P as operand.
1041      By doing this:
1042        1) More accurate cost can be computed for address expressions;
1043        2) Duplicate candidates won't be created for bases in different
1044           forms, like &a[0] and &a.  */
1045   STRIP_NOPS (expr);
1046   if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1047       || contain_complex_addr_expr (expr))
1048     {
1049       aff_tree comb;
1050       tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1051       base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1052     }
1053
1054   iv->base = base;
1055   iv->base_object = determine_base_object (base);
1056   iv->step = step;
1057   iv->biv_p = false;
1058   iv->have_use_for = false;
1059   iv->use_id = 0;
1060   iv->ssa_name = NULL_TREE;
1061   iv->no_overflow = no_overflow;
1062   iv->have_address_use = false;
1063
1064   return iv;
1065 }
1066
1067 /* Sets STEP and BASE for induction variable IV.  NO_OVERFLOW implies the IV
1068    doesn't overflow.  */
1069
1070 static void
1071 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1072         bool no_overflow)
1073 {
1074   struct version_info *info = name_info (data, iv);
1075
1076   gcc_assert (!info->iv);
1077
1078   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1079   info->iv = alloc_iv (data, base, step, no_overflow);
1080   info->iv->ssa_name = iv;
1081 }
1082
1083 /* Finds induction variable declaration for VAR.  */
1084
1085 static struct iv *
1086 get_iv (struct ivopts_data *data, tree var)
1087 {
1088   basic_block bb;
1089   tree type = TREE_TYPE (var);
1090
1091   if (!POINTER_TYPE_P (type)
1092       && !INTEGRAL_TYPE_P (type))
1093     return NULL;
1094
1095   if (!name_info (data, var)->iv)
1096     {
1097       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1098
1099       if (!bb
1100           || !flow_bb_inside_loop_p (data->current_loop, bb))
1101         set_iv (data, var, var, build_int_cst (type, 0), true);
1102     }
1103
1104   return name_info (data, var)->iv;
1105 }
1106
1107 /* Return the first non-invariant ssa var found in EXPR.  */
1108
1109 static tree
1110 extract_single_var_from_expr (tree expr)
1111 {
1112   int i, n;
1113   tree tmp;
1114   enum tree_code code;
1115
1116   if (!expr || is_gimple_min_invariant (expr))
1117     return NULL;
1118
1119   code = TREE_CODE (expr);
1120   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1121     {
1122       n = TREE_OPERAND_LENGTH (expr);
1123       for (i = 0; i < n; i++)
1124         {
1125           tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1126
1127           if (tmp)
1128             return tmp;
1129         }
1130     }
1131   return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1132 }
1133
1134 /* Finds basic ivs.  */
1135
1136 static bool
1137 find_bivs (struct ivopts_data *data)
1138 {
1139   gphi *phi;
1140   affine_iv iv;
1141   tree step, type, base, stop;
1142   bool found = false;
1143   struct loop *loop = data->current_loop;
1144   gphi_iterator psi;
1145
1146   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1147     {
1148       phi = psi.phi ();
1149
1150       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1151         continue;
1152
1153       if (virtual_operand_p (PHI_RESULT (phi)))
1154         continue;
1155
1156       if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1157         continue;
1158
1159       if (integer_zerop (iv.step))
1160         continue;
1161
1162       step = iv.step;
1163       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1164       /* Stop expanding iv base at the first ssa var referred by iv step.
1165          Ideally we should stop at any ssa var, because that's expensive
1166          and unusual to happen, we just do it on the first one.
1167
1168          See PR64705 for the rationale.  */
1169       stop = extract_single_var_from_expr (step);
1170       base = expand_simple_operations (base, stop);
1171       if (contains_abnormal_ssa_name_p (base)
1172           || contains_abnormal_ssa_name_p (step))
1173         continue;
1174
1175       type = TREE_TYPE (PHI_RESULT (phi));
1176       base = fold_convert (type, base);
1177       if (step)
1178         {
1179           if (POINTER_TYPE_P (type))
1180             step = convert_to_ptrofftype (step);
1181           else
1182             step = fold_convert (type, step);
1183         }
1184
1185       set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1186       found = true;
1187     }
1188
1189   return found;
1190 }
1191
1192 /* Marks basic ivs.  */
1193
1194 static void
1195 mark_bivs (struct ivopts_data *data)
1196 {
1197   gphi *phi;
1198   gimple *def;
1199   tree var;
1200   struct iv *iv, *incr_iv;
1201   struct loop *loop = data->current_loop;
1202   basic_block incr_bb;
1203   gphi_iterator psi;
1204
1205   data->bivs_not_used_in_addr = 0;
1206   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1207     {
1208       phi = psi.phi ();
1209
1210       iv = get_iv (data, PHI_RESULT (phi));
1211       if (!iv)
1212         continue;
1213
1214       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1215       def = SSA_NAME_DEF_STMT (var);
1216       /* Don't mark iv peeled from other one as biv.  */
1217       if (def
1218           && gimple_code (def) == GIMPLE_PHI
1219           && gimple_bb (def) == loop->header)
1220         continue;
1221
1222       incr_iv = get_iv (data, var);
1223       if (!incr_iv)
1224         continue;
1225
1226       /* If the increment is in the subloop, ignore it.  */
1227       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1228       if (incr_bb->loop_father != data->current_loop
1229           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1230         continue;
1231
1232       iv->biv_p = true;
1233       incr_iv->biv_p = true;
1234       if (iv->no_overflow)
1235         data->bivs_not_used_in_addr++;
1236       if (incr_iv->no_overflow)
1237         data->bivs_not_used_in_addr++;
1238     }
1239 }
1240
1241 /* Checks whether STMT defines a linear induction variable and stores its
1242    parameters to IV.  */
1243
1244 static bool
1245 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1246 {
1247   tree lhs, stop;
1248   struct loop *loop = data->current_loop;
1249
1250   iv->base = NULL_TREE;
1251   iv->step = NULL_TREE;
1252
1253   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1254     return false;
1255
1256   lhs = gimple_assign_lhs (stmt);
1257   if (TREE_CODE (lhs) != SSA_NAME)
1258     return false;
1259
1260   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1261     return false;
1262
1263   /* Stop expanding iv base at the first ssa var referred by iv step.
1264      Ideally we should stop at any ssa var, because that's expensive
1265      and unusual to happen, we just do it on the first one.
1266
1267      See PR64705 for the rationale.  */
1268   stop = extract_single_var_from_expr (iv->step);
1269   iv->base = expand_simple_operations (iv->base, stop);
1270   if (contains_abnormal_ssa_name_p (iv->base)
1271       || contains_abnormal_ssa_name_p (iv->step))
1272     return false;
1273
1274   /* If STMT could throw, then do not consider STMT as defining a GIV.
1275      While this will suppress optimizations, we can not safely delete this
1276      GIV and associated statements, even if it appears it is not used.  */
1277   if (stmt_could_throw_p (stmt))
1278     return false;
1279
1280   return true;
1281 }
1282
1283 /* Finds general ivs in statement STMT.  */
1284
1285 static void
1286 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1287 {
1288   affine_iv iv;
1289
1290   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1291     return;
1292
1293   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1294 }
1295
1296 /* Finds general ivs in basic block BB.  */
1297
1298 static void
1299 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1300 {
1301   gimple_stmt_iterator bsi;
1302
1303   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1304     find_givs_in_stmt (data, gsi_stmt (bsi));
1305 }
1306
1307 /* Finds general ivs.  */
1308
1309 static void
1310 find_givs (struct ivopts_data *data)
1311 {
1312   struct loop *loop = data->current_loop;
1313   basic_block *body = get_loop_body_in_dom_order (loop);
1314   unsigned i;
1315
1316   for (i = 0; i < loop->num_nodes; i++)
1317     find_givs_in_bb (data, body[i]);
1318   free (body);
1319 }
1320
1321 /* For each ssa name defined in LOOP determines whether it is an induction
1322    variable and if so, its initial value and step.  */
1323
1324 static bool
1325 find_induction_variables (struct ivopts_data *data)
1326 {
1327   unsigned i;
1328   bitmap_iterator bi;
1329
1330   if (!find_bivs (data))
1331     return false;
1332
1333   find_givs (data);
1334   mark_bivs (data);
1335
1336   if (dump_file && (dump_flags & TDF_DETAILS))
1337     {
1338       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1339
1340       if (niter)
1341         {
1342           fprintf (dump_file, "  number of iterations ");
1343           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1344           if (!integer_zerop (niter->may_be_zero))
1345             {
1346               fprintf (dump_file, "; zero if ");
1347               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1348             }
1349           fprintf (dump_file, "\n\n");
1350         };
1351
1352       fprintf (dump_file, "Induction variables:\n\n");
1353
1354       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1355         {
1356           if (ver_info (data, i)->iv)
1357             dump_iv (dump_file, ver_info (data, i)->iv, true);
1358         }
1359     }
1360
1361   return true;
1362 }
1363
1364 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1365    For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1366    is the const offset stripped from IV base.  For uses of other types,
1367    ADDR_BASE and ADDR_OFFSET are zero by default.  */
1368
1369 static struct iv_use *
1370 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1371             gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1372             unsigned HOST_WIDE_INT addr_offset = 0)
1373 {
1374   struct iv_use *use = XCNEW (struct iv_use);
1375
1376   use->id = n_iv_uses (data);
1377   use->sub_id = 0;
1378   use->type = use_type;
1379   use->iv = iv;
1380   use->stmt = stmt;
1381   use->op_p = use_p;
1382   use->related_cands = BITMAP_ALLOC (NULL);
1383   use->next = NULL;
1384   use->addr_base = addr_base;
1385   use->addr_offset = addr_offset;
1386
1387   data->iv_uses.safe_push (use);
1388
1389   return use;
1390 }
1391
1392 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1393    The sub use is recorded under the one whose use id is ID_GROUP.  */
1394
1395 static struct iv_use *
1396 record_sub_use (struct ivopts_data *data, tree *use_p,
1397                     struct iv *iv, gimple *stmt, enum use_type use_type,
1398                     tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1399                     unsigned int id_group)
1400 {
1401   struct iv_use *use = XCNEW (struct iv_use);
1402   struct iv_use *group = iv_use (data, id_group);
1403
1404   use->id = group->id;
1405   use->sub_id = 0;
1406   use->type = use_type;
1407   use->iv = iv;
1408   use->stmt = stmt;
1409   use->op_p = use_p;
1410   use->related_cands = NULL;
1411   use->addr_base = addr_base;
1412   use->addr_offset = addr_offset;
1413
1414   /* Sub use list is maintained in offset ascending order.  */
1415   if (addr_offset <= group->addr_offset)
1416     {
1417       use->related_cands = group->related_cands;
1418       group->related_cands = NULL;
1419       use->next = group;
1420       data->iv_uses[id_group] = use;
1421     }
1422   else
1423     {
1424       struct iv_use *pre;
1425       do
1426         {
1427           pre = group;
1428           group = group->next;
1429         }
1430       while (group && addr_offset > group->addr_offset);
1431       use->next = pre->next;
1432       pre->next = use;
1433     }
1434
1435   return use;
1436 }
1437
1438 /* Checks whether OP is a loop-level invariant and if so, records it.
1439    NONLINEAR_USE is true if the invariant is used in a way we do not
1440    handle specially.  */
1441
1442 static void
1443 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1444 {
1445   basic_block bb;
1446   struct version_info *info;
1447
1448   if (TREE_CODE (op) != SSA_NAME
1449       || virtual_operand_p (op))
1450     return;
1451
1452   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1453   if (bb
1454       && flow_bb_inside_loop_p (data->current_loop, bb))
1455     return;
1456
1457   info = name_info (data, op);
1458   info->name = op;
1459   info->has_nonlin_use |= nonlinear_use;
1460   if (!info->inv_id)
1461     info->inv_id = ++data->max_inv_id;
1462   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1463 }
1464
1465 /* Checks whether the use OP is interesting and if so, records it.  */
1466
1467 static struct iv_use *
1468 find_interesting_uses_op (struct ivopts_data *data, tree op)
1469 {
1470   struct iv *iv;
1471   gimple *stmt;
1472   struct iv_use *use;
1473
1474   if (TREE_CODE (op) != SSA_NAME)
1475     return NULL;
1476
1477   iv = get_iv (data, op);
1478   if (!iv)
1479     return NULL;
1480
1481   if (iv->have_use_for)
1482     {
1483       use = iv_use (data, iv->use_id);
1484
1485       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1486       return use;
1487     }
1488
1489   if (integer_zerop (iv->step))
1490     {
1491       record_invariant (data, op, true);
1492       return NULL;
1493     }
1494   iv->have_use_for = true;
1495
1496   stmt = SSA_NAME_DEF_STMT (op);
1497   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1498               || is_gimple_assign (stmt));
1499
1500   use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1501   iv->use_id = use->id;
1502
1503   return use;
1504 }
1505
1506 /* Given a condition in statement STMT, checks whether it is a compare
1507    of an induction variable and an invariant.  If this is the case,
1508    CONTROL_VAR is set to location of the iv, BOUND to the location of
1509    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1510    induction variable descriptions, and true is returned.  If this is not
1511    the case, CONTROL_VAR and BOUND are set to the arguments of the
1512    condition and false is returned.  */
1513
1514 static bool
1515 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1516                        tree **control_var, tree **bound,
1517                        struct iv **iv_var, struct iv **iv_bound)
1518 {
1519   /* The objects returned when COND has constant operands.  */
1520   static struct iv const_iv;
1521   static tree zero;
1522   tree *op0 = &zero, *op1 = &zero;
1523   struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1524   bool ret = false;
1525
1526   if (gimple_code (stmt) == GIMPLE_COND)
1527     {
1528       gcond *cond_stmt = as_a <gcond *> (stmt);
1529       op0 = gimple_cond_lhs_ptr (cond_stmt);
1530       op1 = gimple_cond_rhs_ptr (cond_stmt);
1531     }
1532   else
1533     {
1534       op0 = gimple_assign_rhs1_ptr (stmt);
1535       op1 = gimple_assign_rhs2_ptr (stmt);
1536     }
1537
1538   zero = integer_zero_node;
1539   const_iv.step = integer_zero_node;
1540
1541   if (TREE_CODE (*op0) == SSA_NAME)
1542     iv0 = get_iv (data, *op0);
1543   if (TREE_CODE (*op1) == SSA_NAME)
1544     iv1 = get_iv (data, *op1);
1545
1546   /* Exactly one of the compared values must be an iv, and the other one must
1547      be an invariant.  */
1548   if (!iv0 || !iv1)
1549     goto end;
1550
1551   if (integer_zerop (iv0->step))
1552     {
1553       /* Control variable may be on the other side.  */
1554       std::swap (op0, op1);
1555       std::swap (iv0, iv1);
1556     }
1557   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1558
1559 end:
1560   if (control_var)
1561     *control_var = op0;
1562   if (iv_var)
1563     *iv_var = iv0;
1564   if (bound)
1565     *bound = op1;
1566   if (iv_bound)
1567     *iv_bound = iv1;
1568
1569   return ret;
1570 }
1571
1572 /* Checks whether the condition in STMT is interesting and if so,
1573    records it.  */
1574
1575 static void
1576 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1577 {
1578   tree *var_p, *bound_p;
1579   struct iv *var_iv;
1580
1581   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1582     {
1583       find_interesting_uses_op (data, *var_p);
1584       find_interesting_uses_op (data, *bound_p);
1585       return;
1586     }
1587
1588   record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1589 }
1590
1591 /* Returns the outermost loop EXPR is obviously invariant in
1592    relative to the loop LOOP, i.e. if all its operands are defined
1593    outside of the returned loop.  Returns NULL if EXPR is not
1594    even obviously invariant in LOOP.  */
1595
1596 struct loop *
1597 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1598 {
1599   basic_block def_bb;
1600   unsigned i, len;
1601
1602   if (is_gimple_min_invariant (expr))
1603     return current_loops->tree_root;
1604
1605   if (TREE_CODE (expr) == SSA_NAME)
1606     {
1607       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1608       if (def_bb)
1609         {
1610           if (flow_bb_inside_loop_p (loop, def_bb))
1611             return NULL;
1612           return superloop_at_depth (loop,
1613                                      loop_depth (def_bb->loop_father) + 1);
1614         }
1615
1616       return current_loops->tree_root;
1617     }
1618
1619   if (!EXPR_P (expr))
1620     return NULL;
1621
1622   unsigned maxdepth = 0;
1623   len = TREE_OPERAND_LENGTH (expr);
1624   for (i = 0; i < len; i++)
1625     {
1626       struct loop *ivloop;
1627       if (!TREE_OPERAND (expr, i))
1628         continue;
1629
1630       ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1631       if (!ivloop)
1632         return NULL;
1633       maxdepth = MAX (maxdepth, loop_depth (ivloop));
1634     }
1635
1636   return superloop_at_depth (loop, maxdepth);
1637 }
1638
1639 /* Returns true if expression EXPR is obviously invariant in LOOP,
1640    i.e. if all its operands are defined outside of the LOOP.  LOOP
1641    should not be the function body.  */
1642
1643 bool
1644 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1645 {
1646   basic_block def_bb;
1647   unsigned i, len;
1648
1649   gcc_assert (loop_depth (loop) > 0);
1650
1651   if (is_gimple_min_invariant (expr))
1652     return true;
1653
1654   if (TREE_CODE (expr) == SSA_NAME)
1655     {
1656       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1657       if (def_bb
1658           && flow_bb_inside_loop_p (loop, def_bb))
1659         return false;
1660
1661       return true;
1662     }
1663
1664   if (!EXPR_P (expr))
1665     return false;
1666
1667   len = TREE_OPERAND_LENGTH (expr);
1668   for (i = 0; i < len; i++)
1669     if (TREE_OPERAND (expr, i)
1670         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1671       return false;
1672
1673   return true;
1674 }
1675
1676 /* Given expression EXPR which computes inductive values with respect
1677    to loop recorded in DATA, this function returns biv from which EXPR
1678    is derived by tracing definition chains of ssa variables in EXPR.  */
1679
1680 static struct iv*
1681 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1682 {
1683   struct iv *iv;
1684   unsigned i, n;
1685   tree e2, e1;
1686   enum tree_code code;
1687   gimple *stmt;
1688
1689   if (expr == NULL_TREE)
1690     return NULL;
1691
1692   if (is_gimple_min_invariant (expr))
1693     return NULL;
1694
1695   code = TREE_CODE (expr);
1696   if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1697     {
1698       n = TREE_OPERAND_LENGTH (expr);
1699       for (i = 0; i < n; i++)
1700         {
1701           iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1702           if (iv)
1703             return iv;
1704         }
1705     }
1706
1707   /* Stop if it's not ssa name.  */
1708   if (code != SSA_NAME)
1709     return NULL;
1710
1711   iv = get_iv (data, expr);
1712   if (!iv || integer_zerop (iv->step))
1713     return NULL;
1714   else if (iv->biv_p)
1715     return iv;
1716
1717   stmt = SSA_NAME_DEF_STMT (expr);
1718   if (gphi *phi = dyn_cast <gphi *> (stmt))
1719     {
1720       ssa_op_iter iter;
1721       use_operand_p use_p;
1722
1723       if (virtual_operand_p (gimple_phi_result (phi)))
1724         return NULL;
1725
1726       FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1727         {
1728           tree use = USE_FROM_PTR (use_p);
1729           iv = find_deriving_biv_for_expr (data, use);
1730           if (iv)
1731             return iv;
1732         }
1733       return NULL;
1734     }
1735   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1736     return NULL;
1737
1738   e1 = gimple_assign_rhs1 (stmt);
1739   code = gimple_assign_rhs_code (stmt);
1740   if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1741     return find_deriving_biv_for_expr (data, e1);
1742
1743   switch (code)
1744     {
1745     case MULT_EXPR:
1746     case PLUS_EXPR:
1747     case MINUS_EXPR:
1748     case POINTER_PLUS_EXPR:
1749       /* Increments, decrements and multiplications by a constant
1750          are simple.  */
1751       e2 = gimple_assign_rhs2 (stmt);
1752       iv = find_deriving_biv_for_expr (data, e2);
1753       if (iv)
1754         return iv;
1755
1756       /* Fallthru.  */
1757     CASE_CONVERT:
1758       /* Casts are simple.  */
1759       return find_deriving_biv_for_expr (data, e1);
1760
1761     default:
1762       break;
1763     }
1764
1765   return NULL;
1766 }
1767
1768 /* Record BIV, its predecessor and successor that they are used in
1769    address type uses.  */
1770
1771 static void
1772 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1773 {
1774   unsigned i;
1775   tree type, base_1, base_2;
1776   bitmap_iterator bi;
1777
1778   if (!biv || !biv->biv_p || integer_zerop (biv->step)
1779       || biv->have_address_use || !biv->no_overflow)
1780     return;
1781
1782   type = TREE_TYPE (biv->base);
1783   if (!INTEGRAL_TYPE_P (type))
1784     return;
1785
1786   biv->have_address_use = true;
1787   data->bivs_not_used_in_addr--;
1788   base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1789   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1790     {
1791       struct iv *iv = ver_info (data, i)->iv;
1792
1793       if (!iv || !iv->biv_p || integer_zerop (iv->step)
1794           || iv->have_address_use || !iv->no_overflow)
1795         continue;
1796
1797       if (type != TREE_TYPE (iv->base)
1798           || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1799         continue;
1800
1801       if (!operand_equal_p (biv->step, iv->step, 0))
1802         continue;
1803
1804       base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1805       if (operand_equal_p (base_1, iv->base, 0)
1806           || operand_equal_p (base_2, biv->base, 0))
1807         {
1808           iv->have_address_use = true;
1809           data->bivs_not_used_in_addr--;
1810         }
1811     }
1812 }
1813
1814 /* Cumulates the steps of indices into DATA and replaces their values with the
1815    initial ones.  Returns false when the value of the index cannot be determined.
1816    Callback for for_each_index.  */
1817
1818 struct ifs_ivopts_data
1819 {
1820   struct ivopts_data *ivopts_data;
1821   gimple *stmt;
1822   tree step;
1823 };
1824
1825 static bool
1826 idx_find_step (tree base, tree *idx, void *data)
1827 {
1828   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1829   struct iv *iv;
1830   bool use_overflow_semantics = false;
1831   tree step, iv_base, iv_step, lbound, off;
1832   struct loop *loop = dta->ivopts_data->current_loop;
1833
1834   /* If base is a component ref, require that the offset of the reference
1835      be invariant.  */
1836   if (TREE_CODE (base) == COMPONENT_REF)
1837     {
1838       off = component_ref_field_offset (base);
1839       return expr_invariant_in_loop_p (loop, off);
1840     }
1841
1842   /* If base is array, first check whether we will be able to move the
1843      reference out of the loop (in order to take its address in strength
1844      reduction).  In order for this to work we need both lower bound
1845      and step to be loop invariants.  */
1846   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1847     {
1848       /* Moreover, for a range, the size needs to be invariant as well.  */
1849       if (TREE_CODE (base) == ARRAY_RANGE_REF
1850           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1851         return false;
1852
1853       step = array_ref_element_size (base);
1854       lbound = array_ref_low_bound (base);
1855
1856       if (!expr_invariant_in_loop_p (loop, step)
1857           || !expr_invariant_in_loop_p (loop, lbound))
1858         return false;
1859     }
1860
1861   if (TREE_CODE (*idx) != SSA_NAME)
1862     return true;
1863
1864   iv = get_iv (dta->ivopts_data, *idx);
1865   if (!iv)
1866     return false;
1867
1868   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1869           *&x[0], which is not folded and does not trigger the
1870           ARRAY_REF path below.  */
1871   *idx = iv->base;
1872
1873   if (integer_zerop (iv->step))
1874     return true;
1875
1876   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1877     {
1878       step = array_ref_element_size (base);
1879
1880       /* We only handle addresses whose step is an integer constant.  */
1881       if (TREE_CODE (step) != INTEGER_CST)
1882         return false;
1883     }
1884   else
1885     /* The step for pointer arithmetics already is 1 byte.  */
1886     step = size_one_node;
1887
1888   iv_base = iv->base;
1889   iv_step = iv->step;
1890   if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1891     use_overflow_semantics = true;
1892
1893   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1894                             sizetype, &iv_base, &iv_step, dta->stmt,
1895                             use_overflow_semantics))
1896     {
1897       /* The index might wrap.  */
1898       return false;
1899     }
1900
1901   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1902   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1903
1904   if (dta->ivopts_data->bivs_not_used_in_addr)
1905     {
1906       if (!iv->biv_p)
1907         iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1908
1909       record_biv_for_address_use (dta->ivopts_data, iv);
1910     }
1911   return true;
1912 }
1913
1914 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1915    object is passed to it in DATA.  */
1916
1917 static bool
1918 idx_record_use (tree base, tree *idx,
1919                 void *vdata)
1920 {
1921   struct ivopts_data *data = (struct ivopts_data *) vdata;
1922   find_interesting_uses_op (data, *idx);
1923   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1924     {
1925       find_interesting_uses_op (data, array_ref_element_size (base));
1926       find_interesting_uses_op (data, array_ref_low_bound (base));
1927     }
1928   return true;
1929 }
1930
1931 /* If we can prove that TOP = cst * BOT for some constant cst,
1932    store cst to MUL and return true.  Otherwise return false.
1933    The returned value is always sign-extended, regardless of the
1934    signedness of TOP and BOT.  */
1935
1936 static bool
1937 constant_multiple_of (tree top, tree bot, widest_int *mul)
1938 {
1939   tree mby;
1940   enum tree_code code;
1941   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1942   widest_int res, p0, p1;
1943
1944   STRIP_NOPS (top);
1945   STRIP_NOPS (bot);
1946
1947   if (operand_equal_p (top, bot, 0))
1948     {
1949       *mul = 1;
1950       return true;
1951     }
1952
1953   code = TREE_CODE (top);
1954   switch (code)
1955     {
1956     case MULT_EXPR:
1957       mby = TREE_OPERAND (top, 1);
1958       if (TREE_CODE (mby) != INTEGER_CST)
1959         return false;
1960
1961       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1962         return false;
1963
1964       *mul = wi::sext (res * wi::to_widest (mby), precision);
1965       return true;
1966
1967     case PLUS_EXPR:
1968     case MINUS_EXPR:
1969       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1970           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1971         return false;
1972
1973       if (code == MINUS_EXPR)
1974         p1 = -p1;
1975       *mul = wi::sext (p0 + p1, precision);
1976       return true;
1977
1978     case INTEGER_CST:
1979       if (TREE_CODE (bot) != INTEGER_CST)
1980         return false;
1981
1982       p0 = widest_int::from (top, SIGNED);
1983       p1 = widest_int::from (bot, SIGNED);
1984       if (p1 == 0)
1985         return false;
1986       *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1987       return res == 0;
1988
1989     default:
1990       return false;
1991     }
1992 }
1993
1994 /* Return true if memory reference REF with step STEP may be unaligned.  */
1995
1996 static bool
1997 may_be_unaligned_p (tree ref, tree step)
1998 {
1999   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2000      thus they are not misaligned.  */
2001   if (TREE_CODE (ref) == TARGET_MEM_REF)
2002     return false;
2003
2004   unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2005   if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2006     align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2007
2008   unsigned HOST_WIDE_INT bitpos;
2009   unsigned int ref_align;
2010   get_object_alignment_1 (ref, &ref_align, &bitpos);
2011   if (ref_align < align
2012       || (bitpos % align) != 0
2013       || (bitpos % BITS_PER_UNIT) != 0)
2014     return true;
2015
2016   unsigned int trailing_zeros = tree_ctz (step);
2017   if (trailing_zeros < HOST_BITS_PER_INT
2018       && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2019     return true;
2020
2021   return false;
2022 }
2023
2024 /* Return true if EXPR may be non-addressable.   */
2025
2026 bool
2027 may_be_nonaddressable_p (tree expr)
2028 {
2029   switch (TREE_CODE (expr))
2030     {
2031     case TARGET_MEM_REF:
2032       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2033          target, thus they are always addressable.  */
2034       return false;
2035
2036     case MEM_REF:
2037       /* Likewise for MEM_REFs, modulo the storage order.  */
2038       return REF_REVERSE_STORAGE_ORDER (expr);
2039
2040     case BIT_FIELD_REF:
2041       if (REF_REVERSE_STORAGE_ORDER (expr))
2042         return true;
2043       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2044
2045     case COMPONENT_REF:
2046       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2047         return true;
2048       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2049              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2050
2051     case ARRAY_REF:
2052     case ARRAY_RANGE_REF:
2053       if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2054         return true;
2055       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2056
2057     case VIEW_CONVERT_EXPR:
2058       /* This kind of view-conversions may wrap non-addressable objects
2059          and make them look addressable.  After some processing the
2060          non-addressability may be uncovered again, causing ADDR_EXPRs
2061          of inappropriate objects to be built.  */
2062       if (is_gimple_reg (TREE_OPERAND (expr, 0))
2063           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2064         return true;
2065       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2066
2067     CASE_CONVERT:
2068       return true;
2069
2070     default:
2071       break;
2072     }
2073
2074   return false;
2075 }
2076
2077 static tree
2078 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2079
2080 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2081    If there is an existing use which has same stripped iv base and step,
2082    this function records this one as a sub use to that; otherwise records
2083    it as a normal one.  */
2084
2085 static struct iv_use *
2086 record_group_use (struct ivopts_data *data, tree *use_p,
2087                   struct iv *iv, gimple *stmt, enum use_type use_type)
2088 {
2089   unsigned int i;
2090   struct iv_use *use;
2091   tree addr_base;
2092   unsigned HOST_WIDE_INT addr_offset;
2093
2094   /* Only support sub use for address type uses, that is, with base
2095      object.  */
2096   if (!iv->base_object)
2097     return record_use (data, use_p, iv, stmt, use_type);
2098
2099   addr_base = strip_offset (iv->base, &addr_offset);
2100   for (i = 0; i < n_iv_uses (data); i++)
2101     {
2102       use = iv_use (data, i);
2103       if (use->type != USE_ADDRESS || !use->iv->base_object)
2104         continue;
2105
2106       /* Check if it has the same stripped base and step.  */
2107       if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2108           && operand_equal_p (iv->step, use->iv->step, 0)
2109           && operand_equal_p (addr_base, use->addr_base, 0))
2110         break;
2111     }
2112
2113   if (i == n_iv_uses (data))
2114     return record_use (data, use_p, iv, stmt,
2115                        use_type, addr_base, addr_offset);
2116   else
2117     return record_sub_use (data, use_p, iv, stmt,
2118                            use_type, addr_base, addr_offset, i);
2119 }
2120
2121 /* Finds addresses in *OP_P inside STMT.  */
2122
2123 static void
2124 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2125                                tree *op_p)
2126 {
2127   tree base = *op_p, step = size_zero_node;
2128   struct iv *civ;
2129   struct ifs_ivopts_data ifs_ivopts_data;
2130
2131   /* Do not play with volatile memory references.  A bit too conservative,
2132      perhaps, but safe.  */
2133   if (gimple_has_volatile_ops (stmt))
2134     goto fail;
2135
2136   /* Ignore bitfields for now.  Not really something terribly complicated
2137      to handle.  TODO.  */
2138   if (TREE_CODE (base) == BIT_FIELD_REF)
2139     goto fail;
2140
2141   base = unshare_expr (base);
2142
2143   if (TREE_CODE (base) == TARGET_MEM_REF)
2144     {
2145       tree type = build_pointer_type (TREE_TYPE (base));
2146       tree astep;
2147
2148       if (TMR_BASE (base)
2149           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2150         {
2151           civ = get_iv (data, TMR_BASE (base));
2152           if (!civ)
2153             goto fail;
2154
2155           TMR_BASE (base) = civ->base;
2156           step = civ->step;
2157         }
2158       if (TMR_INDEX2 (base)
2159           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2160         {
2161           civ = get_iv (data, TMR_INDEX2 (base));
2162           if (!civ)
2163             goto fail;
2164
2165           TMR_INDEX2 (base) = civ->base;
2166           step = civ->step;
2167         }
2168       if (TMR_INDEX (base)
2169           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2170         {
2171           civ = get_iv (data, TMR_INDEX (base));
2172           if (!civ)
2173             goto fail;
2174
2175           TMR_INDEX (base) = civ->base;
2176           astep = civ->step;
2177
2178           if (astep)
2179             {
2180               if (TMR_STEP (base))
2181                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2182
2183               step = fold_build2 (PLUS_EXPR, type, step, astep);
2184             }
2185         }
2186
2187       if (integer_zerop (step))
2188         goto fail;
2189       base = tree_mem_ref_addr (type, base);
2190     }
2191   else
2192     {
2193       ifs_ivopts_data.ivopts_data = data;
2194       ifs_ivopts_data.stmt = stmt;
2195       ifs_ivopts_data.step = size_zero_node;
2196       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2197           || integer_zerop (ifs_ivopts_data.step))
2198         goto fail;
2199       step = ifs_ivopts_data.step;
2200
2201       /* Check that the base expression is addressable.  This needs
2202          to be done after substituting bases of IVs into it.  */
2203       if (may_be_nonaddressable_p (base))
2204         goto fail;
2205
2206       /* Moreover, on strict alignment platforms, check that it is
2207          sufficiently aligned.  */
2208       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2209         goto fail;
2210
2211       base = build_fold_addr_expr (base);
2212
2213       /* Substituting bases of IVs into the base expression might
2214          have caused folding opportunities.  */
2215       if (TREE_CODE (base) == ADDR_EXPR)
2216         {
2217           tree *ref = &TREE_OPERAND (base, 0);
2218           while (handled_component_p (*ref))
2219             ref = &TREE_OPERAND (*ref, 0);
2220           if (TREE_CODE (*ref) == MEM_REF)
2221             {
2222               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2223                                       TREE_OPERAND (*ref, 0),
2224                                       TREE_OPERAND (*ref, 1));
2225               if (tem)
2226                 *ref = tem;
2227             }
2228         }
2229     }
2230
2231   civ = alloc_iv (data, base, step);
2232   record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2233   return;
2234
2235 fail:
2236   for_each_index (op_p, idx_record_use, data);
2237 }
2238
2239 /* Finds and records invariants used in STMT.  */
2240
2241 static void
2242 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2243 {
2244   ssa_op_iter iter;
2245   use_operand_p use_p;
2246   tree op;
2247
2248   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2249     {
2250       op = USE_FROM_PTR (use_p);
2251       record_invariant (data, op, false);
2252     }
2253 }
2254
2255 /* Finds interesting uses of induction variables in the statement STMT.  */
2256
2257 static void
2258 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2259 {
2260   struct iv *iv;
2261   tree op, *lhs, *rhs;
2262   ssa_op_iter iter;
2263   use_operand_p use_p;
2264   enum tree_code code;
2265
2266   find_invariants_stmt (data, stmt);
2267
2268   if (gimple_code (stmt) == GIMPLE_COND)
2269     {
2270       find_interesting_uses_cond (data, stmt);
2271       return;
2272     }
2273
2274   if (is_gimple_assign (stmt))
2275     {
2276       lhs = gimple_assign_lhs_ptr (stmt);
2277       rhs = gimple_assign_rhs1_ptr (stmt);
2278
2279       if (TREE_CODE (*lhs) == SSA_NAME)
2280         {
2281           /* If the statement defines an induction variable, the uses are not
2282              interesting by themselves.  */
2283
2284           iv = get_iv (data, *lhs);
2285
2286           if (iv && !integer_zerop (iv->step))
2287             return;
2288         }
2289
2290       code = gimple_assign_rhs_code (stmt);
2291       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2292           && (REFERENCE_CLASS_P (*rhs)
2293               || is_gimple_val (*rhs)))
2294         {
2295           if (REFERENCE_CLASS_P (*rhs))
2296             find_interesting_uses_address (data, stmt, rhs);
2297           else
2298             find_interesting_uses_op (data, *rhs);
2299
2300           if (REFERENCE_CLASS_P (*lhs))
2301             find_interesting_uses_address (data, stmt, lhs);
2302           return;
2303         }
2304       else if (TREE_CODE_CLASS (code) == tcc_comparison)
2305         {
2306           find_interesting_uses_cond (data, stmt);
2307           return;
2308         }
2309
2310       /* TODO -- we should also handle address uses of type
2311
2312          memory = call (whatever);
2313
2314          and
2315
2316          call (memory).  */
2317     }
2318
2319   if (gimple_code (stmt) == GIMPLE_PHI
2320       && gimple_bb (stmt) == data->current_loop->header)
2321     {
2322       iv = get_iv (data, PHI_RESULT (stmt));
2323
2324       if (iv && !integer_zerop (iv->step))
2325         return;
2326     }
2327
2328   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2329     {
2330       op = USE_FROM_PTR (use_p);
2331
2332       if (TREE_CODE (op) != SSA_NAME)
2333         continue;
2334
2335       iv = get_iv (data, op);
2336       if (!iv)
2337         continue;
2338
2339       find_interesting_uses_op (data, op);
2340     }
2341 }
2342
2343 /* Finds interesting uses of induction variables outside of loops
2344    on loop exit edge EXIT.  */
2345
2346 static void
2347 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2348 {
2349   gphi *phi;
2350   gphi_iterator psi;
2351   tree def;
2352
2353   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2354     {
2355       phi = psi.phi ();
2356       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2357       if (!virtual_operand_p (def))
2358         find_interesting_uses_op (data, def);
2359     }
2360 }
2361
2362 /* Finds uses of the induction variables that are interesting.  */
2363
2364 static void
2365 find_interesting_uses (struct ivopts_data *data)
2366 {
2367   basic_block bb;
2368   gimple_stmt_iterator bsi;
2369   basic_block *body = get_loop_body (data->current_loop);
2370   unsigned i;
2371   struct version_info *info;
2372   edge e;
2373
2374   if (dump_file && (dump_flags & TDF_DETAILS))
2375     fprintf (dump_file, "Uses:\n\n");
2376
2377   for (i = 0; i < data->current_loop->num_nodes; i++)
2378     {
2379       edge_iterator ei;
2380       bb = body[i];
2381
2382       FOR_EACH_EDGE (e, ei, bb->succs)
2383         if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2384             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2385           find_interesting_uses_outside (data, e);
2386
2387       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2388         find_interesting_uses_stmt (data, gsi_stmt (bsi));
2389       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2390         if (!is_gimple_debug (gsi_stmt (bsi)))
2391           find_interesting_uses_stmt (data, gsi_stmt (bsi));
2392     }
2393
2394   if (dump_file && (dump_flags & TDF_DETAILS))
2395     {
2396       bitmap_iterator bi;
2397
2398       fprintf (dump_file, "\n");
2399
2400       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2401         {
2402           info = ver_info (data, i);
2403           if (info->inv_id)
2404             {
2405               fprintf (dump_file, "  ");
2406               print_generic_expr (dump_file, info->name, TDF_SLIM);
2407               fprintf (dump_file, " is invariant (%d)%s\n",
2408                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2409             }
2410         }
2411
2412       fprintf (dump_file, "\n");
2413     }
2414
2415   free (body);
2416 }
2417
2418 /* Compute maximum offset of [base + offset] addressing mode
2419    for memory reference represented by USE.  */
2420
2421 static HOST_WIDE_INT
2422 compute_max_addr_offset (struct iv_use *use)
2423 {
2424   int width;
2425   rtx reg, addr;
2426   HOST_WIDE_INT i, off;
2427   unsigned list_index, num;
2428   addr_space_t as;
2429   machine_mode mem_mode, addr_mode;
2430   static vec<HOST_WIDE_INT> max_offset_list;
2431
2432   as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2433   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2434
2435   num = max_offset_list.length ();
2436   list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2437   if (list_index >= num)
2438     {
2439       max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2440       for (; num < max_offset_list.length (); num++)
2441         max_offset_list[num] = -1;
2442     }
2443
2444   off = max_offset_list[list_index];
2445   if (off != -1)
2446     return off;
2447
2448   addr_mode = targetm.addr_space.address_mode (as);
2449   reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2450   addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2451
2452   width = GET_MODE_BITSIZE (addr_mode) - 1;
2453   if (width > (HOST_BITS_PER_WIDE_INT - 1))
2454     width = HOST_BITS_PER_WIDE_INT - 1;
2455
2456   for (i = width; i > 0; i--)
2457     {
2458       off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2459       XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2460       if (memory_address_addr_space_p (mem_mode, addr, as))
2461         break;
2462
2463       /* For some strict-alignment targets, the offset must be naturally
2464          aligned.  Try an aligned offset if mem_mode is not QImode.  */
2465       off = ((unsigned HOST_WIDE_INT) 1 << i);
2466       if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2467         {
2468           off -= GET_MODE_SIZE (mem_mode);
2469           XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2470           if (memory_address_addr_space_p (mem_mode, addr, as))
2471             break;
2472         }
2473     }
2474   if (i == 0)
2475     off = 0;
2476
2477   max_offset_list[list_index] = off;
2478   return off;
2479 }
2480
2481 /* Check if all small groups should be split.  Return true if and
2482    only if:
2483
2484      1) At least one groups contain two uses with different offsets.
2485      2) No group contains more than two uses with different offsets.
2486
2487    Return false otherwise.  We want to split such groups because:
2488
2489      1) Small groups don't have much benefit and may interfer with
2490         general candidate selection.
2491      2) Size for problem with only small groups is usually small and
2492         general algorithm can handle it well.
2493
2494    TODO -- Above claim may not hold when auto increment is supported.  */
2495
2496 static bool
2497 split_all_small_groups (struct ivopts_data *data)
2498 {
2499   bool split_p = false;
2500   unsigned int i, n, distinct;
2501   struct iv_use *pre, *use;
2502
2503   n = n_iv_uses (data);
2504   for (i = 0; i < n; i++)
2505     {
2506       use = iv_use (data, i);
2507       if (!use->next)
2508         continue;
2509
2510       distinct = 1;
2511       gcc_assert (use->type == USE_ADDRESS);
2512       for (pre = use, use = use->next; use; pre = use, use = use->next)
2513         {
2514           if (pre->addr_offset != use->addr_offset)
2515             distinct++;
2516
2517           if (distinct > 2)
2518             return false;
2519         }
2520       if (distinct == 2)
2521         split_p = true;
2522     }
2523
2524   return split_p;
2525 }
2526
2527 /* For each group of address type uses, this function further groups
2528    these uses according to the maximum offset supported by target's
2529    [base + offset] addressing mode.  */
2530
2531 static void
2532 group_address_uses (struct ivopts_data *data)
2533 {
2534   HOST_WIDE_INT max_offset = -1;
2535   unsigned int i, n, sub_id;
2536   struct iv_use *pre, *use;
2537   unsigned HOST_WIDE_INT addr_offset_first;
2538
2539   /* Reset max offset to split all small groups.  */
2540   if (split_all_small_groups (data))
2541     max_offset = 0;
2542
2543   n = n_iv_uses (data);
2544   for (i = 0; i < n; i++)
2545     {
2546       use = iv_use (data, i);
2547       if (!use->next)
2548         continue;
2549
2550       gcc_assert (use->type == USE_ADDRESS);
2551       if (max_offset != 0)
2552         max_offset = compute_max_addr_offset (use);
2553
2554       while (use)
2555         {
2556           sub_id = 0;
2557           addr_offset_first = use->addr_offset;
2558           /* Only uses with offset that can fit in offset part against
2559              the first use can be grouped together.  */
2560           for (pre = use, use = use->next;
2561                use && (use->addr_offset - addr_offset_first
2562                        <= (unsigned HOST_WIDE_INT) max_offset);
2563                pre = use, use = use->next)
2564             {
2565               use->id = pre->id;
2566               use->sub_id = ++sub_id;
2567             }
2568
2569           /* Break the list and create new group.  */
2570           if (use)
2571             {
2572               pre->next = NULL;
2573               use->id = n_iv_uses (data);
2574               use->related_cands = BITMAP_ALLOC (NULL);
2575               data->iv_uses.safe_push (use);
2576             }
2577         }
2578     }
2579
2580   if (dump_file && (dump_flags & TDF_DETAILS))
2581     dump_uses (dump_file, data);
2582 }
2583
2584 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2585    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2586    we are at the top-level of the processed address.  */
2587
2588 static tree
2589 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2590                 HOST_WIDE_INT *offset)
2591 {
2592   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2593   enum tree_code code;
2594   tree type, orig_type = TREE_TYPE (expr);
2595   HOST_WIDE_INT off0, off1, st;
2596   tree orig_expr = expr;
2597
2598   STRIP_NOPS (expr);
2599
2600   type = TREE_TYPE (expr);
2601   code = TREE_CODE (expr);
2602   *offset = 0;
2603
2604   switch (code)
2605     {
2606     case INTEGER_CST:
2607       if (!cst_and_fits_in_hwi (expr)
2608           || integer_zerop (expr))
2609         return orig_expr;
2610
2611       *offset = int_cst_value (expr);
2612       return build_int_cst (orig_type, 0);
2613
2614     case POINTER_PLUS_EXPR:
2615     case PLUS_EXPR:
2616     case MINUS_EXPR:
2617       op0 = TREE_OPERAND (expr, 0);
2618       op1 = TREE_OPERAND (expr, 1);
2619
2620       op0 = strip_offset_1 (op0, false, false, &off0);
2621       op1 = strip_offset_1 (op1, false, false, &off1);
2622
2623       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2624       if (op0 == TREE_OPERAND (expr, 0)
2625           && op1 == TREE_OPERAND (expr, 1))
2626         return orig_expr;
2627
2628       if (integer_zerop (op1))
2629         expr = op0;
2630       else if (integer_zerop (op0))
2631         {
2632           if (code == MINUS_EXPR)
2633             expr = fold_build1 (NEGATE_EXPR, type, op1);
2634           else
2635             expr = op1;
2636         }
2637       else
2638         expr = fold_build2 (code, type, op0, op1);
2639
2640       return fold_convert (orig_type, expr);
2641
2642     case MULT_EXPR:
2643       op1 = TREE_OPERAND (expr, 1);
2644       if (!cst_and_fits_in_hwi (op1))
2645         return orig_expr;
2646
2647       op0 = TREE_OPERAND (expr, 0);
2648       op0 = strip_offset_1 (op0, false, false, &off0);
2649       if (op0 == TREE_OPERAND (expr, 0))
2650         return orig_expr;
2651
2652       *offset = off0 * int_cst_value (op1);
2653       if (integer_zerop (op0))
2654         expr = op0;
2655       else
2656         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2657
2658       return fold_convert (orig_type, expr);
2659
2660     case ARRAY_REF:
2661     case ARRAY_RANGE_REF:
2662       if (!inside_addr)
2663         return orig_expr;
2664
2665       step = array_ref_element_size (expr);
2666       if (!cst_and_fits_in_hwi (step))
2667         break;
2668
2669       st = int_cst_value (step);
2670       op1 = TREE_OPERAND (expr, 1);
2671       op1 = strip_offset_1 (op1, false, false, &off1);
2672       *offset = off1 * st;
2673
2674       if (top_compref
2675           && integer_zerop (op1))
2676         {
2677           /* Strip the component reference completely.  */
2678           op0 = TREE_OPERAND (expr, 0);
2679           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2680           *offset += off0;
2681           return op0;
2682         }
2683       break;
2684
2685     case COMPONENT_REF:
2686       {
2687         tree field;
2688
2689         if (!inside_addr)
2690           return orig_expr;
2691
2692         tmp = component_ref_field_offset (expr);
2693         field = TREE_OPERAND (expr, 1);
2694         if (top_compref
2695             && cst_and_fits_in_hwi (tmp)
2696             && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2697           {
2698             HOST_WIDE_INT boffset, abs_off;
2699
2700             /* Strip the component reference completely.  */
2701             op0 = TREE_OPERAND (expr, 0);
2702             op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2703             boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2704             abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2705             if (boffset < 0)
2706               abs_off = -abs_off;
2707
2708             *offset = off0 + int_cst_value (tmp) + abs_off;
2709             return op0;
2710           }
2711       }
2712       break;
2713
2714     case ADDR_EXPR:
2715       op0 = TREE_OPERAND (expr, 0);
2716       op0 = strip_offset_1 (op0, true, true, &off0);
2717       *offset += off0;
2718
2719       if (op0 == TREE_OPERAND (expr, 0))
2720         return orig_expr;
2721
2722       expr = build_fold_addr_expr (op0);
2723       return fold_convert (orig_type, expr);
2724
2725     case MEM_REF:
2726       /* ???  Offset operand?  */
2727       inside_addr = false;
2728       break;
2729
2730     default:
2731       return orig_expr;
2732     }
2733
2734   /* Default handling of expressions for that we want to recurse into
2735      the first operand.  */
2736   op0 = TREE_OPERAND (expr, 0);
2737   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2738   *offset += off0;
2739
2740   if (op0 == TREE_OPERAND (expr, 0)
2741       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2742     return orig_expr;
2743
2744   expr = copy_node (expr);
2745   TREE_OPERAND (expr, 0) = op0;
2746   if (op1)
2747     TREE_OPERAND (expr, 1) = op1;
2748
2749   /* Inside address, we might strip the top level component references,
2750      thus changing type of the expression.  Handling of ADDR_EXPR
2751      will fix that.  */
2752   expr = fold_convert (orig_type, expr);
2753
2754   return expr;
2755 }
2756
2757 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2758
2759 static tree
2760 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2761 {
2762   HOST_WIDE_INT off;
2763   tree core = strip_offset_1 (expr, false, false, &off);
2764   *offset = off;
2765   return core;
2766 }
2767
2768 /* Returns variant of TYPE that can be used as base for different uses.
2769    We return unsigned type with the same precision, which avoids problems
2770    with overflows.  */
2771
2772 static tree
2773 generic_type_for (tree type)
2774 {
2775   if (POINTER_TYPE_P (type))
2776     return unsigned_type_for (type);
2777
2778   if (TYPE_UNSIGNED (type))
2779     return type;
2780
2781   return unsigned_type_for (type);
2782 }
2783
2784 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2785    the bitmap to that we should store it.  */
2786
2787 static struct ivopts_data *fd_ivopts_data;
2788 static tree
2789 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2790 {
2791   bitmap *depends_on = (bitmap *) data;
2792   struct version_info *info;
2793
2794   if (TREE_CODE (*expr_p) != SSA_NAME)
2795     return NULL_TREE;
2796   info = name_info (fd_ivopts_data, *expr_p);
2797
2798   if (!info->inv_id || info->has_nonlin_use)
2799     return NULL_TREE;
2800
2801   if (!*depends_on)
2802     *depends_on = BITMAP_ALLOC (NULL);
2803   bitmap_set_bit (*depends_on, info->inv_id);
2804
2805   return NULL_TREE;
2806 }
2807
2808 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2809    position to POS.  If USE is not NULL, the candidate is set as related to
2810    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2811    replacement of the final value of the iv by a direct computation.  */
2812
2813 static struct iv_cand *
2814 add_candidate_1 (struct ivopts_data *data,
2815                  tree base, tree step, bool important, enum iv_position pos,
2816                  struct iv_use *use, gimple *incremented_at,
2817                  struct iv *orig_iv = NULL)
2818 {
2819   unsigned i;
2820   struct iv_cand *cand = NULL;
2821   tree type, orig_type;
2822
2823   /* -fkeep-gc-roots-live means that we have to keep a real pointer
2824      live, but the ivopts code may replace a real pointer with one
2825      pointing before or after the memory block that is then adjusted
2826      into the memory block during the loop.  FIXME: It would likely be
2827      better to actually force the pointer live and still use ivopts;
2828      for example, it would be enough to write the pointer into memory
2829      and keep it there until after the loop.  */
2830   if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2831     return NULL;
2832
2833   /* For non-original variables, make sure their values are computed in a type
2834      that does not invoke undefined behavior on overflows (since in general,
2835      we cannot prove that these induction variables are non-wrapping).  */
2836   if (pos != IP_ORIGINAL)
2837     {
2838       orig_type = TREE_TYPE (base);
2839       type = generic_type_for (orig_type);
2840       if (type != orig_type)
2841         {
2842           base = fold_convert (type, base);
2843           step = fold_convert (type, step);
2844         }
2845     }
2846
2847   for (i = 0; i < n_iv_cands (data); i++)
2848     {
2849       cand = iv_cand (data, i);
2850
2851       if (cand->pos != pos)
2852         continue;
2853
2854       if (cand->incremented_at != incremented_at
2855           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2856               && cand->ainc_use != use))
2857         continue;
2858
2859       if (!cand->iv)
2860         {
2861           if (!base && !step)
2862             break;
2863
2864           continue;
2865         }
2866
2867       if (!base && !step)
2868         continue;
2869
2870       if (operand_equal_p (base, cand->iv->base, 0)
2871           && operand_equal_p (step, cand->iv->step, 0)
2872           && (TYPE_PRECISION (TREE_TYPE (base))
2873               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2874         break;
2875     }
2876
2877   if (i == n_iv_cands (data))
2878     {
2879       cand = XCNEW (struct iv_cand);
2880       cand->id = i;
2881
2882       if (!base && !step)
2883         cand->iv = NULL;
2884       else
2885         cand->iv = alloc_iv (data, base, step);
2886
2887       cand->pos = pos;
2888       if (pos != IP_ORIGINAL && cand->iv)
2889         {
2890           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2891           cand->var_after = cand->var_before;
2892         }
2893       cand->important = important;
2894       cand->incremented_at = incremented_at;
2895       data->iv_candidates.safe_push (cand);
2896
2897       if (step
2898           && TREE_CODE (step) != INTEGER_CST)
2899         {
2900           fd_ivopts_data = data;
2901           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2902         }
2903
2904       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2905         cand->ainc_use = use;
2906       else
2907         cand->ainc_use = NULL;
2908
2909       cand->orig_iv = orig_iv;
2910       if (dump_file && (dump_flags & TDF_DETAILS))
2911         dump_cand (dump_file, cand);
2912     }
2913
2914   if (important && !cand->important)
2915     {
2916       cand->important = true;
2917       if (dump_file && (dump_flags & TDF_DETAILS))
2918         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2919     }
2920
2921   if (use)
2922     {
2923       bitmap_set_bit (use->related_cands, i);
2924       if (dump_file && (dump_flags & TDF_DETAILS))
2925         fprintf (dump_file, "Candidate %d is related to use %d\n",
2926                  cand->id, use->id);
2927     }
2928
2929   return cand;
2930 }
2931
2932 /* Returns true if incrementing the induction variable at the end of the LOOP
2933    is allowed.
2934
2935    The purpose is to avoid splitting latch edge with a biv increment, thus
2936    creating a jump, possibly confusing other optimization passes and leaving
2937    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2938    is not available (so we do not have a better alternative), or if the latch
2939    edge is already nonempty.  */
2940
2941 static bool
2942 allow_ip_end_pos_p (struct loop *loop)
2943 {
2944   if (!ip_normal_pos (loop))
2945     return true;
2946
2947   if (!empty_block_p (ip_end_pos (loop)))
2948     return true;
2949
2950   return false;
2951 }
2952
2953 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2954    Important field is set to IMPORTANT.  */
2955
2956 static void
2957 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2958                         bool important, struct iv_use *use)
2959 {
2960   basic_block use_bb = gimple_bb (use->stmt);
2961   machine_mode mem_mode;
2962   unsigned HOST_WIDE_INT cstepi;
2963
2964   /* If we insert the increment in any position other than the standard
2965      ones, we must ensure that it is incremented once per iteration.
2966      It must not be in an inner nested loop, or one side of an if
2967      statement.  */
2968   if (use_bb->loop_father != data->current_loop
2969       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2970       || stmt_could_throw_p (use->stmt)
2971       || !cst_and_fits_in_hwi (step))
2972     return;
2973
2974   cstepi = int_cst_value (step);
2975
2976   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2977   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2978         || USE_STORE_PRE_INCREMENT (mem_mode))
2979        && GET_MODE_SIZE (mem_mode) == cstepi)
2980       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2981            || USE_STORE_PRE_DECREMENT (mem_mode))
2982           && GET_MODE_SIZE (mem_mode) == -cstepi))
2983     {
2984       enum tree_code code = MINUS_EXPR;
2985       tree new_base;
2986       tree new_step = step;
2987
2988       if (POINTER_TYPE_P (TREE_TYPE (base)))
2989         {
2990           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2991           code = POINTER_PLUS_EXPR;
2992         }
2993       else
2994         new_step = fold_convert (TREE_TYPE (base), new_step);
2995       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2996       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2997                        use->stmt);
2998     }
2999   if (((USE_LOAD_POST_INCREMENT (mem_mode)
3000         || USE_STORE_POST_INCREMENT (mem_mode))
3001        && GET_MODE_SIZE (mem_mode) == cstepi)
3002       || ((USE_LOAD_POST_DECREMENT (mem_mode)
3003            || USE_STORE_POST_DECREMENT (mem_mode))
3004           && GET_MODE_SIZE (mem_mode) == -cstepi))
3005     {
3006       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3007                        use->stmt);
3008     }
3009 }
3010
3011 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
3012    position to POS.  If USE is not NULL, the candidate is set as related to
3013    it.  The candidate computation is scheduled before exit condition and at
3014    the end of loop.  */
3015
3016 static void
3017 add_candidate (struct ivopts_data *data,
3018                tree base, tree step, bool important, struct iv_use *use,
3019                struct iv *orig_iv = NULL)
3020 {
3021   gcc_assert (use == NULL || use->sub_id == 0);
3022
3023   if (ip_normal_pos (data->current_loop))
3024     add_candidate_1 (data, base, step, important,
3025                      IP_NORMAL, use, NULL, orig_iv);
3026   if (ip_end_pos (data->current_loop)
3027       && allow_ip_end_pos_p (data->current_loop))
3028     add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3029 }
3030
3031 /* Adds standard iv candidates.  */
3032
3033 static void
3034 add_standard_iv_candidates (struct ivopts_data *data)
3035 {
3036   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3037
3038   /* The same for a double-integer type if it is still fast enough.  */
3039   if (TYPE_PRECISION
3040         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3041       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3042     add_candidate (data, build_int_cst (long_integer_type_node, 0),
3043                    build_int_cst (long_integer_type_node, 1), true, NULL);
3044
3045   /* The same for a double-integer type if it is still fast enough.  */
3046   if (TYPE_PRECISION
3047         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3048       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3049     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3050                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
3051 }
3052
3053
3054 /* Adds candidates bases on the old induction variable IV.  */
3055
3056 static void
3057 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3058 {
3059   gimple *phi;
3060   tree def;
3061   struct iv_cand *cand;
3062
3063   /* Check if this biv is used in address type use.  */
3064   if (iv->no_overflow  && iv->have_address_use
3065       && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3066       && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3067     {
3068       tree base = fold_convert (sizetype, iv->base);
3069       tree step = fold_convert (sizetype, iv->step);
3070
3071       /* Add iv cand of same precision as index part in TARGET_MEM_REF.  */
3072       add_candidate (data, base, step, true, NULL, iv);
3073       /* Add iv cand of the original type only if it has nonlinear use.  */
3074       if (iv->have_use_for)
3075         add_candidate (data, iv->base, iv->step, true, NULL);
3076     }
3077   else
3078     add_candidate (data, iv->base, iv->step, true, NULL);
3079
3080   /* The same, but with initial value zero.  */
3081   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3082     add_candidate (data, size_int (0), iv->step, true, NULL);
3083   else
3084     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3085                    iv->step, true, NULL);
3086
3087   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3088   if (gimple_code (phi) == GIMPLE_PHI)
3089     {
3090       /* Additionally record the possibility of leaving the original iv
3091          untouched.  */
3092       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3093       /* Don't add candidate if it's from another PHI node because
3094          it's an affine iv appearing in the form of PEELED_CHREC.  */
3095       phi = SSA_NAME_DEF_STMT (def);
3096       if (gimple_code (phi) != GIMPLE_PHI)
3097         {
3098           cand = add_candidate_1 (data,
3099                                   iv->base, iv->step, true, IP_ORIGINAL, NULL,
3100                                   SSA_NAME_DEF_STMT (def));
3101           if (cand)
3102             {
3103               cand->var_before = iv->ssa_name;
3104               cand->var_after = def;
3105             }
3106         }
3107       else
3108         gcc_assert (gimple_bb (phi) == data->current_loop->header);
3109     }
3110 }
3111
3112 /* Adds candidates based on the old induction variables.  */
3113
3114 static void
3115 add_iv_candidate_for_bivs (struct ivopts_data *data)
3116 {
3117   unsigned i;
3118   struct iv *iv;
3119   bitmap_iterator bi;
3120
3121   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3122     {
3123       iv = ver_info (data, i)->iv;
3124       if (iv && iv->biv_p && !integer_zerop (iv->step))
3125         add_iv_candidate_for_biv (data, iv);
3126     }
3127 }
3128
3129 /* Record common candidate {BASE, STEP} derived from USE in hashtable.  */
3130
3131 static void
3132 record_common_cand (struct ivopts_data *data, tree base,
3133                     tree step, struct iv_use *use)
3134 {
3135   struct iv_common_cand ent;
3136   struct iv_common_cand **slot;
3137
3138   gcc_assert (use != NULL);
3139
3140   ent.base = base;
3141   ent.step = step;
3142   ent.hash = iterative_hash_expr (base, 0);
3143   ent.hash = iterative_hash_expr (step, ent.hash);
3144
3145   slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3146   if (*slot == NULL)
3147     {
3148       *slot = new iv_common_cand ();
3149       (*slot)->base = base;
3150       (*slot)->step = step;
3151       (*slot)->uses.create (8);
3152       (*slot)->hash = ent.hash;
3153       data->iv_common_cands.safe_push ((*slot));
3154     }
3155   (*slot)->uses.safe_push (use);
3156   return;
3157 }
3158
3159 /* Comparison function used to sort common candidates.  */
3160
3161 static int
3162 common_cand_cmp (const void *p1, const void *p2)
3163 {
3164   unsigned n1, n2;
3165   const struct iv_common_cand *const *const ccand1
3166     = (const struct iv_common_cand *const *)p1;
3167   const struct iv_common_cand *const *const ccand2
3168     = (const struct iv_common_cand *const *)p2;
3169
3170   n1 = (*ccand1)->uses.length ();
3171   n2 = (*ccand2)->uses.length ();
3172   return n2 - n1;
3173 }
3174
3175 /* Adds IV candidates based on common candidated recorded.  */
3176
3177 static void
3178 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3179 {
3180   unsigned i, j;
3181   struct iv_cand *cand_1, *cand_2;
3182
3183   data->iv_common_cands.qsort (common_cand_cmp);
3184   for (i = 0; i < data->iv_common_cands.length (); i++)
3185     {
3186       struct iv_common_cand *ptr = data->iv_common_cands[i];
3187
3188       /* Only add IV candidate if it's derived from multiple uses.  */
3189       if (ptr->uses.length () <= 1)
3190         break;
3191
3192       cand_1 = NULL;
3193       cand_2 = NULL;
3194       if (ip_normal_pos (data->current_loop))
3195         cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3196                                   false, IP_NORMAL, NULL, NULL);
3197
3198       if (ip_end_pos (data->current_loop)
3199           && allow_ip_end_pos_p (data->current_loop))
3200         cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3201                                   false, IP_END, NULL, NULL);
3202
3203       /* Bind deriving uses and the new candidates.  */
3204       for (j = 0; j < ptr->uses.length (); j++)
3205         {
3206           struct iv_use *use = ptr->uses[j];
3207           if (cand_1)
3208             bitmap_set_bit (use->related_cands, cand_1->id);
3209           if (cand_2)
3210             bitmap_set_bit (use->related_cands, cand_2->id);
3211         }
3212     }
3213
3214   /* Release data since it is useless from this point.  */
3215   data->iv_common_cand_tab->empty ();
3216   data->iv_common_cands.truncate (0);
3217 }
3218
3219 /* Adds candidates based on the value of USE's iv.  */
3220
3221 static void
3222 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3223 {
3224   unsigned HOST_WIDE_INT offset;
3225   tree base;
3226   tree basetype;
3227   struct iv *iv = use->iv;
3228
3229   add_candidate (data, iv->base, iv->step, false, use);
3230
3231   /* Record common candidate for use in case it can be shared by others.  */
3232   record_common_cand (data, iv->base, iv->step, use);
3233
3234   /* Record common candidate with initial value zero.  */
3235   basetype = TREE_TYPE (iv->base);
3236   if (POINTER_TYPE_P (basetype))
3237     basetype = sizetype;
3238   record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3239
3240   /* Record common candidate with constant offset stripped in base.
3241      Like the use itself, we also add candidate directly for it.  */
3242   base = strip_offset (iv->base, &offset);
3243   if (offset || base != iv->base)
3244     {
3245       record_common_cand (data, base, iv->step, use);
3246       add_candidate (data, base, iv->step, false, use);
3247     }
3248
3249   /* Record common candidate with base_object removed in base.  */
3250   if (iv->base_object != NULL)
3251     {
3252       unsigned i;
3253       aff_tree aff_base;
3254       tree step, base_object = iv->base_object;
3255
3256       base = iv->base;
3257       step = iv->step;
3258       STRIP_NOPS (base);
3259       STRIP_NOPS (step);
3260       STRIP_NOPS (base_object);
3261       tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3262       for (i = 0; i < aff_base.n; i++)
3263         {
3264           if (aff_base.elts[i].coef != 1)
3265             continue;
3266
3267           if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3268             break;
3269         }
3270       if (i < aff_base.n)
3271         {
3272           aff_combination_remove_elt (&aff_base, i);
3273           base = aff_combination_to_tree (&aff_base);
3274           basetype = TREE_TYPE (base);
3275           if (POINTER_TYPE_P (basetype))
3276             basetype = sizetype;
3277
3278           step = fold_convert (basetype, step);
3279           record_common_cand (data, base, step, use);
3280           /* Also record common candidate with offset stripped.  */
3281           base = strip_offset (base, &offset);
3282           if (offset)
3283             record_common_cand (data, base, step, use);
3284         }
3285     }
3286
3287   /* At last, add auto-incremental candidates.  Make such variables
3288      important since other iv uses with same base object may be based
3289      on it.  */
3290   if (use != NULL && use->type == USE_ADDRESS)
3291     add_autoinc_candidates (data, iv->base, iv->step, true, use);
3292 }
3293
3294 /* Adds candidates based on the uses.  */
3295
3296 static void
3297 add_iv_candidate_for_uses (struct ivopts_data *data)
3298 {
3299   unsigned i;
3300
3301   for (i = 0; i < n_iv_uses (data); i++)
3302     {
3303       struct iv_use *use = iv_use (data, i);
3304
3305       if (!use)
3306         continue;
3307
3308       switch (use->type)
3309         {
3310         case USE_NONLINEAR_EXPR:
3311         case USE_COMPARE:
3312         case USE_ADDRESS:
3313           /* Just add the ivs based on the value of the iv used here.  */
3314           add_iv_candidate_for_use (data, use);
3315           break;
3316
3317         default:
3318           gcc_unreachable ();
3319         }
3320     }
3321   add_iv_candidate_derived_from_uses (data);
3322 }
3323
3324 /* Record important candidates and add them to related_cands bitmaps.  */
3325
3326 static void
3327 record_important_candidates (struct ivopts_data *data)
3328 {
3329   unsigned i;
3330   struct iv_use *use;
3331
3332   for (i = 0; i < n_iv_cands (data); i++)
3333     {
3334       struct iv_cand *cand = iv_cand (data, i);
3335
3336       if (cand->important)
3337         bitmap_set_bit (data->important_candidates, i);
3338     }
3339
3340   data->consider_all_candidates = (n_iv_cands (data)
3341                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
3342
3343   /* Add important candidates to uses' related_cands bitmaps.  */
3344   for (i = 0; i < n_iv_uses (data); i++)
3345     {
3346       use = iv_use (data, i);
3347       bitmap_ior_into (use->related_cands, data->important_candidates);
3348     }
3349 }
3350
3351 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3352    If consider_all_candidates is true, we use a two-dimensional array, otherwise
3353    we allocate a simple list to every use.  */
3354
3355 static void
3356 alloc_use_cost_map (struct ivopts_data *data)
3357 {
3358   unsigned i, size, s;
3359
3360   for (i = 0; i < n_iv_uses (data); i++)
3361     {
3362       struct iv_use *use = iv_use (data, i);
3363
3364       if (data->consider_all_candidates)
3365         size = n_iv_cands (data);
3366       else
3367         {
3368           s = bitmap_count_bits (use->related_cands);
3369
3370           /* Round up to the power of two, so that moduling by it is fast.  */
3371           size = s ? (1 << ceil_log2 (s)) : 1;
3372         }
3373
3374       use->n_map_members = size;
3375       use->cost_map = XCNEWVEC (struct cost_pair, size);
3376     }
3377 }
3378
3379 /* Returns description of computation cost of expression whose runtime
3380    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
3381
3382 static comp_cost
3383 new_cost (unsigned runtime, unsigned complexity)
3384 {
3385   comp_cost cost;
3386
3387   cost.cost = runtime;
3388   cost.complexity = complexity;
3389
3390   return cost;
3391 }
3392
3393 /* Returns true if COST is infinite.  */
3394
3395 static bool
3396 infinite_cost_p (comp_cost cost)
3397 {
3398   return cost.cost == INFTY;
3399 }
3400
3401 /* Adds costs COST1 and COST2.  */
3402
3403 static comp_cost
3404 add_costs (comp_cost cost1, comp_cost cost2)
3405 {
3406   if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3407     return infinite_cost;
3408
3409   cost1.cost += cost2.cost;
3410   cost1.complexity += cost2.complexity;
3411
3412   return cost1;
3413 }
3414 /* Subtracts costs COST1 and COST2.  */
3415
3416 static comp_cost
3417 sub_costs (comp_cost cost1, comp_cost cost2)
3418 {
3419   cost1.cost -= cost2.cost;
3420   cost1.complexity -= cost2.complexity;
3421
3422   return cost1;
3423 }
3424
3425 /* Returns a negative number if COST1 < COST2, a positive number if
3426    COST1 > COST2, and 0 if COST1 = COST2.  */
3427
3428 static int
3429 compare_costs (comp_cost cost1, comp_cost cost2)
3430 {
3431   if (cost1.cost == cost2.cost)
3432     return cost1.complexity - cost2.complexity;
3433
3434   return cost1.cost - cost2.cost;
3435 }
3436
3437 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3438    on invariants DEPENDS_ON and that the value used in expressing it
3439    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
3440
3441 static void
3442 set_use_iv_cost (struct ivopts_data *data,
3443                  struct iv_use *use, struct iv_cand *cand,
3444                  comp_cost cost, bitmap depends_on, tree value,
3445                  enum tree_code comp, int inv_expr_id)
3446 {
3447   unsigned i, s;
3448
3449   if (infinite_cost_p (cost))
3450     {
3451       BITMAP_FREE (depends_on);
3452       return;
3453     }
3454
3455   if (data->consider_all_candidates)
3456     {
3457       use->cost_map[cand->id].cand = cand;
3458       use->cost_map[cand->id].cost = cost;
3459       use->cost_map[cand->id].depends_on = depends_on;
3460       use->cost_map[cand->id].value = value;
3461       use->cost_map[cand->id].comp = comp;
3462       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3463       return;
3464     }
3465
3466   /* n_map_members is a power of two, so this computes modulo.  */
3467   s = cand->id & (use->n_map_members - 1);
3468   for (i = s; i < use->n_map_members; i++)
3469     if (!use->cost_map[i].cand)
3470       goto found;
3471   for (i = 0; i < s; i++)
3472     if (!use->cost_map[i].cand)
3473       goto found;
3474
3475   gcc_unreachable ();
3476
3477 found:
3478   use->cost_map[i].cand = cand;
3479   use->cost_map[i].cost = cost;
3480   use->cost_map[i].depends_on = depends_on;
3481   use->cost_map[i].value = value;
3482   use->cost_map[i].comp = comp;
3483   use->cost_map[i].inv_expr_id = inv_expr_id;
3484 }
3485
3486 /* Gets cost of (USE, CANDIDATE) pair.  */
3487
3488 static struct cost_pair *
3489 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3490                  struct iv_cand *cand)
3491 {
3492   unsigned i, s;
3493   struct cost_pair *ret;
3494
3495   if (!cand)
3496     return NULL;
3497
3498   if (data->consider_all_candidates)
3499     {
3500       ret = use->cost_map + cand->id;
3501       if (!ret->cand)
3502         return NULL;
3503
3504       return ret;
3505     }
3506
3507   /* n_map_members is a power of two, so this computes modulo.  */
3508   s = cand->id & (use->n_map_members - 1);
3509   for (i = s; i < use->n_map_members; i++)
3510     if (use->cost_map[i].cand == cand)
3511       return use->cost_map + i;
3512     else if (use->cost_map[i].cand == NULL)
3513       return NULL;
3514   for (i = 0; i < s; i++)
3515     if (use->cost_map[i].cand == cand)
3516       return use->cost_map + i;
3517     else if (use->cost_map[i].cand == NULL)
3518       return NULL;
3519
3520   return NULL;
3521 }
3522
3523 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
3524 static rtx
3525 produce_memory_decl_rtl (tree obj, int *regno)
3526 {
3527   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3528   machine_mode address_mode = targetm.addr_space.address_mode (as);
3529   rtx x;
3530
3531   gcc_assert (obj);
3532   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3533     {
3534       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3535       x = gen_rtx_SYMBOL_REF (address_mode, name);
3536       SET_SYMBOL_REF_DECL (x, obj);
3537       x = gen_rtx_MEM (DECL_MODE (obj), x);
3538       set_mem_addr_space (x, as);
3539       targetm.encode_section_info (obj, x, true);
3540     }
3541   else
3542     {
3543       x = gen_raw_REG (address_mode, (*regno)++);
3544       x = gen_rtx_MEM (DECL_MODE (obj), x);
3545       set_mem_addr_space (x, as);
3546     }
3547
3548   return x;
3549 }
3550
3551 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
3552    walk_tree.  DATA contains the actual fake register number.  */
3553
3554 static tree
3555 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3556 {
3557   tree obj = NULL_TREE;
3558   rtx x = NULL_RTX;
3559   int *regno = (int *) data;
3560
3561   switch (TREE_CODE (*expr_p))
3562     {
3563     case ADDR_EXPR:
3564       for (expr_p = &TREE_OPERAND (*expr_p, 0);
3565            handled_component_p (*expr_p);
3566            expr_p = &TREE_OPERAND (*expr_p, 0))
3567         continue;
3568       obj = *expr_p;
3569       if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3570         x = produce_memory_decl_rtl (obj, regno);
3571       break;
3572
3573     case SSA_NAME:
3574       *ws = 0;
3575       obj = SSA_NAME_VAR (*expr_p);
3576       /* Defer handling of anonymous SSA_NAMEs to the expander.  */
3577       if (!obj)
3578         return NULL_TREE;
3579       if (!DECL_RTL_SET_P (obj))
3580         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3581       break;
3582
3583     case VAR_DECL:
3584     case PARM_DECL:
3585     case RESULT_DECL:
3586       *ws = 0;
3587       obj = *expr_p;
3588
3589       if (DECL_RTL_SET_P (obj))
3590         break;
3591
3592       if (DECL_MODE (obj) == BLKmode)
3593         x = produce_memory_decl_rtl (obj, regno);
3594       else
3595         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3596
3597       break;
3598
3599     default:
3600       break;
3601     }
3602
3603   if (x)
3604     {
3605       decl_rtl_to_reset.safe_push (obj);
3606       SET_DECL_RTL (obj, x);
3607     }
3608
3609   return NULL_TREE;
3610 }
3611
3612 /* Determines cost of the computation of EXPR.  */
3613
3614 static unsigned
3615 computation_cost (tree expr, bool speed)
3616 {
3617   rtx_insn *seq;
3618   rtx rslt;
3619   tree type = TREE_TYPE (expr);
3620   unsigned cost;
3621   /* Avoid using hard regs in ways which may be unsupported.  */
3622   int regno = LAST_VIRTUAL_REGISTER + 1;
3623   struct cgraph_node *node = cgraph_node::get (current_function_decl);
3624   enum node_frequency real_frequency = node->frequency;
3625
3626   node->frequency = NODE_FREQUENCY_NORMAL;
3627   crtl->maybe_hot_insn_p = speed;
3628   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3629   start_sequence ();
3630   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3631   seq = get_insns ();
3632   end_sequence ();
3633   default_rtl_profile ();
3634   node->frequency = real_frequency;
3635
3636   cost = seq_cost (seq, speed);
3637   if (MEM_P (rslt))
3638     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3639                           TYPE_ADDR_SPACE (type), speed);
3640   else if (!REG_P (rslt))
3641     cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3642
3643   return cost;
3644 }
3645
3646 /* Returns variable containing the value of candidate CAND at statement AT.  */
3647
3648 static tree
3649 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3650 {
3651   if (stmt_after_increment (loop, cand, stmt))
3652     return cand->var_after;
3653   else
3654     return cand->var_before;
3655 }
3656
3657 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3658    same precision that is at least as wide as the precision of TYPE, stores
3659    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
3660    type of A and B.  */
3661
3662 static tree
3663 determine_common_wider_type (tree *a, tree *b)
3664 {
3665   tree wider_type = NULL;
3666   tree suba, subb;
3667   tree atype = TREE_TYPE (*a);
3668
3669   if (CONVERT_EXPR_P (*a))
3670     {
3671       suba = TREE_OPERAND (*a, 0);
3672       wider_type = TREE_TYPE (suba);
3673       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3674         return atype;
3675     }
3676   else
3677     return atype;
3678
3679   if (CONVERT_EXPR_P (*b))
3680     {
3681       subb = TREE_OPERAND (*b, 0);
3682       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3683         return atype;
3684     }
3685   else
3686     return atype;
3687
3688   *a = suba;
3689   *b = subb;
3690   return wider_type;
3691 }
3692
3693 /* Determines the expression by that USE is expressed from induction variable
3694    CAND at statement AT in LOOP.  The expression is stored in a decomposed
3695    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
3696
3697 static bool
3698 get_computation_aff (struct loop *loop,
3699                      struct iv_use *use, struct iv_cand *cand, gimple *at,
3700                      struct aff_tree *aff)
3701 {
3702   tree ubase = use->iv->base;
3703   tree ustep = use->iv->step;
3704   tree cbase = cand->iv->base;
3705   tree cstep = cand->iv->step, cstep_common;
3706   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3707   tree common_type, var;
3708   tree uutype;
3709   aff_tree cbase_aff, var_aff;
3710   widest_int rat;
3711
3712   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3713     {
3714       /* We do not have a precision to express the values of use.  */
3715       return false;
3716     }
3717
3718   var = var_at_stmt (loop, cand, at);
3719   uutype = unsigned_type_for (utype);
3720
3721   /* If the conversion is not noop, perform it.  */
3722   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3723     {
3724       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3725           && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3726         {
3727           tree inner_base, inner_step, inner_type;
3728           inner_base = TREE_OPERAND (cbase, 0);
3729           if (CONVERT_EXPR_P (cstep))
3730             inner_step = TREE_OPERAND (cstep, 0);
3731           else
3732             inner_step = cstep;
3733
3734           inner_type = TREE_TYPE (inner_base);
3735           /* If candidate is added from a biv whose type is smaller than
3736              ctype, we know both candidate and the biv won't overflow.
3737              In this case, it's safe to skip the convertion in candidate.
3738              As an example, (unsigned short)((unsigned long)A) equals to
3739              (unsigned short)A, if A has a type no larger than short.  */
3740           if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3741             {
3742               cbase = inner_base;
3743               cstep = inner_step;
3744             }
3745         }
3746       cstep = fold_convert (uutype, cstep);
3747       cbase = fold_convert (uutype, cbase);
3748       var = fold_convert (uutype, var);
3749     }
3750
3751   /* Ratio is 1 when computing the value of biv cand by itself.
3752      We can't rely on constant_multiple_of in this case because the
3753      use is created after the original biv is selected.  The call
3754      could fail because of inconsistent fold behavior.  See PR68021
3755      for more information.  */
3756   if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3757     {
3758       gcc_assert (is_gimple_assign (use->stmt));
3759       gcc_assert (use->iv->ssa_name == cand->var_after);
3760       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3761       rat = 1;
3762     }
3763   else if (!constant_multiple_of (ustep, cstep, &rat))
3764     return false;
3765
3766   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3767      type, we achieve better folding by computing their difference in this
3768      wider type, and cast the result to UUTYPE.  We do not need to worry about
3769      overflows, as all the arithmetics will in the end be performed in UUTYPE
3770      anyway.  */
3771   common_type = determine_common_wider_type (&ubase, &cbase);
3772
3773   /* use = ubase - ratio * cbase + ratio * var.  */
3774   tree_to_aff_combination (ubase, common_type, aff);
3775   tree_to_aff_combination (cbase, common_type, &cbase_aff);
3776   tree_to_aff_combination (var, uutype, &var_aff);
3777
3778   /* We need to shift the value if we are after the increment.  */
3779   if (stmt_after_increment (loop, cand, at))
3780     {
3781       aff_tree cstep_aff;
3782
3783       if (common_type != uutype)
3784         cstep_common = fold_convert (common_type, cstep);
3785       else
3786         cstep_common = cstep;
3787
3788       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3789       aff_combination_add (&cbase_aff, &cstep_aff);
3790     }
3791
3792   aff_combination_scale (&cbase_aff, -rat);
3793   aff_combination_add (aff, &cbase_aff);
3794   if (common_type != uutype)
3795     aff_combination_convert (aff, uutype);
3796
3797   aff_combination_scale (&var_aff, rat);
3798   aff_combination_add (aff, &var_aff);
3799
3800   return true;
3801 }
3802
3803 /* Return the type of USE.  */
3804
3805 static tree
3806 get_use_type (struct iv_use *use)
3807 {
3808   tree base_type = TREE_TYPE (use->iv->base);
3809   tree type;
3810
3811   if (use->type == USE_ADDRESS)
3812     {
3813       /* The base_type may be a void pointer.  Create a pointer type based on
3814          the mem_ref instead.  */
3815       type = build_pointer_type (TREE_TYPE (*use->op_p));
3816       gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3817                   == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3818     }
3819   else
3820     type = base_type;
3821
3822   return type;
3823 }
3824
3825 /* Determines the expression by that USE is expressed from induction variable
3826    CAND at statement AT in LOOP.  The computation is unshared.  */
3827
3828 static tree
3829 get_computation_at (struct loop *loop,
3830                     struct iv_use *use, struct iv_cand *cand, gimple *at)
3831 {
3832   aff_tree aff;
3833   tree type = get_use_type (use);
3834
3835   if (!get_computation_aff (loop, use, cand, at, &aff))
3836     return NULL_TREE;
3837   unshare_aff_combination (&aff);
3838   return fold_convert (type, aff_combination_to_tree (&aff));
3839 }
3840
3841 /* Determines the expression by that USE is expressed from induction variable
3842    CAND in LOOP.  The computation is unshared.  */
3843
3844 static tree
3845 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3846 {
3847   return get_computation_at (loop, use, cand, use->stmt);
3848 }
3849
3850 /* Adjust the cost COST for being in loop setup rather than loop body.
3851    If we're optimizing for space, the loop setup overhead is constant;
3852    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3853 static unsigned
3854 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3855 {
3856   if (cost == INFTY)
3857     return cost;
3858   else if (optimize_loop_for_speed_p (data->current_loop))
3859     return cost / avg_loop_niter (data->current_loop);
3860   else
3861     return cost;
3862 }
3863
3864 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3865    validity for a memory reference accessing memory of mode MODE in
3866    address space AS.  */
3867
3868
3869 bool
3870 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3871                                  addr_space_t as)
3872 {
3873 #define MAX_RATIO 128
3874   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3875   static vec<sbitmap> valid_mult_list;
3876   sbitmap valid_mult;
3877
3878   if (data_index >= valid_mult_list.length ())
3879     valid_mult_list.safe_grow_cleared (data_index + 1);
3880
3881   valid_mult = valid_mult_list[data_index];
3882   if (!valid_mult)
3883     {
3884       machine_mode address_mode = targetm.addr_space.address_mode (as);
3885       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3886       rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3887       rtx addr, scaled;
3888       HOST_WIDE_INT i;
3889
3890       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3891       bitmap_clear (valid_mult);
3892       scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3893       addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3894       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3895         {
3896           XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3897           if (memory_address_addr_space_p (mode, addr, as)
3898               || memory_address_addr_space_p (mode, scaled, as))
3899             bitmap_set_bit (valid_mult, i + MAX_RATIO);
3900         }
3901
3902       if (dump_file && (dump_flags & TDF_DETAILS))
3903         {
3904           fprintf (dump_file, "  allowed multipliers:");
3905           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3906             if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3907               fprintf (dump_file, " %d", (int) i);
3908           fprintf (dump_file, "\n");
3909           fprintf (dump_file, "\n");
3910         }
3911
3912       valid_mult_list[data_index] = valid_mult;
3913     }
3914
3915   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3916     return false;
3917
3918   return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3919 }
3920
3921 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3922    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3923    variable is omitted.  Compute the cost for a memory reference that accesses
3924    a memory location of mode MEM_MODE in address space AS.
3925
3926    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3927    size of MEM_MODE / RATIO) is available.  To make this determination, we
3928    look at the size of the increment to be made, which is given in CSTEP.
3929    CSTEP may be zero if the step is unknown.
3930    STMT_AFTER_INC is true iff the statement we're looking at is after the
3931    increment of the original biv.
3932
3933    TODO -- there must be some better way.  This all is quite crude.  */
3934
3935 enum ainc_type
3936 {
3937   AINC_PRE_INC,         /* Pre increment.  */
3938   AINC_PRE_DEC,         /* Pre decrement.  */
3939   AINC_POST_INC,        /* Post increment.  */
3940   AINC_POST_DEC,        /* Post decrement.  */
3941   AINC_NONE             /* Also the number of auto increment types.  */
3942 };
3943
3944 struct address_cost_data
3945 {
3946   HOST_WIDE_INT min_offset, max_offset;
3947   unsigned costs[2][2][2][2];
3948   unsigned ainc_costs[AINC_NONE];
3949 };
3950
3951
3952 static comp_cost
3953 get_address_cost (bool symbol_present, bool var_present,
3954                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3955                   HOST_WIDE_INT cstep, machine_mode mem_mode,
3956                   addr_space_t as, bool speed,
3957                   bool stmt_after_inc, bool *may_autoinc)
3958 {
3959   machine_mode address_mode = targetm.addr_space.address_mode (as);
3960   static vec<address_cost_data *> address_cost_data_list;
3961   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3962   address_cost_data *data;
3963   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3964   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3965   unsigned cost, acost, complexity;
3966   enum ainc_type autoinc_type;
3967   bool offset_p, ratio_p, autoinc;
3968   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3969   unsigned HOST_WIDE_INT mask;
3970   unsigned bits;
3971
3972   if (data_index >= address_cost_data_list.length ())
3973     address_cost_data_list.safe_grow_cleared (data_index + 1);
3974
3975   data = address_cost_data_list[data_index];
3976   if (!data)
3977     {
3978       HOST_WIDE_INT i;
3979       HOST_WIDE_INT rat, off = 0;
3980       int old_cse_not_expected, width;
3981       unsigned sym_p, var_p, off_p, rat_p, add_c;
3982       rtx_insn *seq;
3983       rtx addr, base;
3984       rtx reg0, reg1;
3985
3986       data = (address_cost_data *) xcalloc (1, sizeof (*data));
3987
3988       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3989
3990       width = GET_MODE_BITSIZE (address_mode) - 1;
3991       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3992         width = HOST_BITS_PER_WIDE_INT - 1;
3993       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3994
3995       for (i = width; i >= 0; i--)
3996         {
3997           off = -((unsigned HOST_WIDE_INT) 1 << i);
3998           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3999           if (memory_address_addr_space_p (mem_mode, addr, as))
4000             break;
4001         }
4002       data->min_offset = (i == -1? 0 : off);
4003
4004       for (i = width; i >= 0; i--)
4005         {
4006           off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4007           XEXP (addr, 1) = gen_int_mode (off, address_mode);
4008           if (memory_address_addr_space_p (mem_mode, addr, as))
4009             break;
4010           /* For some strict-alignment targets, the offset must be naturally
4011              aligned.  Try an aligned offset if mem_mode is not QImode.  */
4012           off = mem_mode != QImode
4013                 ? ((unsigned HOST_WIDE_INT) 1 << i)
4014                     - GET_MODE_SIZE (mem_mode)
4015                 : 0;
4016           if (off > 0)
4017             {
4018               XEXP (addr, 1) = gen_int_mode (off, address_mode);
4019               if (memory_address_addr_space_p (mem_mode, addr, as))
4020                 break;
4021             }
4022         }
4023       if (i == -1)
4024         off = 0;
4025       data->max_offset = off;
4026
4027       if (dump_file && (dump_flags & TDF_DETAILS))
4028         {
4029           fprintf (dump_file, "get_address_cost:\n");
4030           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4031                    GET_MODE_NAME (mem_mode),
4032                    data->min_offset);
4033           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4034                    GET_MODE_NAME (mem_mode),
4035                    data->max_offset);
4036         }
4037
4038       rat = 1;
4039       for (i = 2; i <= MAX_RATIO; i++)
4040         if (multiplier_allowed_in_address_p (i, mem_mode, as))
4041           {
4042             rat = i;
4043             break;
4044           }
4045
4046       /* Compute the cost of various addressing modes.  */
4047       acost = 0;
4048       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4049       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4050
4051       if (USE_LOAD_PRE_DECREMENT (mem_mode)
4052           || USE_STORE_PRE_DECREMENT (mem_mode))
4053         {
4054           addr = gen_rtx_PRE_DEC (address_mode, reg0);
4055           has_predec[mem_mode]
4056             = memory_address_addr_space_p (mem_mode, addr, as);
4057
4058           if (has_predec[mem_mode])
4059             data->ainc_costs[AINC_PRE_DEC]
4060               = address_cost (addr, mem_mode, as, speed);
4061         }
4062       if (USE_LOAD_POST_DECREMENT (mem_mode)
4063           || USE_STORE_POST_DECREMENT (mem_mode))
4064         {
4065           addr = gen_rtx_POST_DEC (address_mode, reg0);
4066           has_postdec[mem_mode]
4067             = memory_address_addr_space_p (mem_mode, addr, as);
4068
4069           if (has_postdec[mem_mode])
4070             data->ainc_costs[AINC_POST_DEC]
4071               = address_cost (addr, mem_mode, as, speed);
4072         }
4073       if (USE_LOAD_PRE_INCREMENT (mem_mode)
4074           || USE_STORE_PRE_DECREMENT (mem_mode))
4075         {
4076           addr = gen_rtx_PRE_INC (address_mode, reg0);
4077           has_preinc[mem_mode]
4078             = memory_address_addr_space_p (mem_mode, addr, as);
4079
4080           if (has_preinc[mem_mode])
4081             data->ainc_costs[AINC_PRE_INC]
4082               = address_cost (addr, mem_mode, as, speed);
4083         }
4084       if (USE_LOAD_POST_INCREMENT (mem_mode)
4085           || USE_STORE_POST_INCREMENT (mem_mode))
4086         {
4087           addr = gen_rtx_POST_INC (address_mode, reg0);
4088           has_postinc[mem_mode]
4089             = memory_address_addr_space_p (mem_mode, addr, as);
4090
4091           if (has_postinc[mem_mode])
4092             data->ainc_costs[AINC_POST_INC]
4093               = address_cost (addr, mem_mode, as, speed);
4094         }
4095       for (i = 0; i < 16; i++)
4096         {
4097           sym_p = i & 1;
4098           var_p = (i >> 1) & 1;
4099           off_p = (i >> 2) & 1;
4100           rat_p = (i >> 3) & 1;
4101
4102           addr = reg0;
4103           if (rat_p)
4104             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4105                                    gen_int_mode (rat, address_mode));
4106
4107           if (var_p)
4108             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4109
4110           if (sym_p)
4111             {
4112               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4113               /* ??? We can run into trouble with some backends by presenting
4114                  it with symbols which haven't been properly passed through
4115                  targetm.encode_section_info.  By setting the local bit, we
4116                  enhance the probability of things working.  */
4117               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4118
4119               if (off_p)
4120                 base = gen_rtx_fmt_e (CONST, address_mode,
4121                                       gen_rtx_fmt_ee
4122                                         (PLUS, address_mode, base,
4123                                          gen_int_mode (off, address_mode)));
4124             }
4125           else if (off_p)
4126             base = gen_int_mode (off, address_mode);
4127           else
4128             base = NULL_RTX;
4129
4130           if (base)
4131             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4132
4133           start_sequence ();
4134           /* To avoid splitting addressing modes, pretend that no cse will
4135              follow.  */
4136           old_cse_not_expected = cse_not_expected;
4137           cse_not_expected = true;
4138           addr = memory_address_addr_space (mem_mode, addr, as);
4139           cse_not_expected = old_cse_not_expected;
4140           seq = get_insns ();
4141           end_sequence ();
4142
4143           acost = seq_cost (seq, speed);
4144           acost += address_cost (addr, mem_mode, as, speed);
4145
4146           if (!acost)
4147             acost = 1;
4148           data->costs[sym_p][var_p][off_p][rat_p] = acost;
4149         }
4150
4151       /* On some targets, it is quite expensive to load symbol to a register,
4152          which makes addresses that contain symbols look much more expensive.
4153          However, the symbol will have to be loaded in any case before the
4154          loop (and quite likely we have it in register already), so it does not
4155          make much sense to penalize them too heavily.  So make some final
4156          tweaks for the SYMBOL_PRESENT modes:
4157
4158          If VAR_PRESENT is false, and the mode obtained by changing symbol to
4159          var is cheaper, use this mode with small penalty.
4160          If VAR_PRESENT is true, try whether the mode with
4161          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4162          if this is the case, use it.  */
4163       add_c = add_cost (speed, address_mode);
4164       for (i = 0; i < 8; i++)
4165         {
4166           var_p = i & 1;
4167           off_p = (i >> 1) & 1;
4168           rat_p = (i >> 2) & 1;
4169
4170           acost = data->costs[0][1][off_p][rat_p] + 1;
4171           if (var_p)
4172             acost += add_c;
4173
4174           if (acost < data->costs[1][var_p][off_p][rat_p])
4175             data->costs[1][var_p][off_p][rat_p] = acost;
4176         }
4177
4178       if (dump_file && (dump_flags & TDF_DETAILS))
4179         {
4180           fprintf (dump_file, "Address costs:\n");
4181
4182           for (i = 0; i < 16; i++)
4183             {
4184               sym_p = i & 1;
4185               var_p = (i >> 1) & 1;
4186               off_p = (i >> 2) & 1;
4187               rat_p = (i >> 3) & 1;
4188
4189               fprintf (dump_file, "  ");
4190               if (sym_p)
4191                 fprintf (dump_file, "sym + ");
4192               if (var_p)
4193                 fprintf (dump_file, "var + ");
4194               if (off_p)
4195                 fprintf (dump_file, "cst + ");
4196               if (rat_p)
4197                 fprintf (dump_file, "rat * ");
4198
4199               acost = data->costs[sym_p][var_p][off_p][rat_p];
4200               fprintf (dump_file, "index costs %d\n", acost);
4201             }
4202           if (has_predec[mem_mode] || has_postdec[mem_mode]
4203               || has_preinc[mem_mode] || has_postinc[mem_mode])
4204             fprintf (dump_file, "  May include autoinc/dec\n");
4205           fprintf (dump_file, "\n");
4206         }
4207
4208       address_cost_data_list[data_index] = data;
4209     }
4210
4211   bits = GET_MODE_BITSIZE (address_mode);
4212   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4213   offset &= mask;
4214   if ((offset >> (bits - 1) & 1))
4215     offset |= ~mask;
4216   s_offset = offset;
4217
4218   autoinc = false;
4219   autoinc_type = AINC_NONE;
4220   msize = GET_MODE_SIZE (mem_mode);
4221   autoinc_offset = offset;
4222   if (stmt_after_inc)
4223     autoinc_offset += ratio * cstep;
4224   if (symbol_present || var_present || ratio != 1)
4225     autoinc = false;
4226   else
4227     {
4228       if (has_postinc[mem_mode] && autoinc_offset == 0
4229           && msize == cstep)
4230         autoinc_type = AINC_POST_INC;
4231       else if (has_postdec[mem_mode] && autoinc_offset == 0
4232                && msize == -cstep)
4233         autoinc_type = AINC_POST_DEC;
4234       else if (has_preinc[mem_mode] && autoinc_offset == msize
4235                && msize == cstep)
4236         autoinc_type = AINC_PRE_INC;
4237       else if (has_predec[mem_mode] && autoinc_offset == -msize
4238                && msize == -cstep)
4239         autoinc_type = AINC_PRE_DEC;
4240
4241       if (autoinc_type != AINC_NONE)
4242         autoinc = true;
4243     }
4244
4245   cost = 0;
4246   offset_p = (s_offset != 0
4247               && data->min_offset <= s_offset
4248               && s_offset <= data->max_offset);
4249   ratio_p = (ratio != 1
4250              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4251
4252   if (ratio != 1 && !ratio_p)
4253     cost += mult_by_coeff_cost (ratio, address_mode, speed);
4254
4255   if (s_offset && !offset_p && !symbol_present)
4256     cost += add_cost (speed, address_mode);
4257
4258   if (may_autoinc)
4259     *may_autoinc = autoinc;
4260   if (autoinc)
4261     acost = data->ainc_costs[autoinc_type];
4262   else
4263     acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4264   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4265   return new_cost (cost + acost, complexity);
4266 }
4267
4268  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
4269     EXPR operand holding the shift.  COST0 and COST1 are the costs for
4270     calculating the operands of EXPR.  Returns true if successful, and returns
4271     the cost in COST.  */
4272
4273 static bool
4274 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4275                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4276 {
4277   comp_cost res;
4278   tree op1 = TREE_OPERAND (expr, 1);
4279   tree cst = TREE_OPERAND (mult, 1);
4280   tree multop = TREE_OPERAND (mult, 0);
4281   int m = exact_log2 (int_cst_value (cst));
4282   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4283   int as_cost, sa_cost;
4284   bool mult_in_op1;
4285
4286   if (!(m >= 0 && m < maxm))
4287     return false;
4288
4289   STRIP_NOPS (op1);
4290   mult_in_op1 = operand_equal_p (op1, mult, 0);
4291
4292   as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4293
4294   /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4295      use that in preference to a shift insn followed by an add insn.  */
4296   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4297              ? shiftadd_cost (speed, mode, m)
4298              : (mult_in_op1
4299                 ? shiftsub1_cost (speed, mode, m)
4300                 : shiftsub0_cost (speed, mode, m)));
4301
4302   res = new_cost (MIN (as_cost, sa_cost), 0);
4303   res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4304
4305   STRIP_NOPS (multop);
4306   if (!is_gimple_val (multop))
4307     res = add_costs (res, force_expr_to_var_cost (multop, speed));
4308
4309   *cost = res;
4310   return true;
4311 }
4312
4313 /* Estimates cost of forcing expression EXPR into a variable.  */
4314
4315 static comp_cost
4316 force_expr_to_var_cost (tree expr, bool speed)
4317 {
4318   static bool costs_initialized = false;
4319   static unsigned integer_cost [2];
4320   static unsigned symbol_cost [2];
4321   static unsigned address_cost [2];
4322   tree op0, op1;
4323   comp_cost cost0, cost1, cost;
4324   machine_mode mode;
4325
4326   if (!costs_initialized)
4327     {
4328       tree type = build_pointer_type (integer_type_node);
4329       tree var, addr;
4330       rtx x;
4331       int i;
4332
4333       var = create_tmp_var_raw (integer_type_node, "test_var");
4334       TREE_STATIC (var) = 1;
4335       x = produce_memory_decl_rtl (var, NULL);
4336       SET_DECL_RTL (var, x);
4337
4338       addr = build1 (ADDR_EXPR, type, var);
4339
4340
4341       for (i = 0; i < 2; i++)
4342         {
4343           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4344                                                              2000), i);
4345
4346           symbol_cost[i] = computation_cost (addr, i) + 1;
4347
4348           address_cost[i]
4349             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4350           if (dump_file && (dump_flags & TDF_DETAILS))
4351             {
4352               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4353               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
4354               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
4355               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
4356               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
4357               fprintf (dump_file, "\n");
4358             }
4359         }
4360
4361       costs_initialized = true;
4362     }
4363
4364   STRIP_NOPS (expr);
4365
4366   if (SSA_VAR_P (expr))
4367     return no_cost;
4368
4369   if (is_gimple_min_invariant (expr))
4370     {
4371       if (TREE_CODE (expr) == INTEGER_CST)
4372         return new_cost (integer_cost [speed], 0);
4373
4374       if (TREE_CODE (expr) == ADDR_EXPR)
4375         {
4376           tree obj = TREE_OPERAND (expr, 0);
4377
4378           if (TREE_CODE (obj) == VAR_DECL
4379               || TREE_CODE (obj) == PARM_DECL
4380               || TREE_CODE (obj) == RESULT_DECL)
4381             return new_cost (symbol_cost [speed], 0);
4382         }
4383
4384       return new_cost (address_cost [speed], 0);
4385     }
4386
4387   switch (TREE_CODE (expr))
4388     {
4389     case POINTER_PLUS_EXPR:
4390     case PLUS_EXPR:
4391     case MINUS_EXPR:
4392     case MULT_EXPR:
4393       op0 = TREE_OPERAND (expr, 0);
4394       op1 = TREE_OPERAND (expr, 1);
4395       STRIP_NOPS (op0);
4396       STRIP_NOPS (op1);
4397       break;
4398
4399     CASE_CONVERT:
4400     case NEGATE_EXPR:
4401       op0 = TREE_OPERAND (expr, 0);
4402       STRIP_NOPS (op0);
4403       op1 = NULL_TREE;
4404       break;
4405
4406     default:
4407       /* Just an arbitrary value, FIXME.  */
4408       return new_cost (target_spill_cost[speed], 0);
4409     }
4410
4411   if (op0 == NULL_TREE
4412       || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4413     cost0 = no_cost;
4414   else
4415     cost0 = force_expr_to_var_cost (op0, speed);
4416
4417   if (op1 == NULL_TREE
4418       || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4419     cost1 = no_cost;
4420   else
4421     cost1 = force_expr_to_var_cost (op1, speed);
4422
4423   mode = TYPE_MODE (TREE_TYPE (expr));
4424   switch (TREE_CODE (expr))
4425     {
4426     case POINTER_PLUS_EXPR:
4427     case PLUS_EXPR:
4428     case MINUS_EXPR:
4429     case NEGATE_EXPR:
4430       cost = new_cost (add_cost (speed, mode), 0);
4431       if (TREE_CODE (expr) != NEGATE_EXPR)
4432         {
4433           tree mult = NULL_TREE;
4434           comp_cost sa_cost;
4435           if (TREE_CODE (op1) == MULT_EXPR)
4436             mult = op1;
4437           else if (TREE_CODE (op0) == MULT_EXPR)
4438             mult = op0;
4439
4440           if (mult != NULL_TREE
4441               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4442               && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4443                                     speed, &sa_cost))
4444             return sa_cost;
4445         }
4446       break;
4447
4448     CASE_CONVERT:
4449       {
4450         tree inner_mode, outer_mode;
4451         outer_mode = TREE_TYPE (expr);
4452         inner_mode = TREE_TYPE (op0);
4453         cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4454                                        TYPE_MODE (inner_mode), speed), 0);
4455       }
4456       break;
4457
4458     case MULT_EXPR:
4459       if (cst_and_fits_in_hwi (op0))
4460         cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4461                                              mode, speed), 0);
4462       else if (cst_and_fits_in_hwi (op1))
4463         cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4464                                              mode, speed), 0);
4465       else
4466         return new_cost (target_spill_cost [speed], 0);
4467       break;
4468
4469     default:
4470       gcc_unreachable ();
4471     }
4472
4473   cost = add_costs (cost, cost0);
4474   cost = add_costs (cost, cost1);
4475
4476   /* Bound the cost by target_spill_cost.  The parts of complicated
4477      computations often are either loop invariant or at least can
4478      be shared between several iv uses, so letting this grow without
4479      limits would not give reasonable results.  */
4480   if (cost.cost > (int) target_spill_cost [speed])
4481     cost.cost = target_spill_cost [speed];
4482
4483   return cost;
4484 }
4485
4486 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
4487    invariants the computation depends on.  */
4488
4489 static comp_cost
4490 force_var_cost (struct ivopts_data *data,
4491                 tree expr, bitmap *depends_on)
4492 {
4493   if (depends_on)
4494     {
4495       fd_ivopts_data = data;
4496       walk_tree (&expr, find_depends, depends_on, NULL);
4497     }
4498
4499   return force_expr_to_var_cost (expr, data->speed);
4500 }
4501
4502 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
4503    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4504    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
4505    invariants the computation depends on.  */
4506
4507 static comp_cost
4508 split_address_cost (struct ivopts_data *data,
4509                     tree addr, bool *symbol_present, bool *var_present,
4510                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4511 {
4512   tree core;
4513   HOST_WIDE_INT bitsize;
4514   HOST_WIDE_INT bitpos;
4515   tree toffset;
4516   machine_mode mode;
4517   int unsignedp, reversep, volatilep;
4518
4519   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4520                               &unsignedp, &reversep, &volatilep, false);
4521
4522   if (toffset != 0
4523       || bitpos % BITS_PER_UNIT != 0
4524       || reversep
4525       || TREE_CODE (core) != VAR_DECL)
4526     {
4527       *symbol_present = false;
4528       *var_present = true;
4529       fd_ivopts_data = data;
4530       if (depends_on)
4531         walk_tree (&addr, find_depends, depends_on, NULL);
4532
4533       return new_cost (target_spill_cost[data->speed], 0);
4534     }
4535
4536   *offset += bitpos / BITS_PER_UNIT;
4537   if (TREE_STATIC (core)
4538       || DECL_EXTERNAL (core))
4539     {
4540       *symbol_present = true;
4541       *var_present = false;
4542       return no_cost;
4543     }
4544
4545   *symbol_present = false;
4546   *var_present = true;
4547   return no_cost;
4548 }
4549
4550 /* Estimates cost of expressing difference of addresses E1 - E2 as
4551    var + symbol + offset.  The value of offset is added to OFFSET,
4552    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4553    part is missing.  DEPENDS_ON is a set of the invariants the computation
4554    depends on.  */
4555
4556 static comp_cost
4557 ptr_difference_cost (struct ivopts_data *data,
4558                      tree e1, tree e2, bool *symbol_present, bool *var_present,
4559                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4560 {
4561   HOST_WIDE_INT diff = 0;
4562   aff_tree aff_e1, aff_e2;
4563   tree type;
4564
4565   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4566
4567   if (ptr_difference_const (e1, e2, &diff))
4568     {
4569       *offset += diff;
4570       *symbol_present = false;
4571       *var_present = false;
4572       return no_cost;
4573     }
4574
4575   if (integer_zerop (e2))
4576     return split_address_cost (data, TREE_OPERAND (e1, 0),
4577                                symbol_present, var_present, offset, depends_on);
4578
4579   *symbol_present = false;
4580   *var_present = true;
4581
4582   type = signed_type_for (TREE_TYPE (e1));
4583   tree_to_aff_combination (e1, type, &aff_e1);
4584   tree_to_aff_combination (e2, type, &aff_e2);
4585   aff_combination_scale (&aff_e2, -1);
4586   aff_combination_add (&aff_e1, &aff_e2);
4587
4588   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4589 }
4590
4591 /* Estimates cost of expressing difference E1 - E2 as
4592    var + symbol + offset.  The value of offset is added to OFFSET,
4593    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4594    part is missing.  DEPENDS_ON is a set of the invariants the computation
4595    depends on.  */
4596
4597 static comp_cost
4598 difference_cost (struct ivopts_data *data,
4599                  tree e1, tree e2, bool *symbol_present, bool *var_present,
4600                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4601 {
4602   machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4603   unsigned HOST_WIDE_INT off1, off2;
4604   aff_tree aff_e1, aff_e2;
4605   tree type;
4606
4607   e1 = strip_offset (e1, &off1);
4608   e2 = strip_offset (e2, &off2);
4609   *offset += off1 - off2;
4610
4611   STRIP_NOPS (e1);
4612   STRIP_NOPS (e2);
4613
4614   if (TREE_CODE (e1) == ADDR_EXPR)
4615     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4616                                 offset, depends_on);
4617   *symbol_present = false;
4618
4619   if (operand_equal_p (e1, e2, 0))
4620     {
4621       *var_present = false;
4622       return no_cost;
4623     }
4624
4625   *var_present = true;
4626
4627   if (integer_zerop (e2))
4628     return force_var_cost (data, e1, depends_on);
4629
4630   if (integer_zerop (e1))
4631     {
4632       comp_cost cost = force_var_cost (data, e2, depends_on);
4633       cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4634       return cost;
4635     }
4636
4637   type = signed_type_for (TREE_TYPE (e1));
4638   tree_to_aff_combination (e1, type, &aff_e1);
4639   tree_to_aff_combination (e2, type, &aff_e2);
4640   aff_combination_scale (&aff_e2, -1);
4641   aff_combination_add (&aff_e1, &aff_e2);
4642
4643   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4644 }
4645
4646 /* Returns true if AFF1 and AFF2 are identical.  */
4647
4648 static bool
4649 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4650 {
4651   unsigned i;
4652
4653   if (aff1->n != aff2->n)
4654     return false;
4655
4656   for (i = 0; i < aff1->n; i++)
4657     {
4658       if (aff1->elts[i].coef != aff2->elts[i].coef)
4659         return false;
4660
4661       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4662         return false;
4663     }
4664   return true;
4665 }
4666
4667 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
4668
4669 static int
4670 get_expr_id (struct ivopts_data *data, tree expr)
4671 {
4672   struct iv_inv_expr_ent ent;
4673   struct iv_inv_expr_ent **slot;
4674
4675   ent.expr = expr;
4676   ent.hash = iterative_hash_expr (expr, 0);
4677   slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4678   if (*slot)
4679     return (*slot)->id;
4680
4681   *slot = XNEW (struct iv_inv_expr_ent);
4682   (*slot)->expr = expr;
4683   (*slot)->hash = ent.hash;
4684   (*slot)->id = data->inv_expr_id++;
4685   return (*slot)->id;
4686 }
4687
4688 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4689    requires a new compiler generated temporary.  Returns -1 otherwise.
4690    ADDRESS_P is a flag indicating if the expression is for address
4691    computation.  */
4692
4693 static int
4694 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4695                             tree cbase, HOST_WIDE_INT ratio,
4696                             bool address_p)
4697 {
4698   aff_tree ubase_aff, cbase_aff;
4699   tree expr, ub, cb;
4700
4701   STRIP_NOPS (ubase);
4702   STRIP_NOPS (cbase);
4703   ub = ubase;
4704   cb = cbase;
4705
4706   if ((TREE_CODE (ubase) == INTEGER_CST)
4707       && (TREE_CODE (cbase) == INTEGER_CST))
4708     return -1;
4709
4710   /* Strips the constant part. */
4711   if (TREE_CODE (ubase) == PLUS_EXPR
4712       || TREE_CODE (ubase) == MINUS_EXPR
4713       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4714     {
4715       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4716         ubase = TREE_OPERAND (ubase, 0);
4717     }
4718
4719   /* Strips the constant part. */
4720   if (TREE_CODE (cbase) == PLUS_EXPR
4721       || TREE_CODE (cbase) == MINUS_EXPR
4722       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4723     {
4724       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4725         cbase = TREE_OPERAND (cbase, 0);
4726     }
4727
4728   if (address_p)
4729     {
4730       if (((TREE_CODE (ubase) == SSA_NAME)
4731            || (TREE_CODE (ubase) == ADDR_EXPR
4732                && is_gimple_min_invariant (ubase)))
4733           && (TREE_CODE (cbase) == INTEGER_CST))
4734         return -1;
4735
4736       if (((TREE_CODE (cbase) == SSA_NAME)
4737            || (TREE_CODE (cbase) == ADDR_EXPR
4738                && is_gimple_min_invariant (cbase)))
4739           && (TREE_CODE (ubase) == INTEGER_CST))
4740         return -1;
4741     }
4742
4743   if (ratio == 1)
4744     {
4745       if (operand_equal_p (ubase, cbase, 0))
4746         return -1;
4747
4748       if (TREE_CODE (ubase) == ADDR_EXPR
4749           && TREE_CODE (cbase) == ADDR_EXPR)
4750         {
4751           tree usym, csym;
4752
4753           usym = TREE_OPERAND (ubase, 0);
4754           csym = TREE_OPERAND (cbase, 0);
4755           if (TREE_CODE (usym) == ARRAY_REF)
4756             {
4757               tree ind = TREE_OPERAND (usym, 1);
4758               if (TREE_CODE (ind) == INTEGER_CST
4759                   && tree_fits_shwi_p (ind)
4760                   && tree_to_shwi (ind) == 0)
4761                 usym = TREE_OPERAND (usym, 0);
4762             }
4763           if (TREE_CODE (csym) == ARRAY_REF)
4764             {
4765               tree ind = TREE_OPERAND (csym, 1);
4766               if (TREE_CODE (ind) == INTEGER_CST
4767                   && tree_fits_shwi_p (ind)
4768                   && tree_to_shwi (ind) == 0)
4769                 csym = TREE_OPERAND (csym, 0);
4770             }
4771           if (operand_equal_p (usym, csym, 0))
4772             return -1;
4773         }
4774       /* Now do more complex comparison  */
4775       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4776       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4777       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4778         return -1;
4779     }
4780
4781   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4782   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4783
4784   aff_combination_scale (&cbase_aff, -1 * ratio);
4785   aff_combination_add (&ubase_aff, &cbase_aff);
4786   expr = aff_combination_to_tree (&ubase_aff);
4787   return get_expr_id (data, expr);
4788 }
4789
4790
4791
4792 /* Determines the cost of the computation by that USE is expressed
4793    from induction variable CAND.  If ADDRESS_P is true, we just need
4794    to create an address from it, otherwise we want to get it into
4795    register.  A set of invariants we depend on is stored in
4796    DEPENDS_ON.  AT is the statement at that the value is computed.
4797    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4798    addressing is likely.  */
4799
4800 static comp_cost
4801 get_computation_cost_at (struct ivopts_data *data,
4802                          struct iv_use *use, struct iv_cand *cand,
4803                          bool address_p, bitmap *depends_on, gimple *at,
4804                          bool *can_autoinc,
4805                          int *inv_expr_id)
4806 {
4807   tree ubase = use->iv->base, ustep = use->iv->step;
4808   tree cbase, cstep;
4809   tree utype = TREE_TYPE (ubase), ctype;
4810   unsigned HOST_WIDE_INT cstepi, offset = 0;
4811   HOST_WIDE_INT ratio, aratio;
4812   bool var_present, symbol_present, stmt_is_after_inc;
4813   comp_cost cost;
4814   widest_int rat;
4815   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4816   machine_mode mem_mode = (address_p
4817                                 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4818                                 : VOIDmode);
4819
4820   if (depends_on)
4821     *depends_on = NULL;
4822
4823   /* Only consider real candidates.  */
4824   if (!cand->iv)
4825     return infinite_cost;
4826
4827   cbase = cand->iv->base;
4828   cstep = cand->iv->step;
4829   ctype = TREE_TYPE (cbase);
4830
4831   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4832     {
4833       /* We do not have a precision to express the values of use.  */
4834       return infinite_cost;
4835     }
4836
4837   if (address_p
4838       || (use->iv->base_object
4839           && cand->iv->base_object
4840           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4841           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4842     {
4843       /* Do not try to express address of an object with computation based
4844          on address of a different object.  This may cause problems in rtl
4845          level alias analysis (that does not expect this to be happening,
4846          as this is illegal in C), and would be unlikely to be useful
4847          anyway.  */
4848       if (use->iv->base_object
4849           && cand->iv->base_object
4850           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4851         return infinite_cost;
4852     }
4853
4854   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4855     {
4856       /* TODO -- add direct handling of this case.  */
4857       goto fallback;
4858     }
4859
4860   /* CSTEPI is removed from the offset in case statement is after the
4861      increment.  If the step is not constant, we use zero instead.
4862      This is a bit imprecise (there is the extra addition), but
4863      redundancy elimination is likely to transform the code so that
4864      it uses value of the variable before increment anyway,
4865      so it is not that much unrealistic.  */
4866   if (cst_and_fits_in_hwi (cstep))
4867     cstepi = int_cst_value (cstep);
4868   else
4869     cstepi = 0;
4870
4871   if (!constant_multiple_of (ustep, cstep, &rat))
4872     return infinite_cost;
4873
4874   if (wi::fits_shwi_p (rat))
4875     ratio = rat.to_shwi ();
4876   else
4877     return infinite_cost;
4878
4879   STRIP_NOPS (cbase);
4880   ctype = TREE_TYPE (cbase);
4881
4882   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4883
4884   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4885      or ratio == 1, it is better to handle this like
4886
4887      ubase - ratio * cbase + ratio * var
4888
4889      (also holds in the case ratio == -1, TODO.  */
4890
4891   if (cst_and_fits_in_hwi (cbase))
4892     {
4893       offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4894       cost = difference_cost (data,
4895                               ubase, build_int_cst (utype, 0),
4896                               &symbol_present, &var_present, &offset,
4897                               depends_on);
4898       cost.cost /= avg_loop_niter (data->current_loop);
4899     }
4900   else if (ratio == 1)
4901     {
4902       tree real_cbase = cbase;
4903
4904       /* Check to see if any adjustment is needed.  */
4905       if (cstepi == 0 && stmt_is_after_inc)
4906         {
4907           aff_tree real_cbase_aff;
4908           aff_tree cstep_aff;
4909
4910           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4911                                    &real_cbase_aff);
4912           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4913
4914           aff_combination_add (&real_cbase_aff, &cstep_aff);
4915           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4916         }
4917
4918       cost = difference_cost (data,
4919                               ubase, real_cbase,
4920                               &symbol_present, &var_present, &offset,
4921                               depends_on);
4922       cost.cost /= avg_loop_niter (data->current_loop);
4923     }
4924   else if (address_p
4925            && !POINTER_TYPE_P (ctype)
4926            && multiplier_allowed_in_address_p
4927                 (ratio, mem_mode,
4928                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4929     {
4930       if (cstepi == 0 && stmt_is_after_inc)
4931         {
4932           if (POINTER_TYPE_P (ctype))
4933             cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4934           else
4935             cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4936         }
4937       cbase
4938         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4939       cost = difference_cost (data,
4940                               ubase, cbase,
4941                               &symbol_present, &var_present, &offset,
4942                               depends_on);
4943       cost.cost /= avg_loop_niter (data->current_loop);
4944     }
4945   else
4946     {
4947       cost = force_var_cost (data, cbase, depends_on);
4948       cost = add_costs (cost,
4949                         difference_cost (data,
4950                                          ubase, build_int_cst (utype, 0),
4951                                          &symbol_present, &var_present,
4952                                          &offset, depends_on));
4953       cost.cost /= avg_loop_niter (data->current_loop);
4954       cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4955     }
4956
4957   /* Record setup cost in scrach field.  */
4958   cost.scratch = cost.cost;
4959   /* Set of invariants depended on by sub use has already been computed
4960      for the first use in the group.  */
4961   if (use->sub_id)
4962     {
4963       cost.cost = 0;
4964       if (depends_on && *depends_on)
4965         bitmap_clear (*depends_on);
4966     }
4967   else if (inv_expr_id)
4968     {
4969       *inv_expr_id =
4970           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4971       /* Clear depends on.  */
4972       if (*inv_expr_id != -1 && depends_on && *depends_on)
4973         bitmap_clear (*depends_on);
4974     }
4975
4976   /* If we are after the increment, the value of the candidate is higher by
4977      one iteration.  */
4978   if (stmt_is_after_inc)
4979     offset -= ratio * cstepi;
4980
4981   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4982      (symbol/var1/const parts may be omitted).  If we are looking for an
4983      address, find the cost of addressing this.  */
4984   if (address_p)
4985     return add_costs (cost,
4986                       get_address_cost (symbol_present, var_present,
4987                                         offset, ratio, cstepi,
4988                                         mem_mode,
4989                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4990                                         speed, stmt_is_after_inc,
4991                                         can_autoinc));
4992
4993   /* Otherwise estimate the costs for computing the expression.  */
4994   if (!symbol_present && !var_present && !offset)
4995     {
4996       if (ratio != 1)
4997         cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4998       return cost;
4999     }
5000
5001   /* Symbol + offset should be compile-time computable so consider that they
5002       are added once to the variable, if present.  */
5003   if (var_present && (symbol_present || offset))
5004     cost.cost += adjust_setup_cost (data,
5005                                     add_cost (speed, TYPE_MODE (ctype)));
5006
5007   /* Having offset does not affect runtime cost in case it is added to
5008      symbol, but it increases complexity.  */
5009   if (offset)
5010     cost.complexity++;
5011
5012   cost.cost += add_cost (speed, TYPE_MODE (ctype));
5013
5014   aratio = ratio > 0 ? ratio : -ratio;
5015   if (aratio != 1)
5016     cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5017   return cost;
5018
5019 fallback:
5020   if (can_autoinc)
5021     *can_autoinc = false;
5022
5023   {
5024     /* Just get the expression, expand it and measure the cost.  */
5025     tree comp = get_computation_at (data->current_loop, use, cand, at);
5026
5027     if (!comp)
5028       return infinite_cost;
5029
5030     if (address_p)
5031       comp = build_simple_mem_ref (comp);
5032
5033     cost = new_cost (computation_cost (comp, speed), 0);
5034     cost.scratch = 0;
5035     return cost;
5036   }
5037 }
5038
5039 /* Determines the cost of the computation by that USE is expressed
5040    from induction variable CAND.  If ADDRESS_P is true, we just need
5041    to create an address from it, otherwise we want to get it into
5042    register.  A set of invariants we depend on is stored in
5043    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
5044    autoinc addressing is likely.  */
5045
5046 static comp_cost
5047 get_computation_cost (struct ivopts_data *data,
5048                       struct iv_use *use, struct iv_cand *cand,
5049                       bool address_p, bitmap *depends_on,
5050                       bool *can_autoinc, int *inv_expr_id)
5051 {
5052   return get_computation_cost_at (data,
5053                                   use, cand, address_p, depends_on, use->stmt,
5054                                   can_autoinc, inv_expr_id);
5055 }
5056
5057 /* Determines cost of basing replacement of USE on CAND in a generic
5058    expression.  */
5059
5060 static bool
5061 determine_use_iv_cost_generic (struct ivopts_data *data,
5062                                struct iv_use *use, struct iv_cand *cand)
5063 {
5064   bitmap depends_on;
5065   comp_cost cost;
5066   int inv_expr_id = -1;
5067
5068   /* The simple case first -- if we need to express value of the preserved
5069      original biv, the cost is 0.  This also prevents us from counting the
5070      cost of increment twice -- once at this use and once in the cost of
5071      the candidate.  */
5072   if (cand->pos == IP_ORIGINAL
5073       && cand->incremented_at == use->stmt)
5074     {
5075       set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
5076                        ERROR_MARK, -1);
5077       return true;
5078     }
5079
5080   cost = get_computation_cost (data, use, cand, false, &depends_on,
5081                                NULL, &inv_expr_id);
5082
5083   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5084                    inv_expr_id);
5085
5086   return !infinite_cost_p (cost);
5087 }
5088
5089 /* Determines cost of basing replacement of USE on CAND in an address.  */
5090
5091 static bool
5092 determine_use_iv_cost_address (struct ivopts_data *data,
5093                                struct iv_use *use, struct iv_cand *cand)
5094 {
5095   bitmap depends_on;
5096   bool can_autoinc, first;
5097   int inv_expr_id = -1;
5098   struct iv_use *sub_use;
5099   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
5100                                          &can_autoinc, &inv_expr_id);
5101   comp_cost sub_cost = cost;
5102
5103   if (cand->ainc_use == use)
5104     {
5105       if (can_autoinc)
5106         cost.cost -= cand->cost_step;
5107       /* If we generated the candidate solely for exploiting autoincrement
5108          opportunities, and it turns out it can't be used, set the cost to
5109          infinity to make sure we ignore it.  */
5110       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5111         cost = infinite_cost;
5112     }
5113
5114   if (!infinite_cost_p (cost) && use->next)
5115     {
5116       first = true;
5117       sub_use = use->next;
5118       /* We don't want to add setup cost for sub-uses.  */
5119       sub_cost.cost -= sub_cost.scratch;
5120       /* Add cost for sub uses in group.  */
5121       do
5122         {
5123           /* Compute cost for the first sub use with different offset to
5124              the main use and add it afterwards.  Costs for these uses
5125              could be quite different.  Given below uses in a group:
5126                use 0  : {base + A + offset_0, step}
5127                use 0.1: {base + A + offset_0, step}
5128                use 0.2: {base + A + offset_1, step}
5129                use 0.3: {base + A + offset_2, step}
5130              when we need to compute costs with candidate:
5131                cand 1 : {base + B + offset_0, step}
5132
5133              The first sub use with different offset is use 0.2, its cost
5134              is larger than cost of use 0/0.1 because we need to compute:
5135                A - B + offset_1 - offset_0
5136              rather than:
5137                A - B.  */
5138           if (first && use->addr_offset != sub_use->addr_offset)
5139             {
5140               first = false;
5141               sub_cost = get_computation_cost (data, sub_use, cand, true,
5142                                                NULL, &can_autoinc, NULL);
5143               if (infinite_cost_p (sub_cost))
5144                 {
5145                   cost = infinite_cost;
5146                   break;
5147                 }
5148             }
5149
5150           cost = add_costs (cost, sub_cost);
5151           sub_use = sub_use->next;
5152         }
5153       while (sub_use);
5154     }
5155
5156   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
5157                    inv_expr_id);
5158
5159   return !infinite_cost_p (cost);
5160 }
5161
5162 /* Computes value of candidate CAND at position AT in iteration NITER, and
5163    stores it to VAL.  */
5164
5165 static void
5166 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5167                aff_tree *val)
5168 {
5169   aff_tree step, delta, nit;
5170   struct iv *iv = cand->iv;
5171   tree type = TREE_TYPE (iv->base);
5172   tree steptype = type;
5173   if (POINTER_TYPE_P (type))
5174     steptype = sizetype;
5175   steptype = unsigned_type_for (type);
5176
5177   tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5178   aff_combination_convert (&step, steptype);
5179   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5180   aff_combination_convert (&nit, steptype);
5181   aff_combination_mult (&nit, &step, &delta);
5182   if (stmt_after_increment (loop, cand, at))
5183     aff_combination_add (&delta, &step);
5184
5185   tree_to_aff_combination (iv->base, type, val);
5186   if (!POINTER_TYPE_P (type))
5187     aff_combination_convert (val, steptype);
5188   aff_combination_add (val, &delta);
5189 }
5190
5191 /* Returns period of induction variable iv.  */
5192
5193 static tree
5194 iv_period (struct iv *iv)
5195 {
5196   tree step = iv->step, period, type;
5197   tree pow2div;
5198
5199   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5200
5201   type = unsigned_type_for (TREE_TYPE (step));
5202   /* Period of the iv is lcm (step, type_range)/step -1,
5203      i.e., N*type_range/step - 1. Since type range is power
5204      of two, N == (step >> num_of_ending_zeros_binary (step),
5205      so the final result is
5206
5207        (type_range >> num_of_ending_zeros_binary (step)) - 1
5208
5209   */
5210   pow2div = num_ending_zeros (step);
5211
5212   period = build_low_bits_mask (type,
5213                                 (TYPE_PRECISION (type)
5214                                  - tree_to_uhwi (pow2div)));
5215
5216   return period;
5217 }
5218
5219 /* Returns the comparison operator used when eliminating the iv USE.  */
5220
5221 static enum tree_code
5222 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5223 {
5224   struct loop *loop = data->current_loop;
5225   basic_block ex_bb;
5226   edge exit;
5227
5228   ex_bb = gimple_bb (use->stmt);
5229   exit = EDGE_SUCC (ex_bb, 0);
5230   if (flow_bb_inside_loop_p (loop, exit->dest))
5231     exit = EDGE_SUCC (ex_bb, 1);
5232
5233   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5234 }
5235
5236 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
5237    we only detect the situation that BASE = SOMETHING + OFFSET, where the
5238    calculation is performed in non-wrapping type.
5239
5240    TODO: More generally, we could test for the situation that
5241          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5242          This would require knowing the sign of OFFSET.  */
5243
5244 static bool
5245 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5246 {
5247   enum tree_code code;
5248   tree e1, e2;
5249   aff_tree aff_e1, aff_e2, aff_offset;
5250
5251   if (!nowrap_type_p (TREE_TYPE (base)))
5252     return false;
5253
5254   base = expand_simple_operations (base);
5255
5256   if (TREE_CODE (base) == SSA_NAME)
5257     {
5258       gimple *stmt = SSA_NAME_DEF_STMT (base);
5259
5260       if (gimple_code (stmt) != GIMPLE_ASSIGN)
5261         return false;
5262
5263       code = gimple_assign_rhs_code (stmt);
5264       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5265         return false;
5266
5267       e1 = gimple_assign_rhs1 (stmt);
5268       e2 = gimple_assign_rhs2 (stmt);
5269     }
5270   else
5271     {
5272       code = TREE_CODE (base);
5273       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5274         return false;
5275       e1 = TREE_OPERAND (base, 0);
5276       e2 = TREE_OPERAND (base, 1);
5277     }
5278
5279   /* Use affine expansion as deeper inspection to prove the equality.  */
5280   tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5281                                   &aff_e2, &data->name_expansion_cache);
5282   tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5283                                   &aff_offset, &data->name_expansion_cache);
5284   aff_combination_scale (&aff_offset, -1);
5285   switch (code)
5286     {
5287     case PLUS_EXPR:
5288       aff_combination_add (&aff_e2, &aff_offset);
5289       if (aff_combination_zero_p (&aff_e2))
5290         return true;
5291
5292       tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5293                                       &aff_e1, &data->name_expansion_cache);
5294       aff_combination_add (&aff_e1, &aff_offset);
5295       return aff_combination_zero_p (&aff_e1);
5296
5297     case POINTER_PLUS_EXPR:
5298       aff_combination_add (&aff_e2, &aff_offset);
5299       return aff_combination_zero_p (&aff_e2);
5300
5301     default:
5302       return false;
5303     }
5304 }
5305
5306 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5307    comparison with CAND.  NITER describes the number of iterations of
5308    the loops.  If successful, the comparison in COMP_P is altered accordingly.
5309
5310    We aim to handle the following situation:
5311
5312    sometype *base, *p;
5313    int a, b, i;
5314
5315    i = a;
5316    p = p_0 = base + a;
5317
5318    do
5319      {
5320        bla (*p);
5321        p++;
5322        i++;
5323      }
5324    while (i < b);
5325
5326    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5327    We aim to optimize this to
5328
5329    p = p_0 = base + a;
5330    do
5331      {
5332        bla (*p);
5333        p++;
5334      }
5335    while (p < p_0 - a + b);
5336
5337    This preserves the correctness, since the pointer arithmetics does not
5338    overflow.  More precisely:
5339
5340    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5341       overflow in computing it or the values of p.
5342    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5343       overflow.  To prove this, we use the fact that p_0 = base + a.  */
5344
5345 static bool
5346 iv_elimination_compare_lt (struct ivopts_data *data,
5347                            struct iv_cand *cand, enum tree_code *comp_p,
5348                            struct tree_niter_desc *niter)
5349 {
5350   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5351   struct aff_tree nit, tmpa, tmpb;
5352   enum tree_code comp;
5353   HOST_WIDE_INT step;
5354
5355   /* We need to know that the candidate induction variable does not overflow.
5356      While more complex analysis may be used to prove this, for now just
5357      check that the variable appears in the original program and that it
5358      is computed in a type that guarantees no overflows.  */
5359   cand_type = TREE_TYPE (cand->iv->base);
5360   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5361     return false;
5362
5363   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5364      the calculation of the BOUND could overflow, making the comparison
5365      invalid.  */
5366   if (!data->loop_single_exit_p)
5367     return false;
5368
5369   /* We need to be able to decide whether candidate is increasing or decreasing
5370      in order to choose the right comparison operator.  */
5371   if (!cst_and_fits_in_hwi (cand->iv->step))
5372     return false;
5373   step = int_cst_value (cand->iv->step);
5374
5375   /* Check that the number of iterations matches the expected pattern:
5376      a + 1 > b ? 0 : b - a - 1.  */
5377   mbz = niter->may_be_zero;
5378   if (TREE_CODE (mbz) == GT_EXPR)
5379     {
5380       /* Handle a + 1 > b.  */
5381       tree op0 = TREE_OPERAND (mbz, 0);
5382       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5383         {
5384           a = TREE_OPERAND (op0, 0);
5385           b = TREE_OPERAND (mbz, 1);
5386         }
5387       else
5388         return false;
5389     }
5390   else if (TREE_CODE (mbz) == LT_EXPR)
5391     {
5392       tree op1 = TREE_OPERAND (mbz, 1);
5393
5394       /* Handle b < a + 1.  */
5395       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5396         {
5397           a = TREE_OPERAND (op1, 0);
5398           b = TREE_OPERAND (mbz, 0);
5399         }
5400       else
5401         return false;
5402     }
5403   else
5404     return false;
5405
5406   /* Expected number of iterations is B - A - 1.  Check that it matches
5407      the actual number, i.e., that B - A - NITER = 1.  */
5408   tree_to_aff_combination (niter->niter, nit_type, &nit);
5409   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5410   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5411   aff_combination_scale (&nit, -1);
5412   aff_combination_scale (&tmpa, -1);
5413   aff_combination_add (&tmpb, &tmpa);
5414   aff_combination_add (&tmpb, &nit);
5415   if (tmpb.n != 0 || tmpb.offset != 1)
5416     return false;
5417
5418   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5419      overflow.  */
5420   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5421                         cand->iv->step,
5422                         fold_convert (TREE_TYPE (cand->iv->step), a));
5423   if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5424     return false;
5425
5426   /* Determine the new comparison operator.  */
5427   comp = step < 0 ? GT_EXPR : LT_EXPR;
5428   if (*comp_p == NE_EXPR)
5429     *comp_p = comp;
5430   else if (*comp_p == EQ_EXPR)
5431     *comp_p = invert_tree_comparison (comp, false);
5432   else
5433     gcc_unreachable ();
5434
5435   return true;
5436 }
5437
5438 /* Check whether it is possible to express the condition in USE by comparison
5439    of candidate CAND.  If so, store the value compared with to BOUND, and the
5440    comparison operator to COMP.  */
5441
5442 static bool
5443 may_eliminate_iv (struct ivopts_data *data,
5444                   struct iv_use *use, struct iv_cand *cand, tree *bound,
5445                   enum tree_code *comp)
5446 {
5447   basic_block ex_bb;
5448   edge exit;
5449   tree period;
5450   struct loop *loop = data->current_loop;
5451   aff_tree bnd;
5452   struct tree_niter_desc *desc = NULL;
5453
5454   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5455     return false;
5456
5457   /* For now works only for exits that dominate the loop latch.
5458      TODO: extend to other conditions inside loop body.  */
5459   ex_bb = gimple_bb (use->stmt);
5460   if (use->stmt != last_stmt (ex_bb)
5461       || gimple_code (use->stmt) != GIMPLE_COND
5462       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5463     return false;
5464
5465   exit = EDGE_SUCC (ex_bb, 0);
5466   if (flow_bb_inside_loop_p (loop, exit->dest))
5467     exit = EDGE_SUCC (ex_bb, 1);
5468   if (flow_bb_inside_loop_p (loop, exit->dest))
5469     return false;
5470
5471   desc = niter_for_exit (data, exit);
5472   if (!desc)
5473     return false;
5474
5475   /* Determine whether we can use the variable to test the exit condition.
5476      This is the case iff the period of the induction variable is greater
5477      than the number of iterations for which the exit condition is true.  */
5478   period = iv_period (cand->iv);
5479
5480   /* If the number of iterations is constant, compare against it directly.  */
5481   if (TREE_CODE (desc->niter) == INTEGER_CST)
5482     {
5483       /* See cand_value_at.  */
5484       if (stmt_after_increment (loop, cand, use->stmt))
5485         {
5486           if (!tree_int_cst_lt (desc->niter, period))
5487             return false;
5488         }
5489       else
5490         {
5491           if (tree_int_cst_lt (period, desc->niter))
5492             return false;
5493         }
5494     }
5495
5496   /* If not, and if this is the only possible exit of the loop, see whether
5497      we can get a conservative estimate on the number of iterations of the
5498      entire loop and compare against that instead.  */
5499   else
5500     {
5501       widest_int period_value, max_niter;
5502
5503       max_niter = desc->max;
5504       if (stmt_after_increment (loop, cand, use->stmt))
5505         max_niter += 1;
5506       period_value = wi::to_widest (period);
5507       if (wi::gtu_p (max_niter, period_value))
5508         {
5509           /* See if we can take advantage of inferred loop bound information.  */
5510           if (data->loop_single_exit_p)
5511             {
5512               if (!max_loop_iterations (loop, &max_niter))
5513                 return false;
5514               /* The loop bound is already adjusted by adding 1.  */
5515               if (wi::gtu_p (max_niter, period_value))
5516                 return false;
5517             }
5518           else
5519             return false;
5520         }
5521     }
5522
5523   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5524
5525   *bound = fold_convert (TREE_TYPE (cand->iv->base),
5526                          aff_combination_to_tree (&bnd));
5527   *comp = iv_elimination_compare (data, use);
5528
5529   /* It is unlikely that computing the number of iterations using division
5530      would be more profitable than keeping the original induction variable.  */
5531   if (expression_expensive_p (*bound))
5532     return false;
5533
5534   /* Sometimes, it is possible to handle the situation that the number of
5535      iterations may be zero unless additional assumtions by using <
5536      instead of != in the exit condition.
5537
5538      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5539            base the exit condition on it.  However, that is often too
5540            expensive.  */
5541   if (!integer_zerop (desc->may_be_zero))
5542     return iv_elimination_compare_lt (data, cand, comp, desc);
5543
5544   return true;
5545 }
5546
5547  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
5548     be copied, if it is used in the loop body and DATA->body_includes_call.  */
5549
5550 static int
5551 parm_decl_cost (struct ivopts_data *data, tree bound)
5552 {
5553   tree sbound = bound;
5554   STRIP_NOPS (sbound);
5555
5556   if (TREE_CODE (sbound) == SSA_NAME
5557       && SSA_NAME_IS_DEFAULT_DEF (sbound)
5558       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5559       && data->body_includes_call)
5560     return COSTS_N_INSNS (1);
5561
5562   return 0;
5563 }
5564
5565 /* Determines cost of basing replacement of USE on CAND in a condition.  */
5566
5567 static bool
5568 determine_use_iv_cost_condition (struct ivopts_data *data,
5569                                  struct iv_use *use, struct iv_cand *cand)
5570 {
5571   tree bound = NULL_TREE;
5572   struct iv *cmp_iv;
5573   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5574   comp_cost elim_cost, express_cost, cost, bound_cost;
5575   bool ok;
5576   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5577   tree *control_var, *bound_cst;
5578   enum tree_code comp = ERROR_MARK;
5579
5580   /* Only consider real candidates.  */
5581   if (!cand->iv)
5582     {
5583       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5584                        ERROR_MARK, -1);
5585       return false;
5586     }
5587
5588   /* Try iv elimination.  */
5589   if (may_eliminate_iv (data, use, cand, &bound, &comp))
5590     {
5591       elim_cost = force_var_cost (data, bound, &depends_on_elim);
5592       if (elim_cost.cost == 0)
5593         elim_cost.cost = parm_decl_cost (data, bound);
5594       else if (TREE_CODE (bound) == INTEGER_CST)
5595         elim_cost.cost = 0;
5596       /* If we replace a loop condition 'i < n' with 'p < base + n',
5597          depends_on_elim will have 'base' and 'n' set, which implies
5598          that both 'base' and 'n' will be live during the loop.  More likely,
5599          'base + n' will be loop invariant, resulting in only one live value
5600          during the loop.  So in that case we clear depends_on_elim and set
5601         elim_inv_expr_id instead.  */
5602       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5603         {
5604           elim_inv_expr_id = get_expr_id (data, bound);
5605           bitmap_clear (depends_on_elim);
5606         }
5607       /* The bound is a loop invariant, so it will be only computed
5608          once.  */
5609       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5610     }
5611   else
5612     elim_cost = infinite_cost;
5613
5614   /* Try expressing the original giv.  If it is compared with an invariant,
5615      note that we cannot get rid of it.  */
5616   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5617                               NULL, &cmp_iv);
5618   gcc_assert (ok);
5619
5620   /* When the condition is a comparison of the candidate IV against
5621      zero, prefer this IV.
5622
5623      TODO: The constant that we're subtracting from the cost should
5624      be target-dependent.  This information should be added to the
5625      target costs for each backend.  */
5626   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5627       && integer_zerop (*bound_cst)
5628       && (operand_equal_p (*control_var, cand->var_after, 0)
5629           || operand_equal_p (*control_var, cand->var_before, 0)))
5630     elim_cost.cost -= 1;
5631
5632   express_cost = get_computation_cost (data, use, cand, false,
5633                                        &depends_on_express, NULL,
5634                                        &express_inv_expr_id);
5635   fd_ivopts_data = data;
5636   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5637
5638   /* Count the cost of the original bound as well.  */
5639   bound_cost = force_var_cost (data, *bound_cst, NULL);
5640   if (bound_cost.cost == 0)
5641     bound_cost.cost = parm_decl_cost (data, *bound_cst);
5642   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5643     bound_cost.cost = 0;
5644   express_cost.cost += bound_cost.cost;
5645
5646   /* Choose the better approach, preferring the eliminated IV. */
5647   if (compare_costs (elim_cost, express_cost) <= 0)
5648     {
5649       cost = elim_cost;
5650       depends_on = depends_on_elim;
5651       depends_on_elim = NULL;
5652       inv_expr_id = elim_inv_expr_id;
5653     }
5654   else
5655     {
5656       cost = express_cost;
5657       depends_on = depends_on_express;
5658       depends_on_express = NULL;
5659       bound = NULL_TREE;
5660       comp = ERROR_MARK;
5661       inv_expr_id = express_inv_expr_id;
5662     }
5663
5664   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5665
5666   if (depends_on_elim)
5667     BITMAP_FREE (depends_on_elim);
5668   if (depends_on_express)
5669     BITMAP_FREE (depends_on_express);
5670
5671   return !infinite_cost_p (cost);
5672 }
5673
5674 /* Determines cost of basing replacement of USE on CAND.  Returns false
5675    if USE cannot be based on CAND.  */
5676
5677 static bool
5678 determine_use_iv_cost (struct ivopts_data *data,
5679                        struct iv_use *use, struct iv_cand *cand)
5680 {
5681   switch (use->type)
5682     {
5683     case USE_NONLINEAR_EXPR:
5684       return determine_use_iv_cost_generic (data, use, cand);
5685
5686     case USE_ADDRESS:
5687       return determine_use_iv_cost_address (data, use, cand);
5688
5689     case USE_COMPARE:
5690       return determine_use_iv_cost_condition (data, use, cand);
5691
5692     default:
5693       gcc_unreachable ();
5694     }
5695 }
5696
5697 /* Return true if get_computation_cost indicates that autoincrement is
5698    a possibility for the pair of USE and CAND, false otherwise.  */
5699
5700 static bool
5701 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5702                            struct iv_cand *cand)
5703 {
5704   bitmap depends_on;
5705   bool can_autoinc;
5706   comp_cost cost;
5707
5708   if (use->type != USE_ADDRESS)
5709     return false;
5710
5711   cost = get_computation_cost (data, use, cand, true, &depends_on,
5712                                &can_autoinc, NULL);
5713
5714   BITMAP_FREE (depends_on);
5715
5716   return !infinite_cost_p (cost) && can_autoinc;
5717 }
5718
5719 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5720    use that allows autoincrement, and set their AINC_USE if possible.  */
5721
5722 static void
5723 set_autoinc_for_original_candidates (struct ivopts_data *data)
5724 {
5725   unsigned i, j;
5726
5727   for (i = 0; i < n_iv_cands (data); i++)
5728     {
5729       struct iv_cand *cand = iv_cand (data, i);
5730       struct iv_use *closest_before = NULL;
5731       struct iv_use *closest_after = NULL;
5732       if (cand->pos != IP_ORIGINAL)
5733         continue;
5734
5735       for (j = 0; j < n_iv_uses (data); j++)
5736         {
5737           struct iv_use *use = iv_use (data, j);
5738           unsigned uid = gimple_uid (use->stmt);
5739
5740           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5741             continue;
5742
5743           if (uid < gimple_uid (cand->incremented_at)
5744               && (closest_before == NULL
5745                   || uid > gimple_uid (closest_before->stmt)))
5746             closest_before = use;
5747
5748           if (uid > gimple_uid (cand->incremented_at)
5749               && (closest_after == NULL
5750                   || uid < gimple_uid (closest_after->stmt)))
5751             closest_after = use;
5752         }
5753
5754       if (closest_before != NULL
5755           && autoinc_possible_for_pair (data, closest_before, cand))
5756         cand->ainc_use = closest_before;
5757       else if (closest_after != NULL
5758                && autoinc_possible_for_pair (data, closest_after, cand))
5759         cand->ainc_use = closest_after;
5760     }
5761 }
5762
5763 /* Finds the candidates for the induction variables.  */
5764
5765 static void
5766 find_iv_candidates (struct ivopts_data *data)
5767 {
5768   /* Add commonly used ivs.  */
5769   add_standard_iv_candidates (data);
5770
5771   /* Add old induction variables.  */
5772   add_iv_candidate_for_bivs (data);
5773
5774   /* Add induction variables derived from uses.  */
5775   add_iv_candidate_for_uses (data);
5776
5777   set_autoinc_for_original_candidates (data);
5778
5779   /* Record the important candidates.  */
5780   record_important_candidates (data);
5781 }
5782
5783 /* Determines costs of basing the use of the iv on an iv candidate.  */
5784
5785 static void
5786 determine_use_iv_costs (struct ivopts_data *data)
5787 {
5788   unsigned i, j;
5789   struct iv_use *use;
5790   struct iv_cand *cand;
5791   bitmap to_clear = BITMAP_ALLOC (NULL);
5792
5793   alloc_use_cost_map (data);
5794
5795   for (i = 0; i < n_iv_uses (data); i++)
5796     {
5797       use = iv_use (data, i);
5798
5799       if (data->consider_all_candidates)
5800         {
5801           for (j = 0; j < n_iv_cands (data); j++)
5802             {
5803               cand = iv_cand (data, j);
5804               determine_use_iv_cost (data, use, cand);
5805             }
5806         }
5807       else
5808         {
5809           bitmap_iterator bi;
5810
5811           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5812             {
5813               cand = iv_cand (data, j);
5814               if (!determine_use_iv_cost (data, use, cand))
5815                 bitmap_set_bit (to_clear, j);
5816             }
5817
5818           /* Remove the candidates for that the cost is infinite from
5819              the list of related candidates.  */
5820           bitmap_and_compl_into (use->related_cands, to_clear);
5821           bitmap_clear (to_clear);
5822         }
5823     }
5824
5825   BITMAP_FREE (to_clear);
5826
5827   if (dump_file && (dump_flags & TDF_DETAILS))
5828     {
5829       fprintf (dump_file, "Use-candidate costs:\n");
5830
5831       for (i = 0; i < n_iv_uses (data); i++)
5832         {
5833           use = iv_use (data, i);
5834
5835           fprintf (dump_file, "Use %d:\n", i);
5836           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5837           for (j = 0; j < use->n_map_members; j++)
5838             {
5839               if (!use->cost_map[j].cand
5840                   || infinite_cost_p (use->cost_map[j].cost))
5841                 continue;
5842
5843               fprintf (dump_file, "  %d\t%d\t%d\t",
5844                        use->cost_map[j].cand->id,
5845                        use->cost_map[j].cost.cost,
5846                        use->cost_map[j].cost.complexity);
5847               if (use->cost_map[j].depends_on)
5848                 bitmap_print (dump_file,
5849                               use->cost_map[j].depends_on, "","");
5850               if (use->cost_map[j].inv_expr_id != -1)
5851                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5852               fprintf (dump_file, "\n");
5853             }
5854
5855           fprintf (dump_file, "\n");
5856         }
5857       fprintf (dump_file, "\n");
5858     }
5859 }
5860
5861 /* Determines cost of the candidate CAND.  */
5862
5863 static void
5864 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5865 {
5866   comp_cost cost_base;
5867   unsigned cost, cost_step;
5868   tree base;
5869
5870   if (!cand->iv)
5871     {
5872       cand->cost = 0;
5873       return;
5874     }
5875
5876   /* There are two costs associated with the candidate -- its increment
5877      and its initialization.  The second is almost negligible for any loop
5878      that rolls enough, so we take it just very little into account.  */
5879
5880   base = cand->iv->base;
5881   cost_base = force_var_cost (data, base, NULL);
5882   /* It will be exceptional that the iv register happens to be initialized with
5883      the proper value at no cost.  In general, there will at least be a regcopy
5884      or a const set.  */
5885   if (cost_base.cost == 0)
5886     cost_base.cost = COSTS_N_INSNS (1);
5887   cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5888
5889   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5890
5891   /* Prefer the original ivs unless we may gain something by replacing it.
5892      The reason is to make debugging simpler; so this is not relevant for
5893      artificial ivs created by other optimization passes.  */
5894   if (cand->pos != IP_ORIGINAL
5895       || !SSA_NAME_VAR (cand->var_before)
5896       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5897     cost++;
5898
5899   /* Prefer not to insert statements into latch unless there are some
5900      already (so that we do not create unnecessary jumps).  */
5901   if (cand->pos == IP_END
5902       && empty_block_p (ip_end_pos (data->current_loop)))
5903     cost++;
5904
5905   cand->cost = cost;
5906   cand->cost_step = cost_step;
5907 }
5908
5909 /* Determines costs of computation of the candidates.  */
5910
5911 static void
5912 determine_iv_costs (struct ivopts_data *data)
5913 {
5914   unsigned i;
5915
5916   if (dump_file && (dump_flags & TDF_DETAILS))
5917     {
5918       fprintf (dump_file, "Candidate costs:\n");
5919       fprintf (dump_file, "  cand\tcost\n");
5920     }
5921
5922   for (i = 0; i < n_iv_cands (data); i++)
5923     {
5924       struct iv_cand *cand = iv_cand (data, i);
5925
5926       determine_iv_cost (data, cand);
5927
5928       if (dump_file && (dump_flags & TDF_DETAILS))
5929         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5930     }
5931
5932   if (dump_file && (dump_flags & TDF_DETAILS))
5933     fprintf (dump_file, "\n");
5934 }
5935
5936 /* Calculates cost for having SIZE induction variables.  */
5937
5938 static unsigned
5939 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5940 {
5941   /* We add size to the cost, so that we prefer eliminating ivs
5942      if possible.  */
5943   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5944                                             data->body_includes_call);
5945 }
5946
5947 /* For each size of the induction variable set determine the penalty.  */
5948
5949 static void
5950 determine_set_costs (struct ivopts_data *data)
5951 {
5952   unsigned j, n;
5953   gphi *phi;
5954   gphi_iterator psi;
5955   tree op;
5956   struct loop *loop = data->current_loop;
5957   bitmap_iterator bi;
5958
5959   if (dump_file && (dump_flags & TDF_DETAILS))
5960     {
5961       fprintf (dump_file, "Global costs:\n");
5962       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5963       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5964       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5965       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5966     }
5967
5968   n = 0;
5969   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5970     {
5971       phi = psi.phi ();
5972       op = PHI_RESULT (phi);
5973
5974       if (virtual_operand_p (op))
5975         continue;
5976
5977       if (get_iv (data, op))
5978         continue;
5979
5980       n++;
5981     }
5982
5983   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5984     {
5985       struct version_info *info = ver_info (data, j);
5986
5987       if (info->inv_id && info->has_nonlin_use)
5988         n++;
5989     }
5990
5991   data->regs_used = n;
5992   if (dump_file && (dump_flags & TDF_DETAILS))
5993     fprintf (dump_file, "  regs_used %d\n", n);
5994
5995   if (dump_file && (dump_flags & TDF_DETAILS))
5996     {
5997       fprintf (dump_file, "  cost for size:\n");
5998       fprintf (dump_file, "  ivs\tcost\n");
5999       for (j = 0; j <= 2 * target_avail_regs; j++)
6000         fprintf (dump_file, "  %d\t%d\n", j,
6001                  ivopts_global_cost_for_size (data, j));
6002       fprintf (dump_file, "\n");
6003     }
6004 }
6005
6006 /* Returns true if A is a cheaper cost pair than B.  */
6007
6008 static bool
6009 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6010 {
6011   int cmp;
6012
6013   if (!a)
6014     return false;
6015
6016   if (!b)
6017     return true;
6018
6019   cmp = compare_costs (a->cost, b->cost);
6020   if (cmp < 0)
6021     return true;
6022
6023   if (cmp > 0)
6024     return false;
6025
6026   /* In case the costs are the same, prefer the cheaper candidate.  */
6027   if (a->cand->cost < b->cand->cost)
6028     return true;
6029
6030   return false;
6031 }
6032
6033
6034 /* Returns candidate by that USE is expressed in IVS.  */
6035
6036 static struct cost_pair *
6037 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
6038 {
6039   return ivs->cand_for_use[use->id];
6040 }
6041
6042 /* Computes the cost field of IVS structure.  */
6043
6044 static void
6045 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6046 {
6047   comp_cost cost = ivs->cand_use_cost;
6048
6049   cost.cost += ivs->cand_cost;
6050
6051   cost.cost += ivopts_global_cost_for_size (data,
6052                                             ivs->n_regs + ivs->num_used_inv_expr);
6053
6054   ivs->cost = cost;
6055 }
6056
6057 /* Remove invariants in set INVS to set IVS.  */
6058
6059 static void
6060 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6061 {
6062   bitmap_iterator bi;
6063   unsigned iid;
6064
6065   if (!invs)
6066     return;
6067
6068   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6069     {
6070       ivs->n_invariant_uses[iid]--;
6071       if (ivs->n_invariant_uses[iid] == 0)
6072         ivs->n_regs--;
6073     }
6074 }
6075
6076 /* Set USE not to be expressed by any candidate in IVS.  */
6077
6078 static void
6079 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6080                  struct iv_use *use)
6081 {
6082   unsigned uid = use->id, cid;
6083   struct cost_pair *cp;
6084
6085   cp = ivs->cand_for_use[uid];
6086   if (!cp)
6087     return;
6088   cid = cp->cand->id;
6089
6090   ivs->bad_uses++;
6091   ivs->cand_for_use[uid] = NULL;
6092   ivs->n_cand_uses[cid]--;
6093
6094   if (ivs->n_cand_uses[cid] == 0)
6095     {
6096       bitmap_clear_bit (ivs->cands, cid);
6097       /* Do not count the pseudocandidates.  */
6098       if (cp->cand->iv)
6099         ivs->n_regs--;
6100       ivs->n_cands--;
6101       ivs->cand_cost -= cp->cand->cost;
6102
6103       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6104     }
6105
6106   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6107
6108   iv_ca_set_remove_invariants (ivs, cp->depends_on);
6109
6110   if (cp->inv_expr_id != -1)
6111     {
6112       ivs->used_inv_expr[cp->inv_expr_id]--;
6113       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
6114         ivs->num_used_inv_expr--;
6115     }
6116   iv_ca_recount_cost (data, ivs);
6117 }
6118
6119 /* Add invariants in set INVS to set IVS.  */
6120
6121 static void
6122 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6123 {
6124   bitmap_iterator bi;
6125   unsigned iid;
6126
6127   if (!invs)
6128     return;
6129
6130   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6131     {
6132       ivs->n_invariant_uses[iid]++;
6133       if (ivs->n_invariant_uses[iid] == 1)
6134         ivs->n_regs++;
6135     }
6136 }
6137
6138 /* Set cost pair for USE in set IVS to CP.  */
6139
6140 static void
6141 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6142               struct iv_use *use, struct cost_pair *cp)
6143 {
6144   unsigned uid = use->id, cid;
6145
6146   if (ivs->cand_for_use[uid] == cp)
6147     return;
6148
6149   if (ivs->cand_for_use[uid])
6150     iv_ca_set_no_cp (data, ivs, use);
6151
6152   if (cp)
6153     {
6154       cid = cp->cand->id;
6155
6156       ivs->bad_uses--;
6157       ivs->cand_for_use[uid] = cp;
6158       ivs->n_cand_uses[cid]++;
6159       if (ivs->n_cand_uses[cid] == 1)
6160         {
6161           bitmap_set_bit (ivs->cands, cid);
6162           /* Do not count the pseudocandidates.  */
6163           if (cp->cand->iv)
6164             ivs->n_regs++;
6165           ivs->n_cands++;
6166           ivs->cand_cost += cp->cand->cost;
6167
6168           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6169         }
6170
6171       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6172       iv_ca_set_add_invariants (ivs, cp->depends_on);
6173
6174       if (cp->inv_expr_id != -1)
6175         {
6176           ivs->used_inv_expr[cp->inv_expr_id]++;
6177           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
6178             ivs->num_used_inv_expr++;
6179         }
6180       iv_ca_recount_cost (data, ivs);
6181     }
6182 }
6183
6184 /* Extend set IVS by expressing USE by some of the candidates in it
6185    if possible.  Consider all important candidates if candidates in
6186    set IVS don't give any result.  */
6187
6188 static void
6189 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
6190                struct iv_use *use)
6191 {
6192   struct cost_pair *best_cp = NULL, *cp;
6193   bitmap_iterator bi;
6194   unsigned i;
6195   struct iv_cand *cand;
6196
6197   gcc_assert (ivs->upto >= use->id);
6198   ivs->upto++;
6199   ivs->bad_uses++;
6200
6201   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6202     {
6203       cand = iv_cand (data, i);
6204       cp = get_use_iv_cost (data, use, cand);
6205       if (cheaper_cost_pair (cp, best_cp))
6206         best_cp = cp;
6207     }
6208
6209   if (best_cp == NULL)
6210     {
6211       EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6212         {
6213           cand = iv_cand (data, i);
6214           cp = get_use_iv_cost (data, use, cand);
6215           if (cheaper_cost_pair (cp, best_cp))
6216             best_cp = cp;
6217         }
6218     }
6219
6220   iv_ca_set_cp (data, ivs, use, best_cp);
6221 }
6222
6223 /* Get cost for assignment IVS.  */
6224
6225 static comp_cost
6226 iv_ca_cost (struct iv_ca *ivs)
6227 {
6228   /* This was a conditional expression but it triggered a bug in
6229      Sun C 5.5.  */
6230   if (ivs->bad_uses)
6231     return infinite_cost;
6232   else
6233     return ivs->cost;
6234 }
6235
6236 /* Returns true if all dependences of CP are among invariants in IVS.  */
6237
6238 static bool
6239 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6240 {
6241   unsigned i;
6242   bitmap_iterator bi;
6243
6244   if (!cp->depends_on)
6245     return true;
6246
6247   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6248     {
6249       if (ivs->n_invariant_uses[i] == 0)
6250         return false;
6251     }
6252
6253   return true;
6254 }
6255
6256 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6257    it before NEXT_CHANGE.  */
6258
6259 static struct iv_ca_delta *
6260 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6261                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6262 {
6263   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6264
6265   change->use = use;
6266   change->old_cp = old_cp;
6267   change->new_cp = new_cp;
6268   change->next_change = next_change;
6269
6270   return change;
6271 }
6272
6273 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
6274    are rewritten.  */
6275
6276 static struct iv_ca_delta *
6277 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6278 {
6279   struct iv_ca_delta *last;
6280
6281   if (!l2)
6282     return l1;
6283
6284   if (!l1)
6285     return l2;
6286
6287   for (last = l1; last->next_change; last = last->next_change)
6288     continue;
6289   last->next_change = l2;
6290
6291   return l1;
6292 }
6293
6294 /* Reverse the list of changes DELTA, forming the inverse to it.  */
6295
6296 static struct iv_ca_delta *
6297 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6298 {
6299   struct iv_ca_delta *act, *next, *prev = NULL;
6300
6301   for (act = delta; act; act = next)
6302     {
6303       next = act->next_change;
6304       act->next_change = prev;
6305       prev = act;
6306
6307       std::swap (act->old_cp, act->new_cp);
6308     }
6309
6310   return prev;
6311 }
6312
6313 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
6314    reverted instead.  */
6315
6316 static void
6317 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6318                     struct iv_ca_delta *delta, bool forward)
6319 {
6320   struct cost_pair *from, *to;
6321   struct iv_ca_delta *act;
6322
6323   if (!forward)
6324     delta = iv_ca_delta_reverse (delta);
6325
6326   for (act = delta; act; act = act->next_change)
6327     {
6328       from = act->old_cp;
6329       to = act->new_cp;
6330       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6331       iv_ca_set_cp (data, ivs, act->use, to);
6332     }
6333
6334   if (!forward)
6335     iv_ca_delta_reverse (delta);
6336 }
6337
6338 /* Returns true if CAND is used in IVS.  */
6339
6340 static bool
6341 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6342 {
6343   return ivs->n_cand_uses[cand->id] > 0;
6344 }
6345
6346 /* Returns number of induction variable candidates in the set IVS.  */
6347
6348 static unsigned
6349 iv_ca_n_cands (struct iv_ca *ivs)
6350 {
6351   return ivs->n_cands;
6352 }
6353
6354 /* Free the list of changes DELTA.  */
6355
6356 static void
6357 iv_ca_delta_free (struct iv_ca_delta **delta)
6358 {
6359   struct iv_ca_delta *act, *next;
6360
6361   for (act = *delta; act; act = next)
6362     {
6363       next = act->next_change;
6364       free (act);
6365     }
6366
6367   *delta = NULL;
6368 }
6369
6370 /* Allocates new iv candidates assignment.  */
6371
6372 static struct iv_ca *
6373 iv_ca_new (struct ivopts_data *data)
6374 {
6375   struct iv_ca *nw = XNEW (struct iv_ca);
6376
6377   nw->upto = 0;
6378   nw->bad_uses = 0;
6379   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6380   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6381   nw->cands = BITMAP_ALLOC (NULL);
6382   nw->n_cands = 0;
6383   nw->n_regs = 0;
6384   nw->cand_use_cost = no_cost;
6385   nw->cand_cost = 0;
6386   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6387   nw->cost = no_cost;
6388   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6389   nw->num_used_inv_expr = 0;
6390
6391   return nw;
6392 }
6393
6394 /* Free memory occupied by the set IVS.  */
6395
6396 static void
6397 iv_ca_free (struct iv_ca **ivs)
6398 {
6399   free ((*ivs)->cand_for_use);
6400   free ((*ivs)->n_cand_uses);
6401   BITMAP_FREE ((*ivs)->cands);
6402   free ((*ivs)->n_invariant_uses);
6403   free ((*ivs)->used_inv_expr);
6404   free (*ivs);
6405   *ivs = NULL;
6406 }
6407
6408 /* Dumps IVS to FILE.  */
6409
6410 static void
6411 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6412 {
6413   const char *pref = "  invariants ";
6414   unsigned i;
6415   comp_cost cost = iv_ca_cost (ivs);
6416
6417   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6418   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
6419            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6420   bitmap_print (file, ivs->cands, "  candidates: ","\n");
6421
6422    for (i = 0; i < ivs->upto; i++)
6423     {
6424       struct iv_use *use = iv_use (data, i);
6425       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6426       if (cp)
6427         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6428                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6429       else
6430         fprintf (file, "   use:%d --> ??\n", use->id);
6431     }
6432
6433   for (i = 1; i <= data->max_inv_id; i++)
6434     if (ivs->n_invariant_uses[i])
6435       {
6436         fprintf (file, "%s%d", pref, i);
6437         pref = ", ";
6438       }
6439   fprintf (file, "\n\n");
6440 }
6441
6442 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
6443    new set, and store differences in DELTA.  Number of induction variables
6444    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6445    the function will try to find a solution with mimimal iv candidates.  */
6446
6447 static comp_cost
6448 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6449               struct iv_cand *cand, struct iv_ca_delta **delta,
6450               unsigned *n_ivs, bool min_ncand)
6451 {
6452   unsigned i;
6453   comp_cost cost;
6454   struct iv_use *use;
6455   struct cost_pair *old_cp, *new_cp;
6456
6457   *delta = NULL;
6458   for (i = 0; i < ivs->upto; i++)
6459     {
6460       use = iv_use (data, i);
6461       old_cp = iv_ca_cand_for_use (ivs, use);
6462
6463       if (old_cp
6464           && old_cp->cand == cand)
6465         continue;
6466
6467       new_cp = get_use_iv_cost (data, use, cand);
6468       if (!new_cp)
6469         continue;
6470
6471       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6472         continue;
6473
6474       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6475         continue;
6476
6477       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6478     }
6479
6480   iv_ca_delta_commit (data, ivs, *delta, true);
6481   cost = iv_ca_cost (ivs);
6482   if (n_ivs)
6483     *n_ivs = iv_ca_n_cands (ivs);
6484   iv_ca_delta_commit (data, ivs, *delta, false);
6485
6486   return cost;
6487 }
6488
6489 /* Try narrowing set IVS by removing CAND.  Return the cost of
6490    the new set and store the differences in DELTA.  START is
6491    the candidate with which we start narrowing.  */
6492
6493 static comp_cost
6494 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6495               struct iv_cand *cand, struct iv_cand *start,
6496               struct iv_ca_delta **delta)
6497 {
6498   unsigned i, ci;
6499   struct iv_use *use;
6500   struct cost_pair *old_cp, *new_cp, *cp;
6501   bitmap_iterator bi;
6502   struct iv_cand *cnd;
6503   comp_cost cost, best_cost, acost;
6504
6505   *delta = NULL;
6506   for (i = 0; i < n_iv_uses (data); i++)
6507     {
6508       use = iv_use (data, i);
6509
6510       old_cp = iv_ca_cand_for_use (ivs, use);
6511       if (old_cp->cand != cand)
6512         continue;
6513
6514       best_cost = iv_ca_cost (ivs);
6515       /* Start narrowing with START.  */
6516       new_cp = get_use_iv_cost (data, use, start);
6517
6518       if (data->consider_all_candidates)
6519         {
6520           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6521             {
6522               if (ci == cand->id || (start && ci == start->id))
6523                 continue;
6524
6525               cnd = iv_cand (data, ci);
6526
6527               cp = get_use_iv_cost (data, use, cnd);
6528               if (!cp)
6529                 continue;
6530
6531               iv_ca_set_cp (data, ivs, use, cp);
6532               acost = iv_ca_cost (ivs);
6533
6534               if (compare_costs (acost, best_cost) < 0)
6535                 {
6536                   best_cost = acost;
6537                   new_cp = cp;
6538                 }
6539             }
6540         }
6541       else
6542         {
6543           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6544             {
6545               if (ci == cand->id || (start && ci == start->id))
6546                 continue;
6547
6548               cnd = iv_cand (data, ci);
6549
6550               cp = get_use_iv_cost (data, use, cnd);
6551               if (!cp)
6552                 continue;
6553
6554               iv_ca_set_cp (data, ivs, use, cp);
6555               acost = iv_ca_cost (ivs);
6556
6557               if (compare_costs (acost, best_cost) < 0)
6558                 {
6559                   best_cost = acost;
6560                   new_cp = cp;
6561                 }
6562             }
6563         }
6564       /* Restore to old cp for use.  */
6565       iv_ca_set_cp (data, ivs, use, old_cp);
6566
6567       if (!new_cp)
6568         {
6569           iv_ca_delta_free (delta);
6570           return infinite_cost;
6571         }
6572
6573       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6574     }
6575
6576   iv_ca_delta_commit (data, ivs, *delta, true);
6577   cost = iv_ca_cost (ivs);
6578   iv_ca_delta_commit (data, ivs, *delta, false);
6579
6580   return cost;
6581 }
6582
6583 /* Try optimizing the set of candidates IVS by removing candidates different
6584    from to EXCEPT_CAND from it.  Return cost of the new set, and store
6585    differences in DELTA.  */
6586
6587 static comp_cost
6588 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6589              struct iv_cand *except_cand, struct iv_ca_delta **delta)
6590 {
6591   bitmap_iterator bi;
6592   struct iv_ca_delta *act_delta, *best_delta;
6593   unsigned i;
6594   comp_cost best_cost, acost;
6595   struct iv_cand *cand;
6596
6597   best_delta = NULL;
6598   best_cost = iv_ca_cost (ivs);
6599
6600   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6601     {
6602       cand = iv_cand (data, i);
6603
6604       if (cand == except_cand)
6605         continue;
6606
6607       acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6608
6609       if (compare_costs (acost, best_cost) < 0)
6610         {
6611           best_cost = acost;
6612           iv_ca_delta_free (&best_delta);
6613           best_delta = act_delta;
6614         }
6615       else
6616         iv_ca_delta_free (&act_delta);
6617     }
6618
6619   if (!best_delta)
6620     {
6621       *delta = NULL;
6622       return best_cost;
6623     }
6624
6625   /* Recurse to possibly remove other unnecessary ivs.  */
6626   iv_ca_delta_commit (data, ivs, best_delta, true);
6627   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6628   iv_ca_delta_commit (data, ivs, best_delta, false);
6629   *delta = iv_ca_delta_join (best_delta, *delta);
6630   return best_cost;
6631 }
6632
6633 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6634    cheaper local cost for USE than BEST_CP.  Return pointer to
6635    the corresponding cost_pair, otherwise just return BEST_CP.  */
6636
6637 static struct cost_pair*
6638 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6639                         unsigned int cand_idx, struct iv_cand *old_cand,
6640                         struct cost_pair *best_cp)
6641 {
6642   struct iv_cand *cand;
6643   struct cost_pair *cp;
6644
6645   gcc_assert (old_cand != NULL && best_cp != NULL);
6646   if (cand_idx == old_cand->id)
6647     return best_cp;
6648
6649   cand = iv_cand (data, cand_idx);
6650   cp = get_use_iv_cost (data, use, cand);
6651   if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6652     return cp;
6653
6654   return best_cp;
6655 }
6656
6657 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6658    which are used by more than one iv uses.  For each of those candidates,
6659    this function tries to represent iv uses under that candidate using
6660    other ones with lower local cost, then tries to prune the new set.
6661    If the new set has lower cost, It returns the new cost after recording
6662    candidate replacement in list DELTA.  */
6663
6664 static comp_cost
6665 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6666                struct iv_ca_delta **delta)
6667 {
6668   bitmap_iterator bi, bj;
6669   unsigned int i, j, k;
6670   struct iv_use *use;
6671   struct iv_cand *cand;
6672   comp_cost orig_cost, acost;
6673   struct iv_ca_delta *act_delta, *tmp_delta;
6674   struct cost_pair *old_cp, *best_cp = NULL;
6675
6676   *delta = NULL;
6677   orig_cost = iv_ca_cost (ivs);
6678
6679   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6680     {
6681       if (ivs->n_cand_uses[i] == 1
6682           || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6683         continue;
6684
6685       cand = iv_cand (data, i);
6686
6687       act_delta = NULL;
6688       /*  Represent uses under current candidate using other ones with
6689           lower local cost.  */
6690       for (j = 0; j < ivs->upto; j++)
6691         {
6692           use = iv_use (data, j);
6693           old_cp = iv_ca_cand_for_use (ivs, use);
6694
6695           if (old_cp->cand != cand)
6696             continue;
6697
6698           best_cp = old_cp;
6699           if (data->consider_all_candidates)
6700             for (k = 0; k < n_iv_cands (data); k++)
6701               best_cp = cheaper_cost_with_cand (data, use, k,
6702                                                 old_cp->cand, best_cp);
6703           else
6704             EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6705               best_cp = cheaper_cost_with_cand (data, use, k,
6706                                                 old_cp->cand, best_cp);
6707
6708           if (best_cp == old_cp)
6709             continue;
6710
6711           act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6712         }
6713       /* No need for further prune.  */
6714       if (!act_delta)
6715         continue;
6716
6717       /* Prune the new candidate set.  */
6718       iv_ca_delta_commit (data, ivs, act_delta, true);
6719       acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6720       iv_ca_delta_commit (data, ivs, act_delta, false);
6721       act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6722
6723       if (compare_costs (acost, orig_cost) < 0)
6724         {
6725           *delta = act_delta;
6726           return acost;
6727         }
6728       else
6729         iv_ca_delta_free (&act_delta);
6730     }
6731
6732   return orig_cost;
6733 }
6734
6735 /* Tries to extend the sets IVS in the best possible way in order
6736    to express the USE.  If ORIGINALP is true, prefer candidates from
6737    the original set of IVs, otherwise favor important candidates not
6738    based on any memory object.  */
6739
6740 static bool
6741 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6742                   struct iv_use *use, bool originalp)
6743 {
6744   comp_cost best_cost, act_cost;
6745   unsigned i;
6746   bitmap_iterator bi;
6747   struct iv_cand *cand;
6748   struct iv_ca_delta *best_delta = NULL, *act_delta;
6749   struct cost_pair *cp;
6750
6751   iv_ca_add_use (data, ivs, use);
6752   best_cost = iv_ca_cost (ivs);
6753   cp = iv_ca_cand_for_use (ivs, use);
6754   if (cp)
6755     {
6756       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6757       iv_ca_set_no_cp (data, ivs, use);
6758     }
6759
6760   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
6761      first try important candidates not based on any memory object.  Only if
6762      this fails, try the specific ones.  Rationale -- in loops with many
6763      variables the best choice often is to use just one generic biv.  If we
6764      added here many ivs specific to the uses, the optimization algorithm later
6765      would be likely to get stuck in a local minimum, thus causing us to create
6766      too many ivs.  The approach from few ivs to more seems more likely to be
6767      successful -- starting from few ivs, replacing an expensive use by a
6768      specific iv should always be a win.  */
6769   EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, i, bi)
6770     {
6771       cand = iv_cand (data, i);
6772
6773       if (originalp && cand->pos !=IP_ORIGINAL)
6774         continue;
6775
6776       if (!originalp && cand->iv->base_object != NULL_TREE)
6777         continue;
6778
6779       if (iv_ca_cand_used_p (ivs, cand))
6780         continue;
6781
6782       cp = get_use_iv_cost (data, use, cand);
6783       if (!cp)
6784         continue;
6785
6786       iv_ca_set_cp (data, ivs, use, cp);
6787       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6788                                true);
6789       iv_ca_set_no_cp (data, ivs, use);
6790       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6791
6792       if (compare_costs (act_cost, best_cost) < 0)
6793         {
6794           best_cost = act_cost;
6795
6796           iv_ca_delta_free (&best_delta);
6797           best_delta = act_delta;
6798         }
6799       else
6800         iv_ca_delta_free (&act_delta);
6801     }
6802
6803   if (infinite_cost_p (best_cost))
6804     {
6805       for (i = 0; i < use->n_map_members; i++)
6806         {
6807           cp = use->cost_map + i;
6808           cand = cp->cand;
6809           if (!cand)
6810             continue;
6811
6812           /* Already tried this.  */
6813           if (cand->important)
6814             {
6815               if (originalp && cand->pos == IP_ORIGINAL)
6816                 continue;
6817               if (!originalp && cand->iv->base_object == NULL_TREE)
6818                 continue;
6819             }
6820
6821           if (iv_ca_cand_used_p (ivs, cand))
6822             continue;
6823
6824           act_delta = NULL;
6825           iv_ca_set_cp (data, ivs, use, cp);
6826           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6827           iv_ca_set_no_cp (data, ivs, use);
6828           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6829                                        cp, act_delta);
6830
6831           if (compare_costs (act_cost, best_cost) < 0)
6832             {
6833               best_cost = act_cost;
6834
6835               if (best_delta)
6836                 iv_ca_delta_free (&best_delta);
6837               best_delta = act_delta;
6838             }
6839           else
6840             iv_ca_delta_free (&act_delta);
6841         }
6842     }
6843
6844   iv_ca_delta_commit (data, ivs, best_delta, true);
6845   iv_ca_delta_free (&best_delta);
6846
6847   return !infinite_cost_p (best_cost);
6848 }
6849
6850 /* Finds an initial assignment of candidates to uses.  */
6851
6852 static struct iv_ca *
6853 get_initial_solution (struct ivopts_data *data, bool originalp)
6854 {
6855   struct iv_ca *ivs = iv_ca_new (data);
6856   unsigned i;
6857
6858   for (i = 0; i < n_iv_uses (data); i++)
6859     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6860       {
6861         iv_ca_free (&ivs);
6862         return NULL;
6863       }
6864
6865   return ivs;
6866 }
6867
6868 /* Tries to improve set of induction variables IVS.  TRY_REPLACE_P
6869    points to a bool variable, this function tries to break local
6870    optimal fixed-point by replacing candidates in IVS if it's true.  */
6871
6872 static bool
6873 try_improve_iv_set (struct ivopts_data *data,
6874                     struct iv_ca *ivs, bool *try_replace_p)
6875 {
6876   unsigned i, n_ivs;
6877   comp_cost acost, best_cost = iv_ca_cost (ivs);
6878   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6879   struct iv_cand *cand;
6880
6881   /* Try extending the set of induction variables by one.  */
6882   for (i = 0; i < n_iv_cands (data); i++)
6883     {
6884       cand = iv_cand (data, i);
6885
6886       if (iv_ca_cand_used_p (ivs, cand))
6887         continue;
6888
6889       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6890       if (!act_delta)
6891         continue;
6892
6893       /* If we successfully added the candidate and the set is small enough,
6894          try optimizing it by removing other candidates.  */
6895       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6896         {
6897           iv_ca_delta_commit (data, ivs, act_delta, true);
6898           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6899           iv_ca_delta_commit (data, ivs, act_delta, false);
6900           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6901         }
6902
6903       if (compare_costs (acost, best_cost) < 0)
6904         {
6905           best_cost = acost;
6906           iv_ca_delta_free (&best_delta);
6907           best_delta = act_delta;
6908         }
6909       else
6910         iv_ca_delta_free (&act_delta);
6911     }
6912
6913   if (!best_delta)
6914     {
6915       /* Try removing the candidates from the set instead.  */
6916       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6917
6918       if (!best_delta && *try_replace_p)
6919         {
6920           *try_replace_p = false;
6921           /* So far candidate selecting algorithm tends to choose fewer IVs
6922              so that it can handle cases in which loops have many variables
6923              but the best choice is often to use only one general biv.  One
6924              weakness is it can't handle opposite cases, in which different
6925              candidates should be chosen with respect to each use.  To solve
6926              the problem, we replace candidates in a manner described by the
6927              comments of iv_ca_replace, thus give general algorithm a chance
6928              to break local optimal fixed-point in these cases.  */
6929           best_cost = iv_ca_replace (data, ivs, &best_delta);
6930         }
6931
6932       if (!best_delta)
6933         return false;
6934     }
6935
6936   iv_ca_delta_commit (data, ivs, best_delta, true);
6937   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6938   iv_ca_delta_free (&best_delta);
6939   return true;
6940 }
6941
6942 /* Attempts to find the optimal set of induction variables.  We do simple
6943    greedy heuristic -- we try to replace at most one candidate in the selected
6944    solution and remove the unused ivs while this improves the cost.  */
6945
6946 static struct iv_ca *
6947 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6948 {
6949   struct iv_ca *set;
6950   bool try_replace_p = true;
6951
6952   /* Get the initial solution.  */
6953   set = get_initial_solution (data, originalp);
6954   if (!set)
6955     {
6956       if (dump_file && (dump_flags & TDF_DETAILS))
6957         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6958       return NULL;
6959     }
6960
6961   if (dump_file && (dump_flags & TDF_DETAILS))
6962     {
6963       fprintf (dump_file, "Initial set of candidates:\n");
6964       iv_ca_dump (data, dump_file, set);
6965     }
6966
6967   while (try_improve_iv_set (data, set, &try_replace_p))
6968     {
6969       if (dump_file && (dump_flags & TDF_DETAILS))
6970         {
6971           fprintf (dump_file, "Improved to:\n");
6972           iv_ca_dump (data, dump_file, set);
6973         }
6974     }
6975
6976   return set;
6977 }
6978
6979 static struct iv_ca *
6980 find_optimal_iv_set (struct ivopts_data *data)
6981 {
6982   unsigned i;
6983   struct iv_ca *set, *origset;
6984   struct iv_use *use;
6985   comp_cost cost, origcost;
6986
6987   /* Determine the cost based on a strategy that starts with original IVs,
6988      and try again using a strategy that prefers candidates not based
6989      on any IVs.  */
6990   origset = find_optimal_iv_set_1 (data, true);
6991   set = find_optimal_iv_set_1 (data, false);
6992
6993   if (!origset && !set)
6994     return NULL;
6995
6996   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6997   cost = set ? iv_ca_cost (set) : infinite_cost;
6998
6999   if (dump_file && (dump_flags & TDF_DETAILS))
7000     {
7001       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7002                origcost.cost, origcost.complexity);
7003       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7004                cost.cost, cost.complexity);
7005     }
7006
7007   /* Choose the one with the best cost.  */
7008   if (compare_costs (origcost, cost) <= 0)
7009     {
7010       if (set)
7011         iv_ca_free (&set);
7012       set = origset;
7013     }
7014   else if (origset)
7015     iv_ca_free (&origset);
7016
7017   for (i = 0; i < n_iv_uses (data); i++)
7018     {
7019       use = iv_use (data, i);
7020       use->selected = iv_ca_cand_for_use (set, use)->cand;
7021     }
7022
7023   return set;
7024 }
7025
7026 /* Creates a new induction variable corresponding to CAND.  */
7027
7028 static void
7029 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7030 {
7031   gimple_stmt_iterator incr_pos;
7032   tree base;
7033   bool after = false;
7034
7035   if (!cand->iv)
7036     return;
7037
7038   switch (cand->pos)
7039     {
7040     case IP_NORMAL:
7041       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7042       break;
7043
7044     case IP_END:
7045       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7046       after = true;
7047       break;
7048
7049     case IP_AFTER_USE:
7050       after = true;
7051       /* fall through */
7052     case IP_BEFORE_USE:
7053       incr_pos = gsi_for_stmt (cand->incremented_at);
7054       break;
7055
7056     case IP_ORIGINAL:
7057       /* Mark that the iv is preserved.  */
7058       name_info (data, cand->var_before)->preserve_biv = true;
7059       name_info (data, cand->var_after)->preserve_biv = true;
7060
7061       /* Rewrite the increment so that it uses var_before directly.  */
7062       find_interesting_uses_op (data, cand->var_after)->selected = cand;
7063       return;
7064     }
7065
7066   gimple_add_tmp_var (cand->var_before);
7067
7068   base = unshare_expr (cand->iv->base);
7069
7070   create_iv (base, unshare_expr (cand->iv->step),
7071              cand->var_before, data->current_loop,
7072              &incr_pos, after, &cand->var_before, &cand->var_after);
7073 }
7074
7075 /* Creates new induction variables described in SET.  */
7076
7077 static void
7078 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7079 {
7080   unsigned i;
7081   struct iv_cand *cand;
7082   bitmap_iterator bi;
7083
7084   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7085     {
7086       cand = iv_cand (data, i);
7087       create_new_iv (data, cand);
7088     }
7089
7090   if (dump_file && (dump_flags & TDF_DETAILS))
7091     {
7092       fprintf (dump_file, "Selected IV set for loop %d",
7093                data->current_loop->num);
7094       if (data->loop_loc != UNKNOWN_LOCATION)
7095         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7096                  LOCATION_LINE (data->loop_loc));
7097       fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7098       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7099         {
7100           cand = iv_cand (data, i);
7101           dump_cand (dump_file, cand);
7102         }
7103       fprintf (dump_file, "\n");
7104     }
7105 }
7106
7107 /* Rewrites USE (definition of iv used in a nonlinear expression)
7108    using candidate CAND.  */
7109
7110 static void
7111 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7112                             struct iv_use *use, struct iv_cand *cand)
7113 {
7114   tree comp;
7115   tree op, tgt;
7116   gassign *ass;
7117   gimple_stmt_iterator bsi;
7118
7119   /* An important special case -- if we are asked to express value of
7120      the original iv by itself, just exit; there is no need to
7121      introduce a new computation (that might also need casting the
7122      variable to unsigned and back).  */
7123   if (cand->pos == IP_ORIGINAL
7124       && cand->incremented_at == use->stmt)
7125     {
7126       enum tree_code stmt_code;
7127
7128       gcc_assert (is_gimple_assign (use->stmt));
7129       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7130
7131       /* Check whether we may leave the computation unchanged.
7132          This is the case only if it does not rely on other
7133          computations in the loop -- otherwise, the computation
7134          we rely upon may be removed in remove_unused_ivs,
7135          thus leading to ICE.  */
7136       stmt_code = gimple_assign_rhs_code (use->stmt);
7137       if (stmt_code == PLUS_EXPR
7138           || stmt_code == MINUS_EXPR
7139           || stmt_code == POINTER_PLUS_EXPR)
7140         {
7141           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7142             op = gimple_assign_rhs2 (use->stmt);
7143           else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7144             op = gimple_assign_rhs1 (use->stmt);
7145           else
7146             op = NULL_TREE;
7147         }
7148       else
7149         op = NULL_TREE;
7150
7151       if (op && expr_invariant_in_loop_p (data->current_loop, op))
7152         return;
7153     }
7154
7155   comp = get_computation (data->current_loop, use, cand);
7156   gcc_assert (comp != NULL_TREE);
7157
7158   switch (gimple_code (use->stmt))
7159     {
7160     case GIMPLE_PHI:
7161       tgt = PHI_RESULT (use->stmt);
7162
7163       /* If we should keep the biv, do not replace it.  */
7164       if (name_info (data, tgt)->preserve_biv)
7165         return;
7166
7167       bsi = gsi_after_labels (gimple_bb (use->stmt));
7168       break;
7169
7170     case GIMPLE_ASSIGN:
7171       tgt = gimple_assign_lhs (use->stmt);
7172       bsi = gsi_for_stmt (use->stmt);
7173       break;
7174
7175     default:
7176       gcc_unreachable ();
7177     }
7178
7179   if (!valid_gimple_rhs_p (comp)
7180       || (gimple_code (use->stmt) != GIMPLE_PHI
7181           /* We can't allow re-allocating the stmt as it might be pointed
7182              to still.  */
7183           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7184               >= gimple_num_ops (gsi_stmt (bsi)))))
7185     {
7186       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7187                                        true, GSI_SAME_STMT);
7188       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7189         {
7190           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7191           /* As this isn't a plain copy we have to reset alignment
7192              information.  */
7193           if (SSA_NAME_PTR_INFO (comp))
7194             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7195         }
7196     }
7197
7198   if (gimple_code (use->stmt) == GIMPLE_PHI)
7199     {
7200       ass = gimple_build_assign (tgt, comp);
7201       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7202
7203       bsi = gsi_for_stmt (use->stmt);
7204       remove_phi_node (&bsi, false);
7205     }
7206   else
7207     {
7208       gimple_assign_set_rhs_from_tree (&bsi, comp);
7209       use->stmt = gsi_stmt (bsi);
7210     }
7211 }
7212
7213 /* Performs a peephole optimization to reorder the iv update statement with
7214    a mem ref to enable instruction combining in later phases. The mem ref uses
7215    the iv value before the update, so the reordering transformation requires
7216    adjustment of the offset. CAND is the selected IV_CAND.
7217
7218    Example:
7219
7220    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
7221    iv2 = iv1 + 1;
7222
7223    if (t < val)      (1)
7224      goto L;
7225    goto Head;
7226
7227
7228    directly propagating t over to (1) will introduce overlapping live range
7229    thus increase register pressure. This peephole transform it into:
7230
7231
7232    iv2 = iv1 + 1;
7233    t = MEM_REF (base, iv2, 8, 8);
7234    if (t < val)
7235      goto L;
7236    goto Head;
7237 */
7238
7239 static void
7240 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7241 {
7242   tree var_after;
7243   gimple *iv_update, *stmt;
7244   basic_block bb;
7245   gimple_stmt_iterator gsi, gsi_iv;
7246
7247   if (cand->pos != IP_NORMAL)
7248     return;
7249
7250   var_after = cand->var_after;
7251   iv_update = SSA_NAME_DEF_STMT (var_after);
7252
7253   bb = gimple_bb (iv_update);
7254   gsi = gsi_last_nondebug_bb (bb);
7255   stmt = gsi_stmt (gsi);
7256
7257   /* Only handle conditional statement for now.  */
7258   if (gimple_code (stmt) != GIMPLE_COND)
7259     return;
7260
7261   gsi_prev_nondebug (&gsi);
7262   stmt = gsi_stmt (gsi);
7263   if (stmt != iv_update)
7264     return;
7265
7266   gsi_prev_nondebug (&gsi);
7267   if (gsi_end_p (gsi))
7268     return;
7269
7270   stmt = gsi_stmt (gsi);
7271   if (gimple_code (stmt) != GIMPLE_ASSIGN)
7272     return;
7273
7274   if (stmt != use->stmt)
7275     return;
7276
7277   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7278     return;
7279
7280   if (dump_file && (dump_flags & TDF_DETAILS))
7281     {
7282       fprintf (dump_file, "Reordering \n");
7283       print_gimple_stmt (dump_file, iv_update, 0, 0);
7284       print_gimple_stmt (dump_file, use->stmt, 0, 0);
7285       fprintf (dump_file, "\n");
7286     }
7287
7288   gsi = gsi_for_stmt (use->stmt);
7289   gsi_iv = gsi_for_stmt (iv_update);
7290   gsi_move_before (&gsi_iv, &gsi);
7291
7292   cand->pos = IP_BEFORE_USE;
7293   cand->incremented_at = use->stmt;
7294 }
7295
7296 /* Rewrites USE (address that is an iv) using candidate CAND.  */
7297
7298 static void
7299 rewrite_use_address_1 (struct ivopts_data *data,
7300                        struct iv_use *use, struct iv_cand *cand)
7301 {
7302   aff_tree aff;
7303   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7304   tree base_hint = NULL_TREE;
7305   tree ref, iv;
7306   bool ok;
7307
7308   adjust_iv_update_pos (cand, use);
7309   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7310   gcc_assert (ok);
7311   unshare_aff_combination (&aff);
7312
7313   /* To avoid undefined overflow problems, all IV candidates use unsigned
7314      integer types.  The drawback is that this makes it impossible for
7315      create_mem_ref to distinguish an IV that is based on a memory object
7316      from one that represents simply an offset.
7317
7318      To work around this problem, we pass a hint to create_mem_ref that
7319      indicates which variable (if any) in aff is an IV based on a memory
7320      object.  Note that we only consider the candidate.  If this is not
7321      based on an object, the base of the reference is in some subexpression
7322      of the use -- but these will use pointer types, so they are recognized
7323      by the create_mem_ref heuristics anyway.  */
7324   if (cand->iv->base_object)
7325     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7326
7327   iv = var_at_stmt (data->current_loop, cand, use->stmt);
7328   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7329                         reference_alias_ptr_type (*use->op_p),
7330                         iv, base_hint, data->speed);
7331   copy_ref_info (ref, *use->op_p);
7332   *use->op_p = ref;
7333 }
7334
7335 /* Rewrites USE (address that is an iv) using candidate CAND.  If it's the
7336    first use of a group, rewrites sub uses in the group too.  */
7337
7338 static void
7339 rewrite_use_address (struct ivopts_data *data,
7340                       struct iv_use *use, struct iv_cand *cand)
7341 {
7342   struct iv_use *next;
7343
7344   gcc_assert (use->sub_id == 0);
7345   rewrite_use_address_1 (data, use, cand);
7346   update_stmt (use->stmt);
7347
7348   for (next = use->next; next != NULL; next = next->next)
7349     {
7350       rewrite_use_address_1 (data, next, cand);
7351       update_stmt (next->stmt);
7352     }
7353
7354   return;
7355 }
7356
7357 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7358    candidate CAND.  */
7359
7360 static void
7361 rewrite_use_compare (struct ivopts_data *data,
7362                      struct iv_use *use, struct iv_cand *cand)
7363 {
7364   tree comp, *var_p, op, bound;
7365   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7366   enum tree_code compare;
7367   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7368   bool ok;
7369
7370   bound = cp->value;
7371   if (bound)
7372     {
7373       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7374       tree var_type = TREE_TYPE (var);
7375       gimple_seq stmts;
7376
7377       if (dump_file && (dump_flags & TDF_DETAILS))
7378         {
7379           fprintf (dump_file, "Replacing exit test: ");
7380           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7381         }
7382       compare = cp->comp;
7383       bound = unshare_expr (fold_convert (var_type, bound));
7384       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7385       if (stmts)
7386         gsi_insert_seq_on_edge_immediate (
7387                 loop_preheader_edge (data->current_loop),
7388                 stmts);
7389
7390       gcond *cond_stmt = as_a <gcond *> (use->stmt);
7391       gimple_cond_set_lhs (cond_stmt, var);
7392       gimple_cond_set_code (cond_stmt, compare);
7393       gimple_cond_set_rhs (cond_stmt, op);
7394       return;
7395     }
7396
7397   /* The induction variable elimination failed; just express the original
7398      giv.  */
7399   comp = get_computation (data->current_loop, use, cand);
7400   gcc_assert (comp != NULL_TREE);
7401
7402   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7403   gcc_assert (ok);
7404
7405   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7406                                      true, GSI_SAME_STMT);
7407 }
7408
7409 /* Rewrites USE using candidate CAND.  */
7410
7411 static void
7412 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7413 {
7414   switch (use->type)
7415     {
7416       case USE_NONLINEAR_EXPR:
7417         rewrite_use_nonlinear_expr (data, use, cand);
7418         break;
7419
7420       case USE_ADDRESS:
7421         rewrite_use_address (data, use, cand);
7422         break;
7423
7424       case USE_COMPARE:
7425         rewrite_use_compare (data, use, cand);
7426         break;
7427
7428       default:
7429         gcc_unreachable ();
7430     }
7431
7432   update_stmt (use->stmt);
7433 }
7434
7435 /* Rewrite the uses using the selected induction variables.  */
7436
7437 static void
7438 rewrite_uses (struct ivopts_data *data)
7439 {
7440   unsigned i;
7441   struct iv_cand *cand;
7442   struct iv_use *use;
7443
7444   for (i = 0; i < n_iv_uses (data); i++)
7445     {
7446       use = iv_use (data, i);
7447       cand = use->selected;
7448       gcc_assert (cand);
7449
7450       rewrite_use (data, use, cand);
7451     }
7452 }
7453
7454 /* Removes the ivs that are not used after rewriting.  */
7455
7456 static void
7457 remove_unused_ivs (struct ivopts_data *data)
7458 {
7459   unsigned j;
7460   bitmap_iterator bi;
7461   bitmap toremove = BITMAP_ALLOC (NULL);
7462
7463   /* Figure out an order in which to release SSA DEFs so that we don't
7464      release something that we'd have to propagate into a debug stmt
7465      afterwards.  */
7466   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7467     {
7468       struct version_info *info;
7469
7470       info = ver_info (data, j);
7471       if (info->iv
7472           && !integer_zerop (info->iv->step)
7473           && !info->inv_id
7474           && !info->iv->have_use_for
7475           && !info->preserve_biv)
7476         {
7477           bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7478
7479           tree def = info->iv->ssa_name;
7480
7481           if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7482             {
7483               imm_use_iterator imm_iter;
7484               use_operand_p use_p;
7485               gimple *stmt;
7486               int count = 0;
7487
7488               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7489                 {
7490                   if (!gimple_debug_bind_p (stmt))
7491                     continue;
7492
7493                   /* We just want to determine whether to do nothing
7494                      (count == 0), to substitute the computed
7495                      expression into a single use of the SSA DEF by
7496                      itself (count == 1), or to use a debug temp
7497                      because the SSA DEF is used multiple times or as
7498                      part of a larger expression (count > 1). */
7499                   count++;
7500                   if (gimple_debug_bind_get_value (stmt) != def)
7501                     count++;
7502
7503                   if (count > 1)
7504                     BREAK_FROM_IMM_USE_STMT (imm_iter);
7505                 }
7506
7507               if (!count)
7508                 continue;
7509
7510               struct iv_use dummy_use;
7511               struct iv_cand *best_cand = NULL, *cand;
7512               unsigned i, best_pref = 0, cand_pref;
7513
7514               memset (&dummy_use, 0, sizeof (dummy_use));
7515               dummy_use.iv = info->iv;
7516               for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7517                 {
7518                   cand = iv_use (data, i)->selected;
7519                   if (cand == best_cand)
7520                     continue;
7521                   cand_pref = operand_equal_p (cand->iv->step,
7522                                                info->iv->step, 0)
7523                     ? 4 : 0;
7524                   cand_pref
7525                     += TYPE_MODE (TREE_TYPE (cand->iv->base))
7526                     == TYPE_MODE (TREE_TYPE (info->iv->base))
7527                     ? 2 : 0;
7528                   cand_pref
7529                     += TREE_CODE (cand->iv->base) == INTEGER_CST
7530                     ? 1 : 0;
7531                   if (best_cand == NULL || best_pref < cand_pref)
7532                     {
7533                       best_cand = cand;
7534                       best_pref = cand_pref;
7535                     }
7536                 }
7537
7538               if (!best_cand)
7539                 continue;
7540
7541               tree comp = get_computation_at (data->current_loop,
7542                                               &dummy_use, best_cand,
7543                                               SSA_NAME_DEF_STMT (def));
7544               if (!comp)
7545                 continue;
7546
7547               if (count > 1)
7548                 {
7549                   tree vexpr = make_node (DEBUG_EXPR_DECL);
7550                   DECL_ARTIFICIAL (vexpr) = 1;
7551                   TREE_TYPE (vexpr) = TREE_TYPE (comp);
7552                   if (SSA_NAME_VAR (def))
7553                     DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7554                   else
7555                     DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7556                   gdebug *def_temp
7557                     = gimple_build_debug_bind (vexpr, comp, NULL);
7558                   gimple_stmt_iterator gsi;
7559
7560                   if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7561                     gsi = gsi_after_labels (gimple_bb
7562                                             (SSA_NAME_DEF_STMT (def)));
7563                   else
7564                     gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7565
7566                   gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7567                   comp = vexpr;
7568                 }
7569
7570               FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7571                 {
7572                   if (!gimple_debug_bind_p (stmt))
7573                     continue;
7574
7575                   FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7576                     SET_USE (use_p, comp);
7577
7578                   update_stmt (stmt);
7579                 }
7580             }
7581         }
7582     }
7583
7584   release_defs_bitset (toremove);
7585
7586   BITMAP_FREE (toremove);
7587 }
7588
7589 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7590    for hash_map::traverse.  */
7591
7592 bool
7593 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7594 {
7595   free (value);
7596   return true;
7597 }
7598
7599 /* Frees data allocated by the optimization of a single loop.  */
7600
7601 static void
7602 free_loop_data (struct ivopts_data *data)
7603 {
7604   unsigned i, j;
7605   bitmap_iterator bi;
7606   tree obj;
7607
7608   if (data->niters)
7609     {
7610       data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7611       delete data->niters;
7612       data->niters = NULL;
7613     }
7614
7615   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7616     {
7617       struct version_info *info;
7618
7619       info = ver_info (data, i);
7620       info->iv = NULL;
7621       info->has_nonlin_use = false;
7622       info->preserve_biv = false;
7623       info->inv_id = 0;
7624     }
7625   bitmap_clear (data->relevant);
7626   bitmap_clear (data->important_candidates);
7627
7628   for (i = 0; i < n_iv_uses (data); i++)
7629     {
7630       struct iv_use *use = iv_use (data, i);
7631       struct iv_use *pre = use, *sub = use->next;
7632
7633       while (sub)
7634         {
7635           gcc_assert (sub->related_cands == NULL);
7636           gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7637
7638           pre = sub;
7639           sub = sub->next;
7640           free (pre);
7641         }
7642
7643       BITMAP_FREE (use->related_cands);
7644       for (j = 0; j < use->n_map_members; j++)
7645         if (use->cost_map[j].depends_on)
7646           BITMAP_FREE (use->cost_map[j].depends_on);
7647       free (use->cost_map);
7648       free (use);
7649     }
7650   data->iv_uses.truncate (0);
7651
7652   for (i = 0; i < n_iv_cands (data); i++)
7653     {
7654       struct iv_cand *cand = iv_cand (data, i);
7655
7656       if (cand->depends_on)
7657         BITMAP_FREE (cand->depends_on);
7658       free (cand);
7659     }
7660   data->iv_candidates.truncate (0);
7661
7662   if (data->version_info_size < num_ssa_names)
7663     {
7664       data->version_info_size = 2 * num_ssa_names;
7665       free (data->version_info);
7666       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7667     }
7668
7669   data->max_inv_id = 0;
7670
7671   FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7672     SET_DECL_RTL (obj, NULL_RTX);
7673
7674   decl_rtl_to_reset.truncate (0);
7675
7676   data->inv_expr_tab->empty ();
7677   data->inv_expr_id = 0;
7678
7679   data->iv_common_cand_tab->empty ();
7680   data->iv_common_cands.truncate (0);
7681 }
7682
7683 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
7684    loop tree.  */
7685
7686 static void
7687 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7688 {
7689   free_loop_data (data);
7690   free (data->version_info);
7691   BITMAP_FREE (data->relevant);
7692   BITMAP_FREE (data->important_candidates);
7693
7694   decl_rtl_to_reset.release ();
7695   data->iv_uses.release ();
7696   data->iv_candidates.release ();
7697   delete data->inv_expr_tab;
7698   data->inv_expr_tab = NULL;
7699   free_affine_expand_cache (&data->name_expansion_cache);
7700   delete data->iv_common_cand_tab;
7701   data->iv_common_cand_tab = NULL;
7702   data->iv_common_cands.release ();
7703   obstack_free (&data->iv_obstack, NULL);
7704 }
7705
7706 /* Returns true if the loop body BODY includes any function calls.  */
7707
7708 static bool
7709 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7710 {
7711   gimple_stmt_iterator gsi;
7712   unsigned i;
7713
7714   for (i = 0; i < num_nodes; i++)
7715     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7716       {
7717         gimple *stmt = gsi_stmt (gsi);
7718         if (is_gimple_call (stmt)
7719             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7720           return true;
7721       }
7722   return false;
7723 }
7724
7725 /* Optimizes the LOOP.  Returns true if anything changed.  */
7726
7727 static bool
7728 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7729 {
7730   bool changed = false;
7731   struct iv_ca *iv_ca;
7732   edge exit = single_dom_exit (loop);
7733   basic_block *body;
7734
7735   gcc_assert (!data->niters);
7736   data->current_loop = loop;
7737   data->loop_loc = find_loop_location (loop);
7738   data->speed = optimize_loop_for_speed_p (loop);
7739
7740   if (dump_file && (dump_flags & TDF_DETAILS))
7741     {
7742       fprintf (dump_file, "Processing loop %d", loop->num);
7743       if (data->loop_loc != UNKNOWN_LOCATION)
7744         fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7745                  LOCATION_LINE (data->loop_loc));
7746       fprintf (dump_file, "\n");
7747
7748       if (exit)
7749         {
7750           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
7751                    exit->src->index, exit->dest->index);
7752           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7753           fprintf (dump_file, "\n");
7754         }
7755
7756       fprintf (dump_file, "\n");
7757     }
7758
7759   body = get_loop_body (loop);
7760   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7761   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7762   free (body);
7763
7764   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7765
7766   /* For each ssa name determines whether it behaves as an induction variable
7767      in some loop.  */
7768   if (!find_induction_variables (data))
7769     goto finish;
7770
7771   /* Finds interesting uses (item 1).  */
7772   find_interesting_uses (data);
7773   group_address_uses (data);
7774   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7775     goto finish;
7776
7777   /* Finds candidates for the induction variables (item 2).  */
7778   find_iv_candidates (data);
7779
7780   /* Calculates the costs (item 3, part 1).  */
7781   determine_iv_costs (data);
7782   determine_use_iv_costs (data);
7783   determine_set_costs (data);
7784
7785   /* Find the optimal set of induction variables (item 3, part 2).  */
7786   iv_ca = find_optimal_iv_set (data);
7787   if (!iv_ca)
7788     goto finish;
7789   changed = true;
7790
7791   /* Create the new induction variables (item 4, part 1).  */
7792   create_new_ivs (data, iv_ca);
7793   iv_ca_free (&iv_ca);
7794
7795   /* Rewrite the uses (item 4, part 2).  */
7796   rewrite_uses (data);
7797
7798   /* Remove the ivs that are unused after rewriting.  */
7799   remove_unused_ivs (data);
7800
7801   /* We have changed the structure of induction variables; it might happen
7802      that definitions in the scev database refer to some of them that were
7803      eliminated.  */
7804   scev_reset ();
7805
7806 finish:
7807   free_loop_data (data);
7808
7809   return changed;
7810 }
7811
7812 /* Main entry point.  Optimizes induction variables in loops.  */
7813
7814 void
7815 tree_ssa_iv_optimize (void)
7816 {
7817   struct loop *loop;
7818   struct ivopts_data data;
7819
7820   tree_ssa_iv_optimize_init (&data);
7821
7822   /* Optimize the loops starting with the innermost ones.  */
7823   FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7824     {
7825       if (dump_file && (dump_flags & TDF_DETAILS))
7826         flow_loop_dump (loop, dump_file, NULL, 1);
7827
7828       tree_ssa_iv_optimize_loop (&data, loop);
7829     }
7830
7831   tree_ssa_iv_optimize_finalize (&data);
7832 }