gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
   3    Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "pointer-set.h"
  87 #include "hashtab.h"
  88 #include "tree-chrec.h"
  89 #include "tree-scalar-evolution.h"
  90 #include "cfgloop.h"
  91 #include "params.h"
  92 #include "langhooks.h"
  93 #include "tree-affine.h"
  94 #include "target.h"
  95
  96 /* The infinite cost.  */
  97 #define INFTY 10000000
  98
  99 /* The expected number of loop iterations.  TODO -- use profiling instead of
 100    this.  */
 101 #define AVG_LOOP_NITER(LOOP) 5
 102
 103
 104 /* Representation of the induction variable.  */
 105 struct iv
 106 {
 107   tree base;            /* Initial value of the iv.  */
 108   tree base_object;     /* A memory object to that the induction variable points.  */
 109   tree step;            /* Step of the iv (constant only).  */
 110   tree ssa_name;        /* The ssa name with the value.  */
 111   bool biv_p;           /* Is it a biv?  */
 112   bool have_use_for;    /* Do we already have a use for it?  */
 113   unsigned use_id;      /* The identifier in the use if it is the case.  */
 114 };
 115
 116 /* Per-ssa version information (induction variable descriptions, etc.).  */
 117 struct version_info
 118 {
 119   tree name;            /* The ssa name.  */
 120   struct iv *iv;        /* Induction variable description.  */
 121   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 122                            an expression that is not an induction variable.  */
 123   unsigned inv_id;      /* Id of an invariant.  */
 124   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 125 };
 126
 127 /* Types of uses.  */
 128 enum use_type
 129 {
 130   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 131   USE_ADDRESS,          /* Use in an address.  */
 132   USE_COMPARE           /* Use is a compare.  */
 133 };
 134
 135 /* Cost of a computation.  */
 136 typedef struct
 137 {
 138   unsigned cost;        /* The runtime cost.  */
 139   unsigned complexity;  /* The estimate of the complexity of the code for
 140                            the computation (in no concrete units --
 141                            complexity field should be larger for more
 142                            complex expressions and addressing modes).  */
 143 } comp_cost;
 144
 145 static const comp_cost zero_cost = {0, 0};
 146 static const comp_cost infinite_cost = {INFTY, INFTY};
 147
 148 /* The candidate - cost pair.  */
 149 struct cost_pair
 150 {
 151   struct iv_cand *cand; /* The candidate.  */
 152   comp_cost cost;       /* The cost.  */
 153   bitmap depends_on;    /* The list of invariants that have to be
 154                            preserved.  */
 155   tree value;           /* For final value elimination, the expression for
 156                            the final value of the iv.  For iv elimination,
 157                            the new bound to compare with.  */
 158 };
 159
 160 /* Use.  */
 161 struct iv_use
 162 {
 163   unsigned id;          /* The id of the use.  */
 164   enum use_type type;   /* Type of the use.  */
 165   struct iv *iv;        /* The induction variable it is based on.  */
 166   tree stmt;            /* Statement in that it occurs.  */
 167   tree *op_p;           /* The place where it occurs.  */
 168   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 169                            important ones.  */
 170
 171   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 172   struct cost_pair *cost_map;
 173                         /* The costs wrto the iv candidates.  */
 174
 175   struct iv_cand *selected;
 176                         /* The selected candidate.  */
 177 };
 178
 179 /* The position where the iv is computed.  */
 180 enum iv_position
 181 {
 182   IP_NORMAL,            /* At the end, just before the exit condition.  */
 183   IP_END,               /* At the end of the latch block.  */
 184   IP_ORIGINAL           /* The original biv.  */
 185 };
 186
 187 /* The induction variable candidate.  */
 188 struct iv_cand
 189 {
 190   unsigned id;          /* The number of the candidate.  */
 191   bool important;       /* Whether this is an "important" candidate, i.e. such
 192                            that it should be considered by all uses.  */
 193   enum iv_position pos; /* Where it is computed.  */
 194   tree incremented_at;  /* For original biv, the statement where it is
 195                            incremented.  */
 196   tree var_before;      /* The variable used for it before increment.  */
 197   tree var_after;       /* The variable used for it after increment.  */
 198   struct iv *iv;        /* The value of the candidate.  NULL for
 199                            "pseudocandidate" used to indicate the possibility
 200                            to replace the final value of an iv by direct
 201                            computation of the value.  */
 202   unsigned cost;        /* Cost of the candidate.  */
 203   bitmap depends_on;    /* The list of invariants that are used in step of the
 204                            biv.  */
 205 };
 206
 207 /* The data used by the induction variable optimizations.  */
 208
 209 typedef struct iv_use *iv_use_p;
 210 DEF_VEC_P(iv_use_p);
 211 DEF_VEC_ALLOC_P(iv_use_p,heap);
 212
 213 typedef struct iv_cand *iv_cand_p;
 214 DEF_VEC_P(iv_cand_p);
 215 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 216
 217 struct ivopts_data
 218 {
 219   /* The currently optimized loop.  */
 220   struct loop *current_loop;
 221
 222   /* Number of registers used in it.  */
 223   unsigned regs_used;
 224
 225   /* Numbers of iterations for all exits of the current loop.  */
 226   struct pointer_map_t *niters;
 227
 228   /* The size of version_info array allocated.  */
 229   unsigned version_info_size;
 230
 231   /* The array of information for the ssa names.  */
 232   struct version_info *version_info;
 233
 234   /* The bitmap of indices in version_info whose value was changed.  */
 235   bitmap relevant;
 236
 237   /* The maximum invariant id.  */
 238   unsigned max_inv_id;
 239
 240   /* The uses of induction variables.  */
 241   VEC(iv_use_p,heap) *iv_uses;
 242
 243   /* The candidates.  */
 244   VEC(iv_cand_p,heap) *iv_candidates;
 245
 246   /* A bitmap of important candidates.  */
 247   bitmap important_candidates;
 248
 249   /* Whether to consider just related and important candidates when replacing a
 250      use.  */
 251   bool consider_all_candidates;
 252 };
 253
 254 /* An assignment of iv candidates to uses.  */
 255
 256 struct iv_ca
 257 {
 258   /* The number of uses covered by the assignment.  */
 259   unsigned upto;
 260
 261   /* Number of uses that cannot be expressed by the candidates in the set.  */
 262   unsigned bad_uses;
 263
 264   /* Candidate assigned to a use, together with the related costs.  */
 265   struct cost_pair **cand_for_use;
 266
 267   /* Number of times each candidate is used.  */
 268   unsigned *n_cand_uses;
 269
 270   /* The candidates used.  */
 271   bitmap cands;
 272
 273   /* The number of candidates in the set.  */
 274   unsigned n_cands;
 275
 276   /* Total number of registers needed.  */
 277   unsigned n_regs;
 278
 279   /* Total cost of expressing uses.  */
 280   comp_cost cand_use_cost;
 281
 282   /* Total cost of candidates.  */
 283   unsigned cand_cost;
 284
 285   /* Number of times each invariant is used.  */
 286   unsigned *n_invariant_uses;
 287
 288   /* Total cost of the assignment.  */
 289   comp_cost cost;
 290 };
 291
 292 /* Difference of two iv candidate assignments.  */
 293
 294 struct iv_ca_delta
 295 {
 296   /* Changed use.  */
 297   struct iv_use *use;
 298
 299   /* An old assignment (for rollback purposes).  */
 300   struct cost_pair *old_cp;
 301
 302   /* A new assignment.  */
 303   struct cost_pair *new_cp;
 304
 305   /* Next change in the list.  */
 306   struct iv_ca_delta *next_change;
 307 };
 308
 309 /* Bound on number of candidates below that all candidates are considered.  */
 310
 311 #define CONSIDER_ALL_CANDIDATES_BOUND \
 312   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 313
 314 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 315    optimizing such a loop would help, and it would take ages).  */
 316
 317 #define MAX_CONSIDERED_USES \
 318   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 319
 320 /* If there are at most this number of ivs in the set, try removing unnecessary
 321    ivs from the set always.  */
 322
 323 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 324   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 325
 326 /* The list of trees for that the decl_rtl field must be reset is stored
 327    here.  */
 328
 329 static VEC(tree,heap) *decl_rtl_to_reset;
 330
 331 /* Number of uses recorded in DATA.  */
 332
 333 static inline unsigned
 334 n_iv_uses (struct ivopts_data *data)
 335 {
 336   return VEC_length (iv_use_p, data->iv_uses);
 337 }
 338
 339 /* Ith use recorded in DATA.  */
 340
 341 static inline struct iv_use *
 342 iv_use (struct ivopts_data *data, unsigned i)
 343 {
 344   return VEC_index (iv_use_p, data->iv_uses, i);
 345 }
 346
 347 /* Number of candidates recorded in DATA.  */
 348
 349 static inline unsigned
 350 n_iv_cands (struct ivopts_data *data)
 351 {
 352   return VEC_length (iv_cand_p, data->iv_candidates);
 353 }
 354
 355 /* Ith candidate recorded in DATA.  */
 356
 357 static inline struct iv_cand *
 358 iv_cand (struct ivopts_data *data, unsigned i)
 359 {
 360   return VEC_index (iv_cand_p, data->iv_candidates, i);
 361 }
 362
 363 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 364
 365 edge
 366 single_dom_exit (struct loop *loop)
 367 {
 368   edge exit = single_exit (loop);
 369
 370   if (!exit)
 371     return NULL;
 372
 373   if (!just_once_each_iteration_p (loop, exit->src))
 374     return NULL;
 375
 376   return exit;
 377 }
 378
 379 /* Dumps information about the induction variable IV to FILE.  */
 380
 381 extern void dump_iv (FILE *, struct iv *);
 382 void
 383 dump_iv (FILE *file, struct iv *iv)
 384 {
 385   if (iv->ssa_name)
 386     {
 387       fprintf (file, "ssa name ");
 388       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 389       fprintf (file, "\n");
 390     }
 391
 392   fprintf (file, "  type ");
 393   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 394   fprintf (file, "\n");
 395
 396   if (iv->step)
 397     {
 398       fprintf (file, "  base ");
 399       print_generic_expr (file, iv->base, TDF_SLIM);
 400       fprintf (file, "\n");
 401
 402       fprintf (file, "  step ");
 403       print_generic_expr (file, iv->step, TDF_SLIM);
 404       fprintf (file, "\n");
 405     }
 406   else
 407     {
 408       fprintf (file, "  invariant ");
 409       print_generic_expr (file, iv->base, TDF_SLIM);
 410       fprintf (file, "\n");
 411     }
 412
 413   if (iv->base_object)
 414     {
 415       fprintf (file, "  base object ");
 416       print_generic_expr (file, iv->base_object, TDF_SLIM);
 417       fprintf (file, "\n");
 418     }
 419
 420   if (iv->biv_p)
 421     fprintf (file, "  is a biv\n");
 422 }
 423
 424 /* Dumps information about the USE to FILE.  */
 425
 426 extern void dump_use (FILE *, struct iv_use *);
 427 void
 428 dump_use (FILE *file, struct iv_use *use)
 429 {
 430   fprintf (file, "use %d\n", use->id);
 431
 432   switch (use->type)
 433     {
 434     case USE_NONLINEAR_EXPR:
 435       fprintf (file, "  generic\n");
 436       break;
 437
 438     case USE_ADDRESS:
 439       fprintf (file, "  address\n");
 440       break;
 441
 442     case USE_COMPARE:
 443       fprintf (file, "  compare\n");
 444       break;
 445
 446     default:
 447       gcc_unreachable ();
 448     }
 449
 450   fprintf (file, "  in statement ");
 451   print_generic_expr (file, use->stmt, TDF_SLIM);
 452   fprintf (file, "\n");
 453
 454   fprintf (file, "  at position ");
 455   if (use->op_p)
 456     print_generic_expr (file, *use->op_p, TDF_SLIM);
 457   fprintf (file, "\n");
 458
 459   dump_iv (file, use->iv);
 460
 461   if (use->related_cands)
 462     {
 463       fprintf (file, "  related candidates ");
 464       dump_bitmap (file, use->related_cands);
 465     }
 466 }
 467
 468 /* Dumps information about the uses to FILE.  */
 469
 470 extern void dump_uses (FILE *, struct ivopts_data *);
 471 void
 472 dump_uses (FILE *file, struct ivopts_data *data)
 473 {
 474   unsigned i;
 475   struct iv_use *use;
 476
 477   for (i = 0; i < n_iv_uses (data); i++)
 478     {
 479       use = iv_use (data, i);
 480
 481       dump_use (file, use);
 482       fprintf (file, "\n");
 483     }
 484 }
 485
 486 /* Dumps information about induction variable candidate CAND to FILE.  */
 487
 488 extern void dump_cand (FILE *, struct iv_cand *);
 489 void
 490 dump_cand (FILE *file, struct iv_cand *cand)
 491 {
 492   struct iv *iv = cand->iv;
 493
 494   fprintf (file, "candidate %d%s\n",
 495            cand->id, cand->important ? " (important)" : "");
 496
 497   if (cand->depends_on)
 498     {
 499       fprintf (file, "  depends on ");
 500       dump_bitmap (file, cand->depends_on);
 501     }
 502
 503   if (!iv)
 504     {
 505       fprintf (file, "  final value replacement\n");
 506       return;
 507     }
 508
 509   switch (cand->pos)
 510     {
 511     case IP_NORMAL:
 512       fprintf (file, "  incremented before exit test\n");
 513       break;
 514
 515     case IP_END:
 516       fprintf (file, "  incremented at end\n");
 517       break;
 518
 519     case IP_ORIGINAL:
 520       fprintf (file, "  original biv\n");
 521       break;
 522     }
 523
 524   dump_iv (file, iv);
 525 }
 526
 527 /* Returns the info for ssa version VER.  */
 528
 529 static inline struct version_info *
 530 ver_info (struct ivopts_data *data, unsigned ver)
 531 {
 532   return data->version_info + ver;
 533 }
 534
 535 /* Returns the info for ssa name NAME.  */
 536
 537 static inline struct version_info *
 538 name_info (struct ivopts_data *data, tree name)
 539 {
 540   return ver_info (data, SSA_NAME_VERSION (name));
 541 }
 542
 543 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 544    emitted in LOOP.  */
 545
 546 static bool
 547 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 548 {
 549   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 550
 551   gcc_assert (bb);
 552
 553   if (sbb == loop->latch)
 554     return true;
 555
 556   if (sbb != bb)
 557     return false;
 558
 559   return stmt == last_stmt (bb);
 560 }
 561
 562 /* Returns true if STMT if after the place where the original induction
 563    variable CAND is incremented.  */
 564
 565 static bool
 566 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 567 {
 568   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 569   basic_block stmt_bb = bb_for_stmt (stmt);
 570   block_stmt_iterator bsi;
 571
 572   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 573     return false;
 574
 575   if (stmt_bb != cand_bb)
 576     return true;
 577
 578   /* Scan the block from the end, since the original ivs are usually
 579      incremented at the end of the loop body.  */
 580   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 581     {
 582       if (bsi_stmt (bsi) == cand->incremented_at)
 583         return false;
 584       if (bsi_stmt (bsi) == stmt)
 585         return true;
 586     }
 587 }
 588
 589 /* Returns true if STMT if after the place where the induction variable
 590    CAND is incremented in LOOP.  */
 591
 592 static bool
 593 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 594 {
 595   switch (cand->pos)
 596     {
 597     case IP_END:
 598       return false;
 599
 600     case IP_NORMAL:
 601       return stmt_after_ip_normal_pos (loop, stmt);
 602
 603     case IP_ORIGINAL:
 604       return stmt_after_ip_original_pos (cand, stmt);
 605
 606     default:
 607       gcc_unreachable ();
 608     }
 609 }
 610
 611 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 612
 613 static bool
 614 abnormal_ssa_name_p (tree exp)
 615 {
 616   if (!exp)
 617     return false;
 618
 619   if (TREE_CODE (exp) != SSA_NAME)
 620     return false;
 621
 622   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 623 }
 624
 625 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 626    abnormal phi node.  Callback for for_each_index.  */
 627
 628 static bool
 629 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 630                                   void *data ATTRIBUTE_UNUSED)
 631 {
 632   if (TREE_CODE (base) == ARRAY_REF)
 633     {
 634       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 635         return false;
 636       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 637         return false;
 638     }
 639
 640   return !abnormal_ssa_name_p (*index);
 641 }
 642
 643 /* Returns true if EXPR contains a ssa name that occurs in an
 644    abnormal phi node.  */
 645
 646 bool
 647 contains_abnormal_ssa_name_p (tree expr)
 648 {
 649   enum tree_code code;
 650   enum tree_code_class codeclass;
 651
 652   if (!expr)
 653     return false;
 654
 655   code = TREE_CODE (expr);
 656   codeclass = TREE_CODE_CLASS (code);
 657
 658   if (code == SSA_NAME)
 659     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 660
 661   if (code == INTEGER_CST
 662       || is_gimple_min_invariant (expr))
 663     return false;
 664
 665   if (code == ADDR_EXPR)
 666     return !for_each_index (&TREE_OPERAND (expr, 0),
 667                             idx_contains_abnormal_ssa_name_p,
 668                             NULL);
 669
 670   switch (codeclass)
 671     {
 672     case tcc_binary:
 673     case tcc_comparison:
 674       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 675         return true;
 676
 677       /* Fallthru.  */
 678     case tcc_unary:
 679       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 680         return true;
 681
 682       break;
 683
 684     default:
 685       gcc_unreachable ();
 686     }
 687
 688   return false;
 689 }
 690
 691 /*  Returns tree describing number of iterations determined from
 692     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 693
 694 static tree
 695 niter_for_exit (struct ivopts_data *data, edge exit)
 696 {
 697   struct tree_niter_desc desc;
 698   tree niter;
 699   void **slot;
 700
 701   if (!data->niters)
 702     {
 703       data->niters = pointer_map_create ();
 704       slot = NULL;
 705     }
 706   else
 707     slot = pointer_map_contains (data->niters, exit);
 708
 709   if (!slot)
 710     {
 711       /* Try to determine number of iterations.  We must know it
 712          unconditionally (i.e., without possibility of # of iterations
 713          being zero).  Also, we cannot safely work with ssa names that
 714          appear in phi nodes on abnormal edges, so that we do not create
 715          overlapping life ranges for them (PR 27283).  */
 716       if (number_of_iterations_exit (data->current_loop,
 717                                      exit, &desc, true)
 718           && integer_zerop (desc.may_be_zero)
 719           && !contains_abnormal_ssa_name_p (desc.niter))
 720         niter = desc.niter;
 721       else
 722         niter = NULL_TREE;
 723
 724       *pointer_map_insert (data->niters, exit) = niter;
 725     }
 726   else
 727     niter = (tree) *slot;
 728
 729   return niter;
 730 }
 731
 732 /* Returns tree describing number of iterations determined from
 733    single dominating exit of DATA->current_loop, or NULL if something
 734    goes wrong.  */
 735
 736 static tree
 737 niter_for_single_dom_exit (struct ivopts_data *data)
 738 {
 739   edge exit = single_dom_exit (data->current_loop);
 740
 741   if (!exit)
 742     return NULL;
 743
 744   return niter_for_exit (data, exit);
 745 }
 746
 747 /* Initializes data structures used by the iv optimization pass, stored
 748    in DATA.  */
 749
 750 static void
 751 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 752 {
 753   data->version_info_size = 2 * num_ssa_names;
 754   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 755   data->relevant = BITMAP_ALLOC (NULL);
 756   data->important_candidates = BITMAP_ALLOC (NULL);
 757   data->max_inv_id = 0;
 758   data->niters = NULL;
 759   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 760   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 761   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 762 }
 763
 764 /* Returns a memory object to that EXPR points.  In case we are able to
 765    determine that it does not point to any such object, NULL is returned.  */
 766
 767 static tree
 768 determine_base_object (tree expr)
 769 {
 770   enum tree_code code = TREE_CODE (expr);
 771   tree base, obj;
 772
 773   /* If this is a pointer casted to any type, we need to determine
 774      the base object for the pointer; so handle conversions before
 775      throwing away non-pointer expressions.  */
 776   if (CONVERT_EXPR_P (expr))
 777     return determine_base_object (TREE_OPERAND (expr, 0));
 778
 779   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 780     return NULL_TREE;
 781
 782   switch (code)
 783     {
 784     case INTEGER_CST:
 785       return NULL_TREE;
 786
 787     case ADDR_EXPR:
 788       obj = TREE_OPERAND (expr, 0);
 789       base = get_base_address (obj);
 790
 791       if (!base)
 792         return expr;
 793
 794       if (TREE_CODE (base) == INDIRECT_REF)
 795         return determine_base_object (TREE_OPERAND (base, 0));
 796
 797       return fold_convert (ptr_type_node,
 798                            build_fold_addr_expr (base));
 799
 800     case POINTER_PLUS_EXPR:
 801       return determine_base_object (TREE_OPERAND (expr, 0));
 802
 803     case PLUS_EXPR:
 804     case MINUS_EXPR:
 805       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 806       gcc_unreachable ();
 807
 808     default:
 809       return fold_convert (ptr_type_node, expr);
 810     }
 811 }
 812
 813 /* Allocates an induction variable with given initial value BASE and step STEP
 814    for loop LOOP.  */
 815
 816 static struct iv *
 817 alloc_iv (tree base, tree step)
 818 {
 819   struct iv *iv = XCNEW (struct iv);
 820   gcc_assert (step != NULL_TREE);
 821
 822   iv->base = base;
 823   iv->base_object = determine_base_object (base);
 824   iv->step = step;
 825   iv->biv_p = false;
 826   iv->have_use_for = false;
 827   iv->use_id = 0;
 828   iv->ssa_name = NULL_TREE;
 829
 830   return iv;
 831 }
 832
 833 /* Sets STEP and BASE for induction variable IV.  */
 834
 835 static void
 836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 837 {
 838   struct version_info *info = name_info (data, iv);
 839
 840   gcc_assert (!info->iv);
 841
 842   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 843   info->iv = alloc_iv (base, step);
 844   info->iv->ssa_name = iv;
 845 }
 846
 847 /* Finds induction variable declaration for VAR.  */
 848
 849 static struct iv *
 850 get_iv (struct ivopts_data *data, tree var)
 851 {
 852   basic_block bb;
 853   tree type = TREE_TYPE (var);
 854
 855   if (!POINTER_TYPE_P (type)
 856       && !INTEGRAL_TYPE_P (type))
 857     return NULL;
 858
 859   if (!name_info (data, var)->iv)
 860     {
 861       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 862
 863       if (!bb
 864           || !flow_bb_inside_loop_p (data->current_loop, bb))
 865         set_iv (data, var, var, build_int_cst (type, 0));
 866     }
 867
 868   return name_info (data, var)->iv;
 869 }
 870
 871 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 872    not define a simple affine biv with nonzero step.  */
 873
 874 static tree
 875 determine_biv_step (tree phi)
 876 {
 877   struct loop *loop = bb_for_stmt (phi)->loop_father;
 878   tree name = PHI_RESULT (phi);
 879   affine_iv iv;
 880
 881   if (!is_gimple_reg (name))
 882     return NULL_TREE;
 883
 884   if (!simple_iv (loop, phi, name, &iv, true))
 885     return NULL_TREE;
 886
 887   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 888 }
 889
 890 /* Finds basic ivs.  */
 891
 892 static bool
 893 find_bivs (struct ivopts_data *data)
 894 {
 895   tree phi, step, type, base;
 896   bool found = false;
 897   struct loop *loop = data->current_loop;
 898
 899   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 900     {
 901       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 902         continue;
 903
 904       step = determine_biv_step (phi);
 905       if (!step)
 906         continue;
 907
 908       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 909       base = expand_simple_operations (base);
 910       if (contains_abnormal_ssa_name_p (base)
 911           || contains_abnormal_ssa_name_p (step))
 912         continue;
 913
 914       type = TREE_TYPE (PHI_RESULT (phi));
 915       base = fold_convert (type, base);
 916       if (step)
 917         {
 918           if (POINTER_TYPE_P (type))
 919             step = fold_convert (sizetype, step);
 920           else
 921             step = fold_convert (type, step);
 922         }
 923
 924       set_iv (data, PHI_RESULT (phi), base, step);
 925       found = true;
 926     }
 927
 928   return found;
 929 }
 930
 931 /* Marks basic ivs.  */
 932
 933 static void
 934 mark_bivs (struct ivopts_data *data)
 935 {
 936   tree phi, var;
 937   struct iv *iv, *incr_iv;
 938   struct loop *loop = data->current_loop;
 939   basic_block incr_bb;
 940
 941   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 942     {
 943       iv = get_iv (data, PHI_RESULT (phi));
 944       if (!iv)
 945         continue;
 946
 947       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 948       incr_iv = get_iv (data, var);
 949       if (!incr_iv)
 950         continue;
 951
 952       /* If the increment is in the subloop, ignore it.  */
 953       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 954       if (incr_bb->loop_father != data->current_loop
 955           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 956         continue;
 957
 958       iv->biv_p = true;
 959       incr_iv->biv_p = true;
 960     }
 961 }
 962
 963 /* Checks whether STMT defines a linear induction variable and stores its
 964    parameters to IV.  */
 965
 966 static bool
 967 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 968 {
 969   tree lhs;
 970   struct loop *loop = data->current_loop;
 971
 972   iv->base = NULL_TREE;
 973   iv->step = NULL_TREE;
 974
 975   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 976     return false;
 977
 978   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 979   if (TREE_CODE (lhs) != SSA_NAME)
 980     return false;
 981
 982   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 983     return false;
 984   iv->base = expand_simple_operations (iv->base);
 985
 986   if (contains_abnormal_ssa_name_p (iv->base)
 987       || contains_abnormal_ssa_name_p (iv->step))
 988     return false;
 989
 990   return true;
 991 }
 992
 993 /* Finds general ivs in statement STMT.  */
 994
 995 static void
 996 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 997 {
 998   affine_iv iv;
 999
1000   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1001     return;
1002
1003   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
1004 }
1005
1006 /* Finds general ivs in basic block BB.  */
1007
1008 static void
1009 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1010 {
1011   block_stmt_iterator bsi;
1012
1013   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1014     find_givs_in_stmt (data, bsi_stmt (bsi));
1015 }
1016
1017 /* Finds general ivs.  */
1018
1019 static void
1020 find_givs (struct ivopts_data *data)
1021 {
1022   struct loop *loop = data->current_loop;
1023   basic_block *body = get_loop_body_in_dom_order (loop);
1024   unsigned i;
1025
1026   for (i = 0; i < loop->num_nodes; i++)
1027     find_givs_in_bb (data, body[i]);
1028   free (body);
1029 }
1030
1031 /* For each ssa name defined in LOOP determines whether it is an induction
1032    variable and if so, its initial value and step.  */
1033
1034 static bool
1035 find_induction_variables (struct ivopts_data *data)
1036 {
1037   unsigned i;
1038   bitmap_iterator bi;
1039
1040   if (!find_bivs (data))
1041     return false;
1042
1043   find_givs (data);
1044   mark_bivs (data);
1045
1046   if (dump_file && (dump_flags & TDF_DETAILS))
1047     {
1048       tree niter = niter_for_single_dom_exit (data);
1049
1050       if (niter)
1051         {
1052           fprintf (dump_file, "  number of iterations ");
1053           print_generic_expr (dump_file, niter, TDF_SLIM);
1054           fprintf (dump_file, "\n\n");
1055         };
1056
1057       fprintf (dump_file, "Induction variables:\n\n");
1058
1059       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1060         {
1061           if (ver_info (data, i)->iv)
1062             dump_iv (dump_file, ver_info (data, i)->iv);
1063         }
1064     }
1065
1066   return true;
1067 }
1068
1069 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1070
1071 static struct iv_use *
1072 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1073             tree stmt, enum use_type use_type)
1074 {
1075   struct iv_use *use = XCNEW (struct iv_use);
1076
1077   use->id = n_iv_uses (data);
1078   use->type = use_type;
1079   use->iv = iv;
1080   use->stmt = stmt;
1081   use->op_p = use_p;
1082   use->related_cands = BITMAP_ALLOC (NULL);
1083
1084   /* To avoid showing ssa name in the dumps, if it was not reset by the
1085      caller.  */
1086   iv->ssa_name = NULL_TREE;
1087
1088   if (dump_file && (dump_flags & TDF_DETAILS))
1089     dump_use (dump_file, use);
1090
1091   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1092
1093   return use;
1094 }
1095
1096 /* Checks whether OP is a loop-level invariant and if so, records it.
1097    NONLINEAR_USE is true if the invariant is used in a way we do not
1098    handle specially.  */
1099
1100 static void
1101 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1102 {
1103   basic_block bb;
1104   struct version_info *info;
1105
1106   if (TREE_CODE (op) != SSA_NAME
1107       || !is_gimple_reg (op))
1108     return;
1109
1110   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1111   if (bb
1112       && flow_bb_inside_loop_p (data->current_loop, bb))
1113     return;
1114
1115   info = name_info (data, op);
1116   info->name = op;
1117   info->has_nonlin_use |= nonlinear_use;
1118   if (!info->inv_id)
1119     info->inv_id = ++data->max_inv_id;
1120   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1121 }
1122
1123 /* Checks whether the use OP is interesting and if so, records it.  */
1124
1125 static struct iv_use *
1126 find_interesting_uses_op (struct ivopts_data *data, tree op)
1127 {
1128   struct iv *iv;
1129   struct iv *civ;
1130   tree stmt;
1131   struct iv_use *use;
1132
1133   if (TREE_CODE (op) != SSA_NAME)
1134     return NULL;
1135
1136   iv = get_iv (data, op);
1137   if (!iv)
1138     return NULL;
1139
1140   if (iv->have_use_for)
1141     {
1142       use = iv_use (data, iv->use_id);
1143
1144       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1145       return use;
1146     }
1147
1148   if (integer_zerop (iv->step))
1149     {
1150       record_invariant (data, op, true);
1151       return NULL;
1152     }
1153   iv->have_use_for = true;
1154
1155   civ = XNEW (struct iv);
1156   *civ = *iv;
1157
1158   stmt = SSA_NAME_DEF_STMT (op);
1159   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1160               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1161
1162   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1163   iv->use_id = use->id;
1164
1165   return use;
1166 }
1167
1168 /* Given a condition *COND_P, checks whether it is a compare of an induction
1169    variable and an invariant.  If this is the case, CONTROL_VAR is set
1170    to location of the iv, BOUND to the location of the invariant,
1171    IV_VAR and IV_BOUND are set to the corresponding induction variable
1172    descriptions, and true is returned.  If this is not the case,
1173    CONTROL_VAR and BOUND are set to the arguments of the condition and
1174    false is returned.  */
1175
1176 static bool
1177 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1178                        tree **control_var, tree **bound,
1179                        struct iv **iv_var, struct iv **iv_bound)
1180 {
1181   /* The nodes returned when COND has just one operand.  Note that you should
1182      not modify anything in BOUND or IV_BOUND because of this.  */
1183   static struct iv const_iv;
1184   static tree zero;
1185   tree cond = *cond_p;
1186   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1187   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1188   bool ret = false;
1189
1190   zero = integer_zero_node;
1191   const_iv.step = integer_zero_node;
1192
1193   if (TREE_CODE (cond) == SSA_NAME)
1194     {
1195       op0 = cond_p;
1196       iv0 = get_iv (data, cond);
1197       ret = (iv0 && !integer_zerop (iv0->step));
1198       goto end;
1199     }
1200
1201   if (!COMPARISON_CLASS_P (cond))
1202     {
1203       op0 = cond_p;
1204       goto end;
1205     }
1206
1207   op0 = &TREE_OPERAND (cond, 0);
1208   op1 = &TREE_OPERAND (cond, 1);
1209   if (TREE_CODE (*op0) == SSA_NAME)
1210     iv0 = get_iv (data, *op0);
1211   if (TREE_CODE (*op1) == SSA_NAME)
1212     iv1 = get_iv (data, *op1);
1213
1214   /* Exactly one of the compared values must be an iv, and the other one must
1215      be an invariant.  */
1216   if (!iv0 || !iv1)
1217     goto end;
1218
1219   if (integer_zerop (iv0->step))
1220     {
1221       /* Control variable may be on the other side.  */
1222       tmp_op = op0; op0 = op1; op1 = tmp_op;
1223       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1224     }
1225   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1226
1227 end:
1228   if (control_var)
1229     *control_var = op0;;
1230   if (iv_var)
1231     *iv_var = iv0;;
1232   if (bound)
1233     *bound = op1;
1234   if (iv_bound)
1235     *iv_bound = iv1;
1236
1237   return ret;
1238 }
1239
1240 /* Checks whether the condition *COND_P in STMT is interesting
1241    and if so, records it.  */
1242
1243 static void
1244 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1245 {
1246   tree *var_p, *bound_p;
1247   struct iv *var_iv, *civ;
1248
1249   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1250     {
1251       find_interesting_uses_op (data, *var_p);
1252       find_interesting_uses_op (data, *bound_p);
1253       return;
1254     }
1255
1256   civ = XNEW (struct iv);
1257   *civ = *var_iv;
1258   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1259 }
1260
1261 /* Returns true if expression EXPR is obviously invariant in LOOP,
1262    i.e. if all its operands are defined outside of the LOOP.  LOOP
1263    should not be the function body.  */
1264
1265 bool
1266 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1267 {
1268   basic_block def_bb;
1269   unsigned i, len;
1270
1271   gcc_assert (loop_depth (loop) > 0);
1272
1273   if (is_gimple_min_invariant (expr))
1274     return true;
1275
1276   if (TREE_CODE (expr) == SSA_NAME)
1277     {
1278       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1279       if (def_bb
1280           && flow_bb_inside_loop_p (loop, def_bb))
1281         return false;
1282
1283       return true;
1284     }
1285
1286   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1287     return false;
1288
1289   len = TREE_OPERAND_LENGTH (expr);
1290   for (i = 0; i < len; i++)
1291     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1292       return false;
1293
1294   return true;
1295 }
1296
1297 /* Cumulates the steps of indices into DATA and replaces their values with the
1298    initial ones.  Returns false when the value of the index cannot be determined.
1299    Callback for for_each_index.  */
1300
1301 struct ifs_ivopts_data
1302 {
1303   struct ivopts_data *ivopts_data;
1304   tree stmt;
1305   tree step;
1306 };
1307
1308 static bool
1309 idx_find_step (tree base, tree *idx, void *data)
1310 {
1311   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1312   struct iv *iv;
1313   tree step, iv_base, iv_step, lbound, off;
1314   struct loop *loop = dta->ivopts_data->current_loop;
1315
1316   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1317       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1318     return false;
1319
1320   /* If base is a component ref, require that the offset of the reference
1321      be invariant.  */
1322   if (TREE_CODE (base) == COMPONENT_REF)
1323     {
1324       off = component_ref_field_offset (base);
1325       return expr_invariant_in_loop_p (loop, off);
1326     }
1327
1328   /* If base is array, first check whether we will be able to move the
1329      reference out of the loop (in order to take its address in strength
1330      reduction).  In order for this to work we need both lower bound
1331      and step to be loop invariants.  */
1332   if (TREE_CODE (base) == ARRAY_REF)
1333     {
1334       step = array_ref_element_size (base);
1335       lbound = array_ref_low_bound (base);
1336
1337       if (!expr_invariant_in_loop_p (loop, step)
1338           || !expr_invariant_in_loop_p (loop, lbound))
1339         return false;
1340     }
1341
1342   if (TREE_CODE (*idx) != SSA_NAME)
1343     return true;
1344
1345   iv = get_iv (dta->ivopts_data, *idx);
1346   if (!iv)
1347     return false;
1348
1349   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1350           *&x[0], which is not folded and does not trigger the
1351           ARRAY_REF path below.  */
1352   *idx = iv->base;
1353
1354   if (integer_zerop (iv->step))
1355     return true;
1356
1357   if (TREE_CODE (base) == ARRAY_REF)
1358     {
1359       step = array_ref_element_size (base);
1360
1361       /* We only handle addresses whose step is an integer constant.  */
1362       if (TREE_CODE (step) != INTEGER_CST)
1363         return false;
1364     }
1365   else
1366     /* The step for pointer arithmetics already is 1 byte.  */
1367     step = build_int_cst (sizetype, 1);
1368
1369   iv_base = iv->base;
1370   iv_step = iv->step;
1371   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1372                             sizetype, &iv_base, &iv_step, dta->stmt,
1373                             false))
1374     {
1375       /* The index might wrap.  */
1376       return false;
1377     }
1378
1379   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1380   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1381
1382   return true;
1383 }
1384
1385 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1386    object is passed to it in DATA.  */
1387
1388 static bool
1389 idx_record_use (tree base, tree *idx,
1390                 void *vdata)
1391 {
1392   struct ivopts_data *data = (struct ivopts_data *) vdata;
1393   find_interesting_uses_op (data, *idx);
1394   if (TREE_CODE (base) == ARRAY_REF)
1395     {
1396       find_interesting_uses_op (data, array_ref_element_size (base));
1397       find_interesting_uses_op (data, array_ref_low_bound (base));
1398     }
1399   return true;
1400 }
1401
1402 /* If we can prove that TOP = cst * BOT for some constant cst,
1403    store cst to MUL and return true.  Otherwise return false.
1404    The returned value is always sign-extended, regardless of the
1405    signedness of TOP and BOT.  */
1406
1407 static bool
1408 constant_multiple_of (tree top, tree bot, double_int *mul)
1409 {
1410   tree mby;
1411   enum tree_code code;
1412   double_int res, p0, p1;
1413   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1414
1415   STRIP_NOPS (top);
1416   STRIP_NOPS (bot);
1417
1418   if (operand_equal_p (top, bot, 0))
1419     {
1420       *mul = double_int_one;
1421       return true;
1422     }
1423
1424   code = TREE_CODE (top);
1425   switch (code)
1426     {
1427     case MULT_EXPR:
1428       mby = TREE_OPERAND (top, 1);
1429       if (TREE_CODE (mby) != INTEGER_CST)
1430         return false;
1431
1432       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1433         return false;
1434
1435       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1436                               precision);
1437       return true;
1438
1439     case PLUS_EXPR:
1440     case MINUS_EXPR:
1441       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1442           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1443         return false;
1444
1445       if (code == MINUS_EXPR)
1446         p1 = double_int_neg (p1);
1447       *mul = double_int_sext (double_int_add (p0, p1), precision);
1448       return true;
1449
1450     case INTEGER_CST:
1451       if (TREE_CODE (bot) != INTEGER_CST)
1452         return false;
1453
1454       p0 = double_int_sext (tree_to_double_int (top), precision);
1455       p1 = double_int_sext (tree_to_double_int (bot), precision);
1456       if (double_int_zero_p (p1))
1457         return false;
1458       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1459                               precision);
1460       return double_int_zero_p (res);
1461
1462     default:
1463       return false;
1464     }
1465 }
1466
1467 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1468
1469 static bool
1470 may_be_unaligned_p (tree ref, tree step)
1471 {
1472   tree base;
1473   tree base_type;
1474   HOST_WIDE_INT bitsize;
1475   HOST_WIDE_INT bitpos;
1476   tree toffset;
1477   enum machine_mode mode;
1478   int unsignedp, volatilep;
1479   unsigned base_align;
1480
1481   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1482      thus they are not misaligned.  */
1483   if (TREE_CODE (ref) == TARGET_MEM_REF)
1484     return false;
1485
1486   /* The test below is basically copy of what expr.c:normal_inner_ref
1487      does to check whether the object must be loaded by parts when
1488      STRICT_ALIGNMENT is true.  */
1489   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1490                               &unsignedp, &volatilep, true);
1491   base_type = TREE_TYPE (base);
1492   base_align = TYPE_ALIGN (base_type);
1493
1494   if (mode != BLKmode)
1495     {
1496       double_int mul;
1497       tree al = build_int_cst (TREE_TYPE (step),
1498                                GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1499
1500       if (base_align < GET_MODE_ALIGNMENT (mode)
1501           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1502           || bitpos % BITS_PER_UNIT != 0)
1503         return true;
1504
1505       if (!constant_multiple_of (step, al, &mul))
1506         return true;
1507     }
1508
1509   return false;
1510 }
1511
1512 /* Return true if EXPR may be non-addressable.   */
1513
1514 static bool
1515 may_be_nonaddressable_p (tree expr)
1516 {
1517   switch (TREE_CODE (expr))
1518     {
1519     case TARGET_MEM_REF:
1520       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1521          target, thus they are always addressable.  */
1522       return false;
1523
1524     case COMPONENT_REF:
1525       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1526              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1527
1528     case VIEW_CONVERT_EXPR:
1529       /* This kind of view-conversions may wrap non-addressable objects
1530          and make them look addressable.  After some processing the
1531          non-addressability may be uncovered again, causing ADDR_EXPRs
1532          of inappropriate objects to be built.  */
1533       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1534           || is_gimple_min_invariant (TREE_OPERAND (expr, 0)))
1535         return true;
1536
1537       /* ... fall through ... */
1538
1539     case ARRAY_REF:
1540     case ARRAY_RANGE_REF:
1541       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1542
1543     CASE_CONVERT:
1544       return true;
1545
1546     default:
1547       break;
1548     }
1549
1550   return false;
1551 }
1552
1553 /* Finds addresses in *OP_P inside STMT.  */
1554
1555 static void
1556 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1557 {
1558   tree base = *op_p, step = build_int_cst (sizetype, 0);
1559   struct iv *civ;
1560   struct ifs_ivopts_data ifs_ivopts_data;
1561
1562   /* Do not play with volatile memory references.  A bit too conservative,
1563      perhaps, but safe.  */
1564   if (stmt_ann (stmt)->has_volatile_ops)
1565     goto fail;
1566
1567   /* Ignore bitfields for now.  Not really something terribly complicated
1568      to handle.  TODO.  */
1569   if (TREE_CODE (base) == BIT_FIELD_REF)
1570     goto fail;
1571
1572   base = unshare_expr (base);
1573
1574   if (TREE_CODE (base) == TARGET_MEM_REF)
1575     {
1576       tree type = build_pointer_type (TREE_TYPE (base));
1577       tree astep;
1578
1579       if (TMR_BASE (base)
1580           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1581         {
1582           civ = get_iv (data, TMR_BASE (base));
1583           if (!civ)
1584             goto fail;
1585
1586           TMR_BASE (base) = civ->base;
1587           step = civ->step;
1588         }
1589       if (TMR_INDEX (base)
1590           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1591         {
1592           civ = get_iv (data, TMR_INDEX (base));
1593           if (!civ)
1594             goto fail;
1595
1596           TMR_INDEX (base) = civ->base;
1597           astep = civ->step;
1598
1599           if (astep)
1600             {
1601               if (TMR_STEP (base))
1602                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1603
1604               step = fold_build2 (PLUS_EXPR, type, step, astep);
1605             }
1606         }
1607
1608       if (integer_zerop (step))
1609         goto fail;
1610       base = tree_mem_ref_addr (type, base);
1611     }
1612   else
1613     {
1614       ifs_ivopts_data.ivopts_data = data;
1615       ifs_ivopts_data.stmt = stmt;
1616       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1617       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1618           || integer_zerop (ifs_ivopts_data.step))
1619         goto fail;
1620       step = ifs_ivopts_data.step;
1621
1622       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1623       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1624
1625       /* Check that the base expression is addressable.  This needs
1626          to be done after substituting bases of IVs into it.  */
1627       if (may_be_nonaddressable_p (base))
1628         goto fail;
1629
1630       /* Moreover, on strict alignment platforms, check that it is
1631          sufficiently aligned.  */
1632       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1633         goto fail;
1634
1635       base = build_fold_addr_expr (base);
1636
1637       /* Substituting bases of IVs into the base expression might
1638          have caused folding opportunities.  */
1639       if (TREE_CODE (base) == ADDR_EXPR)
1640         {
1641           tree *ref = &TREE_OPERAND (base, 0);
1642           while (handled_component_p (*ref))
1643             ref = &TREE_OPERAND (*ref, 0);
1644           if (TREE_CODE (*ref) == INDIRECT_REF)
1645             *ref = fold_indirect_ref (*ref);
1646         }
1647     }
1648
1649   civ = alloc_iv (base, step);
1650   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1651   return;
1652
1653 fail:
1654   for_each_index (op_p, idx_record_use, data);
1655 }
1656
1657 /* Finds and records invariants used in STMT.  */
1658
1659 static void
1660 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1661 {
1662   ssa_op_iter iter;
1663   use_operand_p use_p;
1664   tree op;
1665
1666   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1667     {
1668       op = USE_FROM_PTR (use_p);
1669       record_invariant (data, op, false);
1670     }
1671 }
1672
1673 /* Finds interesting uses of induction variables in the statement STMT.  */
1674
1675 static void
1676 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1677 {
1678   struct iv *iv;
1679   tree op, lhs, rhs;
1680   ssa_op_iter iter;
1681   use_operand_p use_p;
1682
1683   find_invariants_stmt (data, stmt);
1684
1685   if (TREE_CODE (stmt) == COND_EXPR)
1686     {
1687       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1688       return;
1689     }
1690
1691   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1692     {
1693       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1694       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1695
1696       if (TREE_CODE (lhs) == SSA_NAME)
1697         {
1698           /* If the statement defines an induction variable, the uses are not
1699              interesting by themselves.  */
1700
1701           iv = get_iv (data, lhs);
1702
1703           if (iv && !integer_zerop (iv->step))
1704             return;
1705         }
1706
1707       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1708         {
1709         case tcc_comparison:
1710           find_interesting_uses_cond (data, stmt,
1711                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1712           return;
1713
1714         case tcc_reference:
1715           find_interesting_uses_address (data, stmt,
1716                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1717           if (REFERENCE_CLASS_P (lhs))
1718             find_interesting_uses_address (data, stmt,
1719                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1720           return;
1721
1722         default: ;
1723         }
1724
1725       if (REFERENCE_CLASS_P (lhs)
1726           && is_gimple_val (rhs))
1727         {
1728           find_interesting_uses_address (data, stmt,
1729                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1730           find_interesting_uses_op (data, rhs);
1731           return;
1732         }
1733
1734       /* TODO -- we should also handle address uses of type
1735
1736          memory = call (whatever);
1737
1738          and
1739
1740          call (memory).  */
1741     }
1742
1743   if (TREE_CODE (stmt) == PHI_NODE
1744       && bb_for_stmt (stmt) == data->current_loop->header)
1745     {
1746       lhs = PHI_RESULT (stmt);
1747       iv = get_iv (data, lhs);
1748
1749       if (iv && !integer_zerop (iv->step))
1750         return;
1751     }
1752
1753   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1754     {
1755       op = USE_FROM_PTR (use_p);
1756
1757       if (TREE_CODE (op) != SSA_NAME)
1758         continue;
1759
1760       iv = get_iv (data, op);
1761       if (!iv)
1762         continue;
1763
1764       find_interesting_uses_op (data, op);
1765     }
1766 }
1767
1768 /* Finds interesting uses of induction variables outside of loops
1769    on loop exit edge EXIT.  */
1770
1771 static void
1772 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1773 {
1774   tree phi, def;
1775
1776   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1777     {
1778       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1779       if (is_gimple_reg (def))
1780         find_interesting_uses_op (data, def);
1781     }
1782 }
1783
1784 /* Finds uses of the induction variables that are interesting.  */
1785
1786 static void
1787 find_interesting_uses (struct ivopts_data *data)
1788 {
1789   basic_block bb;
1790   block_stmt_iterator bsi;
1791   tree phi;
1792   basic_block *body = get_loop_body (data->current_loop);
1793   unsigned i;
1794   struct version_info *info;
1795   edge e;
1796
1797   if (dump_file && (dump_flags & TDF_DETAILS))
1798     fprintf (dump_file, "Uses:\n\n");
1799
1800   for (i = 0; i < data->current_loop->num_nodes; i++)
1801     {
1802       edge_iterator ei;
1803       bb = body[i];
1804
1805       FOR_EACH_EDGE (e, ei, bb->succs)
1806         if (e->dest != EXIT_BLOCK_PTR
1807             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1808           find_interesting_uses_outside (data, e);
1809
1810       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1811         find_interesting_uses_stmt (data, phi);
1812       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1813         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1814     }
1815
1816   if (dump_file && (dump_flags & TDF_DETAILS))
1817     {
1818       bitmap_iterator bi;
1819
1820       fprintf (dump_file, "\n");
1821
1822       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1823         {
1824           info = ver_info (data, i);
1825           if (info->inv_id)
1826             {
1827               fprintf (dump_file, "  ");
1828               print_generic_expr (dump_file, info->name, TDF_SLIM);
1829               fprintf (dump_file, " is invariant (%d)%s\n",
1830                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1831             }
1832         }
1833
1834       fprintf (dump_file, "\n");
1835     }
1836
1837   free (body);
1838 }
1839
1840 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1841    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1842    we are at the top-level of the processed address.  */
1843
1844 static tree
1845 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1846                 unsigned HOST_WIDE_INT *offset)
1847 {
1848   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1849   enum tree_code code;
1850   tree type, orig_type = TREE_TYPE (expr);
1851   unsigned HOST_WIDE_INT off0, off1, st;
1852   tree orig_expr = expr;
1853
1854   STRIP_NOPS (expr);
1855
1856   type = TREE_TYPE (expr);
1857   code = TREE_CODE (expr);
1858   *offset = 0;
1859
1860   switch (code)
1861     {
1862     case INTEGER_CST:
1863       if (!cst_and_fits_in_hwi (expr)
1864           || integer_zerop (expr))
1865         return orig_expr;
1866
1867       *offset = int_cst_value (expr);
1868       return build_int_cst (orig_type, 0);
1869
1870     case POINTER_PLUS_EXPR:
1871     case PLUS_EXPR:
1872     case MINUS_EXPR:
1873       op0 = TREE_OPERAND (expr, 0);
1874       op1 = TREE_OPERAND (expr, 1);
1875
1876       op0 = strip_offset_1 (op0, false, false, &off0);
1877       op1 = strip_offset_1 (op1, false, false, &off1);
1878
1879       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1880       if (op0 == TREE_OPERAND (expr, 0)
1881           && op1 == TREE_OPERAND (expr, 1))
1882         return orig_expr;
1883
1884       if (integer_zerop (op1))
1885         expr = op0;
1886       else if (integer_zerop (op0))
1887         {
1888           if (code == MINUS_EXPR)
1889             expr = fold_build1 (NEGATE_EXPR, type, op1);
1890           else
1891             expr = op1;
1892         }
1893       else
1894         expr = fold_build2 (code, type, op0, op1);
1895
1896       return fold_convert (orig_type, expr);
1897
1898     case ARRAY_REF:
1899       if (!inside_addr)
1900         return orig_expr;
1901
1902       step = array_ref_element_size (expr);
1903       if (!cst_and_fits_in_hwi (step))
1904         break;
1905
1906       st = int_cst_value (step);
1907       op1 = TREE_OPERAND (expr, 1);
1908       op1 = strip_offset_1 (op1, false, false, &off1);
1909       *offset = off1 * st;
1910
1911       if (top_compref
1912           && integer_zerop (op1))
1913         {
1914           /* Strip the component reference completely.  */
1915           op0 = TREE_OPERAND (expr, 0);
1916           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1917           *offset += off0;
1918           return op0;
1919         }
1920       break;
1921
1922     case COMPONENT_REF:
1923       if (!inside_addr)
1924         return orig_expr;
1925
1926       tmp = component_ref_field_offset (expr);
1927       if (top_compref
1928           && cst_and_fits_in_hwi (tmp))
1929         {
1930           /* Strip the component reference completely.  */
1931           op0 = TREE_OPERAND (expr, 0);
1932           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1933           *offset = off0 + int_cst_value (tmp);
1934           return op0;
1935         }
1936       break;
1937
1938     case ADDR_EXPR:
1939       op0 = TREE_OPERAND (expr, 0);
1940       op0 = strip_offset_1 (op0, true, true, &off0);
1941       *offset += off0;
1942
1943       if (op0 == TREE_OPERAND (expr, 0))
1944         return orig_expr;
1945
1946       expr = build_fold_addr_expr (op0);
1947       return fold_convert (orig_type, expr);
1948
1949     case INDIRECT_REF:
1950       inside_addr = false;
1951       break;
1952
1953     default:
1954       return orig_expr;
1955     }
1956
1957   /* Default handling of expressions for that we want to recurse into
1958      the first operand.  */
1959   op0 = TREE_OPERAND (expr, 0);
1960   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1961   *offset += off0;
1962
1963   if (op0 == TREE_OPERAND (expr, 0)
1964       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1965     return orig_expr;
1966
1967   expr = copy_node (expr);
1968   TREE_OPERAND (expr, 0) = op0;
1969   if (op1)
1970     TREE_OPERAND (expr, 1) = op1;
1971
1972   /* Inside address, we might strip the top level component references,
1973      thus changing type of the expression.  Handling of ADDR_EXPR
1974      will fix that.  */
1975   expr = fold_convert (orig_type, expr);
1976
1977   return expr;
1978 }
1979
1980 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1981
1982 static tree
1983 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1984 {
1985   return strip_offset_1 (expr, false, false, offset);
1986 }
1987
1988 /* Returns variant of TYPE that can be used as base for different uses.
1989    We return unsigned type with the same precision, which avoids problems
1990    with overflows.  */
1991
1992 static tree
1993 generic_type_for (tree type)
1994 {
1995   if (POINTER_TYPE_P (type))
1996     return unsigned_type_for (type);
1997
1998   if (TYPE_UNSIGNED (type))
1999     return type;
2000
2001   return unsigned_type_for (type);
2002 }
2003
2004 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2005    the bitmap to that we should store it.  */
2006
2007 static struct ivopts_data *fd_ivopts_data;
2008 static tree
2009 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2010 {
2011   bitmap *depends_on = (bitmap *) data;
2012   struct version_info *info;
2013
2014   if (TREE_CODE (*expr_p) != SSA_NAME)
2015     return NULL_TREE;
2016   info = name_info (fd_ivopts_data, *expr_p);
2017
2018   if (!info->inv_id || info->has_nonlin_use)
2019     return NULL_TREE;
2020
2021   if (!*depends_on)
2022     *depends_on = BITMAP_ALLOC (NULL);
2023   bitmap_set_bit (*depends_on, info->inv_id);
2024
2025   return NULL_TREE;
2026 }
2027
2028 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2029    position to POS.  If USE is not NULL, the candidate is set as related to
2030    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2031    replacement of the final value of the iv by a direct computation.  */
2032
2033 static struct iv_cand *
2034 add_candidate_1 (struct ivopts_data *data,
2035                  tree base, tree step, bool important, enum iv_position pos,
2036                  struct iv_use *use, tree incremented_at)
2037 {
2038   unsigned i;
2039   struct iv_cand *cand = NULL;
2040   tree type, orig_type;
2041
2042   if (base)
2043     {
2044       orig_type = TREE_TYPE (base);
2045       type = generic_type_for (orig_type);
2046       /* Don't convert the base to the generic type for pointers as the generic
2047          type is an integer type with the same size as the pointer type.  */
2048       if (type != orig_type && !POINTER_TYPE_P (orig_type))
2049         {
2050           base = fold_convert (type, base);
2051           step = fold_convert (type, step);
2052         }
2053     }
2054
2055   for (i = 0; i < n_iv_cands (data); i++)
2056     {
2057       cand = iv_cand (data, i);
2058
2059       if (cand->pos != pos)
2060         continue;
2061
2062       if (cand->incremented_at != incremented_at)
2063         continue;
2064
2065       if (!cand->iv)
2066         {
2067           if (!base && !step)
2068             break;
2069
2070           continue;
2071         }
2072
2073       if (!base && !step)
2074         continue;
2075
2076       if (operand_equal_p (base, cand->iv->base, 0)
2077           && operand_equal_p (step, cand->iv->step, 0))
2078         break;
2079     }
2080
2081   if (i == n_iv_cands (data))
2082     {
2083       cand = XCNEW (struct iv_cand);
2084       cand->id = i;
2085
2086       if (!base && !step)
2087         cand->iv = NULL;
2088       else
2089         cand->iv = alloc_iv (base, step);
2090
2091       cand->pos = pos;
2092       if (pos != IP_ORIGINAL && cand->iv)
2093         {
2094           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2095           cand->var_after = cand->var_before;
2096         }
2097       cand->important = important;
2098       cand->incremented_at = incremented_at;
2099       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2100
2101       if (step
2102           && TREE_CODE (step) != INTEGER_CST)
2103         {
2104           fd_ivopts_data = data;
2105           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2106         }
2107
2108       if (dump_file && (dump_flags & TDF_DETAILS))
2109         dump_cand (dump_file, cand);
2110     }
2111
2112   if (important && !cand->important)
2113     {
2114       cand->important = true;
2115       if (dump_file && (dump_flags & TDF_DETAILS))
2116         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2117     }
2118
2119   if (use)
2120     {
2121       bitmap_set_bit (use->related_cands, i);
2122       if (dump_file && (dump_flags & TDF_DETAILS))
2123         fprintf (dump_file, "Candidate %d is related to use %d\n",
2124                  cand->id, use->id);
2125     }
2126
2127   return cand;
2128 }
2129
2130 /* Returns true if incrementing the induction variable at the end of the LOOP
2131    is allowed.
2132
2133    The purpose is to avoid splitting latch edge with a biv increment, thus
2134    creating a jump, possibly confusing other optimization passes and leaving
2135    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2136    is not available (so we do not have a better alternative), or if the latch
2137    edge is already nonempty.  */
2138
2139 static bool
2140 allow_ip_end_pos_p (struct loop *loop)
2141 {
2142   if (!ip_normal_pos (loop))
2143     return true;
2144
2145   if (!empty_block_p (ip_end_pos (loop)))
2146     return true;
2147
2148   return false;
2149 }
2150
2151 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2152    position to POS.  If USE is not NULL, the candidate is set as related to
2153    it.  The candidate computation is scheduled on all available positions.  */
2154
2155 static void
2156 add_candidate (struct ivopts_data *data,
2157                tree base, tree step, bool important, struct iv_use *use)
2158 {
2159   if (ip_normal_pos (data->current_loop))
2160     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2161   if (ip_end_pos (data->current_loop)
2162       && allow_ip_end_pos_p (data->current_loop))
2163     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2164 }
2165
2166 /* Add a standard "0 + 1 * iteration" iv candidate for a
2167    type with SIZE bits.  */
2168
2169 static void
2170 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2171                                      unsigned int size)
2172 {
2173   tree type = lang_hooks.types.type_for_size (size, true);
2174   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2175                  true, NULL);
2176 }
2177
2178 /* Adds standard iv candidates.  */
2179
2180 static void
2181 add_standard_iv_candidates (struct ivopts_data *data)
2182 {
2183   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2184
2185   /* The same for a double-integer type if it is still fast enough.  */
2186   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2187     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2188 }
2189
2190
2191 /* Adds candidates bases on the old induction variable IV.  */
2192
2193 static void
2194 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2195 {
2196   tree phi, def;
2197   struct iv_cand *cand;
2198
2199   add_candidate (data, iv->base, iv->step, true, NULL);
2200
2201   /* The same, but with initial value zero.  */
2202   add_candidate (data,
2203                  build_int_cst (TREE_TYPE (iv->base), 0),
2204                  iv->step, true, NULL);
2205
2206   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2207   if (TREE_CODE (phi) == PHI_NODE)
2208     {
2209       /* Additionally record the possibility of leaving the original iv
2210          untouched.  */
2211       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2212       cand = add_candidate_1 (data,
2213                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2214                               SSA_NAME_DEF_STMT (def));
2215       cand->var_before = iv->ssa_name;
2216       cand->var_after = def;
2217     }
2218 }
2219
2220 /* Adds candidates based on the old induction variables.  */
2221
2222 static void
2223 add_old_ivs_candidates (struct ivopts_data *data)
2224 {
2225   unsigned i;
2226   struct iv *iv;
2227   bitmap_iterator bi;
2228
2229   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2230     {
2231       iv = ver_info (data, i)->iv;
2232       if (iv && iv->biv_p && !integer_zerop (iv->step))
2233         add_old_iv_candidates (data, iv);
2234     }
2235 }
2236
2237 /* Adds candidates based on the value of the induction variable IV and USE.  */
2238
2239 static void
2240 add_iv_value_candidates (struct ivopts_data *data,
2241                          struct iv *iv, struct iv_use *use)
2242 {
2243   unsigned HOST_WIDE_INT offset;
2244   tree base;
2245   tree basetype;
2246
2247   add_candidate (data, iv->base, iv->step, false, use);
2248
2249   /* The same, but with initial value zero.  Make such variable important,
2250      since it is generic enough so that possibly many uses may be based
2251      on it.  */
2252   basetype = TREE_TYPE (iv->base);
2253   if (POINTER_TYPE_P (basetype))
2254     basetype = sizetype;
2255   add_candidate (data, build_int_cst (basetype, 0),
2256                  iv->step, true, use);
2257
2258   /* Third, try removing the constant offset.  */
2259   base = strip_offset (iv->base, &offset);
2260   if (offset)
2261     add_candidate (data, base, iv->step, false, use);
2262 }
2263
2264 /* Adds candidates based on the uses.  */
2265
2266 static void
2267 add_derived_ivs_candidates (struct ivopts_data *data)
2268 {
2269   unsigned i;
2270
2271   for (i = 0; i < n_iv_uses (data); i++)
2272     {
2273       struct iv_use *use = iv_use (data, i);
2274
2275       if (!use)
2276         continue;
2277
2278       switch (use->type)
2279         {
2280         case USE_NONLINEAR_EXPR:
2281         case USE_COMPARE:
2282         case USE_ADDRESS:
2283           /* Just add the ivs based on the value of the iv used here.  */
2284           add_iv_value_candidates (data, use->iv, use);
2285           break;
2286
2287         default:
2288           gcc_unreachable ();
2289         }
2290     }
2291 }
2292
2293 /* Record important candidates and add them to related_cands bitmaps
2294    if needed.  */
2295
2296 static void
2297 record_important_candidates (struct ivopts_data *data)
2298 {
2299   unsigned i;
2300   struct iv_use *use;
2301
2302   for (i = 0; i < n_iv_cands (data); i++)
2303     {
2304       struct iv_cand *cand = iv_cand (data, i);
2305
2306       if (cand->important)
2307         bitmap_set_bit (data->important_candidates, i);
2308     }
2309
2310   data->consider_all_candidates = (n_iv_cands (data)
2311                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2312
2313   if (data->consider_all_candidates)
2314     {
2315       /* We will not need "related_cands" bitmaps in this case,
2316          so release them to decrease peak memory consumption.  */
2317       for (i = 0; i < n_iv_uses (data); i++)
2318         {
2319           use = iv_use (data, i);
2320           BITMAP_FREE (use->related_cands);
2321         }
2322     }
2323   else
2324     {
2325       /* Add important candidates to the related_cands bitmaps.  */
2326       for (i = 0; i < n_iv_uses (data); i++)
2327         bitmap_ior_into (iv_use (data, i)->related_cands,
2328                          data->important_candidates);
2329     }
2330 }
2331
2332 /* Finds the candidates for the induction variables.  */
2333
2334 static void
2335 find_iv_candidates (struct ivopts_data *data)
2336 {
2337   /* Add commonly used ivs.  */
2338   add_standard_iv_candidates (data);
2339
2340   /* Add old induction variables.  */
2341   add_old_ivs_candidates (data);
2342
2343   /* Add induction variables derived from uses.  */
2344   add_derived_ivs_candidates (data);
2345
2346   /* Record the important candidates.  */
2347   record_important_candidates (data);
2348 }
2349
2350 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2351    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2352    we allocate a simple list to every use.  */
2353
2354 static void
2355 alloc_use_cost_map (struct ivopts_data *data)
2356 {
2357   unsigned i, size, s, j;
2358
2359   for (i = 0; i < n_iv_uses (data); i++)
2360     {
2361       struct iv_use *use = iv_use (data, i);
2362       bitmap_iterator bi;
2363
2364       if (data->consider_all_candidates)
2365         size = n_iv_cands (data);
2366       else
2367         {
2368           s = 0;
2369           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2370             {
2371               s++;
2372             }
2373
2374           /* Round up to the power of two, so that moduling by it is fast.  */
2375           for (size = 1; size < s; size <<= 1)
2376             continue;
2377         }
2378
2379       use->n_map_members = size;
2380       use->cost_map = XCNEWVEC (struct cost_pair, size);
2381     }
2382 }
2383
2384 /* Returns description of computation cost of expression whose runtime
2385    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2386
2387 static comp_cost
2388 new_cost (unsigned runtime, unsigned complexity)
2389 {
2390   comp_cost cost;
2391
2392   cost.cost = runtime;
2393   cost.complexity = complexity;
2394
2395   return cost;
2396 }
2397
2398 /* Adds costs COST1 and COST2.  */
2399
2400 static comp_cost
2401 add_costs (comp_cost cost1, comp_cost cost2)
2402 {
2403   cost1.cost += cost2.cost;
2404   cost1.complexity += cost2.complexity;
2405
2406   return cost1;
2407 }
2408 /* Subtracts costs COST1 and COST2.  */
2409
2410 static comp_cost
2411 sub_costs (comp_cost cost1, comp_cost cost2)
2412 {
2413   cost1.cost -= cost2.cost;
2414   cost1.complexity -= cost2.complexity;
2415
2416   return cost1;
2417 }
2418
2419 /* Returns a negative number if COST1 < COST2, a positive number if
2420    COST1 > COST2, and 0 if COST1 = COST2.  */
2421
2422 static int
2423 compare_costs (comp_cost cost1, comp_cost cost2)
2424 {
2425   if (cost1.cost == cost2.cost)
2426     return cost1.complexity - cost2.complexity;
2427
2428   return cost1.cost - cost2.cost;
2429 }
2430
2431 /* Returns true if COST is infinite.  */
2432
2433 static bool
2434 infinite_cost_p (comp_cost cost)
2435 {
2436   return cost.cost == INFTY;
2437 }
2438
2439 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2440    on invariants DEPENDS_ON and that the value used in expressing it
2441    is VALUE.*/
2442
2443 static void
2444 set_use_iv_cost (struct ivopts_data *data,
2445                  struct iv_use *use, struct iv_cand *cand,
2446                  comp_cost cost, bitmap depends_on, tree value)
2447 {
2448   unsigned i, s;
2449
2450   if (infinite_cost_p (cost))
2451     {
2452       BITMAP_FREE (depends_on);
2453       return;
2454     }
2455
2456   if (data->consider_all_candidates)
2457     {
2458       use->cost_map[cand->id].cand = cand;
2459       use->cost_map[cand->id].cost = cost;
2460       use->cost_map[cand->id].depends_on = depends_on;
2461       use->cost_map[cand->id].value = value;
2462       return;
2463     }
2464
2465   /* n_map_members is a power of two, so this computes modulo.  */
2466   s = cand->id & (use->n_map_members - 1);
2467   for (i = s; i < use->n_map_members; i++)
2468     if (!use->cost_map[i].cand)
2469       goto found;
2470   for (i = 0; i < s; i++)
2471     if (!use->cost_map[i].cand)
2472       goto found;
2473
2474   gcc_unreachable ();
2475
2476 found:
2477   use->cost_map[i].cand = cand;
2478   use->cost_map[i].cost = cost;
2479   use->cost_map[i].depends_on = depends_on;
2480   use->cost_map[i].value = value;
2481 }
2482
2483 /* Gets cost of (USE, CANDIDATE) pair.  */
2484
2485 static struct cost_pair *
2486 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2487                  struct iv_cand *cand)
2488 {
2489   unsigned i, s;
2490   struct cost_pair *ret;
2491
2492   if (!cand)
2493     return NULL;
2494
2495   if (data->consider_all_candidates)
2496     {
2497       ret = use->cost_map + cand->id;
2498       if (!ret->cand)
2499         return NULL;
2500
2501       return ret;
2502     }
2503
2504   /* n_map_members is a power of two, so this computes modulo.  */
2505   s = cand->id & (use->n_map_members - 1);
2506   for (i = s; i < use->n_map_members; i++)
2507     if (use->cost_map[i].cand == cand)
2508       return use->cost_map + i;
2509
2510   for (i = 0; i < s; i++)
2511     if (use->cost_map[i].cand == cand)
2512       return use->cost_map + i;
2513
2514   return NULL;
2515 }
2516
2517 /* Returns estimate on cost of computing SEQ.  */
2518
2519 static unsigned
2520 seq_cost (rtx seq)
2521 {
2522   unsigned cost = 0;
2523   rtx set;
2524
2525   for (; seq; seq = NEXT_INSN (seq))
2526     {
2527       set = single_set (seq);
2528       if (set)
2529         cost += rtx_cost (set, SET);
2530       else
2531         cost++;
2532     }
2533
2534   return cost;
2535 }
2536
2537 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2538 static rtx
2539 produce_memory_decl_rtl (tree obj, int *regno)
2540 {
2541   rtx x;
2542
2543   gcc_assert (obj);
2544   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2545     {
2546       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2547       x = gen_rtx_SYMBOL_REF (Pmode, name);
2548       SET_SYMBOL_REF_DECL (x, obj);
2549       x = gen_rtx_MEM (DECL_MODE (obj), x);
2550       targetm.encode_section_info (obj, x, true);
2551     }
2552   else
2553     {
2554       x = gen_raw_REG (Pmode, (*regno)++);
2555       x = gen_rtx_MEM (DECL_MODE (obj), x);
2556     }
2557
2558   return x;
2559 }
2560
2561 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2562    walk_tree.  DATA contains the actual fake register number.  */
2563
2564 static tree
2565 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2566 {
2567   tree obj = NULL_TREE;
2568   rtx x = NULL_RTX;
2569   int *regno = (int *) data;
2570
2571   switch (TREE_CODE (*expr_p))
2572     {
2573     case ADDR_EXPR:
2574       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2575            handled_component_p (*expr_p);
2576            expr_p = &TREE_OPERAND (*expr_p, 0))
2577         continue;
2578       obj = *expr_p;
2579       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2580         x = produce_memory_decl_rtl (obj, regno);
2581       break;
2582
2583     case SSA_NAME:
2584       *ws = 0;
2585       obj = SSA_NAME_VAR (*expr_p);
2586       if (!DECL_RTL_SET_P (obj))
2587         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2588       break;
2589
2590     case VAR_DECL:
2591     case PARM_DECL:
2592     case RESULT_DECL:
2593       *ws = 0;
2594       obj = *expr_p;
2595
2596       if (DECL_RTL_SET_P (obj))
2597         break;
2598
2599       if (DECL_MODE (obj) == BLKmode)
2600         x = produce_memory_decl_rtl (obj, regno);
2601       else
2602         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2603
2604       break;
2605
2606     default:
2607       break;
2608     }
2609
2610   if (x)
2611     {
2612       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2613       SET_DECL_RTL (obj, x);
2614     }
2615
2616   return NULL_TREE;
2617 }
2618
2619 /* Determines cost of the computation of EXPR.  */
2620
2621 static unsigned
2622 computation_cost (tree expr)
2623 {
2624   rtx seq, rslt;
2625   tree type = TREE_TYPE (expr);
2626   unsigned cost;
2627   /* Avoid using hard regs in ways which may be unsupported.  */
2628   int regno = LAST_VIRTUAL_REGISTER + 1;
2629
2630   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2631   start_sequence ();
2632   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2633   seq = get_insns ();
2634   end_sequence ();
2635
2636   cost = seq_cost (seq);
2637   if (MEM_P (rslt))
2638     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2639
2640   return cost;
2641 }
2642
2643 /* Returns variable containing the value of candidate CAND at statement AT.  */
2644
2645 static tree
2646 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2647 {
2648   if (stmt_after_increment (loop, cand, stmt))
2649     return cand->var_after;
2650   else
2651     return cand->var_before;
2652 }
2653
2654 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2655    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2656
2657 int
2658 tree_int_cst_sign_bit (const_tree t)
2659 {
2660   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2661   unsigned HOST_WIDE_INT w;
2662
2663   if (bitno < HOST_BITS_PER_WIDE_INT)
2664     w = TREE_INT_CST_LOW (t);
2665   else
2666     {
2667       w = TREE_INT_CST_HIGH (t);
2668       bitno -= HOST_BITS_PER_WIDE_INT;
2669     }
2670
2671   return (w >> bitno) & 1;
2672 }
2673
2674 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2675    same precision that is at least as wide as the precision of TYPE, stores
2676    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2677    type of A and B.  */
2678
2679 static tree
2680 determine_common_wider_type (tree *a, tree *b)
2681 {
2682   tree wider_type = NULL;
2683   tree suba, subb;
2684   tree atype = TREE_TYPE (*a);
2685
2686   if (CONVERT_EXPR_P (*a))
2687     {
2688       suba = TREE_OPERAND (*a, 0);
2689       wider_type = TREE_TYPE (suba);
2690       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2691         return atype;
2692     }
2693   else
2694     return atype;
2695
2696   if (CONVERT_EXPR_P (*b))
2697     {
2698       subb = TREE_OPERAND (*b, 0);
2699       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2700         return atype;
2701     }
2702   else
2703     return atype;
2704
2705   *a = suba;
2706   *b = subb;
2707   return wider_type;
2708 }
2709
2710 /* Determines the expression by that USE is expressed from induction variable
2711    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2712    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2713
2714 static bool
2715 get_computation_aff (struct loop *loop,
2716                      struct iv_use *use, struct iv_cand *cand, tree at,
2717                      struct affine_tree_combination *aff)
2718 {
2719   tree ubase = use->iv->base;
2720   tree ustep = use->iv->step;
2721   tree cbase = cand->iv->base;
2722   tree cstep = cand->iv->step, cstep_common;
2723   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2724   tree common_type, var;
2725   tree uutype;
2726   aff_tree cbase_aff, var_aff;
2727   double_int rat;
2728
2729   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2730     {
2731       /* We do not have a precision to express the values of use.  */
2732       return false;
2733     }
2734
2735   var = var_at_stmt (loop, cand, at);
2736   uutype = unsigned_type_for (utype);
2737
2738   /* If the conversion is not noop, perform it.  */
2739   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2740     {
2741       cstep = fold_convert (uutype, cstep);
2742       cbase = fold_convert (uutype, cbase);
2743       var = fold_convert (uutype, var);
2744     }
2745
2746   if (!constant_multiple_of (ustep, cstep, &rat))
2747     return false;
2748
2749   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2750      type, we achieve better folding by computing their difference in this
2751      wider type, and cast the result to UUTYPE.  We do not need to worry about
2752      overflows, as all the arithmetics will in the end be performed in UUTYPE
2753      anyway.  */
2754   common_type = determine_common_wider_type (&ubase, &cbase);
2755
2756   /* use = ubase - ratio * cbase + ratio * var.  */
2757   tree_to_aff_combination (ubase, common_type, aff);
2758   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2759   tree_to_aff_combination (var, uutype, &var_aff);
2760
2761   /* We need to shift the value if we are after the increment.  */
2762   if (stmt_after_increment (loop, cand, at))
2763     {
2764       aff_tree cstep_aff;
2765
2766       if (common_type != uutype)
2767         cstep_common = fold_convert (common_type, cstep);
2768       else
2769         cstep_common = cstep;
2770
2771       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2772       aff_combination_add (&cbase_aff, &cstep_aff);
2773     }
2774
2775   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2776   aff_combination_add (aff, &cbase_aff);
2777   if (common_type != uutype)
2778     aff_combination_convert (aff, uutype);
2779
2780   aff_combination_scale (&var_aff, rat);
2781   aff_combination_add (aff, &var_aff);
2782
2783   return true;
2784 }
2785
2786 /* Determines the expression by that USE is expressed from induction variable
2787    CAND at statement AT in LOOP.  The computation is unshared.  */
2788
2789 static tree
2790 get_computation_at (struct loop *loop,
2791                     struct iv_use *use, struct iv_cand *cand, tree at)
2792 {
2793   aff_tree aff;
2794   tree type = TREE_TYPE (use->iv->base);
2795
2796   if (!get_computation_aff (loop, use, cand, at, &aff))
2797     return NULL_TREE;
2798   unshare_aff_combination (&aff);
2799   return fold_convert (type, aff_combination_to_tree (&aff));
2800 }
2801
2802 /* Determines the expression by that USE is expressed from induction variable
2803    CAND in LOOP.  The computation is unshared.  */
2804
2805 static tree
2806 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2807 {
2808   return get_computation_at (loop, use, cand, use->stmt);
2809 }
2810
2811 /* Returns cost of addition in MODE.  */
2812
2813 static unsigned
2814 add_cost (enum machine_mode mode)
2815 {
2816   static unsigned costs[NUM_MACHINE_MODES];
2817   rtx seq;
2818   unsigned cost;
2819
2820   if (costs[mode])
2821     return costs[mode];
2822
2823   start_sequence ();
2824   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2825                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2826                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2827                  NULL_RTX);
2828   seq = get_insns ();
2829   end_sequence ();
2830
2831   cost = seq_cost (seq);
2832   if (!cost)
2833     cost = 1;
2834
2835   costs[mode] = cost;
2836
2837   if (dump_file && (dump_flags & TDF_DETAILS))
2838     fprintf (dump_file, "Addition in %s costs %d\n",
2839              GET_MODE_NAME (mode), cost);
2840   return cost;
2841 }
2842
2843 /* Entry in a hashtable of already known costs for multiplication.  */
2844 struct mbc_entry
2845 {
2846   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2847   enum machine_mode mode;       /* In mode.  */
2848   unsigned cost;                /* The cost.  */
2849 };
2850
2851 /* Counts hash value for the ENTRY.  */
2852
2853 static hashval_t
2854 mbc_entry_hash (const void *entry)
2855 {
2856   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2857
2858   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2859 }
2860
2861 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2862
2863 static int
2864 mbc_entry_eq (const void *entry1, const void *entry2)
2865 {
2866   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2867   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2868
2869   return (e1->mode == e2->mode
2870           && e1->cst == e2->cst);
2871 }
2872
2873 /* Returns cost of multiplication by constant CST in MODE.  */
2874
2875 unsigned
2876 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2877 {
2878   static htab_t costs;
2879   struct mbc_entry **cached, act;
2880   rtx seq;
2881   unsigned cost;
2882
2883   if (!costs)
2884     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2885
2886   act.mode = mode;
2887   act.cst = cst;
2888   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2889   if (*cached)
2890     return (*cached)->cost;
2891
2892   *cached = XNEW (struct mbc_entry);
2893   (*cached)->mode = mode;
2894   (*cached)->cst = cst;
2895
2896   start_sequence ();
2897   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2898                gen_int_mode (cst, mode), NULL_RTX, 0);
2899   seq = get_insns ();
2900   end_sequence ();
2901
2902   cost = seq_cost (seq);
2903
2904   if (dump_file && (dump_flags & TDF_DETAILS))
2905     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2906              (int) cst, GET_MODE_NAME (mode), cost);
2907
2908   (*cached)->cost = cost;
2909
2910   return cost;
2911 }
2912
2913 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2914    validity for a memory reference accessing memory of mode MODE.  */
2915
2916 bool
2917 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2918 {
2919 #define MAX_RATIO 128
2920   static sbitmap valid_mult[MAX_MACHINE_MODE];
2921
2922   if (!valid_mult[mode])
2923     {
2924       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2925       rtx addr;
2926       HOST_WIDE_INT i;
2927
2928       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2929       sbitmap_zero (valid_mult[mode]);
2930       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2931       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2932         {
2933           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2934           if (memory_address_p (mode, addr))
2935             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2936         }
2937
2938       if (dump_file && (dump_flags & TDF_DETAILS))
2939         {
2940           fprintf (dump_file, "  allowed multipliers:");
2941           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2942             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2943               fprintf (dump_file, " %d", (int) i);
2944           fprintf (dump_file, "\n");
2945           fprintf (dump_file, "\n");
2946         }
2947     }
2948
2949   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2950     return false;
2951
2952   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2953 }
2954
2955 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2956    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2957    variable is omitted.  Compute the cost for a memory reference that accesses
2958    a memory location of mode MEM_MODE.
2959
2960    TODO -- there must be some better way.  This all is quite crude.  */
2961
2962 static comp_cost
2963 get_address_cost (bool symbol_present, bool var_present,
2964                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2965                   enum machine_mode mem_mode)
2966 {
2967   static bool initialized[MAX_MACHINE_MODE];
2968   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2969   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2970   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2971   unsigned cost, acost, complexity;
2972   bool offset_p, ratio_p;
2973   HOST_WIDE_INT s_offset;
2974   unsigned HOST_WIDE_INT mask;
2975   unsigned bits;
2976
2977   if (!initialized[mem_mode])
2978     {
2979       HOST_WIDE_INT i;
2980       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2981       int old_cse_not_expected;
2982       unsigned sym_p, var_p, off_p, rat_p, add_c;
2983       rtx seq, addr, base;
2984       rtx reg0, reg1;
2985
2986       initialized[mem_mode] = true;
2987
2988       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2989
2990       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2991       for (i = start; i <= 1 << 20; i <<= 1)
2992         {
2993           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2994           if (!memory_address_p (mem_mode, addr))
2995             break;
2996         }
2997       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2998       off[mem_mode] = max_offset[mem_mode];
2999
3000       for (i = start; i <= 1 << 20; i <<= 1)
3001         {
3002           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3003           if (!memory_address_p (mem_mode, addr))
3004             break;
3005         }
3006       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3007
3008       if (dump_file && (dump_flags & TDF_DETAILS))
3009         {
3010           fprintf (dump_file, "get_address_cost:\n");
3011           fprintf (dump_file, "  min offset %s %d\n",
3012                    GET_MODE_NAME (mem_mode),
3013                    (int) min_offset[mem_mode]);
3014           fprintf (dump_file, "  max offset %s %d\n",
3015                    GET_MODE_NAME (mem_mode),
3016                    (int) max_offset[mem_mode]);
3017         }
3018
3019       rat[mem_mode] = 1;
3020       for (i = 2; i <= MAX_RATIO; i++)
3021         if (multiplier_allowed_in_address_p (i, mem_mode))
3022           {
3023             rat[mem_mode] = i;
3024             break;
3025           }
3026
3027       /* Compute the cost of various addressing modes.  */
3028       acost = 0;
3029       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3030       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3031
3032       for (i = 0; i < 16; i++)
3033         {
3034           sym_p = i & 1;
3035           var_p = (i >> 1) & 1;
3036           off_p = (i >> 2) & 1;
3037           rat_p = (i >> 3) & 1;
3038
3039           addr = reg0;
3040           if (rat_p)
3041             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3042                                    gen_int_mode (rat[mem_mode], Pmode));
3043
3044           if (var_p)
3045             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3046
3047           if (sym_p)
3048             {
3049               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3050               /* ??? We can run into trouble with some backends by presenting
3051                  it with symbols which haven't been properly passed through
3052                  targetm.encode_section_info.  By setting the local bit, we
3053                  enhance the probability of things working.  */
3054               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3055
3056               if (off_p)
3057                 base = gen_rtx_fmt_e (CONST, Pmode,
3058                                       gen_rtx_fmt_ee (PLUS, Pmode,
3059                                                       base,
3060                                                       gen_int_mode (off[mem_mode],
3061                                                                     Pmode)));
3062             }
3063           else if (off_p)
3064             base = gen_int_mode (off[mem_mode], Pmode);
3065           else
3066             base = NULL_RTX;
3067
3068           if (base)
3069             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3070
3071           start_sequence ();
3072           /* To avoid splitting addressing modes, pretend that no cse will
3073              follow.  */
3074           old_cse_not_expected = cse_not_expected;
3075           cse_not_expected = true;
3076           addr = memory_address (mem_mode, addr);
3077           cse_not_expected = old_cse_not_expected;
3078           seq = get_insns ();
3079           end_sequence ();
3080
3081           acost = seq_cost (seq);
3082           acost += address_cost (addr, mem_mode);
3083
3084           if (!acost)
3085             acost = 1;
3086           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3087         }
3088
3089       /* On some targets, it is quite expensive to load symbol to a register,
3090          which makes addresses that contain symbols look much more expensive.
3091          However, the symbol will have to be loaded in any case before the
3092          loop (and quite likely we have it in register already), so it does not
3093          make much sense to penalize them too heavily.  So make some final
3094          tweaks for the SYMBOL_PRESENT modes:
3095
3096          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3097          var is cheaper, use this mode with small penalty.
3098          If VAR_PRESENT is true, try whether the mode with
3099          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3100          if this is the case, use it.  */
3101       add_c = add_cost (Pmode);
3102       for (i = 0; i < 8; i++)
3103         {
3104           var_p = i & 1;
3105           off_p = (i >> 1) & 1;
3106           rat_p = (i >> 2) & 1;
3107
3108           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3109           if (var_p)
3110             acost += add_c;
3111
3112           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3113             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3114         }
3115
3116       if (dump_file && (dump_flags & TDF_DETAILS))
3117         {
3118           fprintf (dump_file, "Address costs:\n");
3119
3120           for (i = 0; i < 16; i++)
3121             {
3122               sym_p = i & 1;
3123               var_p = (i >> 1) & 1;
3124               off_p = (i >> 2) & 1;
3125               rat_p = (i >> 3) & 1;
3126
3127               fprintf (dump_file, "  ");
3128               if (sym_p)
3129                 fprintf (dump_file, "sym + ");
3130               if (var_p)
3131                 fprintf (dump_file, "var + ");
3132               if (off_p)
3133                 fprintf (dump_file, "cst + ");
3134               if (rat_p)
3135                 fprintf (dump_file, "rat * ");
3136
3137               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3138               fprintf (dump_file, "index costs %d\n", acost);
3139             }
3140           fprintf (dump_file, "\n");
3141         }
3142     }
3143
3144   bits = GET_MODE_BITSIZE (Pmode);
3145   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3146   offset &= mask;
3147   if ((offset >> (bits - 1) & 1))
3148     offset |= ~mask;
3149   s_offset = offset;
3150
3151   cost = 0;
3152   offset_p = (s_offset != 0
3153               && min_offset[mem_mode] <= s_offset
3154               && s_offset <= max_offset[mem_mode]);
3155   ratio_p = (ratio != 1
3156              && multiplier_allowed_in_address_p (ratio, mem_mode));
3157
3158   if (ratio != 1 && !ratio_p)
3159     cost += multiply_by_cost (ratio, Pmode);
3160
3161   if (s_offset && !offset_p && !symbol_present)
3162     cost += add_cost (Pmode);
3163
3164   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3165   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3166   return new_cost (cost + acost, complexity);
3167 }
3168
3169 /* Estimates cost of forcing expression EXPR into a variable.  */
3170
3171 static comp_cost
3172 force_expr_to_var_cost (tree expr)
3173 {
3174   static bool costs_initialized = false;
3175   static unsigned integer_cost;
3176   static unsigned symbol_cost;
3177   static unsigned address_cost;
3178   tree op0, op1;
3179   comp_cost cost0, cost1, cost;
3180   enum machine_mode mode;
3181
3182   if (!costs_initialized)
3183     {
3184       tree type = build_pointer_type (integer_type_node);
3185       tree var, addr;
3186       rtx x;
3187
3188       var = create_tmp_var_raw (integer_type_node, "test_var");
3189       TREE_STATIC (var) = 1;
3190       x = produce_memory_decl_rtl (var, NULL);
3191       SET_DECL_RTL (var, x);
3192
3193       integer_cost = computation_cost (build_int_cst (integer_type_node,
3194                                                       2000));
3195
3196       addr = build1 (ADDR_EXPR, type, var);
3197       symbol_cost = computation_cost (addr) + 1;
3198
3199       address_cost
3200         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3201                                     addr,
3202                                     build_int_cst (sizetype, 2000))) + 1;
3203       if (dump_file && (dump_flags & TDF_DETAILS))
3204         {
3205           fprintf (dump_file, "force_expr_to_var_cost:\n");
3206           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3207           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3208           fprintf (dump_file, "  address %d\n", (int) address_cost);
3209           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3210           fprintf (dump_file, "\n");
3211         }
3212
3213       costs_initialized = true;
3214     }
3215
3216   STRIP_NOPS (expr);
3217
3218   if (SSA_VAR_P (expr))
3219     return zero_cost;
3220
3221   if (is_gimple_min_invariant (expr))
3222     {
3223       if (TREE_CODE (expr) == INTEGER_CST)
3224         return new_cost (integer_cost, 0);
3225
3226       if (TREE_CODE (expr) == ADDR_EXPR)
3227         {
3228           tree obj = TREE_OPERAND (expr, 0);
3229
3230           if (TREE_CODE (obj) == VAR_DECL
3231               || TREE_CODE (obj) == PARM_DECL
3232               || TREE_CODE (obj) == RESULT_DECL)
3233             return new_cost (symbol_cost, 0);
3234         }
3235
3236       return new_cost (address_cost, 0);
3237     }
3238
3239   switch (TREE_CODE (expr))
3240     {
3241     case POINTER_PLUS_EXPR:
3242     case PLUS_EXPR:
3243     case MINUS_EXPR:
3244     case MULT_EXPR:
3245       op0 = TREE_OPERAND (expr, 0);
3246       op1 = TREE_OPERAND (expr, 1);
3247       STRIP_NOPS (op0);
3248       STRIP_NOPS (op1);
3249
3250       if (is_gimple_val (op0))
3251         cost0 = zero_cost;
3252       else
3253         cost0 = force_expr_to_var_cost (op0);
3254
3255       if (is_gimple_val (op1))
3256         cost1 = zero_cost;
3257       else
3258         cost1 = force_expr_to_var_cost (op1);
3259
3260       break;
3261
3262     default:
3263       /* Just an arbitrary value, FIXME.  */
3264       return new_cost (target_spill_cost, 0);
3265     }
3266
3267   mode = TYPE_MODE (TREE_TYPE (expr));
3268   switch (TREE_CODE (expr))
3269     {
3270     case POINTER_PLUS_EXPR:
3271     case PLUS_EXPR:
3272     case MINUS_EXPR:
3273       cost = new_cost (add_cost (mode), 0);
3274       break;
3275
3276     case MULT_EXPR:
3277       if (cst_and_fits_in_hwi (op0))
3278         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3279       else if (cst_and_fits_in_hwi (op1))
3280         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3281       else
3282         return new_cost (target_spill_cost, 0);
3283       break;
3284
3285     default:
3286       gcc_unreachable ();
3287     }
3288
3289   cost = add_costs (cost, cost0);
3290   cost = add_costs (cost, cost1);
3291
3292   /* Bound the cost by target_spill_cost.  The parts of complicated
3293      computations often are either loop invariant or at least can
3294      be shared between several iv uses, so letting this grow without
3295      limits would not give reasonable results.  */
3296   if (cost.cost > target_spill_cost)
3297     cost.cost = target_spill_cost;
3298
3299   return cost;
3300 }
3301
3302 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3303    invariants the computation depends on.  */
3304
3305 static comp_cost
3306 force_var_cost (struct ivopts_data *data,
3307                 tree expr, bitmap *depends_on)
3308 {
3309   if (depends_on)
3310     {
3311       fd_ivopts_data = data;
3312       walk_tree (&expr, find_depends, depends_on, NULL);
3313     }
3314
3315   return force_expr_to_var_cost (expr);
3316 }
3317
3318 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3319    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3320    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3321    invariants the computation depends on.  */
3322
3323 static comp_cost
3324 split_address_cost (struct ivopts_data *data,
3325                     tree addr, bool *symbol_present, bool *var_present,
3326                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3327 {
3328   tree core;
3329   HOST_WIDE_INT bitsize;
3330   HOST_WIDE_INT bitpos;
3331   tree toffset;
3332   enum machine_mode mode;
3333   int unsignedp, volatilep;
3334
3335   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3336                               &unsignedp, &volatilep, false);
3337
3338   if (toffset != 0
3339       || bitpos % BITS_PER_UNIT != 0
3340       || TREE_CODE (core) != VAR_DECL)
3341     {
3342       *symbol_present = false;
3343       *var_present = true;
3344       fd_ivopts_data = data;
3345       walk_tree (&addr, find_depends, depends_on, NULL);
3346       return new_cost (target_spill_cost, 0);
3347     }
3348
3349   *offset += bitpos / BITS_PER_UNIT;
3350   if (TREE_STATIC (core)
3351       || DECL_EXTERNAL (core))
3352     {
3353       *symbol_present = true;
3354       *var_present = false;
3355       return zero_cost;
3356     }
3357
3358   *symbol_present = false;
3359   *var_present = true;
3360   return zero_cost;
3361 }
3362
3363 /* Estimates cost of expressing difference of addresses E1 - E2 as
3364    var + symbol + offset.  The value of offset is added to OFFSET,
3365    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3366    part is missing.  DEPENDS_ON is a set of the invariants the computation
3367    depends on.  */
3368
3369 static comp_cost
3370 ptr_difference_cost (struct ivopts_data *data,
3371                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3372                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3373 {
3374   HOST_WIDE_INT diff = 0;
3375   comp_cost cost;
3376
3377   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3378
3379   if (ptr_difference_const (e1, e2, &diff))
3380     {
3381       *offset += diff;
3382       *symbol_present = false;
3383       *var_present = false;
3384       return zero_cost;
3385     }
3386
3387   if (integer_zerop (e2))
3388     return split_address_cost (data, TREE_OPERAND (e1, 0),
3389                                symbol_present, var_present, offset, depends_on);
3390
3391   *symbol_present = false;
3392   *var_present = true;
3393
3394   cost = force_var_cost (data, e1, depends_on);
3395   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3396   cost.cost += add_cost (Pmode);
3397
3398   return cost;
3399 }
3400
3401 /* Estimates cost of expressing difference E1 - E2 as
3402    var + symbol + offset.  The value of offset is added to OFFSET,
3403    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3404    part is missing.  DEPENDS_ON is a set of the invariants the computation
3405    depends on.  */
3406
3407 static comp_cost
3408 difference_cost (struct ivopts_data *data,
3409                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3410                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3411 {
3412   comp_cost cost;
3413   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3414   unsigned HOST_WIDE_INT off1, off2;
3415
3416   e1 = strip_offset (e1, &off1);
3417   e2 = strip_offset (e2, &off2);
3418   *offset += off1 - off2;
3419
3420   STRIP_NOPS (e1);
3421   STRIP_NOPS (e2);
3422
3423   if (TREE_CODE (e1) == ADDR_EXPR)
3424     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3425                                 depends_on);
3426   *symbol_present = false;
3427
3428   if (operand_equal_p (e1, e2, 0))
3429     {
3430       *var_present = false;
3431       return zero_cost;
3432     }
3433   *var_present = true;
3434   if (integer_zerop (e2))
3435     return force_var_cost (data, e1, depends_on);
3436
3437   if (integer_zerop (e1))
3438     {
3439       cost = force_var_cost (data, e2, depends_on);
3440       cost.cost += multiply_by_cost (-1, mode);
3441
3442       return cost;
3443     }
3444
3445   cost = force_var_cost (data, e1, depends_on);
3446   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3447   cost.cost += add_cost (mode);
3448
3449   return cost;
3450 }
3451
3452 /* Determines the cost of the computation by that USE is expressed
3453    from induction variable CAND.  If ADDRESS_P is true, we just need
3454    to create an address from it, otherwise we want to get it into
3455    register.  A set of invariants we depend on is stored in
3456    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3457
3458 static comp_cost
3459 get_computation_cost_at (struct ivopts_data *data,
3460                          struct iv_use *use, struct iv_cand *cand,
3461                          bool address_p, bitmap *depends_on, tree at)
3462 {
3463   tree ubase = use->iv->base, ustep = use->iv->step;
3464   tree cbase, cstep;
3465   tree utype = TREE_TYPE (ubase), ctype;
3466   unsigned HOST_WIDE_INT cstepi, offset = 0;
3467   HOST_WIDE_INT ratio, aratio;
3468   bool var_present, symbol_present;
3469   comp_cost cost;
3470   unsigned n_sums;
3471   double_int rat;
3472
3473   *depends_on = NULL;
3474
3475   /* Only consider real candidates.  */
3476   if (!cand->iv)
3477     return infinite_cost;
3478
3479   cbase = cand->iv->base;
3480   cstep = cand->iv->step;
3481   ctype = TREE_TYPE (cbase);
3482
3483   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3484     {
3485       /* We do not have a precision to express the values of use.  */
3486       return infinite_cost;
3487     }
3488
3489   if (address_p)
3490     {
3491       /* Do not try to express address of an object with computation based
3492          on address of a different object.  This may cause problems in rtl
3493          level alias analysis (that does not expect this to be happening,
3494          as this is illegal in C), and would be unlikely to be useful
3495          anyway.  */
3496       if (use->iv->base_object
3497           && cand->iv->base_object
3498           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3499         return infinite_cost;
3500     }
3501
3502   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3503     {
3504       /* TODO -- add direct handling of this case.  */
3505       goto fallback;
3506     }
3507
3508   /* CSTEPI is removed from the offset in case statement is after the
3509      increment.  If the step is not constant, we use zero instead.
3510      This is a bit imprecise (there is the extra addition), but
3511      redundancy elimination is likely to transform the code so that
3512      it uses value of the variable before increment anyway,
3513      so it is not that much unrealistic.  */
3514   if (cst_and_fits_in_hwi (cstep))
3515     cstepi = int_cst_value (cstep);
3516   else
3517     cstepi = 0;
3518
3519   if (!constant_multiple_of (ustep, cstep, &rat))
3520     return infinite_cost;
3521
3522   if (double_int_fits_in_shwi_p (rat))
3523     ratio = double_int_to_shwi (rat);
3524   else
3525     return infinite_cost;
3526
3527   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3528      or ratio == 1, it is better to handle this like
3529
3530      ubase - ratio * cbase + ratio * var
3531
3532      (also holds in the case ratio == -1, TODO.  */
3533
3534   if (cst_and_fits_in_hwi (cbase))
3535     {
3536       offset = - ratio * int_cst_value (cbase);
3537       cost = difference_cost (data,
3538                               ubase, build_int_cst (utype, 0),
3539                               &symbol_present, &var_present, &offset,
3540                               depends_on);
3541     }
3542   else if (ratio == 1)
3543     {
3544       cost = difference_cost (data,
3545                               ubase, cbase,
3546                               &symbol_present, &var_present, &offset,
3547                               depends_on);
3548     }
3549   else
3550     {
3551       cost = force_var_cost (data, cbase, depends_on);
3552       cost.cost += add_cost (TYPE_MODE (ctype));
3553       cost = add_costs (cost,
3554                         difference_cost (data,
3555                                          ubase, build_int_cst (utype, 0),
3556                                          &symbol_present, &var_present,
3557                                          &offset, depends_on));
3558     }
3559
3560   /* If we are after the increment, the value of the candidate is higher by
3561      one iteration.  */
3562   if (stmt_after_increment (data->current_loop, cand, at))
3563     offset -= ratio * cstepi;
3564
3565   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3566      (symbol/var/const parts may be omitted).  If we are looking for an address,
3567      find the cost of addressing this.  */
3568   if (address_p)
3569     return add_costs (cost, get_address_cost (symbol_present, var_present,
3570                                 offset, ratio,
3571                                 TYPE_MODE (TREE_TYPE (*use->op_p))));
3572
3573   /* Otherwise estimate the costs for computing the expression.  */
3574   aratio = ratio > 0 ? ratio : -ratio;
3575   if (!symbol_present && !var_present && !offset)
3576     {
3577       if (ratio != 1)
3578         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3579
3580       return cost;
3581     }
3582
3583   if (aratio != 1)
3584     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3585
3586   n_sums = 1;
3587   if (var_present
3588       /* Symbol + offset should be compile-time computable.  */
3589       && (symbol_present || offset))
3590     n_sums++;
3591
3592   /* Having offset does not affect runtime cost in case it is added to
3593      symbol, but it increases complexity.  */
3594   if (offset)
3595     cost.complexity++;
3596
3597   cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3598   return cost;
3599
3600 fallback:
3601   {
3602     /* Just get the expression, expand it and measure the cost.  */
3603     tree comp = get_computation_at (data->current_loop, use, cand, at);
3604
3605     if (!comp)
3606       return infinite_cost;
3607
3608     if (address_p)
3609       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3610
3611     return new_cost (computation_cost (comp), 0);
3612   }
3613 }
3614
3615 /* Determines the cost of the computation by that USE is expressed
3616    from induction variable CAND.  If ADDRESS_P is true, we just need
3617    to create an address from it, otherwise we want to get it into
3618    register.  A set of invariants we depend on is stored in
3619    DEPENDS_ON.  */
3620
3621 static comp_cost
3622 get_computation_cost (struct ivopts_data *data,
3623                       struct iv_use *use, struct iv_cand *cand,
3624                       bool address_p, bitmap *depends_on)
3625 {
3626   return get_computation_cost_at (data,
3627                                   use, cand, address_p, depends_on, use->stmt);
3628 }
3629
3630 /* Determines cost of basing replacement of USE on CAND in a generic
3631    expression.  */
3632
3633 static bool
3634 determine_use_iv_cost_generic (struct ivopts_data *data,
3635                                struct iv_use *use, struct iv_cand *cand)
3636 {
3637   bitmap depends_on;
3638   comp_cost cost;
3639
3640   /* The simple case first -- if we need to express value of the preserved
3641      original biv, the cost is 0.  This also prevents us from counting the
3642      cost of increment twice -- once at this use and once in the cost of
3643      the candidate.  */
3644   if (cand->pos == IP_ORIGINAL
3645       && cand->incremented_at == use->stmt)
3646     {
3647       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3648       return true;
3649     }
3650
3651   cost = get_computation_cost (data, use, cand, false, &depends_on);
3652   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3653
3654   return !infinite_cost_p (cost);
3655 }
3656
3657 /* Determines cost of basing replacement of USE on CAND in an address.  */
3658
3659 static bool
3660 determine_use_iv_cost_address (struct ivopts_data *data,
3661                                struct iv_use *use, struct iv_cand *cand)
3662 {
3663   bitmap depends_on;
3664   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3665
3666   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3667
3668   return !infinite_cost_p (cost);
3669 }
3670
3671 /* Computes value of candidate CAND at position AT in iteration NITER, and
3672    stores it to VAL.  */
3673
3674 static void
3675 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3676                aff_tree *val)
3677 {
3678   aff_tree step, delta, nit;
3679   struct iv *iv = cand->iv;
3680   tree type = TREE_TYPE (iv->base);
3681   tree steptype = type;
3682   if (POINTER_TYPE_P (type))
3683     steptype = sizetype;
3684
3685   tree_to_aff_combination (iv->step, steptype, &step);
3686   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3687   aff_combination_convert (&nit, steptype);
3688   aff_combination_mult (&nit, &step, &delta);
3689   if (stmt_after_increment (loop, cand, at))
3690     aff_combination_add (&delta, &step);
3691
3692   tree_to_aff_combination (iv->base, type, val);
3693   aff_combination_add (val, &delta);
3694 }
3695
3696 /* Returns period of induction variable iv.  */
3697
3698 static tree
3699 iv_period (struct iv *iv)
3700 {
3701   tree step = iv->step, period, type;
3702   tree pow2div;
3703
3704   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3705
3706   /* Period of the iv is gcd (step, type range).  Since type range is power
3707      of two, it suffices to determine the maximum power of two that divides
3708      step.  */
3709   pow2div = num_ending_zeros (step);
3710   type = unsigned_type_for (TREE_TYPE (step));
3711
3712   period = build_low_bits_mask (type,
3713                                 (TYPE_PRECISION (type)
3714                                  - tree_low_cst (pow2div, 1)));
3715
3716   return period;
3717 }
3718
3719 /* Returns the comparison operator used when eliminating the iv USE.  */
3720
3721 static enum tree_code
3722 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3723 {
3724   struct loop *loop = data->current_loop;
3725   basic_block ex_bb;
3726   edge exit;
3727
3728   ex_bb = bb_for_stmt (use->stmt);
3729   exit = EDGE_SUCC (ex_bb, 0);
3730   if (flow_bb_inside_loop_p (loop, exit->dest))
3731     exit = EDGE_SUCC (ex_bb, 1);
3732
3733   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3734 }
3735
3736 /* Check whether it is possible to express the condition in USE by comparison
3737    of candidate CAND.  If so, store the value compared with to BOUND.  */
3738
3739 static bool
3740 may_eliminate_iv (struct ivopts_data *data,
3741                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3742 {
3743   basic_block ex_bb;
3744   edge exit;
3745   tree nit, period;
3746   struct loop *loop = data->current_loop;
3747   aff_tree bnd;
3748   double_int period_value, max_niter;
3749
3750   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3751     return false;
3752
3753   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3754      for other conditions inside loop body.  */
3755   ex_bb = bb_for_stmt (use->stmt);
3756   if (use->stmt != last_stmt (ex_bb)
3757       || TREE_CODE (use->stmt) != COND_EXPR)
3758     return false;
3759   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3760     return false;
3761
3762   exit = EDGE_SUCC (ex_bb, 0);
3763   if (flow_bb_inside_loop_p (loop, exit->dest))
3764     exit = EDGE_SUCC (ex_bb, 1);
3765   if (flow_bb_inside_loop_p (loop, exit->dest))
3766     return false;
3767
3768   nit = niter_for_exit (data, exit);
3769   if (!nit)
3770     return false;
3771
3772   /* Determine whether we may use the variable to test whether niter iterations
3773      elapsed.  This is the case iff the period of the induction variable is
3774      greater than the number of iterations.  */
3775   period = iv_period (cand->iv);
3776   if (!period)
3777     return false;
3778
3779   /* Compare the period with the estimate on the number of iterations of the
3780      loop.  */
3781   if (!estimated_loop_iterations (loop, true, &max_niter))
3782     return false;
3783   period_value = tree_to_double_int (period);
3784   if (double_int_ucmp (period_value, max_niter) <= 0)
3785     return false;
3786
3787   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3788   *bound = aff_combination_to_tree (&bnd);
3789   return true;
3790 }
3791
3792 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3793
3794 static bool
3795 determine_use_iv_cost_condition (struct ivopts_data *data,
3796                                  struct iv_use *use, struct iv_cand *cand)
3797 {
3798   tree bound = NULL_TREE;
3799   struct iv *cmp_iv;
3800   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3801   comp_cost elim_cost, express_cost, cost;
3802   bool ok;
3803
3804   /* Only consider real candidates.  */
3805   if (!cand->iv)
3806     {
3807       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3808       return false;
3809     }
3810
3811   /* Try iv elimination.  */
3812   if (may_eliminate_iv (data, use, cand, &bound))
3813     {
3814       elim_cost = force_var_cost (data, bound, &depends_on_elim);
3815       /* The bound is a loop invariant, so it will be only computed
3816          once.  */
3817       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3818     }
3819   else
3820     elim_cost = infinite_cost;
3821
3822   /* Try expressing the original giv.  If it is compared with an invariant,
3823      note that we cannot get rid of it.  */
3824   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3825   gcc_assert (ok);
3826
3827   express_cost = get_computation_cost (data, use, cand, false,
3828                                        &depends_on_express);
3829   fd_ivopts_data = data;
3830   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3831
3832   /* Choose the better approach.  */
3833   if (compare_costs (elim_cost, express_cost) < 0)
3834     {
3835       cost = elim_cost;
3836       depends_on = depends_on_elim;
3837       depends_on_elim = NULL;
3838     }
3839   else
3840     {
3841       cost = express_cost;
3842       depends_on = depends_on_express;
3843       depends_on_express = NULL;
3844       bound = NULL_TREE;
3845     }
3846
3847   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3848
3849   if (depends_on_elim)
3850     BITMAP_FREE (depends_on_elim);
3851   if (depends_on_express)
3852     BITMAP_FREE (depends_on_express);
3853
3854   return !infinite_cost_p (cost);
3855 }
3856
3857 /* Determines cost of basing replacement of USE on CAND.  Returns false
3858    if USE cannot be based on CAND.  */
3859
3860 static bool
3861 determine_use_iv_cost (struct ivopts_data *data,
3862                        struct iv_use *use, struct iv_cand *cand)
3863 {
3864   switch (use->type)
3865     {
3866     case USE_NONLINEAR_EXPR:
3867       return determine_use_iv_cost_generic (data, use, cand);
3868
3869     case USE_ADDRESS:
3870       return determine_use_iv_cost_address (data, use, cand);
3871
3872     case USE_COMPARE:
3873       return determine_use_iv_cost_condition (data, use, cand);
3874
3875     default:
3876       gcc_unreachable ();
3877     }
3878 }
3879
3880 /* Determines costs of basing the use of the iv on an iv candidate.  */
3881
3882 static void
3883 determine_use_iv_costs (struct ivopts_data *data)
3884 {
3885   unsigned i, j;
3886   struct iv_use *use;
3887   struct iv_cand *cand;
3888   bitmap to_clear = BITMAP_ALLOC (NULL);
3889
3890   alloc_use_cost_map (data);
3891
3892   for (i = 0; i < n_iv_uses (data); i++)
3893     {
3894       use = iv_use (data, i);
3895
3896       if (data->consider_all_candidates)
3897         {
3898           for (j = 0; j < n_iv_cands (data); j++)
3899             {
3900               cand = iv_cand (data, j);
3901               determine_use_iv_cost (data, use, cand);
3902             }
3903         }
3904       else
3905         {
3906           bitmap_iterator bi;
3907
3908           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3909             {
3910               cand = iv_cand (data, j);
3911               if (!determine_use_iv_cost (data, use, cand))
3912                 bitmap_set_bit (to_clear, j);
3913             }
3914
3915           /* Remove the candidates for that the cost is infinite from
3916              the list of related candidates.  */
3917           bitmap_and_compl_into (use->related_cands, to_clear);
3918           bitmap_clear (to_clear);
3919         }
3920     }
3921
3922   BITMAP_FREE (to_clear);
3923
3924   if (dump_file && (dump_flags & TDF_DETAILS))
3925     {
3926       fprintf (dump_file, "Use-candidate costs:\n");
3927
3928       for (i = 0; i < n_iv_uses (data); i++)
3929         {
3930           use = iv_use (data, i);
3931
3932           fprintf (dump_file, "Use %d:\n", i);
3933           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
3934           for (j = 0; j < use->n_map_members; j++)
3935             {
3936               if (!use->cost_map[j].cand
3937                   || infinite_cost_p (use->cost_map[j].cost))
3938                 continue;
3939
3940               fprintf (dump_file, "  %d\t%d\t%d\t",
3941                        use->cost_map[j].cand->id,
3942                        use->cost_map[j].cost.cost,
3943                        use->cost_map[j].cost.complexity);
3944               if (use->cost_map[j].depends_on)
3945                 bitmap_print (dump_file,
3946                               use->cost_map[j].depends_on, "","");
3947               fprintf (dump_file, "\n");
3948             }
3949
3950           fprintf (dump_file, "\n");
3951         }
3952       fprintf (dump_file, "\n");
3953     }
3954 }
3955
3956 /* Determines cost of the candidate CAND.  */
3957
3958 static void
3959 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3960 {
3961   comp_cost cost_base;
3962   unsigned cost, cost_step;
3963   tree base;
3964
3965   if (!cand->iv)
3966     {
3967       cand->cost = 0;
3968       return;
3969     }
3970
3971   /* There are two costs associated with the candidate -- its increment
3972      and its initialization.  The second is almost negligible for any loop
3973      that rolls enough, so we take it just very little into account.  */
3974
3975   base = cand->iv->base;
3976   cost_base = force_var_cost (data, base, NULL);
3977   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3978
3979   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3980
3981   /* Prefer the original ivs unless we may gain something by replacing it.
3982      The reason is to make debugging simpler; so this is not relevant for
3983      artificial ivs created by other optimization passes.  */
3984   if (cand->pos != IP_ORIGINAL
3985       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3986     cost++;
3987
3988   /* Prefer not to insert statements into latch unless there are some
3989      already (so that we do not create unnecessary jumps).  */
3990   if (cand->pos == IP_END
3991       && empty_block_p (ip_end_pos (data->current_loop)))
3992     cost++;
3993
3994   cand->cost = cost;
3995 }
3996
3997 /* Determines costs of computation of the candidates.  */
3998
3999 static void
4000 determine_iv_costs (struct ivopts_data *data)
4001 {
4002   unsigned i;
4003
4004   if (dump_file && (dump_flags & TDF_DETAILS))
4005     {
4006       fprintf (dump_file, "Candidate costs:\n");
4007       fprintf (dump_file, "  cand\tcost\n");
4008     }
4009
4010   for (i = 0; i < n_iv_cands (data); i++)
4011     {
4012       struct iv_cand *cand = iv_cand (data, i);
4013
4014       determine_iv_cost (data, cand);
4015
4016       if (dump_file && (dump_flags & TDF_DETAILS))
4017         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4018     }
4019
4020   if (dump_file && (dump_flags & TDF_DETAILS))
4021     fprintf (dump_file, "\n");
4022 }
4023
4024 /* Calculates cost for having SIZE induction variables.  */
4025
4026 static unsigned
4027 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4028 {
4029   /* We add size to the cost, so that we prefer eliminating ivs
4030      if possible.  */
4031   return size + estimate_reg_pressure_cost (size, data->regs_used);
4032 }
4033
4034 /* For each size of the induction variable set determine the penalty.  */
4035
4036 static void
4037 determine_set_costs (struct ivopts_data *data)
4038 {
4039   unsigned j, n;
4040   tree phi, op;
4041   struct loop *loop = data->current_loop;
4042   bitmap_iterator bi;
4043
4044   /* We use the following model (definitely improvable, especially the
4045      cost function -- TODO):
4046
4047      We estimate the number of registers available (using MD data), name it A.
4048
4049      We estimate the number of registers used by the loop, name it U.  This
4050      number is obtained as the number of loop phi nodes (not counting virtual
4051      registers and bivs) + the number of variables from outside of the loop.
4052
4053      We set a reserve R (free regs that are used for temporary computations,
4054      etc.).  For now the reserve is a constant 3.
4055
4056      Let I be the number of induction variables.
4057
4058      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4059         make a lot of ivs without a reason).
4060      -- if A - R < U + I <= A, the cost is I * PRES_COST
4061      -- if U + I > A, the cost is I * PRES_COST and
4062         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4063
4064   if (dump_file && (dump_flags & TDF_DETAILS))
4065     {
4066       fprintf (dump_file, "Global costs:\n");
4067       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4068       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
4069       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4070     }
4071
4072   n = 0;
4073   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4074     {
4075       op = PHI_RESULT (phi);
4076
4077       if (!is_gimple_reg (op))
4078         continue;
4079
4080       if (get_iv (data, op))
4081         continue;
4082
4083       n++;
4084     }
4085
4086   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4087     {
4088       struct version_info *info = ver_info (data, j);
4089
4090       if (info->inv_id && info->has_nonlin_use)
4091         n++;
4092     }
4093
4094   data->regs_used = n;
4095   if (dump_file && (dump_flags & TDF_DETAILS))
4096     fprintf (dump_file, "  regs_used %d\n", n);
4097
4098   if (dump_file && (dump_flags & TDF_DETAILS))
4099     {
4100       fprintf (dump_file, "  cost for size:\n");
4101       fprintf (dump_file, "  ivs\tcost\n");
4102       for (j = 0; j <= 2 * target_avail_regs; j++)
4103         fprintf (dump_file, "  %d\t%d\n", j,
4104                  ivopts_global_cost_for_size (data, j));
4105       fprintf (dump_file, "\n");
4106     }
4107 }
4108
4109 /* Returns true if A is a cheaper cost pair than B.  */
4110
4111 static bool
4112 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4113 {
4114   int cmp;
4115
4116   if (!a)
4117     return false;
4118
4119   if (!b)
4120     return true;
4121
4122   cmp = compare_costs (a->cost, b->cost);
4123   if (cmp < 0)
4124     return true;
4125
4126   if (cmp > 0)
4127     return false;
4128
4129   /* In case the costs are the same, prefer the cheaper candidate.  */
4130   if (a->cand->cost < b->cand->cost)
4131     return true;
4132
4133   return false;
4134 }
4135
4136 /* Computes the cost field of IVS structure.  */
4137
4138 static void
4139 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4140 {
4141   comp_cost cost = ivs->cand_use_cost;
4142   cost.cost += ivs->cand_cost;
4143   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4144
4145   ivs->cost = cost;
4146 }
4147
4148 /* Remove invariants in set INVS to set IVS.  */
4149
4150 static void
4151 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4152 {
4153   bitmap_iterator bi;
4154   unsigned iid;
4155
4156   if (!invs)
4157     return;
4158
4159   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4160     {
4161       ivs->n_invariant_uses[iid]--;
4162       if (ivs->n_invariant_uses[iid] == 0)
4163         ivs->n_regs--;
4164     }
4165 }
4166
4167 /* Set USE not to be expressed by any candidate in IVS.  */
4168
4169 static void
4170 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4171                  struct iv_use *use)
4172 {
4173   unsigned uid = use->id, cid;
4174   struct cost_pair *cp;
4175
4176   cp = ivs->cand_for_use[uid];
4177   if (!cp)
4178     return;
4179   cid = cp->cand->id;
4180
4181   ivs->bad_uses++;
4182   ivs->cand_for_use[uid] = NULL;
4183   ivs->n_cand_uses[cid]--;
4184
4185   if (ivs->n_cand_uses[cid] == 0)
4186     {
4187       bitmap_clear_bit (ivs->cands, cid);
4188       /* Do not count the pseudocandidates.  */
4189       if (cp->cand->iv)
4190         ivs->n_regs--;
4191       ivs->n_cands--;
4192       ivs->cand_cost -= cp->cand->cost;
4193
4194       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4195     }
4196
4197   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4198
4199   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4200   iv_ca_recount_cost (data, ivs);
4201 }
4202
4203 /* Add invariants in set INVS to set IVS.  */
4204
4205 static void
4206 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4207 {
4208   bitmap_iterator bi;
4209   unsigned iid;
4210
4211   if (!invs)
4212     return;
4213
4214   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4215     {
4216       ivs->n_invariant_uses[iid]++;
4217       if (ivs->n_invariant_uses[iid] == 1)
4218         ivs->n_regs++;
4219     }
4220 }
4221
4222 /* Set cost pair for USE in set IVS to CP.  */
4223
4224 static void
4225 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4226               struct iv_use *use, struct cost_pair *cp)
4227 {
4228   unsigned uid = use->id, cid;
4229
4230   if (ivs->cand_for_use[uid] == cp)
4231     return;
4232
4233   if (ivs->cand_for_use[uid])
4234     iv_ca_set_no_cp (data, ivs, use);
4235
4236   if (cp)
4237     {
4238       cid = cp->cand->id;
4239
4240       ivs->bad_uses--;
4241       ivs->cand_for_use[uid] = cp;
4242       ivs->n_cand_uses[cid]++;
4243       if (ivs->n_cand_uses[cid] == 1)
4244         {
4245           bitmap_set_bit (ivs->cands, cid);
4246           /* Do not count the pseudocandidates.  */
4247           if (cp->cand->iv)
4248             ivs->n_regs++;
4249           ivs->n_cands++;
4250           ivs->cand_cost += cp->cand->cost;
4251
4252           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4253         }
4254
4255       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4256       iv_ca_set_add_invariants (ivs, cp->depends_on);
4257       iv_ca_recount_cost (data, ivs);
4258     }
4259 }
4260
4261 /* Extend set IVS by expressing USE by some of the candidates in it
4262    if possible.  */
4263
4264 static void
4265 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4266                struct iv_use *use)
4267 {
4268   struct cost_pair *best_cp = NULL, *cp;
4269   bitmap_iterator bi;
4270   unsigned i;
4271
4272   gcc_assert (ivs->upto >= use->id);
4273
4274   if (ivs->upto == use->id)
4275     {
4276       ivs->upto++;
4277       ivs->bad_uses++;
4278     }
4279
4280   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4281     {
4282       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4283
4284       if (cheaper_cost_pair (cp, best_cp))
4285         best_cp = cp;
4286     }
4287
4288   iv_ca_set_cp (data, ivs, use, best_cp);
4289 }
4290
4291 /* Get cost for assignment IVS.  */
4292
4293 static comp_cost
4294 iv_ca_cost (struct iv_ca *ivs)
4295 {
4296   return (ivs->bad_uses ? infinite_cost : ivs->cost);
4297 }
4298
4299 /* Returns true if all dependences of CP are among invariants in IVS.  */
4300
4301 static bool
4302 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4303 {
4304   unsigned i;
4305   bitmap_iterator bi;
4306
4307   if (!cp->depends_on)
4308     return true;
4309
4310   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4311     {
4312       if (ivs->n_invariant_uses[i] == 0)
4313         return false;
4314     }
4315
4316   return true;
4317 }
4318
4319 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4320    it before NEXT_CHANGE.  */
4321
4322 static struct iv_ca_delta *
4323 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4324                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4325 {
4326   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4327
4328   change->use = use;
4329   change->old_cp = old_cp;
4330   change->new_cp = new_cp;
4331   change->next_change = next_change;
4332
4333   return change;
4334 }
4335
4336 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4337    are rewritten.  */
4338
4339 static struct iv_ca_delta *
4340 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4341 {
4342   struct iv_ca_delta *last;
4343
4344   if (!l2)
4345     return l1;
4346
4347   if (!l1)
4348     return l2;
4349
4350   for (last = l1; last->next_change; last = last->next_change)
4351     continue;
4352   last->next_change = l2;
4353
4354   return l1;
4355 }
4356
4357 /* Returns candidate by that USE is expressed in IVS.  */
4358
4359 static struct cost_pair *
4360 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4361 {
4362   return ivs->cand_for_use[use->id];
4363 }
4364
4365 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4366
4367 static struct iv_ca_delta *
4368 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4369 {
4370   struct iv_ca_delta *act, *next, *prev = NULL;
4371   struct cost_pair *tmp;
4372
4373   for (act = delta; act; act = next)
4374     {
4375       next = act->next_change;
4376       act->next_change = prev;
4377       prev = act;
4378
4379       tmp = act->old_cp;
4380       act->old_cp = act->new_cp;
4381       act->new_cp = tmp;
4382     }
4383
4384   return prev;
4385 }
4386
4387 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4388    reverted instead.  */
4389
4390 static void
4391 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4392                     struct iv_ca_delta *delta, bool forward)
4393 {
4394   struct cost_pair *from, *to;
4395   struct iv_ca_delta *act;
4396
4397   if (!forward)
4398     delta = iv_ca_delta_reverse (delta);
4399
4400   for (act = delta; act; act = act->next_change)
4401     {
4402       from = act->old_cp;
4403       to = act->new_cp;
4404       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4405       iv_ca_set_cp (data, ivs, act->use, to);
4406     }
4407
4408   if (!forward)
4409     iv_ca_delta_reverse (delta);
4410 }
4411
4412 /* Returns true if CAND is used in IVS.  */
4413
4414 static bool
4415 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4416 {
4417   return ivs->n_cand_uses[cand->id] > 0;
4418 }
4419
4420 /* Returns number of induction variable candidates in the set IVS.  */
4421
4422 static unsigned
4423 iv_ca_n_cands (struct iv_ca *ivs)
4424 {
4425   return ivs->n_cands;
4426 }
4427
4428 /* Free the list of changes DELTA.  */
4429
4430 static void
4431 iv_ca_delta_free (struct iv_ca_delta **delta)
4432 {
4433   struct iv_ca_delta *act, *next;
4434
4435   for (act = *delta; act; act = next)
4436     {
4437       next = act->next_change;
4438       free (act);
4439     }
4440
4441   *delta = NULL;
4442 }
4443
4444 /* Allocates new iv candidates assignment.  */
4445
4446 static struct iv_ca *
4447 iv_ca_new (struct ivopts_data *data)
4448 {
4449   struct iv_ca *nw = XNEW (struct iv_ca);
4450
4451   nw->upto = 0;
4452   nw->bad_uses = 0;
4453   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4454   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4455   nw->cands = BITMAP_ALLOC (NULL);
4456   nw->n_cands = 0;
4457   nw->n_regs = 0;
4458   nw->cand_use_cost = zero_cost;
4459   nw->cand_cost = 0;
4460   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4461   nw->cost = zero_cost;
4462
4463   return nw;
4464 }
4465
4466 /* Free memory occupied by the set IVS.  */
4467
4468 static void
4469 iv_ca_free (struct iv_ca **ivs)
4470 {
4471   free ((*ivs)->cand_for_use);
4472   free ((*ivs)->n_cand_uses);
4473   BITMAP_FREE ((*ivs)->cands);
4474   free ((*ivs)->n_invariant_uses);
4475   free (*ivs);
4476   *ivs = NULL;
4477 }
4478
4479 /* Dumps IVS to FILE.  */
4480
4481 static void
4482 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4483 {
4484   const char *pref = "  invariants ";
4485   unsigned i;
4486   comp_cost cost = iv_ca_cost (ivs);
4487
4488   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4489   bitmap_print (file, ivs->cands, "  candidates ","\n");
4490
4491   for (i = 1; i <= data->max_inv_id; i++)
4492     if (ivs->n_invariant_uses[i])
4493       {
4494         fprintf (file, "%s%d", pref, i);
4495         pref = ", ";
4496       }
4497   fprintf (file, "\n");
4498 }
4499
4500 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4501    new set, and store differences in DELTA.  Number of induction variables
4502    in the new set is stored to N_IVS.  */
4503
4504 static comp_cost
4505 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4506               struct iv_cand *cand, struct iv_ca_delta **delta,
4507               unsigned *n_ivs)
4508 {
4509   unsigned i;
4510   comp_cost cost;
4511   struct iv_use *use;
4512   struct cost_pair *old_cp, *new_cp;
4513
4514   *delta = NULL;
4515   for (i = 0; i < ivs->upto; i++)
4516     {
4517       use = iv_use (data, i);
4518       old_cp = iv_ca_cand_for_use (ivs, use);
4519
4520       if (old_cp
4521           && old_cp->cand == cand)
4522         continue;
4523
4524       new_cp = get_use_iv_cost (data, use, cand);
4525       if (!new_cp)
4526         continue;
4527
4528       if (!iv_ca_has_deps (ivs, new_cp))
4529         continue;
4530
4531       if (!cheaper_cost_pair (new_cp, old_cp))
4532         continue;
4533
4534       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4535     }
4536
4537   iv_ca_delta_commit (data, ivs, *delta, true);
4538   cost = iv_ca_cost (ivs);
4539   if (n_ivs)
4540     *n_ivs = iv_ca_n_cands (ivs);
4541   iv_ca_delta_commit (data, ivs, *delta, false);
4542
4543   return cost;
4544 }
4545
4546 /* Try narrowing set IVS by removing CAND.  Return the cost of
4547    the new set and store the differences in DELTA.  */
4548
4549 static comp_cost
4550 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4551               struct iv_cand *cand, struct iv_ca_delta **delta)
4552 {
4553   unsigned i, ci;
4554   struct iv_use *use;
4555   struct cost_pair *old_cp, *new_cp, *cp;
4556   bitmap_iterator bi;
4557   struct iv_cand *cnd;
4558   comp_cost cost;
4559
4560   *delta = NULL;
4561   for (i = 0; i < n_iv_uses (data); i++)
4562     {
4563       use = iv_use (data, i);
4564
4565       old_cp = iv_ca_cand_for_use (ivs, use);
4566       if (old_cp->cand != cand)
4567         continue;
4568
4569       new_cp = NULL;
4570
4571       if (data->consider_all_candidates)
4572         {
4573           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4574             {
4575               if (ci == cand->id)
4576                 continue;
4577
4578               cnd = iv_cand (data, ci);
4579
4580               cp = get_use_iv_cost (data, use, cnd);
4581               if (!cp)
4582                 continue;
4583               if (!iv_ca_has_deps (ivs, cp))
4584                 continue;
4585
4586               if (!cheaper_cost_pair (cp, new_cp))
4587                 continue;
4588
4589               new_cp = cp;
4590             }
4591         }
4592       else
4593         {
4594           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4595             {
4596               if (ci == cand->id)
4597                 continue;
4598
4599               cnd = iv_cand (data, ci);
4600
4601               cp = get_use_iv_cost (data, use, cnd);
4602               if (!cp)
4603                 continue;
4604               if (!iv_ca_has_deps (ivs, cp))
4605                 continue;
4606
4607               if (!cheaper_cost_pair (cp, new_cp))
4608                 continue;
4609
4610               new_cp = cp;
4611             }
4612         }
4613
4614       if (!new_cp)
4615         {
4616           iv_ca_delta_free (delta);
4617           return infinite_cost;
4618         }
4619
4620       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4621     }
4622
4623   iv_ca_delta_commit (data, ivs, *delta, true);
4624   cost = iv_ca_cost (ivs);
4625   iv_ca_delta_commit (data, ivs, *delta, false);
4626
4627   return cost;
4628 }
4629
4630 /* Try optimizing the set of candidates IVS by removing candidates different
4631    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4632    differences in DELTA.  */
4633
4634 static comp_cost
4635 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4636              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4637 {
4638   bitmap_iterator bi;
4639   struct iv_ca_delta *act_delta, *best_delta;
4640   unsigned i;
4641   comp_cost best_cost, acost;
4642   struct iv_cand *cand;
4643
4644   best_delta = NULL;
4645   best_cost = iv_ca_cost (ivs);
4646
4647   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4648     {
4649       cand = iv_cand (data, i);
4650
4651       if (cand == except_cand)
4652         continue;
4653
4654       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4655
4656       if (compare_costs (acost, best_cost) < 0)
4657         {
4658           best_cost = acost;
4659           iv_ca_delta_free (&best_delta);
4660           best_delta = act_delta;
4661         }
4662       else
4663         iv_ca_delta_free (&act_delta);
4664     }
4665
4666   if (!best_delta)
4667     {
4668       *delta = NULL;
4669       return best_cost;
4670     }
4671
4672   /* Recurse to possibly remove other unnecessary ivs.  */
4673   iv_ca_delta_commit (data, ivs, best_delta, true);
4674   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4675   iv_ca_delta_commit (data, ivs, best_delta, false);
4676   *delta = iv_ca_delta_join (best_delta, *delta);
4677   return best_cost;
4678 }
4679
4680 /* Tries to extend the sets IVS in the best possible way in order
4681    to express the USE.  */
4682
4683 static bool
4684 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4685                   struct iv_use *use)
4686 {
4687   comp_cost best_cost, act_cost;
4688   unsigned i;
4689   bitmap_iterator bi;
4690   struct iv_cand *cand;
4691   struct iv_ca_delta *best_delta = NULL, *act_delta;
4692   struct cost_pair *cp;
4693
4694   iv_ca_add_use (data, ivs, use);
4695   best_cost = iv_ca_cost (ivs);
4696
4697   cp = iv_ca_cand_for_use (ivs, use);
4698   if (cp)
4699     {
4700       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4701       iv_ca_set_no_cp (data, ivs, use);
4702     }
4703
4704   /* First try important candidates not based on any memory object.  Only if
4705      this fails, try the specific ones.  Rationale -- in loops with many
4706      variables the best choice often is to use just one generic biv.  If we
4707      added here many ivs specific to the uses, the optimization algorithm later
4708      would be likely to get stuck in a local minimum, thus causing us to create
4709      too many ivs.  The approach from few ivs to more seems more likely to be
4710      successful -- starting from few ivs, replacing an expensive use by a
4711      specific iv should always be a win.  */
4712   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4713     {
4714       cand = iv_cand (data, i);
4715
4716       if (cand->iv->base_object != NULL_TREE)
4717         continue;
4718
4719       if (iv_ca_cand_used_p (ivs, cand))
4720         continue;
4721
4722       cp = get_use_iv_cost (data, use, cand);
4723       if (!cp)
4724         continue;
4725
4726       iv_ca_set_cp (data, ivs, use, cp);
4727       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4728       iv_ca_set_no_cp (data, ivs, use);
4729       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4730
4731       if (compare_costs (act_cost, best_cost) < 0)
4732         {
4733           best_cost = act_cost;
4734
4735           iv_ca_delta_free (&best_delta);
4736           best_delta = act_delta;
4737         }
4738       else
4739         iv_ca_delta_free (&act_delta);
4740     }
4741
4742   if (infinite_cost_p (best_cost))
4743     {
4744       for (i = 0; i < use->n_map_members; i++)
4745         {
4746           cp = use->cost_map + i;
4747           cand = cp->cand;
4748           if (!cand)
4749             continue;
4750
4751           /* Already tried this.  */
4752           if (cand->important && cand->iv->base_object == NULL_TREE)
4753             continue;
4754
4755           if (iv_ca_cand_used_p (ivs, cand))
4756             continue;
4757
4758           act_delta = NULL;
4759           iv_ca_set_cp (data, ivs, use, cp);
4760           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4761           iv_ca_set_no_cp (data, ivs, use);
4762           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4763                                        cp, act_delta);
4764
4765           if (compare_costs (act_cost, best_cost) < 0)
4766             {
4767               best_cost = act_cost;
4768
4769               if (best_delta)
4770                 iv_ca_delta_free (&best_delta);
4771               best_delta = act_delta;
4772             }
4773           else
4774             iv_ca_delta_free (&act_delta);
4775         }
4776     }
4777
4778   iv_ca_delta_commit (data, ivs, best_delta, true);
4779   iv_ca_delta_free (&best_delta);
4780
4781   return !infinite_cost_p (best_cost);
4782 }
4783
4784 /* Finds an initial assignment of candidates to uses.  */
4785
4786 static struct iv_ca *
4787 get_initial_solution (struct ivopts_data *data)
4788 {
4789   struct iv_ca *ivs = iv_ca_new (data);
4790   unsigned i;
4791
4792   for (i = 0; i < n_iv_uses (data); i++)
4793     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4794       {
4795         iv_ca_free (&ivs);
4796         return NULL;
4797       }
4798
4799   return ivs;
4800 }
4801
4802 /* Tries to improve set of induction variables IVS.  */
4803
4804 static bool
4805 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4806 {
4807   unsigned i, n_ivs;
4808   comp_cost acost, best_cost = iv_ca_cost (ivs);
4809   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4810   struct iv_cand *cand;
4811
4812   /* Try extending the set of induction variables by one.  */
4813   for (i = 0; i < n_iv_cands (data); i++)
4814     {
4815       cand = iv_cand (data, i);
4816
4817       if (iv_ca_cand_used_p (ivs, cand))
4818         continue;
4819
4820       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4821       if (!act_delta)
4822         continue;
4823
4824       /* If we successfully added the candidate and the set is small enough,
4825          try optimizing it by removing other candidates.  */
4826       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4827         {
4828           iv_ca_delta_commit (data, ivs, act_delta, true);
4829           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4830           iv_ca_delta_commit (data, ivs, act_delta, false);
4831           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4832         }
4833
4834       if (compare_costs (acost, best_cost) < 0)
4835         {
4836           best_cost = acost;
4837           iv_ca_delta_free (&best_delta);
4838           best_delta = act_delta;
4839         }
4840       else
4841         iv_ca_delta_free (&act_delta);
4842     }
4843
4844   if (!best_delta)
4845     {
4846       /* Try removing the candidates from the set instead.  */
4847       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4848
4849       /* Nothing more we can do.  */
4850       if (!best_delta)
4851         return false;
4852     }
4853
4854   iv_ca_delta_commit (data, ivs, best_delta, true);
4855   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4856   iv_ca_delta_free (&best_delta);
4857   return true;
4858 }
4859
4860 /* Attempts to find the optimal set of induction variables.  We do simple
4861    greedy heuristic -- we try to replace at most one candidate in the selected
4862    solution and remove the unused ivs while this improves the cost.  */
4863
4864 static struct iv_ca *
4865 find_optimal_iv_set (struct ivopts_data *data)
4866 {
4867   unsigned i;
4868   struct iv_ca *set;
4869   struct iv_use *use;
4870
4871   /* Get the initial solution.  */
4872   set = get_initial_solution (data);
4873   if (!set)
4874     {
4875       if (dump_file && (dump_flags & TDF_DETAILS))
4876         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4877       return NULL;
4878     }
4879
4880   if (dump_file && (dump_flags & TDF_DETAILS))
4881     {
4882       fprintf (dump_file, "Initial set of candidates:\n");
4883       iv_ca_dump (data, dump_file, set);
4884     }
4885
4886   while (try_improve_iv_set (data, set))
4887     {
4888       if (dump_file && (dump_flags & TDF_DETAILS))
4889         {
4890           fprintf (dump_file, "Improved to:\n");
4891           iv_ca_dump (data, dump_file, set);
4892         }
4893     }
4894
4895   if (dump_file && (dump_flags & TDF_DETAILS))
4896     {
4897       comp_cost cost = iv_ca_cost (set);
4898       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4899     }
4900
4901   for (i = 0; i < n_iv_uses (data); i++)
4902     {
4903       use = iv_use (data, i);
4904       use->selected = iv_ca_cand_for_use (set, use)->cand;
4905     }
4906
4907   return set;
4908 }
4909
4910 /* Creates a new induction variable corresponding to CAND.  */
4911
4912 static void
4913 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4914 {
4915   block_stmt_iterator incr_pos;
4916   tree base;
4917   bool after = false;
4918
4919   if (!cand->iv)
4920     return;
4921
4922   switch (cand->pos)
4923     {
4924     case IP_NORMAL:
4925       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4926       break;
4927
4928     case IP_END:
4929       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4930       after = true;
4931       break;
4932
4933     case IP_ORIGINAL:
4934       /* Mark that the iv is preserved.  */
4935       name_info (data, cand->var_before)->preserve_biv = true;
4936       name_info (data, cand->var_after)->preserve_biv = true;
4937
4938       /* Rewrite the increment so that it uses var_before directly.  */
4939       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4940
4941       return;
4942     }
4943
4944   gimple_add_tmp_var (cand->var_before);
4945   add_referenced_var (cand->var_before);
4946
4947   base = unshare_expr (cand->iv->base);
4948
4949   create_iv (base, unshare_expr (cand->iv->step),
4950              cand->var_before, data->current_loop,
4951              &incr_pos, after, &cand->var_before, &cand->var_after);
4952 }
4953
4954 /* Creates new induction variables described in SET.  */
4955
4956 static void
4957 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4958 {
4959   unsigned i;
4960   struct iv_cand *cand;
4961   bitmap_iterator bi;
4962
4963   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4964     {
4965       cand = iv_cand (data, i);
4966       create_new_iv (data, cand);
4967     }
4968 }
4969
4970 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4971    is true, remove also the ssa name defined by the statement.  */
4972
4973 static void
4974 remove_statement (tree stmt, bool including_defined_name)
4975 {
4976   if (TREE_CODE (stmt) == PHI_NODE)
4977     {
4978       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4979     }
4980   else
4981     {
4982       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4983
4984       bsi_remove (&bsi, true);
4985       release_defs (stmt);
4986     }
4987 }
4988
4989 /* Rewrites USE (definition of iv used in a nonlinear expression)
4990    using candidate CAND.  */
4991
4992 static void
4993 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4994                             struct iv_use *use, struct iv_cand *cand)
4995 {
4996   tree comp;
4997   tree op, tgt, ass;
4998   block_stmt_iterator bsi;
4999
5000   /* An important special case -- if we are asked to express value of
5001      the original iv by itself, just exit; there is no need to
5002      introduce a new computation (that might also need casting the
5003      variable to unsigned and back).  */
5004   if (cand->pos == IP_ORIGINAL
5005       && cand->incremented_at == use->stmt)
5006     {
5007       tree step, ctype, utype;
5008       enum tree_code incr_code = PLUS_EXPR;
5009
5010       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
5011       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5012
5013       step = cand->iv->step;
5014       ctype = TREE_TYPE (step);
5015       utype = TREE_TYPE (cand->var_after);
5016       if (TREE_CODE (step) == NEGATE_EXPR)
5017         {
5018           incr_code = MINUS_EXPR;
5019           step = TREE_OPERAND (step, 0);
5020         }
5021
5022       /* Check whether we may leave the computation unchanged.
5023          This is the case only if it does not rely on other
5024          computations in the loop -- otherwise, the computation
5025          we rely upon may be removed in remove_unused_ivs,
5026          thus leading to ICE.  */
5027       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5028       if (TREE_CODE (op) == PLUS_EXPR
5029           || TREE_CODE (op) == MINUS_EXPR
5030           || TREE_CODE (op) == POINTER_PLUS_EXPR)
5031         {
5032           if (TREE_OPERAND (op, 0) == cand->var_before)
5033             op = TREE_OPERAND (op, 1);
5034           else if (TREE_CODE (op) != MINUS_EXPR
5035                    && TREE_OPERAND (op, 1) == cand->var_before)
5036             op = TREE_OPERAND (op, 0);
5037           else
5038             op = NULL_TREE;
5039         }
5040       else
5041         op = NULL_TREE;
5042
5043       if (op
5044           && (TREE_CODE (op) == INTEGER_CST
5045               || operand_equal_p (op, step, 0)))
5046         return;
5047
5048       /* Otherwise, add the necessary computations to express
5049          the iv.  */
5050       op = fold_convert (ctype, cand->var_before);
5051       comp = fold_convert (utype,
5052                            build2 (incr_code, ctype, op,
5053                                    unshare_expr (step)));
5054     }
5055   else
5056     {
5057       comp = get_computation (data->current_loop, use, cand);
5058       gcc_assert (comp != NULL_TREE);
5059     }
5060
5061   switch (TREE_CODE (use->stmt))
5062     {
5063     case PHI_NODE:
5064       tgt = PHI_RESULT (use->stmt);
5065
5066       /* If we should keep the biv, do not replace it.  */
5067       if (name_info (data, tgt)->preserve_biv)
5068         return;
5069
5070       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5071       break;
5072
5073     case GIMPLE_MODIFY_STMT:
5074       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5075       bsi = bsi_for_stmt (use->stmt);
5076       break;
5077
5078     default:
5079       gcc_unreachable ();
5080     }
5081
5082   op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5083                                  true, BSI_SAME_STMT);
5084
5085   if (TREE_CODE (use->stmt) == PHI_NODE)
5086     {
5087       ass = build_gimple_modify_stmt (tgt, op);
5088       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5089       remove_statement (use->stmt, false);
5090       SSA_NAME_DEF_STMT (tgt) = ass;
5091     }
5092   else
5093     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5094 }
5095
5096 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5097    for_each_index.  */
5098
5099 static bool
5100 idx_remove_ssa_names (tree base, tree *idx,
5101                       void *data ATTRIBUTE_UNUSED)
5102 {
5103   tree *op;
5104
5105   if (TREE_CODE (*idx) == SSA_NAME)
5106     *idx = SSA_NAME_VAR (*idx);
5107
5108   if (TREE_CODE (base) == ARRAY_REF)
5109     {
5110       op = &TREE_OPERAND (base, 2);
5111       if (*op
5112           && TREE_CODE (*op) == SSA_NAME)
5113         *op = SSA_NAME_VAR (*op);
5114       op = &TREE_OPERAND (base, 3);
5115       if (*op
5116           && TREE_CODE (*op) == SSA_NAME)
5117         *op = SSA_NAME_VAR (*op);
5118     }
5119
5120   return true;
5121 }
5122
5123 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5124
5125 static tree
5126 unshare_and_remove_ssa_names (tree ref)
5127 {
5128   ref = unshare_expr (ref);
5129   for_each_index (&ref, idx_remove_ssa_names, NULL);
5130
5131   return ref;
5132 }
5133
5134 /* Extract the alias analysis info for the memory reference REF.  There are
5135    several ways how this information may be stored and what precisely is
5136    its semantics depending on the type of the reference, but there always is
5137    somewhere hidden one _DECL node that is used to determine the set of
5138    virtual operands for the reference.  The code below deciphers this jungle
5139    and extracts this single useful piece of information.  */
5140
5141 static tree
5142 get_ref_tag (tree ref, tree orig)
5143 {
5144   tree var = get_base_address (ref);
5145   tree aref = NULL_TREE, tag, sv;
5146   HOST_WIDE_INT offset, size, maxsize;
5147
5148   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5149     {
5150       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5151       if (ref)
5152         break;
5153     }
5154
5155   if (!var)
5156     return NULL_TREE;
5157
5158   if (TREE_CODE (var) == INDIRECT_REF)
5159     {
5160       /* If the base is a dereference of a pointer, first check its name memory
5161          tag.  If it does not have one, use its symbol memory tag.  */
5162       var = TREE_OPERAND (var, 0);
5163       if (TREE_CODE (var) != SSA_NAME)
5164         return NULL_TREE;
5165
5166       if (SSA_NAME_PTR_INFO (var))
5167         {
5168           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5169           if (tag)
5170             return tag;
5171         }
5172
5173       var = SSA_NAME_VAR (var);
5174       tag = symbol_mem_tag (var);
5175       gcc_assert (tag != NULL_TREE);
5176       return tag;
5177     }
5178   else
5179     {
5180       if (!DECL_P (var))
5181         return NULL_TREE;
5182
5183       tag = symbol_mem_tag (var);
5184       if (tag)
5185         return tag;
5186
5187       return var;
5188     }
5189 }
5190
5191 /* Copies the reference information from OLD_REF to NEW_REF.  */
5192
5193 static void
5194 copy_ref_info (tree new_ref, tree old_ref)
5195 {
5196   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5197     copy_mem_ref_info (new_ref, old_ref);
5198   else
5199     {
5200       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5201       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5202     }
5203 }
5204
5205 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5206
5207 static void
5208 rewrite_use_address (struct ivopts_data *data,
5209                      struct iv_use *use, struct iv_cand *cand)
5210 {
5211   aff_tree aff;
5212   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5213   tree ref;
5214   bool ok;
5215
5216   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5217   gcc_assert (ok);
5218   unshare_aff_combination (&aff);
5219
5220   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5221   copy_ref_info (ref, *use->op_p);
5222   *use->op_p = ref;
5223 }
5224
5225 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5226    candidate CAND.  */
5227
5228 static void
5229 rewrite_use_compare (struct ivopts_data *data,
5230                      struct iv_use *use, struct iv_cand *cand)
5231 {
5232   tree comp, *var_p, op, bound;
5233   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5234   enum tree_code compare;
5235   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5236   bool ok;
5237
5238   bound = cp->value;
5239   if (bound)
5240     {
5241       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5242       tree var_type = TREE_TYPE (var);
5243
5244       compare = iv_elimination_compare (data, use);
5245       bound = unshare_expr (fold_convert (var_type, bound));
5246       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5247                                      true, BSI_SAME_STMT);
5248
5249       *use->op_p = build2 (compare, boolean_type_node, var, op);
5250       return;
5251     }
5252
5253   /* The induction variable elimination failed; just express the original
5254      giv.  */
5255   comp = get_computation (data->current_loop, use, cand);
5256   gcc_assert (comp != NULL_TREE);
5257
5258   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5259   gcc_assert (ok);
5260
5261   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5262                                      true, BSI_SAME_STMT);
5263 }
5264
5265 /* Rewrites USE using candidate CAND.  */
5266
5267 static void
5268 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5269 {
5270   push_stmt_changes (&use->stmt);
5271
5272   switch (use->type)
5273     {
5274       case USE_NONLINEAR_EXPR:
5275         rewrite_use_nonlinear_expr (data, use, cand);
5276         break;
5277
5278       case USE_ADDRESS:
5279         rewrite_use_address (data, use, cand);
5280         break;
5281
5282       case USE_COMPARE:
5283         rewrite_use_compare (data, use, cand);
5284         break;
5285
5286       default:
5287         gcc_unreachable ();
5288     }
5289
5290   pop_stmt_changes (&use->stmt);
5291 }
5292
5293 /* Rewrite the uses using the selected induction variables.  */
5294
5295 static void
5296 rewrite_uses (struct ivopts_data *data)
5297 {
5298   unsigned i;
5299   struct iv_cand *cand;
5300   struct iv_use *use;
5301
5302   for (i = 0; i < n_iv_uses (data); i++)
5303     {
5304       use = iv_use (data, i);
5305       cand = use->selected;
5306       gcc_assert (cand);
5307
5308       rewrite_use (data, use, cand);
5309     }
5310 }
5311
5312 /* Removes the ivs that are not used after rewriting.  */
5313
5314 static void
5315 remove_unused_ivs (struct ivopts_data *data)
5316 {
5317   unsigned j;
5318   bitmap_iterator bi;
5319
5320   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5321     {
5322       struct version_info *info;
5323
5324       info = ver_info (data, j);
5325       if (info->iv
5326           && !integer_zerop (info->iv->step)
5327           && !info->inv_id
5328           && !info->iv->have_use_for
5329           && !info->preserve_biv)
5330         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5331     }
5332 }
5333
5334 /* Frees data allocated by the optimization of a single loop.  */
5335
5336 static void
5337 free_loop_data (struct ivopts_data *data)
5338 {
5339   unsigned i, j;
5340   bitmap_iterator bi;
5341   tree obj;
5342
5343   if (data->niters)
5344     {
5345       pointer_map_destroy (data->niters);
5346       data->niters = NULL;
5347     }
5348
5349   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5350     {
5351       struct version_info *info;
5352
5353       info = ver_info (data, i);
5354       if (info->iv)
5355         free (info->iv);
5356       info->iv = NULL;
5357       info->has_nonlin_use = false;
5358       info->preserve_biv = false;
5359       info->inv_id = 0;
5360     }
5361   bitmap_clear (data->relevant);
5362   bitmap_clear (data->important_candidates);
5363
5364   for (i = 0; i < n_iv_uses (data); i++)
5365     {
5366       struct iv_use *use = iv_use (data, i);
5367
5368       free (use->iv);
5369       BITMAP_FREE (use->related_cands);
5370       for (j = 0; j < use->n_map_members; j++)
5371         if (use->cost_map[j].depends_on)
5372           BITMAP_FREE (use->cost_map[j].depends_on);
5373       free (use->cost_map);
5374       free (use);
5375     }
5376   VEC_truncate (iv_use_p, data->iv_uses, 0);
5377
5378   for (i = 0; i < n_iv_cands (data); i++)
5379     {
5380       struct iv_cand *cand = iv_cand (data, i);
5381
5382       if (cand->iv)
5383         free (cand->iv);
5384       if (cand->depends_on)
5385         BITMAP_FREE (cand->depends_on);
5386       free (cand);
5387     }
5388   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5389
5390   if (data->version_info_size < num_ssa_names)
5391     {
5392       data->version_info_size = 2 * num_ssa_names;
5393       free (data->version_info);
5394       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5395     }
5396
5397   data->max_inv_id = 0;
5398
5399   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5400     SET_DECL_RTL (obj, NULL_RTX);
5401
5402   VEC_truncate (tree, decl_rtl_to_reset, 0);
5403 }
5404
5405 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5406    loop tree.  */
5407
5408 static void
5409 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5410 {
5411   free_loop_data (data);
5412   free (data->version_info);
5413   BITMAP_FREE (data->relevant);
5414   BITMAP_FREE (data->important_candidates);
5415
5416   VEC_free (tree, heap, decl_rtl_to_reset);
5417   VEC_free (iv_use_p, heap, data->iv_uses);
5418   VEC_free (iv_cand_p, heap, data->iv_candidates);
5419 }
5420
5421 /* Optimizes the LOOP.  Returns true if anything changed.  */
5422
5423 static bool
5424 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5425 {
5426   bool changed = false;
5427   struct iv_ca *iv_ca;
5428   edge exit;
5429
5430   gcc_assert (!data->niters);
5431   data->current_loop = loop;
5432
5433   if (dump_file && (dump_flags & TDF_DETAILS))
5434     {
5435       fprintf (dump_file, "Processing loop %d\n", loop->num);
5436
5437       exit = single_dom_exit (loop);
5438       if (exit)
5439         {
5440           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5441                    exit->src->index, exit->dest->index);
5442           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5443           fprintf (dump_file, "\n");
5444         }
5445
5446       fprintf (dump_file, "\n");
5447     }
5448
5449   /* For each ssa name determines whether it behaves as an induction variable
5450      in some loop.  */
5451   if (!find_induction_variables (data))
5452     goto finish;
5453
5454   /* Finds interesting uses (item 1).  */
5455   find_interesting_uses (data);
5456   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5457     goto finish;
5458
5459   /* Finds candidates for the induction variables (item 2).  */
5460   find_iv_candidates (data);
5461
5462   /* Calculates the costs (item 3, part 1).  */
5463   determine_use_iv_costs (data);
5464   determine_iv_costs (data);
5465   determine_set_costs (data);
5466
5467   /* Find the optimal set of induction variables (item 3, part 2).  */
5468   iv_ca = find_optimal_iv_set (data);
5469   if (!iv_ca)
5470     goto finish;
5471   changed = true;
5472
5473   /* Create the new induction variables (item 4, part 1).  */
5474   create_new_ivs (data, iv_ca);
5475   iv_ca_free (&iv_ca);
5476
5477   /* Rewrite the uses (item 4, part 2).  */
5478   rewrite_uses (data);
5479
5480   /* Remove the ivs that are unused after rewriting.  */
5481   remove_unused_ivs (data);
5482
5483   /* We have changed the structure of induction variables; it might happen
5484      that definitions in the scev database refer to some of them that were
5485      eliminated.  */
5486   scev_reset ();
5487
5488 finish:
5489   free_loop_data (data);
5490
5491   return changed;
5492 }
5493
5494 /* Main entry point.  Optimizes induction variables in loops.  */
5495
5496 void
5497 tree_ssa_iv_optimize (void)
5498 {
5499   struct loop *loop;
5500   struct ivopts_data data;
5501   loop_iterator li;
5502
5503   tree_ssa_iv_optimize_init (&data);
5504
5505   /* Optimize the loops starting with the innermost ones.  */
5506   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5507     {
5508       if (dump_file && (dump_flags & TDF_DETAILS))
5509         flow_loop_dump (loop, dump_file, NULL, 1);
5510
5511       tree_ssa_iv_optimize_loop (&data, loop);
5512     }
5513
5514   tree_ssa_iv_optimize_finalize (&data);
5515 }