gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 2, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING.  If not, write to the Free
  18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  19 02110-1301, USA.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "rtl.h"
  71 #include "tm_p.h"
  72 #include "hard-reg-set.h"
  73 #include "basic-block.h"
  74 #include "output.h"
  75 #include "diagnostic.h"
  76 #include "tree-flow.h"
  77 #include "tree-dump.h"
  78 #include "timevar.h"
  79 #include "cfgloop.h"
  80 #include "varray.h"
  81 #include "expr.h"
  82 #include "tree-pass.h"
  83 #include "ggc.h"
  84 #include "insn-config.h"
  85 #include "recog.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92
  93 /* The infinite cost.  */
  94 #define INFTY 10000000
  95
  96 /* The expected number of loop iterations.  TODO -- use profiling instead of
  97    this.  */
  98 #define AVG_LOOP_NITER(LOOP) 5
  99
 100
 101 /* Representation of the induction variable.  */
 102 struct iv
 103 {
 104   tree base;            /* Initial value of the iv.  */
 105   tree base_object;     /* A memory object to that the induction variable points.  */
 106   tree step;            /* Step of the iv (constant only).  */
 107   tree ssa_name;        /* The ssa name with the value.  */
 108   bool biv_p;           /* Is it a biv?  */
 109   bool have_use_for;    /* Do we already have a use for it?  */
 110   unsigned use_id;      /* The identifier in the use if it is the case.  */
 111 };
 112
 113 /* Per-ssa version information (induction variable descriptions, etc.).  */
 114 struct version_info
 115 {
 116   tree name;            /* The ssa name.  */
 117   struct iv *iv;        /* Induction variable description.  */
 118   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 119                            an expression that is not an induction variable.  */
 120   unsigned inv_id;      /* Id of an invariant.  */
 121   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 122 };
 123
 124 /* Types of uses.  */
 125 enum use_type
 126 {
 127   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 128   USE_ADDRESS,          /* Use in an address.  */
 129   USE_COMPARE           /* Use is a compare.  */
 130 };
 131
 132 /* The candidate - cost pair.  */
 133 struct cost_pair
 134 {
 135   struct iv_cand *cand; /* The candidate.  */
 136   unsigned cost;        /* The cost.  */
 137   bitmap depends_on;    /* The list of invariants that have to be
 138                            preserved.  */
 139   tree value;           /* For final value elimination, the expression for
 140                            the final value of the iv.  For iv elimination,
 141                            the new bound to compare with.  */
 142 };
 143
 144 /* Use.  */
 145 struct iv_use
 146 {
 147   unsigned id;          /* The id of the use.  */
 148   enum use_type type;   /* Type of the use.  */
 149   struct iv *iv;        /* The induction variable it is based on.  */
 150   tree stmt;            /* Statement in that it occurs.  */
 151   tree *op_p;           /* The place where it occurs.  */
 152   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 153                            important ones.  */
 154
 155   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 156   struct cost_pair *cost_map;
 157                         /* The costs wrto the iv candidates.  */
 158
 159   struct iv_cand *selected;
 160                         /* The selected candidate.  */
 161 };
 162
 163 /* The position where the iv is computed.  */
 164 enum iv_position
 165 {
 166   IP_NORMAL,            /* At the end, just before the exit condition.  */
 167   IP_END,               /* At the end of the latch block.  */
 168   IP_ORIGINAL           /* The original biv.  */
 169 };
 170
 171 /* The induction variable candidate.  */
 172 struct iv_cand
 173 {
 174   unsigned id;          /* The number of the candidate.  */
 175   bool important;       /* Whether this is an "important" candidate, i.e. such
 176                            that it should be considered by all uses.  */
 177   enum iv_position pos; /* Where it is computed.  */
 178   tree incremented_at;  /* For original biv, the statement where it is
 179                            incremented.  */
 180   tree var_before;      /* The variable used for it before increment.  */
 181   tree var_after;       /* The variable used for it after increment.  */
 182   struct iv *iv;        /* The value of the candidate.  NULL for
 183                            "pseudocandidate" used to indicate the possibility
 184                            to replace the final value of an iv by direct
 185                            computation of the value.  */
 186   unsigned cost;        /* Cost of the candidate.  */
 187   bitmap depends_on;    /* The list of invariants that are used in step of the
 188                            biv.  */
 189 };
 190
 191 /* The data used by the induction variable optimizations.  */
 192
 193 typedef struct iv_use *iv_use_p;
 194 DEF_VEC_P(iv_use_p);
 195 DEF_VEC_ALLOC_P(iv_use_p,heap);
 196
 197 typedef struct iv_cand *iv_cand_p;
 198 DEF_VEC_P(iv_cand_p);
 199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 200
 201 struct ivopts_data
 202 {
 203   /* The currently optimized loop.  */
 204   struct loop *current_loop;
 205
 206   /* Number of registers used in it.  */
 207   unsigned regs_used;
 208
 209   /* Numbers of iterations for all exits of the current loop.  */
 210   htab_t niters;
 211
 212   /* The size of version_info array allocated.  */
 213   unsigned version_info_size;
 214
 215   /* The array of information for the ssa names.  */
 216   struct version_info *version_info;
 217
 218   /* The bitmap of indices in version_info whose value was changed.  */
 219   bitmap relevant;
 220
 221   /* The maximum invariant id.  */
 222   unsigned max_inv_id;
 223
 224   /* The uses of induction variables.  */
 225   VEC(iv_use_p,heap) *iv_uses;
 226
 227   /* The candidates.  */
 228   VEC(iv_cand_p,heap) *iv_candidates;
 229
 230   /* A bitmap of important candidates.  */
 231   bitmap important_candidates;
 232
 233   /* Whether to consider just related and important candidates when replacing a
 234      use.  */
 235   bool consider_all_candidates;
 236 };
 237
 238 /* An assignment of iv candidates to uses.  */
 239
 240 struct iv_ca
 241 {
 242   /* The number of uses covered by the assignment.  */
 243   unsigned upto;
 244
 245   /* Number of uses that cannot be expressed by the candidates in the set.  */
 246   unsigned bad_uses;
 247
 248   /* Candidate assigned to a use, together with the related costs.  */
 249   struct cost_pair **cand_for_use;
 250
 251   /* Number of times each candidate is used.  */
 252   unsigned *n_cand_uses;
 253
 254   /* The candidates used.  */
 255   bitmap cands;
 256
 257   /* The number of candidates in the set.  */
 258   unsigned n_cands;
 259
 260   /* Total number of registers needed.  */
 261   unsigned n_regs;
 262
 263   /* Total cost of expressing uses.  */
 264   unsigned cand_use_cost;
 265
 266   /* Total cost of candidates.  */
 267   unsigned cand_cost;
 268
 269   /* Number of times each invariant is used.  */
 270   unsigned *n_invariant_uses;
 271
 272   /* Total cost of the assignment.  */
 273   unsigned cost;
 274 };
 275
 276 /* Difference of two iv candidate assignments.  */
 277
 278 struct iv_ca_delta
 279 {
 280   /* Changed use.  */
 281   struct iv_use *use;
 282
 283   /* An old assignment (for rollback purposes).  */
 284   struct cost_pair *old_cp;
 285
 286   /* A new assignment.  */
 287   struct cost_pair *new_cp;
 288
 289   /* Next change in the list.  */
 290   struct iv_ca_delta *next_change;
 291 };
 292
 293 /* Bound on number of candidates below that all candidates are considered.  */
 294
 295 #define CONSIDER_ALL_CANDIDATES_BOUND \
 296   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 297
 298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 299    optimizing such a loop would help, and it would take ages).  */
 300
 301 #define MAX_CONSIDERED_USES \
 302   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 303
 304 /* If there are at most this number of ivs in the set, try removing unnecessary
 305    ivs from the set always.  */
 306
 307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 308   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 309
 310 /* The list of trees for that the decl_rtl field must be reset is stored
 311    here.  */
 312
 313 static VEC(tree,heap) *decl_rtl_to_reset;
 314
 315 /* Number of uses recorded in DATA.  */
 316
 317 static inline unsigned
 318 n_iv_uses (struct ivopts_data *data)
 319 {
 320   return VEC_length (iv_use_p, data->iv_uses);
 321 }
 322
 323 /* Ith use recorded in DATA.  */
 324
 325 static inline struct iv_use *
 326 iv_use (struct ivopts_data *data, unsigned i)
 327 {
 328   return VEC_index (iv_use_p, data->iv_uses, i);
 329 }
 330
 331 /* Number of candidates recorded in DATA.  */
 332
 333 static inline unsigned
 334 n_iv_cands (struct ivopts_data *data)
 335 {
 336   return VEC_length (iv_cand_p, data->iv_candidates);
 337 }
 338
 339 /* Ith candidate recorded in DATA.  */
 340
 341 static inline struct iv_cand *
 342 iv_cand (struct ivopts_data *data, unsigned i)
 343 {
 344   return VEC_index (iv_cand_p, data->iv_candidates, i);
 345 }
 346
 347 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 348
 349 edge
 350 single_dom_exit (struct loop *loop)
 351 {
 352   edge exit = loop->single_exit;
 353
 354   if (!exit)
 355     return NULL;
 356
 357   if (!just_once_each_iteration_p (loop, exit->src))
 358     return NULL;
 359
 360   return exit;
 361 }
 362
 363 /* Dumps information about the induction variable IV to FILE.  */
 364
 365 extern void dump_iv (FILE *, struct iv *);
 366 void
 367 dump_iv (FILE *file, struct iv *iv)
 368 {
 369   if (iv->ssa_name)
 370     {
 371       fprintf (file, "ssa name ");
 372       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 373       fprintf (file, "\n");
 374     }
 375
 376   fprintf (file, "  type ");
 377   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 378   fprintf (file, "\n");
 379
 380   if (iv->step)
 381     {
 382       fprintf (file, "  base ");
 383       print_generic_expr (file, iv->base, TDF_SLIM);
 384       fprintf (file, "\n");
 385
 386       fprintf (file, "  step ");
 387       print_generic_expr (file, iv->step, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390   else
 391     {
 392       fprintf (file, "  invariant ");
 393       print_generic_expr (file, iv->base, TDF_SLIM);
 394       fprintf (file, "\n");
 395     }
 396
 397   if (iv->base_object)
 398     {
 399       fprintf (file, "  base object ");
 400       print_generic_expr (file, iv->base_object, TDF_SLIM);
 401       fprintf (file, "\n");
 402     }
 403
 404   if (iv->biv_p)
 405     fprintf (file, "  is a biv\n");
 406 }
 407
 408 /* Dumps information about the USE to FILE.  */
 409
 410 extern void dump_use (FILE *, struct iv_use *);
 411 void
 412 dump_use (FILE *file, struct iv_use *use)
 413 {
 414   fprintf (file, "use %d\n", use->id);
 415
 416   switch (use->type)
 417     {
 418     case USE_NONLINEAR_EXPR:
 419       fprintf (file, "  generic\n");
 420       break;
 421
 422     case USE_ADDRESS:
 423       fprintf (file, "  address\n");
 424       break;
 425
 426     case USE_COMPARE:
 427       fprintf (file, "  compare\n");
 428       break;
 429
 430     default:
 431       gcc_unreachable ();
 432     }
 433
 434   fprintf (file, "  in statement ");
 435   print_generic_expr (file, use->stmt, TDF_SLIM);
 436   fprintf (file, "\n");
 437
 438   fprintf (file, "  at position ");
 439   if (use->op_p)
 440     print_generic_expr (file, *use->op_p, TDF_SLIM);
 441   fprintf (file, "\n");
 442
 443   dump_iv (file, use->iv);
 444
 445   if (use->related_cands)
 446     {
 447       fprintf (file, "  related candidates ");
 448       dump_bitmap (file, use->related_cands);
 449     }
 450 }
 451
 452 /* Dumps information about the uses to FILE.  */
 453
 454 extern void dump_uses (FILE *, struct ivopts_data *);
 455 void
 456 dump_uses (FILE *file, struct ivopts_data *data)
 457 {
 458   unsigned i;
 459   struct iv_use *use;
 460
 461   for (i = 0; i < n_iv_uses (data); i++)
 462     {
 463       use = iv_use (data, i);
 464
 465       dump_use (file, use);
 466       fprintf (file, "\n");
 467     }
 468 }
 469
 470 /* Dumps information about induction variable candidate CAND to FILE.  */
 471
 472 extern void dump_cand (FILE *, struct iv_cand *);
 473 void
 474 dump_cand (FILE *file, struct iv_cand *cand)
 475 {
 476   struct iv *iv = cand->iv;
 477
 478   fprintf (file, "candidate %d%s\n",
 479            cand->id, cand->important ? " (important)" : "");
 480
 481   if (cand->depends_on)
 482     {
 483       fprintf (file, "  depends on ");
 484       dump_bitmap (file, cand->depends_on);
 485     }
 486
 487   if (!iv)
 488     {
 489       fprintf (file, "  final value replacement\n");
 490       return;
 491     }
 492
 493   switch (cand->pos)
 494     {
 495     case IP_NORMAL:
 496       fprintf (file, "  incremented before exit test\n");
 497       break;
 498
 499     case IP_END:
 500       fprintf (file, "  incremented at end\n");
 501       break;
 502
 503     case IP_ORIGINAL:
 504       fprintf (file, "  original biv\n");
 505       break;
 506     }
 507
 508   dump_iv (file, iv);
 509 }
 510
 511 /* Returns the info for ssa version VER.  */
 512
 513 static inline struct version_info *
 514 ver_info (struct ivopts_data *data, unsigned ver)
 515 {
 516   return data->version_info + ver;
 517 }
 518
 519 /* Returns the info for ssa name NAME.  */
 520
 521 static inline struct version_info *
 522 name_info (struct ivopts_data *data, tree name)
 523 {
 524   return ver_info (data, SSA_NAME_VERSION (name));
 525 }
 526
 527 /* Checks whether there exists number X such that X * B = A, counting modulo
 528    2^BITS.  */
 529
 530 static bool
 531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
 532         HOST_WIDE_INT *x)
 533 {
 534   unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
 535   unsigned HOST_WIDE_INT inv, ex, val;
 536   unsigned i;
 537
 538   a &= mask;
 539   b &= mask;
 540
 541   /* First divide the whole equation by 2 as long as possible.  */
 542   while (!(a & 1) && !(b & 1))
 543     {
 544       a >>= 1;
 545       b >>= 1;
 546       bits--;
 547       mask >>= 1;
 548     }
 549
 550   if (!(b & 1))
 551     {
 552       /* If b is still even, a is odd and there is no such x.  */
 553       return false;
 554     }
 555
 556   /* Find the inverse of b.  We compute it as
 557      b^(2^(bits - 1) - 1) (mod 2^bits).  */
 558   inv = 1;
 559   ex = b;
 560   for (i = 0; i < bits - 1; i++)
 561     {
 562       inv = (inv * ex) & mask;
 563       ex = (ex * ex) & mask;
 564     }
 565
 566   val = (a * inv) & mask;
 567
 568   gcc_assert (((val * b) & mask) == a);
 569
 570   if ((val >> (bits - 1)) & 1)
 571     val |= ~mask;
 572
 573   *x = val;
 574
 575   return true;
 576 }
 577
 578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 579    emitted in LOOP.  */
 580
 581 static bool
 582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 583 {
 584   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 585
 586   gcc_assert (bb);
 587
 588   if (sbb == loop->latch)
 589     return true;
 590
 591   if (sbb != bb)
 592     return false;
 593
 594   return stmt == last_stmt (bb);
 595 }
 596
 597 /* Returns true if STMT if after the place where the original induction
 598    variable CAND is incremented.  */
 599
 600 static bool
 601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 602 {
 603   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 604   basic_block stmt_bb = bb_for_stmt (stmt);
 605   block_stmt_iterator bsi;
 606
 607   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 608     return false;
 609
 610   if (stmt_bb != cand_bb)
 611     return true;
 612
 613   /* Scan the block from the end, since the original ivs are usually
 614      incremented at the end of the loop body.  */
 615   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 616     {
 617       if (bsi_stmt (bsi) == cand->incremented_at)
 618         return false;
 619       if (bsi_stmt (bsi) == stmt)
 620         return true;
 621     }
 622 }
 623
 624 /* Returns true if STMT if after the place where the induction variable
 625    CAND is incremented in LOOP.  */
 626
 627 static bool
 628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 629 {
 630   switch (cand->pos)
 631     {
 632     case IP_END:
 633       return false;
 634
 635     case IP_NORMAL:
 636       return stmt_after_ip_normal_pos (loop, stmt);
 637
 638     case IP_ORIGINAL:
 639       return stmt_after_ip_original_pos (cand, stmt);
 640
 641     default:
 642       gcc_unreachable ();
 643     }
 644 }
 645
 646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 647
 648 static bool
 649 abnormal_ssa_name_p (tree exp)
 650 {
 651   if (!exp)
 652     return false;
 653
 654   if (TREE_CODE (exp) != SSA_NAME)
 655     return false;
 656
 657   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 658 }
 659
 660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 661    abnormal phi node.  Callback for for_each_index.  */
 662
 663 static bool
 664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 665                                   void *data ATTRIBUTE_UNUSED)
 666 {
 667   if (TREE_CODE (base) == ARRAY_REF)
 668     {
 669       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 670         return false;
 671       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 672         return false;
 673     }
 674
 675   return !abnormal_ssa_name_p (*index);
 676 }
 677
 678 /* Returns true if EXPR contains a ssa name that occurs in an
 679    abnormal phi node.  */
 680
 681 bool
 682 contains_abnormal_ssa_name_p (tree expr)
 683 {
 684   enum tree_code code;
 685   enum tree_code_class class;
 686
 687   if (!expr)
 688     return false;
 689
 690   code = TREE_CODE (expr);
 691   class = TREE_CODE_CLASS (code);
 692
 693   if (code == SSA_NAME)
 694     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 695
 696   if (code == INTEGER_CST
 697       || is_gimple_min_invariant (expr))
 698     return false;
 699
 700   if (code == ADDR_EXPR)
 701     return !for_each_index (&TREE_OPERAND (expr, 0),
 702                             idx_contains_abnormal_ssa_name_p,
 703                             NULL);
 704
 705   switch (class)
 706     {
 707     case tcc_binary:
 708     case tcc_comparison:
 709       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 710         return true;
 711
 712       /* Fallthru.  */
 713     case tcc_unary:
 714       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 715         return true;
 716
 717       break;
 718
 719     default:
 720       gcc_unreachable ();
 721     }
 722
 723   return false;
 724 }
 725
 726 /* Element of the table in that we cache the numbers of iterations obtained
 727    from exits of the loop.  */
 728
 729 struct nfe_cache_elt
 730 {
 731   /* The edge for that the number of iterations is cached.  */
 732   edge exit;
 733
 734   /* Number of iterations corresponding to this exit, or NULL if it cannot be
 735      determined.  */
 736   tree niter;
 737 };
 738
 739 /* Hash function for nfe_cache_elt E.  */
 740
 741 static hashval_t
 742 nfe_hash (const void *e)
 743 {
 744   const struct nfe_cache_elt *elt = e;
 745
 746   return htab_hash_pointer (elt->exit);
 747 }
 748
 749 /* Equality function for nfe_cache_elt E1 and edge E2.  */
 750
 751 static int
 752 nfe_eq (const void *e1, const void *e2)
 753 {
 754   const struct nfe_cache_elt *elt1 = e1;
 755
 756   return elt1->exit == e2;
 757 }
 758
 759 /*  Returns tree describing number of iterations determined from
 760     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 761
 762 static tree
 763 niter_for_exit (struct ivopts_data *data, edge exit)
 764 {
 765   struct nfe_cache_elt *nfe_desc;
 766   struct tree_niter_desc desc;
 767   PTR *slot;
 768
 769   slot = htab_find_slot_with_hash (data->niters, exit,
 770                                    htab_hash_pointer (exit),
 771                                    INSERT);
 772
 773   if (!*slot)
 774     {
 775       nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
 776       nfe_desc->exit = exit;
 777
 778       /* Try to determine number of iterations.  We must know it
 779          unconditionally (i.e., without possibility of # of iterations
 780          being zero).  Also, we cannot safely work with ssa names that
 781          appear in phi nodes on abnormal edges, so that we do not create
 782          overlapping life ranges for them (PR 27283).  */
 783       if (number_of_iterations_exit (data->current_loop,
 784                                      exit, &desc, true)
 785           && zero_p (desc.may_be_zero)
 786           && !contains_abnormal_ssa_name_p (desc.niter))
 787         nfe_desc->niter = desc.niter;
 788       else
 789         nfe_desc->niter = NULL_TREE;
 790     }
 791   else
 792     nfe_desc = *slot;
 793
 794   return nfe_desc->niter;
 795 }
 796
 797 /* Returns tree describing number of iterations determined from
 798    single dominating exit of DATA->current_loop, or NULL if something
 799    goes wrong.  */
 800
 801 static tree
 802 niter_for_single_dom_exit (struct ivopts_data *data)
 803 {
 804   edge exit = single_dom_exit (data->current_loop);
 805
 806   if (!exit)
 807     return NULL;
 808
 809   return niter_for_exit (data, exit);
 810 }
 811
 812 /* Initializes data structures used by the iv optimization pass, stored
 813    in DATA.  */
 814
 815 static void
 816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 817 {
 818   data->version_info_size = 2 * num_ssa_names;
 819   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 820   data->relevant = BITMAP_ALLOC (NULL);
 821   data->important_candidates = BITMAP_ALLOC (NULL);
 822   data->max_inv_id = 0;
 823   data->niters = htab_create (10, nfe_hash, nfe_eq, free);
 824   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 825   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 826   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 827 }
 828
 829 /* Returns a memory object to that EXPR points.  In case we are able to
 830    determine that it does not point to any such object, NULL is returned.  */
 831
 832 static tree
 833 determine_base_object (tree expr)
 834 {
 835   enum tree_code code = TREE_CODE (expr);
 836   tree base, obj, op0, op1;
 837
 838   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 839     return NULL_TREE;
 840
 841   switch (code)
 842     {
 843     case INTEGER_CST:
 844       return NULL_TREE;
 845
 846     case ADDR_EXPR:
 847       obj = TREE_OPERAND (expr, 0);
 848       base = get_base_address (obj);
 849
 850       if (!base)
 851         return expr;
 852
 853       if (TREE_CODE (base) == INDIRECT_REF)
 854         return determine_base_object (TREE_OPERAND (base, 0));
 855
 856       return fold_convert (ptr_type_node,
 857                            build_fold_addr_expr (base));
 858
 859     case PLUS_EXPR:
 860     case MINUS_EXPR:
 861       op0 = determine_base_object (TREE_OPERAND (expr, 0));
 862       op1 = determine_base_object (TREE_OPERAND (expr, 1));
 863
 864       if (!op1)
 865         return op0;
 866
 867       if (!op0)
 868         return (code == PLUS_EXPR
 869                 ? op1
 870                 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
 871
 872       return fold_build2 (code, ptr_type_node, op0, op1);
 873
 874     case NOP_EXPR:
 875     case CONVERT_EXPR:
 876       return determine_base_object (TREE_OPERAND (expr, 0));
 877
 878     default:
 879       return fold_convert (ptr_type_node, expr);
 880     }
 881 }
 882
 883 /* Allocates an induction variable with given initial value BASE and step STEP
 884    for loop LOOP.  */
 885
 886 static struct iv *
 887 alloc_iv (tree base, tree step)
 888 {
 889   struct iv *iv = XCNEW (struct iv);
 890
 891   if (step && integer_zerop (step))
 892     step = NULL_TREE;
 893
 894   iv->base = base;
 895   iv->base_object = determine_base_object (base);
 896   iv->step = step;
 897   iv->biv_p = false;
 898   iv->have_use_for = false;
 899   iv->use_id = 0;
 900   iv->ssa_name = NULL_TREE;
 901
 902   return iv;
 903 }
 904
 905 /* Sets STEP and BASE for induction variable IV.  */
 906
 907 static void
 908 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 909 {
 910   struct version_info *info = name_info (data, iv);
 911
 912   gcc_assert (!info->iv);
 913
 914   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 915   info->iv = alloc_iv (base, step);
 916   info->iv->ssa_name = iv;
 917 }
 918
 919 /* Finds induction variable declaration for VAR.  */
 920
 921 static struct iv *
 922 get_iv (struct ivopts_data *data, tree var)
 923 {
 924   basic_block bb;
 925
 926   if (!name_info (data, var)->iv)
 927     {
 928       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 929
 930       if (!bb
 931           || !flow_bb_inside_loop_p (data->current_loop, bb))
 932         set_iv (data, var, var, NULL_TREE);
 933     }
 934
 935   return name_info (data, var)->iv;
 936 }
 937
 938 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 939    not define a simple affine biv with nonzero step.  */
 940
 941 static tree
 942 determine_biv_step (tree phi)
 943 {
 944   struct loop *loop = bb_for_stmt (phi)->loop_father;
 945   tree name = PHI_RESULT (phi);
 946   affine_iv iv;
 947
 948   if (!is_gimple_reg (name))
 949     return NULL_TREE;
 950
 951   if (!simple_iv (loop, phi, name, &iv, true))
 952     return NULL_TREE;
 953
 954   return (zero_p (iv.step) ? NULL_TREE : iv.step);
 955 }
 956
 957 /* Finds basic ivs.  */
 958
 959 static bool
 960 find_bivs (struct ivopts_data *data)
 961 {
 962   tree phi, step, type, base;
 963   bool found = false;
 964   struct loop *loop = data->current_loop;
 965
 966   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 967     {
 968       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 969         continue;
 970
 971       step = determine_biv_step (phi);
 972       if (!step)
 973         continue;
 974
 975       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 976       base = expand_simple_operations (base);
 977       if (contains_abnormal_ssa_name_p (base)
 978           || contains_abnormal_ssa_name_p (step))
 979         continue;
 980
 981       type = TREE_TYPE (PHI_RESULT (phi));
 982       base = fold_convert (type, base);
 983       if (step)
 984         step = fold_convert (type, step);
 985
 986       set_iv (data, PHI_RESULT (phi), base, step);
 987       found = true;
 988     }
 989
 990   return found;
 991 }
 992
 993 /* Marks basic ivs.  */
 994
 995 static void
 996 mark_bivs (struct ivopts_data *data)
 997 {
 998   tree phi, var;
 999   struct iv *iv, *incr_iv;
1000   struct loop *loop = data->current_loop;
1001   basic_block incr_bb;
1002
1003   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1004     {
1005       iv = get_iv (data, PHI_RESULT (phi));
1006       if (!iv)
1007         continue;
1008
1009       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010       incr_iv = get_iv (data, var);
1011       if (!incr_iv)
1012         continue;
1013
1014       /* If the increment is in the subloop, ignore it.  */
1015       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016       if (incr_bb->loop_father != data->current_loop
1017           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018         continue;
1019
1020       iv->biv_p = true;
1021       incr_iv->biv_p = true;
1022     }
1023 }
1024
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026    parameters to IV.  */
1027
1028 static bool
1029 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1030 {
1031   tree lhs;
1032   struct loop *loop = data->current_loop;
1033
1034   iv->base = NULL_TREE;
1035   iv->step = NULL_TREE;
1036
1037   if (TREE_CODE (stmt) != MODIFY_EXPR)
1038     return false;
1039
1040   lhs = TREE_OPERAND (stmt, 0);
1041   if (TREE_CODE (lhs) != SSA_NAME)
1042     return false;
1043
1044   if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1045     return false;
1046   iv->base = expand_simple_operations (iv->base);
1047
1048   if (contains_abnormal_ssa_name_p (iv->base)
1049       || contains_abnormal_ssa_name_p (iv->step))
1050     return false;
1051
1052   return true;
1053 }
1054
1055 /* Finds general ivs in statement STMT.  */
1056
1057 static void
1058 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1059 {
1060   affine_iv iv;
1061
1062   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1063     return;
1064
1065   set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1066 }
1067
1068 /* Finds general ivs in basic block BB.  */
1069
1070 static void
1071 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1072 {
1073   block_stmt_iterator bsi;
1074
1075   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1076     find_givs_in_stmt (data, bsi_stmt (bsi));
1077 }
1078
1079 /* Finds general ivs.  */
1080
1081 static void
1082 find_givs (struct ivopts_data *data)
1083 {
1084   struct loop *loop = data->current_loop;
1085   basic_block *body = get_loop_body_in_dom_order (loop);
1086   unsigned i;
1087
1088   for (i = 0; i < loop->num_nodes; i++)
1089     find_givs_in_bb (data, body[i]);
1090   free (body);
1091 }
1092
1093 /* For each ssa name defined in LOOP determines whether it is an induction
1094    variable and if so, its initial value and step.  */
1095
1096 static bool
1097 find_induction_variables (struct ivopts_data *data)
1098 {
1099   unsigned i;
1100   bitmap_iterator bi;
1101
1102   if (!find_bivs (data))
1103     return false;
1104
1105   find_givs (data);
1106   mark_bivs (data);
1107
1108   if (dump_file && (dump_flags & TDF_DETAILS))
1109     {
1110       tree niter = niter_for_single_dom_exit (data);
1111
1112       if (niter)
1113         {
1114           fprintf (dump_file, "  number of iterations ");
1115           print_generic_expr (dump_file, niter, TDF_SLIM);
1116           fprintf (dump_file, "\n\n");
1117         };
1118
1119       fprintf (dump_file, "Induction variables:\n\n");
1120
1121       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1122         {
1123           if (ver_info (data, i)->iv)
1124             dump_iv (dump_file, ver_info (data, i)->iv);
1125         }
1126     }
1127
1128   return true;
1129 }
1130
1131 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1132
1133 static struct iv_use *
1134 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1135             tree stmt, enum use_type use_type)
1136 {
1137   struct iv_use *use = XCNEW (struct iv_use);
1138
1139   use->id = n_iv_uses (data);
1140   use->type = use_type;
1141   use->iv = iv;
1142   use->stmt = stmt;
1143   use->op_p = use_p;
1144   use->related_cands = BITMAP_ALLOC (NULL);
1145
1146   /* To avoid showing ssa name in the dumps, if it was not reset by the
1147      caller.  */
1148   iv->ssa_name = NULL_TREE;
1149
1150   if (dump_file && (dump_flags & TDF_DETAILS))
1151     dump_use (dump_file, use);
1152
1153   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1154
1155   return use;
1156 }
1157
1158 /* Checks whether OP is a loop-level invariant and if so, records it.
1159    NONLINEAR_USE is true if the invariant is used in a way we do not
1160    handle specially.  */
1161
1162 static void
1163 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1164 {
1165   basic_block bb;
1166   struct version_info *info;
1167
1168   if (TREE_CODE (op) != SSA_NAME
1169       || !is_gimple_reg (op))
1170     return;
1171
1172   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1173   if (bb
1174       && flow_bb_inside_loop_p (data->current_loop, bb))
1175     return;
1176
1177   info = name_info (data, op);
1178   info->name = op;
1179   info->has_nonlin_use |= nonlinear_use;
1180   if (!info->inv_id)
1181     info->inv_id = ++data->max_inv_id;
1182   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1183 }
1184
1185 /* Checks whether the use OP is interesting and if so, records it.  */
1186
1187 static struct iv_use *
1188 find_interesting_uses_op (struct ivopts_data *data, tree op)
1189 {
1190   struct iv *iv;
1191   struct iv *civ;
1192   tree stmt;
1193   struct iv_use *use;
1194
1195   if (TREE_CODE (op) != SSA_NAME)
1196     return NULL;
1197
1198   iv = get_iv (data, op);
1199   if (!iv)
1200     return NULL;
1201
1202   if (iv->have_use_for)
1203     {
1204       use = iv_use (data, iv->use_id);
1205
1206       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1207       return use;
1208     }
1209
1210   if (zero_p (iv->step))
1211     {
1212       record_invariant (data, op, true);
1213       return NULL;
1214     }
1215   iv->have_use_for = true;
1216
1217   civ = XNEW (struct iv);
1218   *civ = *iv;
1219
1220   stmt = SSA_NAME_DEF_STMT (op);
1221   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1222               || TREE_CODE (stmt) == MODIFY_EXPR);
1223
1224   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1225   iv->use_id = use->id;
1226
1227   return use;
1228 }
1229
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231    and if so, records it.  */
1232
1233 static void
1234 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1235 {
1236   tree *op0_p;
1237   tree *op1_p;
1238   struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1239   struct iv const_iv;
1240   tree zero = integer_zero_node;
1241
1242   const_iv.step = NULL_TREE;
1243
1244   if (TREE_CODE (*cond_p) != SSA_NAME
1245       && !COMPARISON_CLASS_P (*cond_p))
1246     return;
1247
1248   if (TREE_CODE (*cond_p) == SSA_NAME)
1249     {
1250       op0_p = cond_p;
1251       op1_p = &zero;
1252     }
1253   else
1254     {
1255       op0_p = &TREE_OPERAND (*cond_p, 0);
1256       op1_p = &TREE_OPERAND (*cond_p, 1);
1257     }
1258
1259   if (TREE_CODE (*op0_p) == SSA_NAME)
1260     iv0 = get_iv (data, *op0_p);
1261   else
1262     iv0 = &const_iv;
1263
1264   if (TREE_CODE (*op1_p) == SSA_NAME)
1265     iv1 = get_iv (data, *op1_p);
1266   else
1267     iv1 = &const_iv;
1268
1269   if (/* When comparing with non-invariant value, we may not do any senseful
1270          induction variable elimination.  */
1271       (!iv0 || !iv1)
1272       /* Eliminating condition based on two ivs would be nontrivial.
1273          ??? TODO -- it is not really important to handle this case.  */
1274       || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1275     {
1276       find_interesting_uses_op (data, *op0_p);
1277       find_interesting_uses_op (data, *op1_p);
1278       return;
1279     }
1280
1281   if (zero_p (iv0->step) && zero_p (iv1->step))
1282     {
1283       /* If both are invariants, this is a work for unswitching.  */
1284       return;
1285     }
1286
1287   civ = XNEW (struct iv);
1288   *civ = zero_p (iv0->step) ? *iv1: *iv0;
1289   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1290 }
1291
1292 /* Returns true if expression EXPR is obviously invariant in LOOP,
1293    i.e. if all its operands are defined outside of the LOOP.  */
1294
1295 bool
1296 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1297 {
1298   basic_block def_bb;
1299   unsigned i, len;
1300
1301   if (is_gimple_min_invariant (expr))
1302     return true;
1303
1304   if (TREE_CODE (expr) == SSA_NAME)
1305     {
1306       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1307       if (def_bb
1308           && flow_bb_inside_loop_p (loop, def_bb))
1309         return false;
1310
1311       return true;
1312     }
1313
1314   if (!EXPR_P (expr))
1315     return false;
1316
1317   len = TREE_CODE_LENGTH (TREE_CODE (expr));
1318   for (i = 0; i < len; i++)
1319     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1320       return false;
1321
1322   return true;
1323 }
1324
1325 /* Cumulates the steps of indices into DATA and replaces their values with the
1326    initial ones.  Returns false when the value of the index cannot be determined.
1327    Callback for for_each_index.  */
1328
1329 struct ifs_ivopts_data
1330 {
1331   struct ivopts_data *ivopts_data;
1332   tree stmt;
1333   tree *step_p;
1334 };
1335
1336 static bool
1337 idx_find_step (tree base, tree *idx, void *data)
1338 {
1339   struct ifs_ivopts_data *dta = data;
1340   struct iv *iv;
1341   tree step, iv_base, iv_step, lbound, off;
1342   struct loop *loop = dta->ivopts_data->current_loop;
1343
1344   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1345       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1346     return false;
1347
1348   /* If base is a component ref, require that the offset of the reference
1349      be invariant.  */
1350   if (TREE_CODE (base) == COMPONENT_REF)
1351     {
1352       off = component_ref_field_offset (base);
1353       return expr_invariant_in_loop_p (loop, off);
1354     }
1355
1356   /* If base is array, first check whether we will be able to move the
1357      reference out of the loop (in order to take its address in strength
1358      reduction).  In order for this to work we need both lower bound
1359      and step to be loop invariants.  */
1360   if (TREE_CODE (base) == ARRAY_REF)
1361     {
1362       step = array_ref_element_size (base);
1363       lbound = array_ref_low_bound (base);
1364
1365       if (!expr_invariant_in_loop_p (loop, step)
1366           || !expr_invariant_in_loop_p (loop, lbound))
1367         return false;
1368     }
1369
1370   if (TREE_CODE (*idx) != SSA_NAME)
1371     return true;
1372
1373   iv = get_iv (dta->ivopts_data, *idx);
1374   if (!iv)
1375     return false;
1376
1377   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1378           *&x[0], which is not folded and does not trigger the
1379           ARRAY_REF path below.  */
1380   *idx = iv->base;
1381
1382   if (!iv->step)
1383     return true;
1384
1385   if (TREE_CODE (base) == ARRAY_REF)
1386     {
1387       step = array_ref_element_size (base);
1388
1389       /* We only handle addresses whose step is an integer constant.  */
1390       if (TREE_CODE (step) != INTEGER_CST)
1391         return false;
1392     }
1393   else
1394     /* The step for pointer arithmetics already is 1 byte.  */
1395     step = build_int_cst (sizetype, 1);
1396
1397   iv_base = iv->base;
1398   iv_step = iv->step;
1399   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1400                             sizetype, &iv_base, &iv_step, dta->stmt,
1401                             false))
1402     {
1403       /* The index might wrap.  */
1404       return false;
1405     }
1406
1407   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1408
1409   if (!*dta->step_p)
1410     *dta->step_p = step;
1411   else
1412     *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1413
1414   return true;
1415 }
1416
1417 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1418    object is passed to it in DATA.  */
1419
1420 static bool
1421 idx_record_use (tree base, tree *idx,
1422                 void *data)
1423 {
1424   find_interesting_uses_op (data, *idx);
1425   if (TREE_CODE (base) == ARRAY_REF)
1426     {
1427       find_interesting_uses_op (data, array_ref_element_size (base));
1428       find_interesting_uses_op (data, array_ref_low_bound (base));
1429     }
1430   return true;
1431 }
1432
1433 /* Returns true if memory reference REF may be unaligned.  */
1434
1435 static bool
1436 may_be_unaligned_p (tree ref)
1437 {
1438   tree base;
1439   tree base_type;
1440   HOST_WIDE_INT bitsize;
1441   HOST_WIDE_INT bitpos;
1442   tree toffset;
1443   enum machine_mode mode;
1444   int unsignedp, volatilep;
1445   unsigned base_align;
1446
1447   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1448      thus they are not misaligned.  */
1449   if (TREE_CODE (ref) == TARGET_MEM_REF)
1450     return false;
1451
1452   /* The test below is basically copy of what expr.c:normal_inner_ref
1453      does to check whether the object must be loaded by parts when
1454      STRICT_ALIGNMENT is true.  */
1455   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1456                               &unsignedp, &volatilep, true);
1457   base_type = TREE_TYPE (base);
1458   base_align = TYPE_ALIGN (base_type);
1459
1460   if (mode != BLKmode
1461       && (base_align < GET_MODE_ALIGNMENT (mode)
1462           || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1463           || bitpos % BITS_PER_UNIT != 0))
1464     return true;
1465
1466   return false;
1467 }
1468
1469 /* Return true if EXPR may be non-addressable.   */
1470
1471 static bool
1472 may_be_nonaddressable_p (tree expr)
1473 {
1474   switch (TREE_CODE (expr))
1475     {
1476     case COMPONENT_REF:
1477       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1478              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1479
1480     case ARRAY_REF:
1481     case ARRAY_RANGE_REF:
1482       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1483
1484     case VIEW_CONVERT_EXPR:
1485       /* This kind of view-conversions may wrap non-addressable objects
1486          and make them look addressable.  After some processing the
1487          non-addressability may be uncovered again, causing ADDR_EXPRs
1488          of inappropriate objects to be built.  */
1489       return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1490              && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1491
1492     default:
1493       break;
1494     }
1495
1496   return false;
1497 }
1498
1499 /* Finds addresses in *OP_P inside STMT.  */
1500
1501 static void
1502 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1503 {
1504   tree base = *op_p, step = NULL;
1505   struct iv *civ;
1506   struct ifs_ivopts_data ifs_ivopts_data;
1507
1508   /* Do not play with volatile memory references.  A bit too conservative,
1509      perhaps, but safe.  */
1510   if (stmt_ann (stmt)->has_volatile_ops)
1511     goto fail;
1512
1513   /* Ignore bitfields for now.  Not really something terribly complicated
1514      to handle.  TODO.  */
1515   if (TREE_CODE (base) == BIT_FIELD_REF)
1516     goto fail;
1517
1518   if (may_be_nonaddressable_p (base))
1519     goto fail;
1520
1521   if (STRICT_ALIGNMENT
1522       && may_be_unaligned_p (base))
1523     goto fail;
1524
1525   base = unshare_expr (base);
1526
1527   if (TREE_CODE (base) == TARGET_MEM_REF)
1528     {
1529       tree type = build_pointer_type (TREE_TYPE (base));
1530       tree astep;
1531
1532       if (TMR_BASE (base)
1533           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1534         {
1535           civ = get_iv (data, TMR_BASE (base));
1536           if (!civ)
1537             goto fail;
1538
1539           TMR_BASE (base) = civ->base;
1540           step = civ->step;
1541         }
1542       if (TMR_INDEX (base)
1543           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1544         {
1545           civ = get_iv (data, TMR_INDEX (base));
1546           if (!civ)
1547             goto fail;
1548
1549           TMR_INDEX (base) = civ->base;
1550           astep = civ->step;
1551
1552           if (astep)
1553             {
1554               if (TMR_STEP (base))
1555                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1556
1557               if (step)
1558                 step = fold_build2 (PLUS_EXPR, type, step, astep);
1559               else
1560                 step = astep;
1561             }
1562         }
1563
1564       if (zero_p (step))
1565         goto fail;
1566       base = tree_mem_ref_addr (type, base);
1567     }
1568   else
1569     {
1570       ifs_ivopts_data.ivopts_data = data;
1571       ifs_ivopts_data.stmt = stmt;
1572       ifs_ivopts_data.step_p = &step;
1573       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1574           || zero_p (step))
1575         goto fail;
1576
1577       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1578       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1579
1580       base = build_fold_addr_expr (base);
1581
1582       /* Substituting bases of IVs into the base expression might
1583          have caused folding opportunities.  */
1584       if (TREE_CODE (base) == ADDR_EXPR)
1585         {
1586           tree *ref = &TREE_OPERAND (base, 0);
1587           while (handled_component_p (*ref))
1588             ref = &TREE_OPERAND (*ref, 0);
1589           if (TREE_CODE (*ref) == INDIRECT_REF)
1590             *ref = fold_indirect_ref (*ref);
1591         }
1592     }
1593
1594   civ = alloc_iv (base, step);
1595   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1596   return;
1597
1598 fail:
1599   for_each_index (op_p, idx_record_use, data);
1600 }
1601
1602 /* Finds and records invariants used in STMT.  */
1603
1604 static void
1605 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1606 {
1607   ssa_op_iter iter;
1608   use_operand_p use_p;
1609   tree op;
1610
1611   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1612     {
1613       op = USE_FROM_PTR (use_p);
1614       record_invariant (data, op, false);
1615     }
1616 }
1617
1618 /* Finds interesting uses of induction variables in the statement STMT.  */
1619
1620 static void
1621 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1622 {
1623   struct iv *iv;
1624   tree op, lhs, rhs;
1625   ssa_op_iter iter;
1626   use_operand_p use_p;
1627
1628   find_invariants_stmt (data, stmt);
1629
1630   if (TREE_CODE (stmt) == COND_EXPR)
1631     {
1632       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1633       return;
1634     }
1635
1636   if (TREE_CODE (stmt) == MODIFY_EXPR)
1637     {
1638       lhs = TREE_OPERAND (stmt, 0);
1639       rhs = TREE_OPERAND (stmt, 1);
1640
1641       if (TREE_CODE (lhs) == SSA_NAME)
1642         {
1643           /* If the statement defines an induction variable, the uses are not
1644              interesting by themselves.  */
1645
1646           iv = get_iv (data, lhs);
1647
1648           if (iv && !zero_p (iv->step))
1649             return;
1650         }
1651
1652       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1653         {
1654         case tcc_comparison:
1655           find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1656           return;
1657
1658         case tcc_reference:
1659           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1660           if (REFERENCE_CLASS_P (lhs))
1661             find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1662           return;
1663
1664         default: ;
1665         }
1666
1667       if (REFERENCE_CLASS_P (lhs)
1668           && is_gimple_val (rhs))
1669         {
1670           find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1671           find_interesting_uses_op (data, rhs);
1672           return;
1673         }
1674
1675       /* TODO -- we should also handle address uses of type
1676
1677          memory = call (whatever);
1678
1679          and
1680
1681          call (memory).  */
1682     }
1683
1684   if (TREE_CODE (stmt) == PHI_NODE
1685       && bb_for_stmt (stmt) == data->current_loop->header)
1686     {
1687       lhs = PHI_RESULT (stmt);
1688       iv = get_iv (data, lhs);
1689
1690       if (iv && !zero_p (iv->step))
1691         return;
1692     }
1693
1694   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1695     {
1696       op = USE_FROM_PTR (use_p);
1697
1698       if (TREE_CODE (op) != SSA_NAME)
1699         continue;
1700
1701       iv = get_iv (data, op);
1702       if (!iv)
1703         continue;
1704
1705       find_interesting_uses_op (data, op);
1706     }
1707 }
1708
1709 /* Finds interesting uses of induction variables outside of loops
1710    on loop exit edge EXIT.  */
1711
1712 static void
1713 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1714 {
1715   tree phi, def;
1716
1717   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1718     {
1719       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1720       find_interesting_uses_op (data, def);
1721     }
1722 }
1723
1724 /* Finds uses of the induction variables that are interesting.  */
1725
1726 static void
1727 find_interesting_uses (struct ivopts_data *data)
1728 {
1729   basic_block bb;
1730   block_stmt_iterator bsi;
1731   tree phi;
1732   basic_block *body = get_loop_body (data->current_loop);
1733   unsigned i;
1734   struct version_info *info;
1735   edge e;
1736
1737   if (dump_file && (dump_flags & TDF_DETAILS))
1738     fprintf (dump_file, "Uses:\n\n");
1739
1740   for (i = 0; i < data->current_loop->num_nodes; i++)
1741     {
1742       edge_iterator ei;
1743       bb = body[i];
1744
1745       FOR_EACH_EDGE (e, ei, bb->succs)
1746         if (e->dest != EXIT_BLOCK_PTR
1747             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1748           find_interesting_uses_outside (data, e);
1749
1750       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1751         find_interesting_uses_stmt (data, phi);
1752       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1753         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1754     }
1755
1756   if (dump_file && (dump_flags & TDF_DETAILS))
1757     {
1758       bitmap_iterator bi;
1759
1760       fprintf (dump_file, "\n");
1761
1762       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1763         {
1764           info = ver_info (data, i);
1765           if (info->inv_id)
1766             {
1767               fprintf (dump_file, "  ");
1768               print_generic_expr (dump_file, info->name, TDF_SLIM);
1769               fprintf (dump_file, " is invariant (%d)%s\n",
1770                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1771             }
1772         }
1773
1774       fprintf (dump_file, "\n");
1775     }
1776
1777   free (body);
1778 }
1779
1780 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1781    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1782    we are at the top-level of the processed address.  */
1783
1784 static tree
1785 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1786                 unsigned HOST_WIDE_INT *offset)
1787 {
1788   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1789   enum tree_code code;
1790   tree type, orig_type = TREE_TYPE (expr);
1791   unsigned HOST_WIDE_INT off0, off1, st;
1792   tree orig_expr = expr;
1793
1794   STRIP_NOPS (expr);
1795
1796   type = TREE_TYPE (expr);
1797   code = TREE_CODE (expr);
1798   *offset = 0;
1799
1800   switch (code)
1801     {
1802     case INTEGER_CST:
1803       if (!cst_and_fits_in_hwi (expr)
1804           || zero_p (expr))
1805         return orig_expr;
1806
1807       *offset = int_cst_value (expr);
1808       return build_int_cst (orig_type, 0);
1809
1810     case PLUS_EXPR:
1811     case MINUS_EXPR:
1812       op0 = TREE_OPERAND (expr, 0);
1813       op1 = TREE_OPERAND (expr, 1);
1814
1815       op0 = strip_offset_1 (op0, false, false, &off0);
1816       op1 = strip_offset_1 (op1, false, false, &off1);
1817
1818       *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1819       if (op0 == TREE_OPERAND (expr, 0)
1820           && op1 == TREE_OPERAND (expr, 1))
1821         return orig_expr;
1822
1823       if (zero_p (op1))
1824         expr = op0;
1825       else if (zero_p (op0))
1826         {
1827           if (code == PLUS_EXPR)
1828             expr = op1;
1829           else
1830             expr = fold_build1 (NEGATE_EXPR, type, op1);
1831         }
1832       else
1833         expr = fold_build2 (code, type, op0, op1);
1834
1835       return fold_convert (orig_type, expr);
1836
1837     case ARRAY_REF:
1838       if (!inside_addr)
1839         return orig_expr;
1840
1841       step = array_ref_element_size (expr);
1842       if (!cst_and_fits_in_hwi (step))
1843         break;
1844
1845       st = int_cst_value (step);
1846       op1 = TREE_OPERAND (expr, 1);
1847       op1 = strip_offset_1 (op1, false, false, &off1);
1848       *offset = off1 * st;
1849
1850       if (top_compref
1851           && zero_p (op1))
1852         {
1853           /* Strip the component reference completely.  */
1854           op0 = TREE_OPERAND (expr, 0);
1855           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1856           *offset += off0;
1857           return op0;
1858         }
1859       break;
1860
1861     case COMPONENT_REF:
1862       if (!inside_addr)
1863         return orig_expr;
1864
1865       tmp = component_ref_field_offset (expr);
1866       if (top_compref
1867           && cst_and_fits_in_hwi (tmp))
1868         {
1869           /* Strip the component reference completely.  */
1870           op0 = TREE_OPERAND (expr, 0);
1871           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1872           *offset = off0 + int_cst_value (tmp);
1873           return op0;
1874         }
1875       break;
1876
1877     case ADDR_EXPR:
1878       op0 = TREE_OPERAND (expr, 0);
1879       op0 = strip_offset_1 (op0, true, true, &off0);
1880       *offset += off0;
1881
1882       if (op0 == TREE_OPERAND (expr, 0))
1883         return orig_expr;
1884
1885       expr = build_fold_addr_expr (op0);
1886       return fold_convert (orig_type, expr);
1887
1888     case INDIRECT_REF:
1889       inside_addr = false;
1890       break;
1891
1892     default:
1893       return orig_expr;
1894     }
1895
1896   /* Default handling of expressions for that we want to recurse into
1897      the first operand.  */
1898   op0 = TREE_OPERAND (expr, 0);
1899   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1900   *offset += off0;
1901
1902   if (op0 == TREE_OPERAND (expr, 0)
1903       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1904     return orig_expr;
1905
1906   expr = copy_node (expr);
1907   TREE_OPERAND (expr, 0) = op0;
1908   if (op1)
1909     TREE_OPERAND (expr, 1) = op1;
1910
1911   /* Inside address, we might strip the top level component references,
1912      thus changing type of the expression.  Handling of ADDR_EXPR
1913      will fix that.  */
1914   expr = fold_convert (orig_type, expr);
1915
1916   return expr;
1917 }
1918
1919 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1920
1921 static tree
1922 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1923 {
1924   return strip_offset_1 (expr, false, false, offset);
1925 }
1926
1927 /* Returns variant of TYPE that can be used as base for different uses.
1928    For integer types, we return unsigned variant of the type, which
1929    avoids problems with overflows.  For pointer types, we return void *.  */
1930
1931 static tree
1932 generic_type_for (tree type)
1933 {
1934   if (POINTER_TYPE_P (type))
1935     return ptr_type_node;
1936
1937   if (TYPE_UNSIGNED (type))
1938     return type;
1939
1940   return unsigned_type_for (type);
1941 }
1942
1943 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1944    the bitmap to that we should store it.  */
1945
1946 static struct ivopts_data *fd_ivopts_data;
1947 static tree
1948 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1949 {
1950   bitmap *depends_on = data;
1951   struct version_info *info;
1952
1953   if (TREE_CODE (*expr_p) != SSA_NAME)
1954     return NULL_TREE;
1955   info = name_info (fd_ivopts_data, *expr_p);
1956
1957   if (!info->inv_id || info->has_nonlin_use)
1958     return NULL_TREE;
1959
1960   if (!*depends_on)
1961     *depends_on = BITMAP_ALLOC (NULL);
1962   bitmap_set_bit (*depends_on, info->inv_id);
1963
1964   return NULL_TREE;
1965 }
1966
1967 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1968    position to POS.  If USE is not NULL, the candidate is set as related to
1969    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1970    replacement of the final value of the iv by a direct computation.  */
1971
1972 static struct iv_cand *
1973 add_candidate_1 (struct ivopts_data *data,
1974                  tree base, tree step, bool important, enum iv_position pos,
1975                  struct iv_use *use, tree incremented_at)
1976 {
1977   unsigned i;
1978   struct iv_cand *cand = NULL;
1979   tree type, orig_type;
1980
1981   if (base)
1982     {
1983       orig_type = TREE_TYPE (base);
1984       type = generic_type_for (orig_type);
1985       if (type != orig_type)
1986         {
1987           base = fold_convert (type, base);
1988           if (step)
1989             step = fold_convert (type, step);
1990         }
1991     }
1992
1993   for (i = 0; i < n_iv_cands (data); i++)
1994     {
1995       cand = iv_cand (data, i);
1996
1997       if (cand->pos != pos)
1998         continue;
1999
2000       if (cand->incremented_at != incremented_at)
2001         continue;
2002
2003       if (!cand->iv)
2004         {
2005           if (!base && !step)
2006             break;
2007
2008           continue;
2009         }
2010
2011       if (!base && !step)
2012         continue;
2013
2014       if (!operand_equal_p (base, cand->iv->base, 0))
2015         continue;
2016
2017       if (zero_p (cand->iv->step))
2018         {
2019           if (zero_p (step))
2020             break;
2021         }
2022       else
2023         {
2024           if (step && operand_equal_p (step, cand->iv->step, 0))
2025             break;
2026         }
2027     }
2028
2029   if (i == n_iv_cands (data))
2030     {
2031       cand = XCNEW (struct iv_cand);
2032       cand->id = i;
2033
2034       if (!base && !step)
2035         cand->iv = NULL;
2036       else
2037         cand->iv = alloc_iv (base, step);
2038
2039       cand->pos = pos;
2040       if (pos != IP_ORIGINAL && cand->iv)
2041         {
2042           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2043           cand->var_after = cand->var_before;
2044         }
2045       cand->important = important;
2046       cand->incremented_at = incremented_at;
2047       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2048
2049       if (step
2050           && TREE_CODE (step) != INTEGER_CST)
2051         {
2052           fd_ivopts_data = data;
2053           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2054         }
2055
2056       if (dump_file && (dump_flags & TDF_DETAILS))
2057         dump_cand (dump_file, cand);
2058     }
2059
2060   if (important && !cand->important)
2061     {
2062       cand->important = true;
2063       if (dump_file && (dump_flags & TDF_DETAILS))
2064         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2065     }
2066
2067   if (use)
2068     {
2069       bitmap_set_bit (use->related_cands, i);
2070       if (dump_file && (dump_flags & TDF_DETAILS))
2071         fprintf (dump_file, "Candidate %d is related to use %d\n",
2072                  cand->id, use->id);
2073     }
2074
2075   return cand;
2076 }
2077
2078 /* Returns true if incrementing the induction variable at the end of the LOOP
2079    is allowed.
2080
2081    The purpose is to avoid splitting latch edge with a biv increment, thus
2082    creating a jump, possibly confusing other optimization passes and leaving
2083    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2084    is not available (so we do not have a better alternative), or if the latch
2085    edge is already nonempty.  */
2086
2087 static bool
2088 allow_ip_end_pos_p (struct loop *loop)
2089 {
2090   if (!ip_normal_pos (loop))
2091     return true;
2092
2093   if (!empty_block_p (ip_end_pos (loop)))
2094     return true;
2095
2096   return false;
2097 }
2098
2099 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2100    position to POS.  If USE is not NULL, the candidate is set as related to
2101    it.  The candidate computation is scheduled on all available positions.  */
2102
2103 static void
2104 add_candidate (struct ivopts_data *data,
2105                tree base, tree step, bool important, struct iv_use *use)
2106 {
2107   if (ip_normal_pos (data->current_loop))
2108     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2109   if (ip_end_pos (data->current_loop)
2110       && allow_ip_end_pos_p (data->current_loop))
2111     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2112 }
2113
2114 /* Add a standard "0 + 1 * iteration" iv candidate for a
2115    type with SIZE bits.  */
2116
2117 static void
2118 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2119                                      unsigned int size)
2120 {
2121   tree type = lang_hooks.types.type_for_size (size, true);
2122   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2123                  true, NULL);
2124 }
2125
2126 /* Adds standard iv candidates.  */
2127
2128 static void
2129 add_standard_iv_candidates (struct ivopts_data *data)
2130 {
2131   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2132
2133   /* The same for a double-integer type if it is still fast enough.  */
2134   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2135     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2136 }
2137
2138
2139 /* Adds candidates bases on the old induction variable IV.  */
2140
2141 static void
2142 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2143 {
2144   tree phi, def;
2145   struct iv_cand *cand;
2146
2147   add_candidate (data, iv->base, iv->step, true, NULL);
2148
2149   /* The same, but with initial value zero.  */
2150   add_candidate (data,
2151                  build_int_cst (TREE_TYPE (iv->base), 0),
2152                  iv->step, true, NULL);
2153
2154   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2155   if (TREE_CODE (phi) == PHI_NODE)
2156     {
2157       /* Additionally record the possibility of leaving the original iv
2158          untouched.  */
2159       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2160       cand = add_candidate_1 (data,
2161                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2162                               SSA_NAME_DEF_STMT (def));
2163       cand->var_before = iv->ssa_name;
2164       cand->var_after = def;
2165     }
2166 }
2167
2168 /* Adds candidates based on the old induction variables.  */
2169
2170 static void
2171 add_old_ivs_candidates (struct ivopts_data *data)
2172 {
2173   unsigned i;
2174   struct iv *iv;
2175   bitmap_iterator bi;
2176
2177   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2178     {
2179       iv = ver_info (data, i)->iv;
2180       if (iv && iv->biv_p && !zero_p (iv->step))
2181         add_old_iv_candidates (data, iv);
2182     }
2183 }
2184
2185 /* Adds candidates based on the value of the induction variable IV and USE.  */
2186
2187 static void
2188 add_iv_value_candidates (struct ivopts_data *data,
2189                          struct iv *iv, struct iv_use *use)
2190 {
2191   unsigned HOST_WIDE_INT offset;
2192   tree base;
2193
2194   add_candidate (data, iv->base, iv->step, false, use);
2195
2196   /* The same, but with initial value zero.  Make such variable important,
2197      since it is generic enough so that possibly many uses may be based
2198      on it.  */
2199   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2200                  iv->step, true, use);
2201
2202   /* Third, try removing the constant offset.  */
2203   base = strip_offset (iv->base, &offset);
2204   if (offset)
2205     add_candidate (data, base, iv->step, false, use);
2206 }
2207
2208 /* Adds candidates based on the uses.  */
2209
2210 static void
2211 add_derived_ivs_candidates (struct ivopts_data *data)
2212 {
2213   unsigned i;
2214
2215   for (i = 0; i < n_iv_uses (data); i++)
2216     {
2217       struct iv_use *use = iv_use (data, i);
2218
2219       if (!use)
2220         continue;
2221
2222       switch (use->type)
2223         {
2224         case USE_NONLINEAR_EXPR:
2225         case USE_COMPARE:
2226         case USE_ADDRESS:
2227           /* Just add the ivs based on the value of the iv used here.  */
2228           add_iv_value_candidates (data, use->iv, use);
2229           break;
2230
2231         default:
2232           gcc_unreachable ();
2233         }
2234     }
2235 }
2236
2237 /* Record important candidates and add them to related_cands bitmaps
2238    if needed.  */
2239
2240 static void
2241 record_important_candidates (struct ivopts_data *data)
2242 {
2243   unsigned i;
2244   struct iv_use *use;
2245
2246   for (i = 0; i < n_iv_cands (data); i++)
2247     {
2248       struct iv_cand *cand = iv_cand (data, i);
2249
2250       if (cand->important)
2251         bitmap_set_bit (data->important_candidates, i);
2252     }
2253
2254   data->consider_all_candidates = (n_iv_cands (data)
2255                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2256
2257   if (data->consider_all_candidates)
2258     {
2259       /* We will not need "related_cands" bitmaps in this case,
2260          so release them to decrease peak memory consumption.  */
2261       for (i = 0; i < n_iv_uses (data); i++)
2262         {
2263           use = iv_use (data, i);
2264           BITMAP_FREE (use->related_cands);
2265         }
2266     }
2267   else
2268     {
2269       /* Add important candidates to the related_cands bitmaps.  */
2270       for (i = 0; i < n_iv_uses (data); i++)
2271         bitmap_ior_into (iv_use (data, i)->related_cands,
2272                          data->important_candidates);
2273     }
2274 }
2275
2276 /* Finds the candidates for the induction variables.  */
2277
2278 static void
2279 find_iv_candidates (struct ivopts_data *data)
2280 {
2281   /* Add commonly used ivs.  */
2282   add_standard_iv_candidates (data);
2283
2284   /* Add old induction variables.  */
2285   add_old_ivs_candidates (data);
2286
2287   /* Add induction variables derived from uses.  */
2288   add_derived_ivs_candidates (data);
2289
2290   /* Record the important candidates.  */
2291   record_important_candidates (data);
2292 }
2293
2294 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2295    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2296    we allocate a simple list to every use.  */
2297
2298 static void
2299 alloc_use_cost_map (struct ivopts_data *data)
2300 {
2301   unsigned i, size, s, j;
2302
2303   for (i = 0; i < n_iv_uses (data); i++)
2304     {
2305       struct iv_use *use = iv_use (data, i);
2306       bitmap_iterator bi;
2307
2308       if (data->consider_all_candidates)
2309         size = n_iv_cands (data);
2310       else
2311         {
2312           s = 0;
2313           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2314             {
2315               s++;
2316             }
2317
2318           /* Round up to the power of two, so that moduling by it is fast.  */
2319           for (size = 1; size < s; size <<= 1)
2320             continue;
2321         }
2322
2323       use->n_map_members = size;
2324       use->cost_map = XCNEWVEC (struct cost_pair, size);
2325     }
2326 }
2327
2328 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2329    on invariants DEPENDS_ON and that the value used in expressing it
2330    is VALUE.*/
2331
2332 static void
2333 set_use_iv_cost (struct ivopts_data *data,
2334                  struct iv_use *use, struct iv_cand *cand, unsigned cost,
2335                  bitmap depends_on, tree value)
2336 {
2337   unsigned i, s;
2338
2339   if (cost == INFTY)
2340     {
2341       BITMAP_FREE (depends_on);
2342       return;
2343     }
2344
2345   if (data->consider_all_candidates)
2346     {
2347       use->cost_map[cand->id].cand = cand;
2348       use->cost_map[cand->id].cost = cost;
2349       use->cost_map[cand->id].depends_on = depends_on;
2350       use->cost_map[cand->id].value = value;
2351       return;
2352     }
2353
2354   /* n_map_members is a power of two, so this computes modulo.  */
2355   s = cand->id & (use->n_map_members - 1);
2356   for (i = s; i < use->n_map_members; i++)
2357     if (!use->cost_map[i].cand)
2358       goto found;
2359   for (i = 0; i < s; i++)
2360     if (!use->cost_map[i].cand)
2361       goto found;
2362
2363   gcc_unreachable ();
2364
2365 found:
2366   use->cost_map[i].cand = cand;
2367   use->cost_map[i].cost = cost;
2368   use->cost_map[i].depends_on = depends_on;
2369   use->cost_map[i].value = value;
2370 }
2371
2372 /* Gets cost of (USE, CANDIDATE) pair.  */
2373
2374 static struct cost_pair *
2375 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2376                  struct iv_cand *cand)
2377 {
2378   unsigned i, s;
2379   struct cost_pair *ret;
2380
2381   if (!cand)
2382     return NULL;
2383
2384   if (data->consider_all_candidates)
2385     {
2386       ret = use->cost_map + cand->id;
2387       if (!ret->cand)
2388         return NULL;
2389
2390       return ret;
2391     }
2392
2393   /* n_map_members is a power of two, so this computes modulo.  */
2394   s = cand->id & (use->n_map_members - 1);
2395   for (i = s; i < use->n_map_members; i++)
2396     if (use->cost_map[i].cand == cand)
2397       return use->cost_map + i;
2398
2399   for (i = 0; i < s; i++)
2400     if (use->cost_map[i].cand == cand)
2401       return use->cost_map + i;
2402
2403   return NULL;
2404 }
2405
2406 /* Returns estimate on cost of computing SEQ.  */
2407
2408 static unsigned
2409 seq_cost (rtx seq)
2410 {
2411   unsigned cost = 0;
2412   rtx set;
2413
2414   for (; seq; seq = NEXT_INSN (seq))
2415     {
2416       set = single_set (seq);
2417       if (set)
2418         cost += rtx_cost (set, SET);
2419       else
2420         cost++;
2421     }
2422
2423   return cost;
2424 }
2425
2426 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2427 static rtx
2428 produce_memory_decl_rtl (tree obj, int *regno)
2429 {
2430   rtx x;
2431
2432   gcc_assert (obj);
2433   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2434     {
2435       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2436       x = gen_rtx_SYMBOL_REF (Pmode, name);
2437     }
2438   else
2439     x = gen_raw_REG (Pmode, (*regno)++);
2440
2441   return gen_rtx_MEM (DECL_MODE (obj), x);
2442 }
2443
2444 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2445    walk_tree.  DATA contains the actual fake register number.  */
2446
2447 static tree
2448 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2449 {
2450   tree obj = NULL_TREE;
2451   rtx x = NULL_RTX;
2452   int *regno = data;
2453
2454   switch (TREE_CODE (*expr_p))
2455     {
2456     case ADDR_EXPR:
2457       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2458            handled_component_p (*expr_p);
2459            expr_p = &TREE_OPERAND (*expr_p, 0))
2460         continue;
2461       obj = *expr_p;
2462       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2463         x = produce_memory_decl_rtl (obj, regno);
2464       break;
2465
2466     case SSA_NAME:
2467       *ws = 0;
2468       obj = SSA_NAME_VAR (*expr_p);
2469       if (!DECL_RTL_SET_P (obj))
2470         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2471       break;
2472
2473     case VAR_DECL:
2474     case PARM_DECL:
2475     case RESULT_DECL:
2476       *ws = 0;
2477       obj = *expr_p;
2478
2479       if (DECL_RTL_SET_P (obj))
2480         break;
2481
2482       if (DECL_MODE (obj) == BLKmode)
2483         x = produce_memory_decl_rtl (obj, regno);
2484       else
2485         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2486
2487       break;
2488
2489     default:
2490       break;
2491     }
2492
2493   if (x)
2494     {
2495       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2496       SET_DECL_RTL (obj, x);
2497     }
2498
2499   return NULL_TREE;
2500 }
2501
2502 /* Determines cost of the computation of EXPR.  */
2503
2504 static unsigned
2505 computation_cost (tree expr)
2506 {
2507   rtx seq, rslt;
2508   tree type = TREE_TYPE (expr);
2509   unsigned cost;
2510   /* Avoid using hard regs in ways which may be unsupported.  */
2511   int regno = LAST_VIRTUAL_REGISTER + 1;
2512
2513   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2514   start_sequence ();
2515   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2516   seq = get_insns ();
2517   end_sequence ();
2518
2519   cost = seq_cost (seq);
2520   if (MEM_P (rslt))
2521     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2522
2523   return cost;
2524 }
2525
2526 /* Returns variable containing the value of candidate CAND at statement AT.  */
2527
2528 static tree
2529 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2530 {
2531   if (stmt_after_increment (loop, cand, stmt))
2532     return cand->var_after;
2533   else
2534     return cand->var_before;
2535 }
2536
2537 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2538    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2539
2540 int
2541 tree_int_cst_sign_bit (tree t)
2542 {
2543   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2544   unsigned HOST_WIDE_INT w;
2545
2546   if (bitno < HOST_BITS_PER_WIDE_INT)
2547     w = TREE_INT_CST_LOW (t);
2548   else
2549     {
2550       w = TREE_INT_CST_HIGH (t);
2551       bitno -= HOST_BITS_PER_WIDE_INT;
2552     }
2553
2554   return (w >> bitno) & 1;
2555 }
2556
2557 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2558    return cst.  Otherwise return NULL_TREE.  */
2559
2560 static tree
2561 constant_multiple_of (tree type, tree top, tree bot)
2562 {
2563   tree res, mby, p0, p1;
2564   enum tree_code code;
2565   bool negate;
2566
2567   STRIP_NOPS (top);
2568   STRIP_NOPS (bot);
2569
2570   if (operand_equal_p (top, bot, 0))
2571     return build_int_cst (type, 1);
2572
2573   code = TREE_CODE (top);
2574   switch (code)
2575     {
2576     case MULT_EXPR:
2577       mby = TREE_OPERAND (top, 1);
2578       if (TREE_CODE (mby) != INTEGER_CST)
2579         return NULL_TREE;
2580
2581       res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2582       if (!res)
2583         return NULL_TREE;
2584
2585       return fold_binary_to_constant (MULT_EXPR, type, res,
2586                                       fold_convert (type, mby));
2587
2588     case PLUS_EXPR:
2589     case MINUS_EXPR:
2590       p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2591       if (!p0)
2592         return NULL_TREE;
2593       p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2594       if (!p1)
2595         return NULL_TREE;
2596
2597       return fold_binary_to_constant (code, type, p0, p1);
2598
2599     case INTEGER_CST:
2600       if (TREE_CODE (bot) != INTEGER_CST)
2601         return NULL_TREE;
2602
2603       bot = fold_convert (type, bot);
2604       top = fold_convert (type, top);
2605
2606       /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2607          the result afterwards.  */
2608       if (tree_int_cst_sign_bit (bot))
2609         {
2610           negate = true;
2611           bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2612         }
2613       else
2614         negate = false;
2615
2616       /* Ditto for TOP.  */
2617       if (tree_int_cst_sign_bit (top))
2618         {
2619           negate = !negate;
2620           top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2621         }
2622
2623       if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2624         return NULL_TREE;
2625
2626       res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2627       if (negate)
2628         res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2629       return res;
2630
2631     default:
2632       return NULL_TREE;
2633     }
2634 }
2635
2636 /* Sets COMB to CST.  */
2637
2638 static void
2639 aff_combination_const (struct affine_tree_combination *comb, tree type,
2640                        unsigned HOST_WIDE_INT cst)
2641 {
2642   unsigned prec = TYPE_PRECISION (type);
2643
2644   comb->type = type;
2645   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2646
2647   comb->n = 0;
2648   comb->rest = NULL_TREE;
2649   comb->offset = cst & comb->mask;
2650 }
2651
2652 /* Sets COMB to single element ELT.  */
2653
2654 static void
2655 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2656 {
2657   unsigned prec = TYPE_PRECISION (type);
2658
2659   comb->type = type;
2660   comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2661
2662   comb->n = 1;
2663   comb->elts[0] = elt;
2664   comb->coefs[0] = 1;
2665   comb->rest = NULL_TREE;
2666   comb->offset = 0;
2667 }
2668
2669 /* Scales COMB by SCALE.  */
2670
2671 static void
2672 aff_combination_scale (struct affine_tree_combination *comb,
2673                        unsigned HOST_WIDE_INT scale)
2674 {
2675   unsigned i, j;
2676
2677   if (scale == 1)
2678     return;
2679
2680   if (scale == 0)
2681     {
2682       aff_combination_const (comb, comb->type, 0);
2683       return;
2684     }
2685
2686   comb->offset = (scale * comb->offset) & comb->mask;
2687   for (i = 0, j = 0; i < comb->n; i++)
2688     {
2689       comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2690       comb->elts[j] = comb->elts[i];
2691       if (comb->coefs[j] != 0)
2692         j++;
2693     }
2694   comb->n = j;
2695
2696   if (comb->rest)
2697     {
2698       if (comb->n < MAX_AFF_ELTS)
2699         {
2700           comb->coefs[comb->n] = scale;
2701           comb->elts[comb->n] = comb->rest;
2702           comb->rest = NULL_TREE;
2703           comb->n++;
2704         }
2705       else
2706         comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2707                                   build_int_cst_type (comb->type, scale));
2708     }
2709 }
2710
2711 /* Adds ELT * SCALE to COMB.  */
2712
2713 static void
2714 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2715                          unsigned HOST_WIDE_INT scale)
2716 {
2717   unsigned i;
2718
2719   if (scale == 0)
2720     return;
2721
2722   for (i = 0; i < comb->n; i++)
2723     if (operand_equal_p (comb->elts[i], elt, 0))
2724       {
2725         comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2726         if (comb->coefs[i])
2727           return;
2728
2729         comb->n--;
2730         comb->coefs[i] = comb->coefs[comb->n];
2731         comb->elts[i] = comb->elts[comb->n];
2732
2733         if (comb->rest)
2734           {
2735             gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2736             comb->coefs[comb->n] = 1;
2737             comb->elts[comb->n] = comb->rest;
2738             comb->rest = NULL_TREE;
2739             comb->n++;
2740           }
2741         return;
2742       }
2743   if (comb->n < MAX_AFF_ELTS)
2744     {
2745       comb->coefs[comb->n] = scale;
2746       comb->elts[comb->n] = elt;
2747       comb->n++;
2748       return;
2749     }
2750
2751   if (scale == 1)
2752     elt = fold_convert (comb->type, elt);
2753   else
2754     elt = fold_build2 (MULT_EXPR, comb->type,
2755                        fold_convert (comb->type, elt),
2756                        build_int_cst_type (comb->type, scale));
2757
2758   if (comb->rest)
2759     comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2760   else
2761     comb->rest = elt;
2762 }
2763
2764 /* Adds COMB2 to COMB1.  */
2765
2766 static void
2767 aff_combination_add (struct affine_tree_combination *comb1,
2768                      struct affine_tree_combination *comb2)
2769 {
2770   unsigned i;
2771
2772   comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2773   for (i = 0; i < comb2->n; i++)
2774     aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2775   if (comb2->rest)
2776     aff_combination_add_elt (comb1, comb2->rest, 1);
2777 }
2778
2779 /* Splits EXPR into an affine combination of parts.  */
2780
2781 static void
2782 tree_to_aff_combination (tree expr, tree type,
2783                          struct affine_tree_combination *comb)
2784 {
2785   struct affine_tree_combination tmp;
2786   enum tree_code code;
2787   tree cst, core, toffset;
2788   HOST_WIDE_INT bitpos, bitsize;
2789   enum machine_mode mode;
2790   int unsignedp, volatilep;
2791
2792   STRIP_NOPS (expr);
2793
2794   code = TREE_CODE (expr);
2795   switch (code)
2796     {
2797     case INTEGER_CST:
2798       aff_combination_const (comb, type, int_cst_value (expr));
2799       return;
2800
2801     case PLUS_EXPR:
2802     case MINUS_EXPR:
2803       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2804       tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2805       if (code == MINUS_EXPR)
2806         aff_combination_scale (&tmp, -1);
2807       aff_combination_add (comb, &tmp);
2808       return;
2809
2810     case MULT_EXPR:
2811       cst = TREE_OPERAND (expr, 1);
2812       if (TREE_CODE (cst) != INTEGER_CST)
2813         break;
2814       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2815       aff_combination_scale (comb, int_cst_value (cst));
2816       return;
2817
2818     case NEGATE_EXPR:
2819       tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2820       aff_combination_scale (comb, -1);
2821       return;
2822
2823     case ADDR_EXPR:
2824       core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2825                                   &toffset, &mode, &unsignedp, &volatilep,
2826                                   false);
2827       if (bitpos % BITS_PER_UNIT != 0)
2828         break;
2829       aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2830       core = build_fold_addr_expr (core);
2831       if (TREE_CODE (core) == ADDR_EXPR)
2832         aff_combination_add_elt (comb, core, 1);
2833       else
2834         {
2835           tree_to_aff_combination (core, type, &tmp);
2836           aff_combination_add (comb, &tmp);
2837         }
2838       if (toffset)
2839         {
2840           tree_to_aff_combination (toffset, type, &tmp);
2841           aff_combination_add (comb, &tmp);
2842         }
2843       return;
2844
2845     default:
2846       break;
2847     }
2848
2849   aff_combination_elt (comb, type, expr);
2850 }
2851
2852 /* Creates EXPR + ELT * SCALE in TYPE.  MASK is the mask for width of TYPE.  */
2853
2854 static tree
2855 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2856                  unsigned HOST_WIDE_INT mask)
2857 {
2858   enum tree_code code;
2859
2860   scale &= mask;
2861   elt = fold_convert (type, elt);
2862
2863   if (scale == 1)
2864     {
2865       if (!expr)
2866         return elt;
2867
2868       return fold_build2 (PLUS_EXPR, type, expr, elt);
2869     }
2870
2871   if (scale == mask)
2872     {
2873       if (!expr)
2874         return fold_build1 (NEGATE_EXPR, type, elt);
2875
2876       return fold_build2 (MINUS_EXPR, type, expr, elt);
2877     }
2878
2879   if (!expr)
2880     return fold_build2 (MULT_EXPR, type, elt,
2881                         build_int_cst_type (type, scale));
2882
2883   if ((scale | (mask >> 1)) == mask)
2884     {
2885       /* Scale is negative.  */
2886       code = MINUS_EXPR;
2887       scale = (-scale) & mask;
2888     }
2889   else
2890     code = PLUS_EXPR;
2891
2892   elt = fold_build2 (MULT_EXPR, type, elt,
2893                      build_int_cst_type (type, scale));
2894   return fold_build2 (code, type, expr, elt);
2895 }
2896
2897 /* Copies the tree elements of COMB to ensure that they are not shared.  */
2898
2899 static void
2900 unshare_aff_combination (struct affine_tree_combination *comb)
2901 {
2902   unsigned i;
2903
2904   for (i = 0; i < comb->n; i++)
2905     comb->elts[i] = unshare_expr (comb->elts[i]);
2906   if (comb->rest)
2907     comb->rest = unshare_expr (comb->rest);
2908 }
2909
2910 /* Makes tree from the affine combination COMB.  */
2911
2912 static tree
2913 aff_combination_to_tree (struct affine_tree_combination *comb)
2914 {
2915   tree type = comb->type;
2916   tree expr = comb->rest;
2917   unsigned i;
2918   unsigned HOST_WIDE_INT off, sgn;
2919
2920   /* Handle the special case produced by get_computation_aff when
2921      the type does not fit in HOST_WIDE_INT.  */
2922   if (comb->n == 0 && comb->offset == 0)
2923     return fold_convert (type, expr);
2924
2925   gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2926
2927   for (i = 0; i < comb->n; i++)
2928     expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2929                             comb->mask);
2930
2931   if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2932     {
2933       /* Offset is negative.  */
2934       off = (-comb->offset) & comb->mask;
2935       sgn = comb->mask;
2936     }
2937   else
2938     {
2939       off = comb->offset;
2940       sgn = 1;
2941     }
2942   return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2943                           comb->mask);
2944 }
2945
2946 /* Determines the expression by that USE is expressed from induction variable
2947    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2948    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2949
2950 static bool
2951 get_computation_aff (struct loop *loop,
2952                      struct iv_use *use, struct iv_cand *cand, tree at,
2953                      struct affine_tree_combination *aff)
2954 {
2955   tree ubase = use->iv->base;
2956   tree ustep = use->iv->step;
2957   tree cbase = cand->iv->base;
2958   tree cstep = cand->iv->step;
2959   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2960   tree uutype;
2961   tree expr, delta;
2962   tree ratio;
2963   unsigned HOST_WIDE_INT ustepi, cstepi;
2964   HOST_WIDE_INT ratioi;
2965   struct affine_tree_combination cbase_aff, expr_aff;
2966   tree cstep_orig = cstep, ustep_orig = ustep;
2967
2968   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2969     {
2970       /* We do not have a precision to express the values of use.  */
2971       return false;
2972     }
2973
2974   expr = var_at_stmt (loop, cand, at);
2975
2976   if (TREE_TYPE (expr) != ctype)
2977     {
2978       /* This may happen with the original ivs.  */
2979       expr = fold_convert (ctype, expr);
2980     }
2981
2982   if (TYPE_UNSIGNED (utype))
2983     uutype = utype;
2984   else
2985     {
2986       uutype = unsigned_type_for (utype);
2987       ubase = fold_convert (uutype, ubase);
2988       ustep = fold_convert (uutype, ustep);
2989     }
2990
2991   if (uutype != ctype)
2992     {
2993       expr = fold_convert (uutype, expr);
2994       cbase = fold_convert (uutype, cbase);
2995       cstep = fold_convert (uutype, cstep);
2996
2997       /* If the conversion is not noop, we must take it into account when
2998          considering the value of the step.  */
2999       if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3000         cstep_orig = cstep;
3001     }
3002
3003   if (cst_and_fits_in_hwi (cstep_orig)
3004       && cst_and_fits_in_hwi (ustep_orig))
3005     {
3006       ustepi = int_cst_value (ustep_orig);
3007       cstepi = int_cst_value (cstep_orig);
3008
3009       if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3010         {
3011           /* TODO maybe consider case when ustep divides cstep and the ratio is
3012              a power of 2 (so that the division is fast to execute)?  We would
3013              need to be much more careful with overflows etc. then.  */
3014           return false;
3015         }
3016
3017       ratio = build_int_cst_type (uutype, ratioi);
3018     }
3019   else
3020     {
3021       ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3022       if (!ratio)
3023         return false;
3024
3025       /* Ratioi is only used to detect special cases when the multiplicative
3026          factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3027          we may set it to 0.  We prefer cst_and_fits_in_hwi/int_cst_value
3028          to integer_onep/integer_all_onesp, since the former ignores
3029          TREE_OVERFLOW.  */
3030       if (cst_and_fits_in_hwi (ratio))
3031         ratioi = int_cst_value (ratio);
3032       else if (integer_onep (ratio))
3033         ratioi = 1;
3034       else if (integer_all_onesp (ratio))
3035         ratioi = -1;
3036       else
3037         ratioi = 0;
3038     }
3039
3040   /* We may need to shift the value if we are after the increment.  */
3041   if (stmt_after_increment (loop, cand, at))
3042     cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3043
3044   /* use = ubase - ratio * cbase + ratio * var.
3045
3046      In general case ubase + ratio * (var - cbase) could be better (one less
3047      multiplication), but often it is possible to eliminate redundant parts
3048      of computations from (ubase - ratio * cbase) term, and if it does not
3049      happen, fold is able to apply the distributive law to obtain this form
3050      anyway.  */
3051
3052   if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3053     {
3054       /* Let's compute in trees and just return the result in AFF.  This case
3055          should not be very common, and fold itself is not that bad either,
3056          so making the aff. functions more complicated to handle this case
3057          is not that urgent.  */
3058       if (ratioi == 1)
3059         {
3060           delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3061           expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3062         }
3063       else if (ratioi == -1)
3064         {
3065           delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3066           expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3067         }
3068       else
3069         {
3070           delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3071           delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3072           expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3073           expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3074         }
3075
3076       aff->type = uutype;
3077       aff->n = 0;
3078       aff->offset = 0;
3079       aff->mask = 0;
3080       aff->rest = expr;
3081       return true;
3082     }
3083
3084   /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3085      possible to compute ratioi.  */
3086   gcc_assert (ratioi);
3087
3088   tree_to_aff_combination (ubase, uutype, aff);
3089   tree_to_aff_combination (cbase, uutype, &cbase_aff);
3090   tree_to_aff_combination (expr, uutype, &expr_aff);
3091   aff_combination_scale (&cbase_aff, -ratioi);
3092   aff_combination_scale (&expr_aff, ratioi);
3093   aff_combination_add (aff, &cbase_aff);
3094   aff_combination_add (aff, &expr_aff);
3095
3096   return true;
3097 }
3098
3099 /* Determines the expression by that USE is expressed from induction variable
3100    CAND at statement AT in LOOP.  The computation is unshared.  */
3101
3102 static tree
3103 get_computation_at (struct loop *loop,
3104                     struct iv_use *use, struct iv_cand *cand, tree at)
3105 {
3106   struct affine_tree_combination aff;
3107   tree type = TREE_TYPE (use->iv->base);
3108
3109   if (!get_computation_aff (loop, use, cand, at, &aff))
3110     return NULL_TREE;
3111   unshare_aff_combination (&aff);
3112   return fold_convert (type, aff_combination_to_tree (&aff));
3113 }
3114
3115 /* Determines the expression by that USE is expressed from induction variable
3116    CAND in LOOP.  The computation is unshared.  */
3117
3118 static tree
3119 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3120 {
3121   return get_computation_at (loop, use, cand, use->stmt);
3122 }
3123
3124 /* Returns cost of addition in MODE.  */
3125
3126 static unsigned
3127 add_cost (enum machine_mode mode)
3128 {
3129   static unsigned costs[NUM_MACHINE_MODES];
3130   rtx seq;
3131   unsigned cost;
3132
3133   if (costs[mode])
3134     return costs[mode];
3135
3136   start_sequence ();
3137   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3138                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3139                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3140                  NULL_RTX);
3141   seq = get_insns ();
3142   end_sequence ();
3143
3144   cost = seq_cost (seq);
3145   if (!cost)
3146     cost = 1;
3147
3148   costs[mode] = cost;
3149
3150   if (dump_file && (dump_flags & TDF_DETAILS))
3151     fprintf (dump_file, "Addition in %s costs %d\n",
3152              GET_MODE_NAME (mode), cost);
3153   return cost;
3154 }
3155
3156 /* Entry in a hashtable of already known costs for multiplication.  */
3157 struct mbc_entry
3158 {
3159   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3160   enum machine_mode mode;       /* In mode.  */
3161   unsigned cost;                /* The cost.  */
3162 };
3163
3164 /* Counts hash value for the ENTRY.  */
3165
3166 static hashval_t
3167 mbc_entry_hash (const void *entry)
3168 {
3169   const struct mbc_entry *e = entry;
3170
3171   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3172 }
3173
3174 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3175
3176 static int
3177 mbc_entry_eq (const void *entry1, const void *entry2)
3178 {
3179   const struct mbc_entry *e1 = entry1;
3180   const struct mbc_entry *e2 = entry2;
3181
3182   return (e1->mode == e2->mode
3183           && e1->cst == e2->cst);
3184 }
3185
3186 /* Returns cost of multiplication by constant CST in MODE.  */
3187
3188 unsigned
3189 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3190 {
3191   static htab_t costs;
3192   struct mbc_entry **cached, act;
3193   rtx seq;
3194   unsigned cost;
3195
3196   if (!costs)
3197     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3198
3199   act.mode = mode;
3200   act.cst = cst;
3201   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3202   if (*cached)
3203     return (*cached)->cost;
3204
3205   *cached = XNEW (struct mbc_entry);
3206   (*cached)->mode = mode;
3207   (*cached)->cst = cst;
3208
3209   start_sequence ();
3210   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3211                gen_int_mode (cst, mode), NULL_RTX, 0);
3212   seq = get_insns ();
3213   end_sequence ();
3214
3215   cost = seq_cost (seq);
3216
3217   if (dump_file && (dump_flags & TDF_DETAILS))
3218     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3219              (int) cst, GET_MODE_NAME (mode), cost);
3220
3221   (*cached)->cost = cost;
3222
3223   return cost;
3224 }
3225
3226 /* Returns true if multiplying by RATIO is allowed in address.  */
3227
3228 bool
3229 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3230 {
3231 #define MAX_RATIO 128
3232   static sbitmap valid_mult;
3233
3234   if (!valid_mult)
3235     {
3236       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3237       rtx addr;
3238       HOST_WIDE_INT i;
3239
3240       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3241       sbitmap_zero (valid_mult);
3242       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3243       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3244         {
3245           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3246           if (memory_address_p (Pmode, addr))
3247             SET_BIT (valid_mult, i + MAX_RATIO);
3248         }
3249
3250       if (dump_file && (dump_flags & TDF_DETAILS))
3251         {
3252           fprintf (dump_file, "  allowed multipliers:");
3253           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3254             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3255               fprintf (dump_file, " %d", (int) i);
3256           fprintf (dump_file, "\n");
3257           fprintf (dump_file, "\n");
3258         }
3259     }
3260
3261   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3262     return false;
3263
3264   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3265 }
3266
3267 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3268    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3269    variable is omitted.  The created memory accesses MODE.
3270
3271    TODO -- there must be some better way.  This all is quite crude.  */
3272
3273 static unsigned
3274 get_address_cost (bool symbol_present, bool var_present,
3275                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3276 {
3277   static bool initialized = false;
3278   static HOST_WIDE_INT rat, off;
3279   static HOST_WIDE_INT min_offset, max_offset;
3280   static unsigned costs[2][2][2][2];
3281   unsigned cost, acost;
3282   rtx seq, addr, base;
3283   bool offset_p, ratio_p;
3284   rtx reg1;
3285   HOST_WIDE_INT s_offset;
3286   unsigned HOST_WIDE_INT mask;
3287   unsigned bits;
3288
3289   if (!initialized)
3290     {
3291       HOST_WIDE_INT i;
3292       initialized = true;
3293
3294       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3295
3296       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3297       for (i = 1; i <= 1 << 20; i <<= 1)
3298         {
3299           XEXP (addr, 1) = gen_int_mode (i, Pmode);
3300           if (!memory_address_p (Pmode, addr))
3301             break;
3302         }
3303       max_offset = i >> 1;
3304       off = max_offset;
3305
3306       for (i = 1; i <= 1 << 20; i <<= 1)
3307         {
3308           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3309           if (!memory_address_p (Pmode, addr))
3310             break;
3311         }
3312       min_offset = -(i >> 1);
3313
3314       if (dump_file && (dump_flags & TDF_DETAILS))
3315         {
3316           fprintf (dump_file, "get_address_cost:\n");
3317           fprintf (dump_file, "  min offset %d\n", (int) min_offset);
3318           fprintf (dump_file, "  max offset %d\n", (int) max_offset);
3319         }
3320
3321       rat = 1;
3322       for (i = 2; i <= MAX_RATIO; i++)
3323         if (multiplier_allowed_in_address_p (i))
3324           {
3325             rat = i;
3326             break;
3327           }
3328     }
3329
3330   bits = GET_MODE_BITSIZE (Pmode);
3331   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3332   offset &= mask;
3333   if ((offset >> (bits - 1) & 1))
3334     offset |= ~mask;
3335   s_offset = offset;
3336
3337   cost = 0;
3338   offset_p = (s_offset != 0
3339               && min_offset <= s_offset && s_offset <= max_offset);
3340   ratio_p = (ratio != 1
3341              && multiplier_allowed_in_address_p (ratio));
3342
3343   if (ratio != 1 && !ratio_p)
3344     cost += multiply_by_cost (ratio, Pmode);
3345
3346   if (s_offset && !offset_p && !symbol_present)
3347     {
3348       cost += add_cost (Pmode);
3349       var_present = true;
3350     }
3351
3352   acost = costs[symbol_present][var_present][offset_p][ratio_p];
3353   if (!acost)
3354     {
3355       int old_cse_not_expected;
3356       acost = 0;
3357
3358       addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3359       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3360       if (ratio_p)
3361         addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3362
3363       if (var_present)
3364         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3365
3366       if (symbol_present)
3367         {
3368           base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3369           if (offset_p)
3370             base = gen_rtx_fmt_e (CONST, Pmode,
3371                                   gen_rtx_fmt_ee (PLUS, Pmode,
3372                                                   base,
3373                                                   gen_int_mode (off, Pmode)));
3374         }
3375       else if (offset_p)
3376         base = gen_int_mode (off, Pmode);
3377       else
3378         base = NULL_RTX;
3379
3380       if (base)
3381         addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3382
3383       start_sequence ();
3384       /* To avoid splitting addressing modes, pretend that no cse will
3385          follow.  */
3386       old_cse_not_expected = cse_not_expected;
3387       cse_not_expected = true;
3388       addr = memory_address (Pmode, addr);
3389       cse_not_expected = old_cse_not_expected;
3390       seq = get_insns ();
3391       end_sequence ();
3392
3393       acost = seq_cost (seq);
3394       acost += address_cost (addr, Pmode);
3395
3396       if (!acost)
3397         acost = 1;
3398       costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3399     }
3400
3401   return cost + acost;
3402 }
3403
3404 /* Estimates cost of forcing expression EXPR into a variable.  */
3405
3406 unsigned
3407 force_expr_to_var_cost (tree expr)
3408 {
3409   static bool costs_initialized = false;
3410   static unsigned integer_cost;
3411   static unsigned symbol_cost;
3412   static unsigned address_cost;
3413   tree op0, op1;
3414   unsigned cost0, cost1, cost;
3415   enum machine_mode mode;
3416
3417   if (!costs_initialized)
3418     {
3419       tree var = create_tmp_var_raw (integer_type_node, "test_var");
3420       rtx x = gen_rtx_MEM (DECL_MODE (var),
3421                            gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3422       tree addr;
3423       tree type = build_pointer_type (integer_type_node);
3424
3425       integer_cost = computation_cost (build_int_cst (integer_type_node,
3426                                                       2000));
3427
3428       SET_DECL_RTL (var, x);
3429       TREE_STATIC (var) = 1;
3430       addr = build1 (ADDR_EXPR, type, var);
3431       symbol_cost = computation_cost (addr) + 1;
3432
3433       address_cost
3434         = computation_cost (build2 (PLUS_EXPR, type,
3435                                     addr,
3436                                     build_int_cst (type, 2000))) + 1;
3437       if (dump_file && (dump_flags & TDF_DETAILS))
3438         {
3439           fprintf (dump_file, "force_expr_to_var_cost:\n");
3440           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3441           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3442           fprintf (dump_file, "  address %d\n", (int) address_cost);
3443           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3444           fprintf (dump_file, "\n");
3445         }
3446
3447       costs_initialized = true;
3448     }
3449
3450   STRIP_NOPS (expr);
3451
3452   if (SSA_VAR_P (expr))
3453     return 0;
3454
3455   if (TREE_INVARIANT (expr))
3456     {
3457       if (TREE_CODE (expr) == INTEGER_CST)
3458         return integer_cost;
3459
3460       if (TREE_CODE (expr) == ADDR_EXPR)
3461         {
3462           tree obj = TREE_OPERAND (expr, 0);
3463
3464           if (TREE_CODE (obj) == VAR_DECL
3465               || TREE_CODE (obj) == PARM_DECL
3466               || TREE_CODE (obj) == RESULT_DECL)
3467             return symbol_cost;
3468         }
3469
3470       return address_cost;
3471     }
3472
3473   switch (TREE_CODE (expr))
3474     {
3475     case PLUS_EXPR:
3476     case MINUS_EXPR:
3477     case MULT_EXPR:
3478       op0 = TREE_OPERAND (expr, 0);
3479       op1 = TREE_OPERAND (expr, 1);
3480       STRIP_NOPS (op0);
3481       STRIP_NOPS (op1);
3482
3483       if (is_gimple_val (op0))
3484         cost0 = 0;
3485       else
3486         cost0 = force_expr_to_var_cost (op0);
3487
3488       if (is_gimple_val (op1))
3489         cost1 = 0;
3490       else
3491         cost1 = force_expr_to_var_cost (op1);
3492
3493       break;
3494
3495     default:
3496       /* Just an arbitrary value, FIXME.  */
3497       return target_spill_cost;
3498     }
3499
3500   mode = TYPE_MODE (TREE_TYPE (expr));
3501   switch (TREE_CODE (expr))
3502     {
3503     case PLUS_EXPR:
3504     case MINUS_EXPR:
3505       cost = add_cost (mode);
3506       break;
3507
3508     case MULT_EXPR:
3509       if (cst_and_fits_in_hwi (op0))
3510         cost = multiply_by_cost (int_cst_value (op0), mode);
3511       else if (cst_and_fits_in_hwi (op1))
3512         cost = multiply_by_cost (int_cst_value (op1), mode);
3513       else
3514         return target_spill_cost;
3515       break;
3516
3517     default:
3518       gcc_unreachable ();
3519     }
3520
3521   cost += cost0;
3522   cost += cost1;
3523
3524   /* Bound the cost by target_spill_cost.  The parts of complicated
3525      computations often are either loop invariant or at least can
3526      be shared between several iv uses, so letting this grow without
3527      limits would not give reasonable results.  */
3528   return cost < target_spill_cost ? cost : target_spill_cost;
3529 }
3530
3531 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3532    invariants the computation depends on.  */
3533
3534 static unsigned
3535 force_var_cost (struct ivopts_data *data,
3536                 tree expr, bitmap *depends_on)
3537 {
3538   if (depends_on)
3539     {
3540       fd_ivopts_data = data;
3541       walk_tree (&expr, find_depends, depends_on, NULL);
3542     }
3543
3544   return force_expr_to_var_cost (expr);
3545 }
3546
3547 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3548    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3549    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3550    invariants the computation depends on.  */
3551
3552 static unsigned
3553 split_address_cost (struct ivopts_data *data,
3554                     tree addr, bool *symbol_present, bool *var_present,
3555                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3556 {
3557   tree core;
3558   HOST_WIDE_INT bitsize;
3559   HOST_WIDE_INT bitpos;
3560   tree toffset;
3561   enum machine_mode mode;
3562   int unsignedp, volatilep;
3563
3564   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3565                               &unsignedp, &volatilep, false);
3566
3567   if (toffset != 0
3568       || bitpos % BITS_PER_UNIT != 0
3569       || TREE_CODE (core) != VAR_DECL)
3570     {
3571       *symbol_present = false;
3572       *var_present = true;
3573       fd_ivopts_data = data;
3574       walk_tree (&addr, find_depends, depends_on, NULL);
3575       return target_spill_cost;
3576     }
3577
3578   *offset += bitpos / BITS_PER_UNIT;
3579   if (TREE_STATIC (core)
3580       || DECL_EXTERNAL (core))
3581     {
3582       *symbol_present = true;
3583       *var_present = false;
3584       return 0;
3585     }
3586
3587   *symbol_present = false;
3588   *var_present = true;
3589   return 0;
3590 }
3591
3592 /* Estimates cost of expressing difference of addresses E1 - E2 as
3593    var + symbol + offset.  The value of offset is added to OFFSET,
3594    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3595    part is missing.  DEPENDS_ON is a set of the invariants the computation
3596    depends on.  */
3597
3598 static unsigned
3599 ptr_difference_cost (struct ivopts_data *data,
3600                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3601                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3602 {
3603   HOST_WIDE_INT diff = 0;
3604   unsigned cost;
3605
3606   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3607
3608   if (ptr_difference_const (e1, e2, &diff))
3609     {
3610       *offset += diff;
3611       *symbol_present = false;
3612       *var_present = false;
3613       return 0;
3614     }
3615
3616   if (e2 == integer_zero_node)
3617     return split_address_cost (data, TREE_OPERAND (e1, 0),
3618                                symbol_present, var_present, offset, depends_on);
3619
3620   *symbol_present = false;
3621   *var_present = true;
3622
3623   cost = force_var_cost (data, e1, depends_on);
3624   cost += force_var_cost (data, e2, depends_on);
3625   cost += add_cost (Pmode);
3626
3627   return cost;
3628 }
3629
3630 /* Estimates cost of expressing difference E1 - E2 as
3631    var + symbol + offset.  The value of offset is added to OFFSET,
3632    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3633    part is missing.  DEPENDS_ON is a set of the invariants the computation
3634    depends on.  */
3635
3636 static unsigned
3637 difference_cost (struct ivopts_data *data,
3638                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3639                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3640 {
3641   unsigned cost;
3642   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3643   unsigned HOST_WIDE_INT off1, off2;
3644
3645   e1 = strip_offset (e1, &off1);
3646   e2 = strip_offset (e2, &off2);
3647   *offset += off1 - off2;
3648
3649   STRIP_NOPS (e1);
3650   STRIP_NOPS (e2);
3651
3652   if (TREE_CODE (e1) == ADDR_EXPR)
3653     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3654                                 depends_on);
3655   *symbol_present = false;
3656
3657   if (operand_equal_p (e1, e2, 0))
3658     {
3659       *var_present = false;
3660       return 0;
3661     }
3662   *var_present = true;
3663   if (zero_p (e2))
3664     return force_var_cost (data, e1, depends_on);
3665
3666   if (zero_p (e1))
3667     {
3668       cost = force_var_cost (data, e2, depends_on);
3669       cost += multiply_by_cost (-1, mode);
3670
3671       return cost;
3672     }
3673
3674   cost = force_var_cost (data, e1, depends_on);
3675   cost += force_var_cost (data, e2, depends_on);
3676   cost += add_cost (mode);
3677
3678   return cost;
3679 }
3680
3681 /* Determines the cost of the computation by that USE is expressed
3682    from induction variable CAND.  If ADDRESS_P is true, we just need
3683    to create an address from it, otherwise we want to get it into
3684    register.  A set of invariants we depend on is stored in
3685    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3686
3687 static unsigned
3688 get_computation_cost_at (struct ivopts_data *data,
3689                          struct iv_use *use, struct iv_cand *cand,
3690                          bool address_p, bitmap *depends_on, tree at)
3691 {
3692   tree ubase = use->iv->base, ustep = use->iv->step;
3693   tree cbase, cstep;
3694   tree utype = TREE_TYPE (ubase), ctype;
3695   unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3696   HOST_WIDE_INT ratio, aratio;
3697   bool var_present, symbol_present;
3698   unsigned cost = 0, n_sums;
3699
3700   *depends_on = NULL;
3701
3702   /* Only consider real candidates.  */
3703   if (!cand->iv)
3704     return INFTY;
3705
3706   cbase = cand->iv->base;
3707   cstep = cand->iv->step;
3708   ctype = TREE_TYPE (cbase);
3709
3710   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3711     {
3712       /* We do not have a precision to express the values of use.  */
3713       return INFTY;
3714     }
3715
3716   if (address_p)
3717     {
3718       /* Do not try to express address of an object with computation based
3719          on address of a different object.  This may cause problems in rtl
3720          level alias analysis (that does not expect this to be happening,
3721          as this is illegal in C), and would be unlikely to be useful
3722          anyway.  */
3723       if (use->iv->base_object
3724           && cand->iv->base_object
3725           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3726         return INFTY;
3727     }
3728
3729   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3730     {
3731       /* TODO -- add direct handling of this case.  */
3732       goto fallback;
3733     }
3734
3735   /* CSTEPI is removed from the offset in case statement is after the
3736      increment.  If the step is not constant, we use zero instead.
3737      This is a bit imprecise (there is the extra addition), but
3738      redundancy elimination is likely to transform the code so that
3739      it uses value of the variable before increment anyway,
3740      so it is not that much unrealistic.  */
3741   if (cst_and_fits_in_hwi (cstep))
3742     cstepi = int_cst_value (cstep);
3743   else
3744     cstepi = 0;
3745
3746   if (cst_and_fits_in_hwi (ustep)
3747       && cst_and_fits_in_hwi (cstep))
3748     {
3749       ustepi = int_cst_value (ustep);
3750
3751       if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3752         return INFTY;
3753     }
3754   else
3755     {
3756       tree rat;
3757
3758       rat = constant_multiple_of (utype, ustep, cstep);
3759
3760       if (!rat)
3761         return INFTY;
3762
3763       if (cst_and_fits_in_hwi (rat))
3764         ratio = int_cst_value (rat);
3765       else if (integer_onep (rat))
3766         ratio = 1;
3767       else if (integer_all_onesp (rat))
3768         ratio = -1;
3769       else
3770         return INFTY;
3771     }
3772
3773   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3774      or ratio == 1, it is better to handle this like
3775
3776      ubase - ratio * cbase + ratio * var
3777
3778      (also holds in the case ratio == -1, TODO.  */
3779
3780   if (cst_and_fits_in_hwi (cbase))
3781     {
3782       offset = - ratio * int_cst_value (cbase);
3783       cost += difference_cost (data,
3784                                ubase, integer_zero_node,
3785                                &symbol_present, &var_present, &offset,
3786                                depends_on);
3787     }
3788   else if (ratio == 1)
3789     {
3790       cost += difference_cost (data,
3791                                ubase, cbase,
3792                                &symbol_present, &var_present, &offset,
3793                                depends_on);
3794     }
3795   else
3796     {
3797       cost += force_var_cost (data, cbase, depends_on);
3798       cost += add_cost (TYPE_MODE (ctype));
3799       cost += difference_cost (data,
3800                                ubase, integer_zero_node,
3801                                &symbol_present, &var_present, &offset,
3802                                depends_on);
3803     }
3804
3805   /* If we are after the increment, the value of the candidate is higher by
3806      one iteration.  */
3807   if (stmt_after_increment (data->current_loop, cand, at))
3808     offset -= ratio * cstepi;
3809
3810   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3811      (symbol/var/const parts may be omitted).  If we are looking for an address,
3812      find the cost of addressing this.  */
3813   if (address_p)
3814     return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3815
3816   /* Otherwise estimate the costs for computing the expression.  */
3817   aratio = ratio > 0 ? ratio : -ratio;
3818   if (!symbol_present && !var_present && !offset)
3819     {
3820       if (ratio != 1)
3821         cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3822
3823       return cost;
3824     }
3825
3826   if (aratio != 1)
3827     cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3828
3829   n_sums = 1;
3830   if (var_present
3831       /* Symbol + offset should be compile-time computable.  */
3832       && (symbol_present || offset))
3833     n_sums++;
3834
3835   return cost + n_sums * add_cost (TYPE_MODE (ctype));
3836
3837 fallback:
3838   {
3839     /* Just get the expression, expand it and measure the cost.  */
3840     tree comp = get_computation_at (data->current_loop, use, cand, at);
3841
3842     if (!comp)
3843       return INFTY;
3844
3845     if (address_p)
3846       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3847
3848     return computation_cost (comp);
3849   }
3850 }
3851
3852 /* Determines the cost of the computation by that USE is expressed
3853    from induction variable CAND.  If ADDRESS_P is true, we just need
3854    to create an address from it, otherwise we want to get it into
3855    register.  A set of invariants we depend on is stored in
3856    DEPENDS_ON.  */
3857
3858 static unsigned
3859 get_computation_cost (struct ivopts_data *data,
3860                       struct iv_use *use, struct iv_cand *cand,
3861                       bool address_p, bitmap *depends_on)
3862 {
3863   return get_computation_cost_at (data,
3864                                   use, cand, address_p, depends_on, use->stmt);
3865 }
3866
3867 /* Determines cost of basing replacement of USE on CAND in a generic
3868    expression.  */
3869
3870 static bool
3871 determine_use_iv_cost_generic (struct ivopts_data *data,
3872                                struct iv_use *use, struct iv_cand *cand)
3873 {
3874   bitmap depends_on;
3875   unsigned cost;
3876
3877   /* The simple case first -- if we need to express value of the preserved
3878      original biv, the cost is 0.  This also prevents us from counting the
3879      cost of increment twice -- once at this use and once in the cost of
3880      the candidate.  */
3881   if (cand->pos == IP_ORIGINAL
3882       && cand->incremented_at == use->stmt)
3883     {
3884       set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3885       return true;
3886     }
3887
3888   cost = get_computation_cost (data, use, cand, false, &depends_on);
3889   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3890
3891   return cost != INFTY;
3892 }
3893
3894 /* Determines cost of basing replacement of USE on CAND in an address.  */
3895
3896 static bool
3897 determine_use_iv_cost_address (struct ivopts_data *data,
3898                                struct iv_use *use, struct iv_cand *cand)
3899 {
3900   bitmap depends_on;
3901   unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3902
3903   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3904
3905   return cost != INFTY;
3906 }
3907
3908 /* Computes value of induction variable IV in iteration NITER.  */
3909
3910 static tree
3911 iv_value (struct iv *iv, tree niter)
3912 {
3913   tree val;
3914   tree type = TREE_TYPE (iv->base);
3915
3916   niter = fold_convert (type, niter);
3917   val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3918
3919   return fold_build2 (PLUS_EXPR, type, iv->base, val);
3920 }
3921
3922 /* Computes value of candidate CAND at position AT in iteration NITER.  */
3923
3924 static tree
3925 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3926 {
3927   tree val = iv_value (cand->iv, niter);
3928   tree type = TREE_TYPE (cand->iv->base);
3929
3930   if (stmt_after_increment (loop, cand, at))
3931     val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3932
3933   return val;
3934 }
3935
3936 /* Returns period of induction variable iv.  */
3937
3938 static tree
3939 iv_period (struct iv *iv)
3940 {
3941   tree step = iv->step, period, type;
3942   tree pow2div;
3943
3944   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3945
3946   /* Period of the iv is gcd (step, type range).  Since type range is power
3947      of two, it suffices to determine the maximum power of two that divides
3948      step.  */
3949   pow2div = num_ending_zeros (step);
3950   type = unsigned_type_for (TREE_TYPE (step));
3951
3952   period = build_low_bits_mask (type,
3953                                 (TYPE_PRECISION (type)
3954                                  - tree_low_cst (pow2div, 1)));
3955
3956   return period;
3957 }
3958
3959 /* Returns the comparison operator used when eliminating the iv USE.  */
3960
3961 static enum tree_code
3962 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3963 {
3964   struct loop *loop = data->current_loop;
3965   basic_block ex_bb;
3966   edge exit;
3967
3968   ex_bb = bb_for_stmt (use->stmt);
3969   exit = EDGE_SUCC (ex_bb, 0);
3970   if (flow_bb_inside_loop_p (loop, exit->dest))
3971     exit = EDGE_SUCC (ex_bb, 1);
3972
3973   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3974 }
3975
3976 /* Check whether it is possible to express the condition in USE by comparison
3977    of candidate CAND.  If so, store the value compared with to BOUND.  */
3978
3979 static bool
3980 may_eliminate_iv (struct ivopts_data *data,
3981                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3982 {
3983   basic_block ex_bb;
3984   edge exit;
3985   tree nit, nit_type;
3986   tree wider_type, period, per_type;
3987   struct loop *loop = data->current_loop;
3988
3989   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3990     return false;
3991
3992   /* For now works only for exits that dominate the loop latch.  TODO -- extend
3993      for other conditions inside loop body.  */
3994   ex_bb = bb_for_stmt (use->stmt);
3995   if (use->stmt != last_stmt (ex_bb)
3996       || TREE_CODE (use->stmt) != COND_EXPR)
3997     return false;
3998   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3999     return false;
4000
4001   exit = EDGE_SUCC (ex_bb, 0);
4002   if (flow_bb_inside_loop_p (loop, exit->dest))
4003     exit = EDGE_SUCC (ex_bb, 1);
4004   if (flow_bb_inside_loop_p (loop, exit->dest))
4005     return false;
4006
4007   nit = niter_for_exit (data, exit);
4008   if (!nit)
4009     return false;
4010
4011   nit_type = TREE_TYPE (nit);
4012
4013   /* Determine whether we may use the variable to test whether niter iterations
4014      elapsed.  This is the case iff the period of the induction variable is
4015      greater than the number of iterations.  */
4016   period = iv_period (cand->iv);
4017   if (!period)
4018     return false;
4019   per_type = TREE_TYPE (period);
4020
4021   wider_type = TREE_TYPE (period);
4022   if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4023     wider_type = per_type;
4024   else
4025     wider_type = nit_type;
4026
4027   if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4028                                       fold_convert (wider_type, period),
4029                                       fold_convert (wider_type, nit))))
4030     return false;
4031
4032   *bound = cand_value_at (loop, cand, use->stmt, nit);
4033   return true;
4034 }
4035
4036 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4037
4038 static bool
4039 determine_use_iv_cost_condition (struct ivopts_data *data,
4040                                  struct iv_use *use, struct iv_cand *cand)
4041 {
4042   tree bound = NULL_TREE, op, cond;
4043   bitmap depends_on = NULL;
4044   unsigned cost;
4045
4046   /* Only consider real candidates.  */
4047   if (!cand->iv)
4048     {
4049       set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4050       return false;
4051     }
4052
4053   if (may_eliminate_iv (data, use, cand, &bound))
4054     {
4055       cost = force_var_cost (data, bound, &depends_on);
4056
4057       set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4058       return cost != INFTY;
4059     }
4060
4061   /* The induction variable elimination failed; just express the original
4062      giv.  If it is compared with an invariant, note that we cannot get
4063      rid of it.  */
4064   cost = get_computation_cost (data, use, cand, false, &depends_on);
4065
4066   cond = *use->op_p;
4067   if (TREE_CODE (cond) != SSA_NAME)
4068     {
4069       op = TREE_OPERAND (cond, 0);
4070       if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4071         op = TREE_OPERAND (cond, 1);
4072       if (TREE_CODE (op) == SSA_NAME)
4073         {
4074           op = get_iv (data, op)->base;
4075           fd_ivopts_data = data;
4076           walk_tree (&op, find_depends, &depends_on, NULL);
4077         }
4078     }
4079
4080   set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4081   return cost != INFTY;
4082 }
4083
4084 /* Determines cost of basing replacement of USE on CAND.  Returns false
4085    if USE cannot be based on CAND.  */
4086
4087 static bool
4088 determine_use_iv_cost (struct ivopts_data *data,
4089                        struct iv_use *use, struct iv_cand *cand)
4090 {
4091   switch (use->type)
4092     {
4093     case USE_NONLINEAR_EXPR:
4094       return determine_use_iv_cost_generic (data, use, cand);
4095
4096     case USE_ADDRESS:
4097       return determine_use_iv_cost_address (data, use, cand);
4098
4099     case USE_COMPARE:
4100       return determine_use_iv_cost_condition (data, use, cand);
4101
4102     default:
4103       gcc_unreachable ();
4104     }
4105 }
4106
4107 /* Determines costs of basing the use of the iv on an iv candidate.  */
4108
4109 static void
4110 determine_use_iv_costs (struct ivopts_data *data)
4111 {
4112   unsigned i, j;
4113   struct iv_use *use;
4114   struct iv_cand *cand;
4115   bitmap to_clear = BITMAP_ALLOC (NULL);
4116
4117   alloc_use_cost_map (data);
4118
4119   for (i = 0; i < n_iv_uses (data); i++)
4120     {
4121       use = iv_use (data, i);
4122
4123       if (data->consider_all_candidates)
4124         {
4125           for (j = 0; j < n_iv_cands (data); j++)
4126             {
4127               cand = iv_cand (data, j);
4128               determine_use_iv_cost (data, use, cand);
4129             }
4130         }
4131       else
4132         {
4133           bitmap_iterator bi;
4134
4135           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4136             {
4137               cand = iv_cand (data, j);
4138               if (!determine_use_iv_cost (data, use, cand))
4139                 bitmap_set_bit (to_clear, j);
4140             }
4141
4142           /* Remove the candidates for that the cost is infinite from
4143              the list of related candidates.  */
4144           bitmap_and_compl_into (use->related_cands, to_clear);
4145           bitmap_clear (to_clear);
4146         }
4147     }
4148
4149   BITMAP_FREE (to_clear);
4150
4151   if (dump_file && (dump_flags & TDF_DETAILS))
4152     {
4153       fprintf (dump_file, "Use-candidate costs:\n");
4154
4155       for (i = 0; i < n_iv_uses (data); i++)
4156         {
4157           use = iv_use (data, i);
4158
4159           fprintf (dump_file, "Use %d:\n", i);
4160           fprintf (dump_file, "  cand\tcost\tdepends on\n");
4161           for (j = 0; j < use->n_map_members; j++)
4162             {
4163               if (!use->cost_map[j].cand
4164                   || use->cost_map[j].cost == INFTY)
4165                 continue;
4166
4167               fprintf (dump_file, "  %d\t%d\t",
4168                        use->cost_map[j].cand->id,
4169                        use->cost_map[j].cost);
4170               if (use->cost_map[j].depends_on)
4171                 bitmap_print (dump_file,
4172                               use->cost_map[j].depends_on, "","");
4173               fprintf (dump_file, "\n");
4174             }
4175
4176           fprintf (dump_file, "\n");
4177         }
4178       fprintf (dump_file, "\n");
4179     }
4180 }
4181
4182 /* Determines cost of the candidate CAND.  */
4183
4184 static void
4185 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4186 {
4187   unsigned cost_base, cost_step;
4188   tree base;
4189
4190   if (!cand->iv)
4191     {
4192       cand->cost = 0;
4193       return;
4194     }
4195
4196   /* There are two costs associated with the candidate -- its increment
4197      and its initialization.  The second is almost negligible for any loop
4198      that rolls enough, so we take it just very little into account.  */
4199
4200   base = cand->iv->base;
4201   cost_base = force_var_cost (data, base, NULL);
4202   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4203
4204   cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4205
4206   /* Prefer the original iv unless we may gain something by replacing it;
4207      this is not really relevant for artificial ivs created by other
4208      passes.  */
4209   if (cand->pos == IP_ORIGINAL
4210       && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4211     cand->cost--;
4212
4213   /* Prefer not to insert statements into latch unless there are some
4214      already (so that we do not create unnecessary jumps).  */
4215   if (cand->pos == IP_END
4216       && empty_block_p (ip_end_pos (data->current_loop)))
4217     cand->cost++;
4218 }
4219
4220 /* Determines costs of computation of the candidates.  */
4221
4222 static void
4223 determine_iv_costs (struct ivopts_data *data)
4224 {
4225   unsigned i;
4226
4227   if (dump_file && (dump_flags & TDF_DETAILS))
4228     {
4229       fprintf (dump_file, "Candidate costs:\n");
4230       fprintf (dump_file, "  cand\tcost\n");
4231     }
4232
4233   for (i = 0; i < n_iv_cands (data); i++)
4234     {
4235       struct iv_cand *cand = iv_cand (data, i);
4236
4237       determine_iv_cost (data, cand);
4238
4239       if (dump_file && (dump_flags & TDF_DETAILS))
4240         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4241     }
4242
4243 if (dump_file && (dump_flags & TDF_DETAILS))
4244       fprintf (dump_file, "\n");
4245 }
4246
4247 /* Calculates cost for having SIZE induction variables.  */
4248
4249 static unsigned
4250 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4251 {
4252   return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4253 }
4254
4255 /* For each size of the induction variable set determine the penalty.  */
4256
4257 static void
4258 determine_set_costs (struct ivopts_data *data)
4259 {
4260   unsigned j, n;
4261   tree phi, op;
4262   struct loop *loop = data->current_loop;
4263   bitmap_iterator bi;
4264
4265   /* We use the following model (definitely improvable, especially the
4266      cost function -- TODO):
4267
4268      We estimate the number of registers available (using MD data), name it A.
4269
4270      We estimate the number of registers used by the loop, name it U.  This
4271      number is obtained as the number of loop phi nodes (not counting virtual
4272      registers and bivs) + the number of variables from outside of the loop.
4273
4274      We set a reserve R (free regs that are used for temporary computations,
4275      etc.).  For now the reserve is a constant 3.
4276
4277      Let I be the number of induction variables.
4278
4279      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4280         make a lot of ivs without a reason).
4281      -- if A - R < U + I <= A, the cost is I * PRES_COST
4282      -- if U + I > A, the cost is I * PRES_COST and
4283         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4284
4285   if (dump_file && (dump_flags & TDF_DETAILS))
4286     {
4287       fprintf (dump_file, "Global costs:\n");
4288       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4289       fprintf (dump_file, "  target_small_cost %d\n", target_small_cost);
4290       fprintf (dump_file, "  target_pres_cost %d\n", target_pres_cost);
4291       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4292     }
4293
4294   n = 0;
4295   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4296     {
4297       op = PHI_RESULT (phi);
4298
4299       if (!is_gimple_reg (op))
4300         continue;
4301
4302       if (get_iv (data, op))
4303         continue;
4304
4305       n++;
4306     }
4307
4308   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4309     {
4310       struct version_info *info = ver_info (data, j);
4311
4312       if (info->inv_id && info->has_nonlin_use)
4313         n++;
4314     }
4315
4316   data->regs_used = n;
4317   if (dump_file && (dump_flags & TDF_DETAILS))
4318     fprintf (dump_file, "  regs_used %d\n", n);
4319
4320   if (dump_file && (dump_flags & TDF_DETAILS))
4321     {
4322       fprintf (dump_file, "  cost for size:\n");
4323       fprintf (dump_file, "  ivs\tcost\n");
4324       for (j = 0; j <= 2 * target_avail_regs; j++)
4325         fprintf (dump_file, "  %d\t%d\n", j,
4326                  ivopts_global_cost_for_size (data, j));
4327       fprintf (dump_file, "\n");
4328     }
4329 }
4330
4331 /* Returns true if A is a cheaper cost pair than B.  */
4332
4333 static bool
4334 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4335 {
4336   if (!a)
4337     return false;
4338
4339   if (!b)
4340     return true;
4341
4342   if (a->cost < b->cost)
4343     return true;
4344
4345   if (a->cost > b->cost)
4346     return false;
4347
4348   /* In case the costs are the same, prefer the cheaper candidate.  */
4349   if (a->cand->cost < b->cand->cost)
4350     return true;
4351
4352   return false;
4353 }
4354
4355 /* Computes the cost field of IVS structure.  */
4356
4357 static void
4358 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4359 {
4360   unsigned cost = 0;
4361
4362   cost += ivs->cand_use_cost;
4363   cost += ivs->cand_cost;
4364   cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4365
4366   ivs->cost = cost;
4367 }
4368
4369 /* Remove invariants in set INVS to set IVS.  */
4370
4371 static void
4372 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4373 {
4374   bitmap_iterator bi;
4375   unsigned iid;
4376
4377   if (!invs)
4378     return;
4379
4380   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4381     {
4382       ivs->n_invariant_uses[iid]--;
4383       if (ivs->n_invariant_uses[iid] == 0)
4384         ivs->n_regs--;
4385     }
4386 }
4387
4388 /* Set USE not to be expressed by any candidate in IVS.  */
4389
4390 static void
4391 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4392                  struct iv_use *use)
4393 {
4394   unsigned uid = use->id, cid;
4395   struct cost_pair *cp;
4396
4397   cp = ivs->cand_for_use[uid];
4398   if (!cp)
4399     return;
4400   cid = cp->cand->id;
4401
4402   ivs->bad_uses++;
4403   ivs->cand_for_use[uid] = NULL;
4404   ivs->n_cand_uses[cid]--;
4405
4406   if (ivs->n_cand_uses[cid] == 0)
4407     {
4408       bitmap_clear_bit (ivs->cands, cid);
4409       /* Do not count the pseudocandidates.  */
4410       if (cp->cand->iv)
4411         ivs->n_regs--;
4412       ivs->n_cands--;
4413       ivs->cand_cost -= cp->cand->cost;
4414
4415       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4416     }
4417
4418   ivs->cand_use_cost -= cp->cost;
4419
4420   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4421   iv_ca_recount_cost (data, ivs);
4422 }
4423
4424 /* Add invariants in set INVS to set IVS.  */
4425
4426 static void
4427 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4428 {
4429   bitmap_iterator bi;
4430   unsigned iid;
4431
4432   if (!invs)
4433     return;
4434
4435   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4436     {
4437       ivs->n_invariant_uses[iid]++;
4438       if (ivs->n_invariant_uses[iid] == 1)
4439         ivs->n_regs++;
4440     }
4441 }
4442
4443 /* Set cost pair for USE in set IVS to CP.  */
4444
4445 static void
4446 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4447               struct iv_use *use, struct cost_pair *cp)
4448 {
4449   unsigned uid = use->id, cid;
4450
4451   if (ivs->cand_for_use[uid] == cp)
4452     return;
4453
4454   if (ivs->cand_for_use[uid])
4455     iv_ca_set_no_cp (data, ivs, use);
4456
4457   if (cp)
4458     {
4459       cid = cp->cand->id;
4460
4461       ivs->bad_uses--;
4462       ivs->cand_for_use[uid] = cp;
4463       ivs->n_cand_uses[cid]++;
4464       if (ivs->n_cand_uses[cid] == 1)
4465         {
4466           bitmap_set_bit (ivs->cands, cid);
4467           /* Do not count the pseudocandidates.  */
4468           if (cp->cand->iv)
4469             ivs->n_regs++;
4470           ivs->n_cands++;
4471           ivs->cand_cost += cp->cand->cost;
4472
4473           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4474         }
4475
4476       ivs->cand_use_cost += cp->cost;
4477       iv_ca_set_add_invariants (ivs, cp->depends_on);
4478       iv_ca_recount_cost (data, ivs);
4479     }
4480 }
4481
4482 /* Extend set IVS by expressing USE by some of the candidates in it
4483    if possible.  */
4484
4485 static void
4486 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4487                struct iv_use *use)
4488 {
4489   struct cost_pair *best_cp = NULL, *cp;
4490   bitmap_iterator bi;
4491   unsigned i;
4492
4493   gcc_assert (ivs->upto >= use->id);
4494
4495   if (ivs->upto == use->id)
4496     {
4497       ivs->upto++;
4498       ivs->bad_uses++;
4499     }
4500
4501   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4502     {
4503       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4504
4505       if (cheaper_cost_pair (cp, best_cp))
4506         best_cp = cp;
4507     }
4508
4509   iv_ca_set_cp (data, ivs, use, best_cp);
4510 }
4511
4512 /* Get cost for assignment IVS.  */
4513
4514 static unsigned
4515 iv_ca_cost (struct iv_ca *ivs)
4516 {
4517   return (ivs->bad_uses ? INFTY : ivs->cost);
4518 }
4519
4520 /* Returns true if all dependences of CP are among invariants in IVS.  */
4521
4522 static bool
4523 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4524 {
4525   unsigned i;
4526   bitmap_iterator bi;
4527
4528   if (!cp->depends_on)
4529     return true;
4530
4531   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4532     {
4533       if (ivs->n_invariant_uses[i] == 0)
4534         return false;
4535     }
4536
4537   return true;
4538 }
4539
4540 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4541    it before NEXT_CHANGE.  */
4542
4543 static struct iv_ca_delta *
4544 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4545                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4546 {
4547   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4548
4549   change->use = use;
4550   change->old_cp = old_cp;
4551   change->new_cp = new_cp;
4552   change->next_change = next_change;
4553
4554   return change;
4555 }
4556
4557 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4558    are rewritten.  */
4559
4560 static struct iv_ca_delta *
4561 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4562 {
4563   struct iv_ca_delta *last;
4564
4565   if (!l2)
4566     return l1;
4567
4568   if (!l1)
4569     return l2;
4570
4571   for (last = l1; last->next_change; last = last->next_change)
4572     continue;
4573   last->next_change = l2;
4574
4575   return l1;
4576 }
4577
4578 /* Returns candidate by that USE is expressed in IVS.  */
4579
4580 static struct cost_pair *
4581 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4582 {
4583   return ivs->cand_for_use[use->id];
4584 }
4585
4586 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4587
4588 static struct iv_ca_delta *
4589 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4590 {
4591   struct iv_ca_delta *act, *next, *prev = NULL;
4592   struct cost_pair *tmp;
4593
4594   for (act = delta; act; act = next)
4595     {
4596       next = act->next_change;
4597       act->next_change = prev;
4598       prev = act;
4599
4600       tmp = act->old_cp;
4601       act->old_cp = act->new_cp;
4602       act->new_cp = tmp;
4603     }
4604
4605   return prev;
4606 }
4607
4608 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4609    reverted instead.  */
4610
4611 static void
4612 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4613                     struct iv_ca_delta *delta, bool forward)
4614 {
4615   struct cost_pair *from, *to;
4616   struct iv_ca_delta *act;
4617
4618   if (!forward)
4619     delta = iv_ca_delta_reverse (delta);
4620
4621   for (act = delta; act; act = act->next_change)
4622     {
4623       from = act->old_cp;
4624       to = act->new_cp;
4625       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4626       iv_ca_set_cp (data, ivs, act->use, to);
4627     }
4628
4629   if (!forward)
4630     iv_ca_delta_reverse (delta);
4631 }
4632
4633 /* Returns true if CAND is used in IVS.  */
4634
4635 static bool
4636 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4637 {
4638   return ivs->n_cand_uses[cand->id] > 0;
4639 }
4640
4641 /* Returns number of induction variable candidates in the set IVS.  */
4642
4643 static unsigned
4644 iv_ca_n_cands (struct iv_ca *ivs)
4645 {
4646   return ivs->n_cands;
4647 }
4648
4649 /* Free the list of changes DELTA.  */
4650
4651 static void
4652 iv_ca_delta_free (struct iv_ca_delta **delta)
4653 {
4654   struct iv_ca_delta *act, *next;
4655
4656   for (act = *delta; act; act = next)
4657     {
4658       next = act->next_change;
4659       free (act);
4660     }
4661
4662   *delta = NULL;
4663 }
4664
4665 /* Allocates new iv candidates assignment.  */
4666
4667 static struct iv_ca *
4668 iv_ca_new (struct ivopts_data *data)
4669 {
4670   struct iv_ca *nw = XNEW (struct iv_ca);
4671
4672   nw->upto = 0;
4673   nw->bad_uses = 0;
4674   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4675   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4676   nw->cands = BITMAP_ALLOC (NULL);
4677   nw->n_cands = 0;
4678   nw->n_regs = 0;
4679   nw->cand_use_cost = 0;
4680   nw->cand_cost = 0;
4681   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4682   nw->cost = 0;
4683
4684   return nw;
4685 }
4686
4687 /* Free memory occupied by the set IVS.  */
4688
4689 static void
4690 iv_ca_free (struct iv_ca **ivs)
4691 {
4692   free ((*ivs)->cand_for_use);
4693   free ((*ivs)->n_cand_uses);
4694   BITMAP_FREE ((*ivs)->cands);
4695   free ((*ivs)->n_invariant_uses);
4696   free (*ivs);
4697   *ivs = NULL;
4698 }
4699
4700 /* Dumps IVS to FILE.  */
4701
4702 static void
4703 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4704 {
4705   const char *pref = "  invariants ";
4706   unsigned i;
4707
4708   fprintf (file, "  cost %d\n", iv_ca_cost (ivs));
4709   bitmap_print (file, ivs->cands, "  candidates ","\n");
4710
4711   for (i = 1; i <= data->max_inv_id; i++)
4712     if (ivs->n_invariant_uses[i])
4713       {
4714         fprintf (file, "%s%d", pref, i);
4715         pref = ", ";
4716       }
4717   fprintf (file, "\n");
4718 }
4719
4720 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4721    new set, and store differences in DELTA.  Number of induction variables
4722    in the new set is stored to N_IVS.  */
4723
4724 static unsigned
4725 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4726               struct iv_cand *cand, struct iv_ca_delta **delta,
4727               unsigned *n_ivs)
4728 {
4729   unsigned i, cost;
4730   struct iv_use *use;
4731   struct cost_pair *old_cp, *new_cp;
4732
4733   *delta = NULL;
4734   for (i = 0; i < ivs->upto; i++)
4735     {
4736       use = iv_use (data, i);
4737       old_cp = iv_ca_cand_for_use (ivs, use);
4738
4739       if (old_cp
4740           && old_cp->cand == cand)
4741         continue;
4742
4743       new_cp = get_use_iv_cost (data, use, cand);
4744       if (!new_cp)
4745         continue;
4746
4747       if (!iv_ca_has_deps (ivs, new_cp))
4748         continue;
4749
4750       if (!cheaper_cost_pair (new_cp, old_cp))
4751         continue;
4752
4753       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4754     }
4755
4756   iv_ca_delta_commit (data, ivs, *delta, true);
4757   cost = iv_ca_cost (ivs);
4758   if (n_ivs)
4759     *n_ivs = iv_ca_n_cands (ivs);
4760   iv_ca_delta_commit (data, ivs, *delta, false);
4761
4762   return cost;
4763 }
4764
4765 /* Try narrowing set IVS by removing CAND.  Return the cost of
4766    the new set and store the differences in DELTA.  */
4767
4768 static unsigned
4769 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4770               struct iv_cand *cand, struct iv_ca_delta **delta)
4771 {
4772   unsigned i, ci;
4773   struct iv_use *use;
4774   struct cost_pair *old_cp, *new_cp, *cp;
4775   bitmap_iterator bi;
4776   struct iv_cand *cnd;
4777   unsigned cost;
4778
4779   *delta = NULL;
4780   for (i = 0; i < n_iv_uses (data); i++)
4781     {
4782       use = iv_use (data, i);
4783
4784       old_cp = iv_ca_cand_for_use (ivs, use);
4785       if (old_cp->cand != cand)
4786         continue;
4787
4788       new_cp = NULL;
4789
4790       if (data->consider_all_candidates)
4791         {
4792           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4793             {
4794               if (ci == cand->id)
4795                 continue;
4796
4797               cnd = iv_cand (data, ci);
4798
4799               cp = get_use_iv_cost (data, use, cnd);
4800               if (!cp)
4801                 continue;
4802               if (!iv_ca_has_deps (ivs, cp))
4803                 continue;
4804
4805               if (!cheaper_cost_pair (cp, new_cp))
4806                 continue;
4807
4808               new_cp = cp;
4809             }
4810         }
4811       else
4812         {
4813           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4814             {
4815               if (ci == cand->id)
4816                 continue;
4817
4818               cnd = iv_cand (data, ci);
4819
4820               cp = get_use_iv_cost (data, use, cnd);
4821               if (!cp)
4822                 continue;
4823               if (!iv_ca_has_deps (ivs, cp))
4824                 continue;
4825
4826               if (!cheaper_cost_pair (cp, new_cp))
4827                 continue;
4828
4829               new_cp = cp;
4830             }
4831         }
4832
4833       if (!new_cp)
4834         {
4835           iv_ca_delta_free (delta);
4836           return INFTY;
4837         }
4838
4839       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4840     }
4841
4842   iv_ca_delta_commit (data, ivs, *delta, true);
4843   cost = iv_ca_cost (ivs);
4844   iv_ca_delta_commit (data, ivs, *delta, false);
4845
4846   return cost;
4847 }
4848
4849 /* Try optimizing the set of candidates IVS by removing candidates different
4850    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4851    differences in DELTA.  */
4852
4853 static unsigned
4854 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4855              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4856 {
4857   bitmap_iterator bi;
4858   struct iv_ca_delta *act_delta, *best_delta;
4859   unsigned i, best_cost, acost;
4860   struct iv_cand *cand;
4861
4862   best_delta = NULL;
4863   best_cost = iv_ca_cost (ivs);
4864
4865   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4866     {
4867       cand = iv_cand (data, i);
4868
4869       if (cand == except_cand)
4870         continue;
4871
4872       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4873
4874       if (acost < best_cost)
4875         {
4876           best_cost = acost;
4877           iv_ca_delta_free (&best_delta);
4878           best_delta = act_delta;
4879         }
4880       else
4881         iv_ca_delta_free (&act_delta);
4882     }
4883
4884   if (!best_delta)
4885     {
4886       *delta = NULL;
4887       return best_cost;
4888     }
4889
4890   /* Recurse to possibly remove other unnecessary ivs.  */
4891   iv_ca_delta_commit (data, ivs, best_delta, true);
4892   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4893   iv_ca_delta_commit (data, ivs, best_delta, false);
4894   *delta = iv_ca_delta_join (best_delta, *delta);
4895   return best_cost;
4896 }
4897
4898 /* Tries to extend the sets IVS in the best possible way in order
4899    to express the USE.  */
4900
4901 static bool
4902 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4903                   struct iv_use *use)
4904 {
4905   unsigned best_cost, act_cost;
4906   unsigned i;
4907   bitmap_iterator bi;
4908   struct iv_cand *cand;
4909   struct iv_ca_delta *best_delta = NULL, *act_delta;
4910   struct cost_pair *cp;
4911
4912   iv_ca_add_use (data, ivs, use);
4913   best_cost = iv_ca_cost (ivs);
4914
4915   cp = iv_ca_cand_for_use (ivs, use);
4916   if (cp)
4917     {
4918       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4919       iv_ca_set_no_cp (data, ivs, use);
4920     }
4921
4922   /* First try important candidates.  Only if it fails, try the specific ones.
4923      Rationale -- in loops with many variables the best choice often is to use
4924      just one generic biv.  If we added here many ivs specific to the uses,
4925      the optimization algorithm later would be likely to get stuck in a local
4926      minimum, thus causing us to create too many ivs.  The approach from
4927      few ivs to more seems more likely to be successful -- starting from few
4928      ivs, replacing an expensive use by a specific iv should always be a
4929      win.  */
4930   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4931     {
4932       cand = iv_cand (data, i);
4933
4934       if (iv_ca_cand_used_p (ivs, cand))
4935         continue;
4936
4937       cp = get_use_iv_cost (data, use, cand);
4938       if (!cp)
4939         continue;
4940
4941       iv_ca_set_cp (data, ivs, use, cp);
4942       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4943       iv_ca_set_no_cp (data, ivs, use);
4944       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4945
4946       if (act_cost < best_cost)
4947         {
4948           best_cost = act_cost;
4949
4950           iv_ca_delta_free (&best_delta);
4951           best_delta = act_delta;
4952         }
4953       else
4954         iv_ca_delta_free (&act_delta);
4955     }
4956
4957   if (best_cost == INFTY)
4958     {
4959       for (i = 0; i < use->n_map_members; i++)
4960         {
4961           cp = use->cost_map + i;
4962           cand = cp->cand;
4963           if (!cand)
4964             continue;
4965
4966           /* Already tried this.  */
4967           if (cand->important)
4968             continue;
4969
4970           if (iv_ca_cand_used_p (ivs, cand))
4971             continue;
4972
4973           act_delta = NULL;
4974           iv_ca_set_cp (data, ivs, use, cp);
4975           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4976           iv_ca_set_no_cp (data, ivs, use);
4977           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4978                                        cp, act_delta);
4979
4980           if (act_cost < best_cost)
4981             {
4982               best_cost = act_cost;
4983
4984               if (best_delta)
4985                 iv_ca_delta_free (&best_delta);
4986               best_delta = act_delta;
4987             }
4988           else
4989             iv_ca_delta_free (&act_delta);
4990         }
4991     }
4992
4993   iv_ca_delta_commit (data, ivs, best_delta, true);
4994   iv_ca_delta_free (&best_delta);
4995
4996   return (best_cost != INFTY);
4997 }
4998
4999 /* Finds an initial assignment of candidates to uses.  */
5000
5001 static struct iv_ca *
5002 get_initial_solution (struct ivopts_data *data)
5003 {
5004   struct iv_ca *ivs = iv_ca_new (data);
5005   unsigned i;
5006
5007   for (i = 0; i < n_iv_uses (data); i++)
5008     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5009       {
5010         iv_ca_free (&ivs);
5011         return NULL;
5012       }
5013
5014   return ivs;
5015 }
5016
5017 /* Tries to improve set of induction variables IVS.  */
5018
5019 static bool
5020 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5021 {
5022   unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5023   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5024   struct iv_cand *cand;
5025
5026   /* Try extending the set of induction variables by one.  */
5027   for (i = 0; i < n_iv_cands (data); i++)
5028     {
5029       cand = iv_cand (data, i);
5030
5031       if (iv_ca_cand_used_p (ivs, cand))
5032         continue;
5033
5034       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5035       if (!act_delta)
5036         continue;
5037
5038       /* If we successfully added the candidate and the set is small enough,
5039          try optimizing it by removing other candidates.  */
5040       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5041         {
5042           iv_ca_delta_commit (data, ivs, act_delta, true);
5043           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5044           iv_ca_delta_commit (data, ivs, act_delta, false);
5045           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5046         }
5047
5048       if (acost < best_cost)
5049         {
5050           best_cost = acost;
5051           iv_ca_delta_free (&best_delta);
5052           best_delta = act_delta;
5053         }
5054       else
5055         iv_ca_delta_free (&act_delta);
5056     }
5057
5058   if (!best_delta)
5059     {
5060       /* Try removing the candidates from the set instead.  */
5061       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5062
5063       /* Nothing more we can do.  */
5064       if (!best_delta)
5065         return false;
5066     }
5067
5068   iv_ca_delta_commit (data, ivs, best_delta, true);
5069   gcc_assert (best_cost == iv_ca_cost (ivs));
5070   iv_ca_delta_free (&best_delta);
5071   return true;
5072 }
5073
5074 /* Attempts to find the optimal set of induction variables.  We do simple
5075    greedy heuristic -- we try to replace at most one candidate in the selected
5076    solution and remove the unused ivs while this improves the cost.  */
5077
5078 static struct iv_ca *
5079 find_optimal_iv_set (struct ivopts_data *data)
5080 {
5081   unsigned i;
5082   struct iv_ca *set;
5083   struct iv_use *use;
5084
5085   /* Get the initial solution.  */
5086   set = get_initial_solution (data);
5087   if (!set)
5088     {
5089       if (dump_file && (dump_flags & TDF_DETAILS))
5090         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5091       return NULL;
5092     }
5093
5094   if (dump_file && (dump_flags & TDF_DETAILS))
5095     {
5096       fprintf (dump_file, "Initial set of candidates:\n");
5097       iv_ca_dump (data, dump_file, set);
5098     }
5099
5100   while (try_improve_iv_set (data, set))
5101     {
5102       if (dump_file && (dump_flags & TDF_DETAILS))
5103         {
5104           fprintf (dump_file, "Improved to:\n");
5105           iv_ca_dump (data, dump_file, set);
5106         }
5107     }
5108
5109   if (dump_file && (dump_flags & TDF_DETAILS))
5110     fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5111
5112   for (i = 0; i < n_iv_uses (data); i++)
5113     {
5114       use = iv_use (data, i);
5115       use->selected = iv_ca_cand_for_use (set, use)->cand;
5116     }
5117
5118   return set;
5119 }
5120
5121 /* Creates a new induction variable corresponding to CAND.  */
5122
5123 static void
5124 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5125 {
5126   block_stmt_iterator incr_pos;
5127   tree base;
5128   bool after = false;
5129
5130   if (!cand->iv)
5131     return;
5132
5133   switch (cand->pos)
5134     {
5135     case IP_NORMAL:
5136       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5137       break;
5138
5139     case IP_END:
5140       incr_pos = bsi_last (ip_end_pos (data->current_loop));
5141       after = true;
5142       break;
5143
5144     case IP_ORIGINAL:
5145       /* Mark that the iv is preserved.  */
5146       name_info (data, cand->var_before)->preserve_biv = true;
5147       name_info (data, cand->var_after)->preserve_biv = true;
5148
5149       /* Rewrite the increment so that it uses var_before directly.  */
5150       find_interesting_uses_op (data, cand->var_after)->selected = cand;
5151
5152       return;
5153     }
5154
5155   gimple_add_tmp_var (cand->var_before);
5156   add_referenced_var (cand->var_before);
5157
5158   base = unshare_expr (cand->iv->base);
5159
5160   create_iv (base, unshare_expr (cand->iv->step),
5161              cand->var_before, data->current_loop,
5162              &incr_pos, after, &cand->var_before, &cand->var_after);
5163 }
5164
5165 /* Creates new induction variables described in SET.  */
5166
5167 static void
5168 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5169 {
5170   unsigned i;
5171   struct iv_cand *cand;
5172   bitmap_iterator bi;
5173
5174   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5175     {
5176       cand = iv_cand (data, i);
5177       create_new_iv (data, cand);
5178     }
5179 }
5180
5181 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
5182    is true, remove also the ssa name defined by the statement.  */
5183
5184 static void
5185 remove_statement (tree stmt, bool including_defined_name)
5186 {
5187   if (TREE_CODE (stmt) == PHI_NODE)
5188     {
5189       if (!including_defined_name)
5190         {
5191           /* Prevent the ssa name defined by the statement from being removed.  */
5192           SET_PHI_RESULT (stmt, NULL);
5193         }
5194       remove_phi_node (stmt, NULL_TREE);
5195     }
5196   else
5197     {
5198       block_stmt_iterator bsi = bsi_for_stmt (stmt);
5199
5200       bsi_remove (&bsi, true);
5201     }
5202 }
5203
5204 /* Rewrites USE (definition of iv used in a nonlinear expression)
5205    using candidate CAND.  */
5206
5207 static void
5208 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5209                             struct iv_use *use, struct iv_cand *cand)
5210 {
5211   tree comp;
5212   tree op, stmts, tgt, ass;
5213   block_stmt_iterator bsi, pbsi;
5214
5215   /* An important special case -- if we are asked to express value of
5216      the original iv by itself, just exit; there is no need to
5217      introduce a new computation (that might also need casting the
5218      variable to unsigned and back).  */
5219   if (cand->pos == IP_ORIGINAL
5220       && cand->incremented_at == use->stmt)
5221     {
5222       tree step, ctype, utype;
5223       enum tree_code incr_code = PLUS_EXPR;
5224
5225       gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5226       gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5227
5228       step = cand->iv->step;
5229       ctype = TREE_TYPE (step);
5230       utype = TREE_TYPE (cand->var_after);
5231       if (TREE_CODE (step) == NEGATE_EXPR)
5232         {
5233           incr_code = MINUS_EXPR;
5234           step = TREE_OPERAND (step, 0);
5235         }
5236
5237       /* Check whether we may leave the computation unchanged.
5238          This is the case only if it does not rely on other
5239          computations in the loop -- otherwise, the computation
5240          we rely upon may be removed in remove_unused_ivs,
5241          thus leading to ICE.  */
5242       op = TREE_OPERAND (use->stmt, 1);
5243       if (TREE_CODE (op) == PLUS_EXPR
5244           || TREE_CODE (op) == MINUS_EXPR)
5245         {
5246           if (TREE_OPERAND (op, 0) == cand->var_before)
5247             op = TREE_OPERAND (op, 1);
5248           else if (TREE_CODE (op) == PLUS_EXPR
5249                    && TREE_OPERAND (op, 1) == cand->var_before)
5250             op = TREE_OPERAND (op, 0);
5251           else
5252             op = NULL_TREE;
5253         }
5254       else
5255         op = NULL_TREE;
5256
5257       if (op
5258           && (TREE_CODE (op) == INTEGER_CST
5259               || operand_equal_p (op, step, 0)))
5260         return;
5261
5262       /* Otherwise, add the necessary computations to express
5263          the iv.  */
5264       op = fold_convert (ctype, cand->var_before);
5265       comp = fold_convert (utype,
5266                            build2 (incr_code, ctype, op,
5267                                    unshare_expr (step)));
5268     }
5269   else
5270     comp = get_computation (data->current_loop, use, cand);
5271
5272   switch (TREE_CODE (use->stmt))
5273     {
5274     case PHI_NODE:
5275       tgt = PHI_RESULT (use->stmt);
5276
5277       /* If we should keep the biv, do not replace it.  */
5278       if (name_info (data, tgt)->preserve_biv)
5279         return;
5280
5281       pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5282       while (!bsi_end_p (pbsi)
5283              && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5284         {
5285           bsi = pbsi;
5286           bsi_next (&pbsi);
5287         }
5288       break;
5289
5290     case MODIFY_EXPR:
5291       tgt = TREE_OPERAND (use->stmt, 0);
5292       bsi = bsi_for_stmt (use->stmt);
5293       break;
5294
5295     default:
5296       gcc_unreachable ();
5297     }
5298
5299   op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5300
5301   if (TREE_CODE (use->stmt) == PHI_NODE)
5302     {
5303       if (stmts)
5304         bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5305       ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5306       bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5307       remove_statement (use->stmt, false);
5308       SSA_NAME_DEF_STMT (tgt) = ass;
5309     }
5310   else
5311     {
5312       if (stmts)
5313         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5314       TREE_OPERAND (use->stmt, 1) = op;
5315     }
5316 }
5317
5318 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5319    for_each_index.  */
5320
5321 static bool
5322 idx_remove_ssa_names (tree base, tree *idx,
5323                       void *data ATTRIBUTE_UNUSED)
5324 {
5325   tree *op;
5326
5327   if (TREE_CODE (*idx) == SSA_NAME)
5328     *idx = SSA_NAME_VAR (*idx);
5329
5330   if (TREE_CODE (base) == ARRAY_REF)
5331     {
5332       op = &TREE_OPERAND (base, 2);
5333       if (*op
5334           && TREE_CODE (*op) == SSA_NAME)
5335         *op = SSA_NAME_VAR (*op);
5336       op = &TREE_OPERAND (base, 3);
5337       if (*op
5338           && TREE_CODE (*op) == SSA_NAME)
5339         *op = SSA_NAME_VAR (*op);
5340     }
5341
5342   return true;
5343 }
5344
5345 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5346
5347 static tree
5348 unshare_and_remove_ssa_names (tree ref)
5349 {
5350   ref = unshare_expr (ref);
5351   for_each_index (&ref, idx_remove_ssa_names, NULL);
5352
5353   return ref;
5354 }
5355
5356 /* Extract the alias analysis info for the memory reference REF.  There are
5357    several ways how this information may be stored and what precisely is
5358    its semantics depending on the type of the reference, but there always is
5359    somewhere hidden one _DECL node that is used to determine the set of
5360    virtual operands for the reference.  The code below deciphers this jungle
5361    and extracts this single useful piece of information.  */
5362
5363 static tree
5364 get_ref_tag (tree ref, tree orig)
5365 {
5366   tree var = get_base_address (ref);
5367   tree aref = NULL_TREE, tag, sv;
5368   HOST_WIDE_INT offset, size, maxsize;
5369
5370   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5371     {
5372       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5373       if (ref)
5374         break;
5375     }
5376
5377   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5378     return unshare_expr (sv);
5379
5380   if (!var)
5381     return NULL_TREE;
5382
5383   if (TREE_CODE (var) == INDIRECT_REF)
5384     {
5385       /* If the base is a dereference of a pointer, first check its name memory
5386          tag.  If it does not have one, use its symbol memory tag.  */
5387       var = TREE_OPERAND (var, 0);
5388       if (TREE_CODE (var) != SSA_NAME)
5389         return NULL_TREE;
5390
5391       if (SSA_NAME_PTR_INFO (var))
5392         {
5393           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5394           if (tag)
5395             return tag;
5396         }
5397
5398       var = SSA_NAME_VAR (var);
5399       tag = var_ann (var)->symbol_mem_tag;
5400       gcc_assert (tag != NULL_TREE);
5401       return tag;
5402     }
5403   else
5404     {
5405       if (!DECL_P (var))
5406         return NULL_TREE;
5407
5408       tag = var_ann (var)->symbol_mem_tag;
5409       if (tag)
5410         return tag;
5411
5412       return var;
5413     }
5414 }
5415
5416 /* Copies the reference information from OLD_REF to NEW_REF.  */
5417
5418 static void
5419 copy_ref_info (tree new_ref, tree old_ref)
5420 {
5421   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5422     copy_mem_ref_info (new_ref, old_ref);
5423   else
5424     {
5425       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5426       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5427     }
5428 }
5429
5430 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5431
5432 static void
5433 rewrite_use_address (struct ivopts_data *data,
5434                      struct iv_use *use, struct iv_cand *cand)
5435 {
5436   struct affine_tree_combination aff;
5437   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5438   tree ref;
5439
5440   get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5441   unshare_aff_combination (&aff);
5442
5443   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5444   copy_ref_info (ref, *use->op_p);
5445   *use->op_p = ref;
5446 }
5447
5448 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5449    candidate CAND.  */
5450
5451 static void
5452 rewrite_use_compare (struct ivopts_data *data,
5453                      struct iv_use *use, struct iv_cand *cand)
5454 {
5455   tree comp;
5456   tree *op_p, cond, op, stmts, bound;
5457   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5458   enum tree_code compare;
5459   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5460
5461   bound = cp->value;
5462   if (bound)
5463     {
5464       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5465       tree var_type = TREE_TYPE (var);
5466
5467       compare = iv_elimination_compare (data, use);
5468       bound = fold_convert (var_type, bound);
5469       op = force_gimple_operand (unshare_expr (bound), &stmts,
5470                                  true, NULL_TREE);
5471
5472       if (stmts)
5473         bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5474
5475       *use->op_p = build2 (compare, boolean_type_node, var, op);
5476       update_stmt (use->stmt);
5477       return;
5478     }
5479
5480   /* The induction variable elimination failed; just express the original
5481      giv.  */
5482   comp = get_computation (data->current_loop, use, cand);
5483
5484   cond = *use->op_p;
5485   op_p = &TREE_OPERAND (cond, 0);
5486   if (TREE_CODE (*op_p) != SSA_NAME
5487       || zero_p (get_iv (data, *op_p)->step))
5488     op_p = &TREE_OPERAND (cond, 1);
5489
5490   op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5491   if (stmts)
5492     bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5493
5494   *op_p = op;
5495 }
5496
5497 /* Rewrites USE using candidate CAND.  */
5498
5499 static void
5500 rewrite_use (struct ivopts_data *data,
5501              struct iv_use *use, struct iv_cand *cand)
5502 {
5503   switch (use->type)
5504     {
5505       case USE_NONLINEAR_EXPR:
5506         rewrite_use_nonlinear_expr (data, use, cand);
5507         break;
5508
5509       case USE_ADDRESS:
5510         rewrite_use_address (data, use, cand);
5511         break;
5512
5513       case USE_COMPARE:
5514         rewrite_use_compare (data, use, cand);
5515         break;
5516
5517       default:
5518         gcc_unreachable ();
5519     }
5520   mark_new_vars_to_rename (use->stmt);
5521 }
5522
5523 /* Rewrite the uses using the selected induction variables.  */
5524
5525 static void
5526 rewrite_uses (struct ivopts_data *data)
5527 {
5528   unsigned i;
5529   struct iv_cand *cand;
5530   struct iv_use *use;
5531
5532   for (i = 0; i < n_iv_uses (data); i++)
5533     {
5534       use = iv_use (data, i);
5535       cand = use->selected;
5536       gcc_assert (cand);
5537
5538       rewrite_use (data, use, cand);
5539     }
5540 }
5541
5542 /* Removes the ivs that are not used after rewriting.  */
5543
5544 static void
5545 remove_unused_ivs (struct ivopts_data *data)
5546 {
5547   unsigned j;
5548   bitmap_iterator bi;
5549
5550   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5551     {
5552       struct version_info *info;
5553
5554       info = ver_info (data, j);
5555       if (info->iv
5556           && !zero_p (info->iv->step)
5557           && !info->inv_id
5558           && !info->iv->have_use_for
5559           && !info->preserve_biv)
5560         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5561     }
5562 }
5563
5564 /* Frees data allocated by the optimization of a single loop.  */
5565
5566 static void
5567 free_loop_data (struct ivopts_data *data)
5568 {
5569   unsigned i, j;
5570   bitmap_iterator bi;
5571   tree obj;
5572
5573   htab_empty (data->niters);
5574
5575   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5576     {
5577       struct version_info *info;
5578
5579       info = ver_info (data, i);
5580       if (info->iv)
5581         free (info->iv);
5582       info->iv = NULL;
5583       info->has_nonlin_use = false;
5584       info->preserve_biv = false;
5585       info->inv_id = 0;
5586     }
5587   bitmap_clear (data->relevant);
5588   bitmap_clear (data->important_candidates);
5589
5590   for (i = 0; i < n_iv_uses (data); i++)
5591     {
5592       struct iv_use *use = iv_use (data, i);
5593
5594       free (use->iv);
5595       BITMAP_FREE (use->related_cands);
5596       for (j = 0; j < use->n_map_members; j++)
5597         if (use->cost_map[j].depends_on)
5598           BITMAP_FREE (use->cost_map[j].depends_on);
5599       free (use->cost_map);
5600       free (use);
5601     }
5602   VEC_truncate (iv_use_p, data->iv_uses, 0);
5603
5604   for (i = 0; i < n_iv_cands (data); i++)
5605     {
5606       struct iv_cand *cand = iv_cand (data, i);
5607
5608       if (cand->iv)
5609         free (cand->iv);
5610       if (cand->depends_on)
5611         BITMAP_FREE (cand->depends_on);
5612       free (cand);
5613     }
5614   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5615
5616   if (data->version_info_size < num_ssa_names)
5617     {
5618       data->version_info_size = 2 * num_ssa_names;
5619       free (data->version_info);
5620       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5621     }
5622
5623   data->max_inv_id = 0;
5624
5625   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5626     SET_DECL_RTL (obj, NULL_RTX);
5627
5628   VEC_truncate (tree, decl_rtl_to_reset, 0);
5629 }
5630
5631 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5632    loop tree.  */
5633
5634 static void
5635 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5636 {
5637   free_loop_data (data);
5638   free (data->version_info);
5639   BITMAP_FREE (data->relevant);
5640   BITMAP_FREE (data->important_candidates);
5641   htab_delete (data->niters);
5642
5643   VEC_free (tree, heap, decl_rtl_to_reset);
5644   VEC_free (iv_use_p, heap, data->iv_uses);
5645   VEC_free (iv_cand_p, heap, data->iv_candidates);
5646 }
5647
5648 /* Optimizes the LOOP.  Returns true if anything changed.  */
5649
5650 static bool
5651 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5652 {
5653   bool changed = false;
5654   struct iv_ca *iv_ca;
5655   edge exit;
5656
5657   data->current_loop = loop;
5658
5659   if (dump_file && (dump_flags & TDF_DETAILS))
5660     {
5661       fprintf (dump_file, "Processing loop %d\n", loop->num);
5662
5663       exit = single_dom_exit (loop);
5664       if (exit)
5665         {
5666           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5667                    exit->src->index, exit->dest->index);
5668           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5669           fprintf (dump_file, "\n");
5670         }
5671
5672       fprintf (dump_file, "\n");
5673     }
5674
5675   /* For each ssa name determines whether it behaves as an induction variable
5676      in some loop.  */
5677   if (!find_induction_variables (data))
5678     goto finish;
5679
5680   /* Finds interesting uses (item 1).  */
5681   find_interesting_uses (data);
5682   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5683     goto finish;
5684
5685   /* Finds candidates for the induction variables (item 2).  */
5686   find_iv_candidates (data);
5687
5688   /* Calculates the costs (item 3, part 1).  */
5689   determine_use_iv_costs (data);
5690   determine_iv_costs (data);
5691   determine_set_costs (data);
5692
5693   /* Find the optimal set of induction variables (item 3, part 2).  */
5694   iv_ca = find_optimal_iv_set (data);
5695   if (!iv_ca)
5696     goto finish;
5697   changed = true;
5698
5699   /* Create the new induction variables (item 4, part 1).  */
5700   create_new_ivs (data, iv_ca);
5701   iv_ca_free (&iv_ca);
5702
5703   /* Rewrite the uses (item 4, part 2).  */
5704   rewrite_uses (data);
5705
5706   /* Remove the ivs that are unused after rewriting.  */
5707   remove_unused_ivs (data);
5708
5709   /* We have changed the structure of induction variables; it might happen
5710      that definitions in the scev database refer to some of them that were
5711      eliminated.  */
5712   scev_reset ();
5713
5714 finish:
5715   free_loop_data (data);
5716
5717   return changed;
5718 }
5719
5720 /* Main entry point.  Optimizes induction variables in LOOPS.  */
5721
5722 void
5723 tree_ssa_iv_optimize (struct loops *loops)
5724 {
5725   struct loop *loop;
5726   struct ivopts_data data;
5727
5728   tree_ssa_iv_optimize_init (&data);
5729
5730   /* Optimize the loops starting with the innermost ones.  */
5731   loop = loops->tree_root;
5732   while (loop->inner)
5733     loop = loop->inner;
5734
5735   /* Scan the loops, inner ones first.  */
5736   while (loop != loops->tree_root)
5737     {
5738       if (dump_file && (dump_flags & TDF_DETAILS))
5739         flow_loop_dump (loop, dump_file, NULL, 1);
5740
5741       tree_ssa_iv_optimize_loop (&data, loop);
5742
5743       if (loop->next)
5744         {
5745           loop = loop->next;
5746           while (loop->inner)
5747             loop = loop->inner;
5748         }
5749       else
5750         loop = loop->outer;
5751     }
5752
5753   tree_ssa_iv_optimize_finalize (&data);
5754 }