gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
   3
   4 This file is part of GCC.
   5
   6 GCC is free software; you can redistribute it and/or modify it
   7 under the terms of the GNU General Public License as published by the
   8 Free Software Foundation; either version 3, or (at your option) any
   9 later version.
  10
  11 GCC is distributed in the hope that it will be useful, but WITHOUT
  12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14 for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GCC; see the file COPYING3.  If not see
  18 <http://www.gnu.org/licenses/>.  */
  19
  20 /* This pass tries to find the optimal set of induction variables for the loop.
  21    It optimizes just the basic linear induction variables (although adding
  22    support for other types should not be too hard).  It includes the
  23    optimizations commonly known as strength reduction, induction variable
  24    coalescing and induction variable elimination.  It does it in the
  25    following steps:
  26
  27    1) The interesting uses of induction variables are found.  This includes
  28
  29       -- uses of induction variables in non-linear expressions
  30       -- addresses of arrays
  31       -- comparisons of induction variables
  32
  33    2) Candidates for the induction variables are found.  This includes
  34
  35       -- old induction variables
  36       -- the variables defined by expressions derived from the "interesting
  37          uses" above
  38
  39    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  40       cost function assigns a cost to sets of induction variables and consists
  41       of three parts:
  42
  43       -- The use costs.  Each of the interesting uses chooses the best induction
  44          variable in the set and adds its cost to the sum.  The cost reflects
  45          the time spent on modifying the induction variables value to be usable
  46          for the given purpose (adding base and offset for arrays, etc.).
  47       -- The variable costs.  Each of the variables has a cost assigned that
  48          reflects the costs associated with incrementing the value of the
  49          variable.  The original variables are somewhat preferred.
  50       -- The set cost.  Depending on the size of the set, extra cost may be
  51          added to reflect register pressure.
  52
  53       All the costs are defined in a machine-specific way, using the target
  54       hooks and machine descriptions to determine them.
  55
  56    4) The trees are transformed to use the new variables, the dead code is
  57       removed.
  58
  59    All of this is done loop by loop.  Doing it globally is theoretically
  60    possible, it might give a better performance and it might enable us
  61    to decide costs more precisely, but getting all the interactions right
  62    would be complicated.  */
  63
  64 #include "config.h"
  65 #include "system.h"
  66 #include "coretypes.h"
  67 #include "tm.h"
  68 #include "tree.h"
  69 #include "rtl.h"
  70 #include "tm_p.h"
  71 #include "hard-reg-set.h"
  72 #include "basic-block.h"
  73 #include "output.h"
  74 #include "diagnostic.h"
  75 #include "tree-flow.h"
  76 #include "tree-dump.h"
  77 #include "timevar.h"
  78 #include "cfgloop.h"
  79 #include "varray.h"
  80 #include "expr.h"
  81 #include "tree-pass.h"
  82 #include "ggc.h"
  83 #include "insn-config.h"
  84 #include "recog.h"
  85 #include "pointer-set.h"
  86 #include "hashtab.h"
  87 #include "tree-chrec.h"
  88 #include "tree-scalar-evolution.h"
  89 #include "cfgloop.h"
  90 #include "params.h"
  91 #include "langhooks.h"
  92 #include "tree-affine.h"
  93 #include "target.h"
  94
  95 /* The infinite cost.  */
  96 #define INFTY 10000000
  97
  98 /* The expected number of loop iterations.  TODO -- use profiling instead of
  99    this.  */
 100 #define AVG_LOOP_NITER(LOOP) 5
 101
 102
 103 /* Representation of the induction variable.  */
 104 struct iv
 105 {
 106   tree base;            /* Initial value of the iv.  */
 107   tree base_object;     /* A memory object to that the induction variable points.  */
 108   tree step;            /* Step of the iv (constant only).  */
 109   tree ssa_name;        /* The ssa name with the value.  */
 110   bool biv_p;           /* Is it a biv?  */
 111   bool have_use_for;    /* Do we already have a use for it?  */
 112   unsigned use_id;      /* The identifier in the use if it is the case.  */
 113 };
 114
 115 /* Per-ssa version information (induction variable descriptions, etc.).  */
 116 struct version_info
 117 {
 118   tree name;            /* The ssa name.  */
 119   struct iv *iv;        /* Induction variable description.  */
 120   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 121                            an expression that is not an induction variable.  */
 122   unsigned inv_id;      /* Id of an invariant.  */
 123   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 124 };
 125
 126 /* Types of uses.  */
 127 enum use_type
 128 {
 129   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 130   USE_ADDRESS,          /* Use in an address.  */
 131   USE_COMPARE           /* Use is a compare.  */
 132 };
 133
 134 /* Cost of a computation.  */
 135 typedef struct
 136 {
 137   unsigned cost;        /* The runtime cost.  */
 138   unsigned complexity;  /* The estimate of the complexity of the code for
 139                            the computation (in no concrete units --
 140                            complexity field should be larger for more
 141                            complex expressions and addressing modes).  */
 142 } comp_cost;
 143
 144 static const comp_cost zero_cost = {0, 0};
 145 static const comp_cost infinite_cost = {INFTY, INFTY};
 146
 147 /* The candidate - cost pair.  */
 148 struct cost_pair
 149 {
 150   struct iv_cand *cand; /* The candidate.  */
 151   comp_cost cost;       /* The cost.  */
 152   bitmap depends_on;    /* The list of invariants that have to be
 153                            preserved.  */
 154   tree value;           /* For final value elimination, the expression for
 155                            the final value of the iv.  For iv elimination,
 156                            the new bound to compare with.  */
 157 };
 158
 159 /* Use.  */
 160 struct iv_use
 161 {
 162   unsigned id;          /* The id of the use.  */
 163   enum use_type type;   /* Type of the use.  */
 164   struct iv *iv;        /* The induction variable it is based on.  */
 165   tree stmt;            /* Statement in that it occurs.  */
 166   tree *op_p;           /* The place where it occurs.  */
 167   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 168                            important ones.  */
 169
 170   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 171   struct cost_pair *cost_map;
 172                         /* The costs wrto the iv candidates.  */
 173
 174   struct iv_cand *selected;
 175                         /* The selected candidate.  */
 176 };
 177
 178 /* The position where the iv is computed.  */
 179 enum iv_position
 180 {
 181   IP_NORMAL,            /* At the end, just before the exit condition.  */
 182   IP_END,               /* At the end of the latch block.  */
 183   IP_ORIGINAL           /* The original biv.  */
 184 };
 185
 186 /* The induction variable candidate.  */
 187 struct iv_cand
 188 {
 189   unsigned id;          /* The number of the candidate.  */
 190   bool important;       /* Whether this is an "important" candidate, i.e. such
 191                            that it should be considered by all uses.  */
 192   enum iv_position pos; /* Where it is computed.  */
 193   tree incremented_at;  /* For original biv, the statement where it is
 194                            incremented.  */
 195   tree var_before;      /* The variable used for it before increment.  */
 196   tree var_after;       /* The variable used for it after increment.  */
 197   struct iv *iv;        /* The value of the candidate.  NULL for
 198                            "pseudocandidate" used to indicate the possibility
 199                            to replace the final value of an iv by direct
 200                            computation of the value.  */
 201   unsigned cost;        /* Cost of the candidate.  */
 202   bitmap depends_on;    /* The list of invariants that are used in step of the
 203                            biv.  */
 204 };
 205
 206 /* The data used by the induction variable optimizations.  */
 207
 208 typedef struct iv_use *iv_use_p;
 209 DEF_VEC_P(iv_use_p);
 210 DEF_VEC_ALLOC_P(iv_use_p,heap);
 211
 212 typedef struct iv_cand *iv_cand_p;
 213 DEF_VEC_P(iv_cand_p);
 214 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 215
 216 struct ivopts_data
 217 {
 218   /* The currently optimized loop.  */
 219   struct loop *current_loop;
 220
 221   /* Number of registers used in it.  */
 222   unsigned regs_used;
 223
 224   /* Numbers of iterations for all exits of the current loop.  */
 225   struct pointer_map_t *niters;
 226
 227   /* The size of version_info array allocated.  */
 228   unsigned version_info_size;
 229
 230   /* The array of information for the ssa names.  */
 231   struct version_info *version_info;
 232
 233   /* The bitmap of indices in version_info whose value was changed.  */
 234   bitmap relevant;
 235
 236   /* The maximum invariant id.  */
 237   unsigned max_inv_id;
 238
 239   /* The uses of induction variables.  */
 240   VEC(iv_use_p,heap) *iv_uses;
 241
 242   /* The candidates.  */
 243   VEC(iv_cand_p,heap) *iv_candidates;
 244
 245   /* A bitmap of important candidates.  */
 246   bitmap important_candidates;
 247
 248   /* Whether to consider just related and important candidates when replacing a
 249      use.  */
 250   bool consider_all_candidates;
 251 };
 252
 253 /* An assignment of iv candidates to uses.  */
 254
 255 struct iv_ca
 256 {
 257   /* The number of uses covered by the assignment.  */
 258   unsigned upto;
 259
 260   /* Number of uses that cannot be expressed by the candidates in the set.  */
 261   unsigned bad_uses;
 262
 263   /* Candidate assigned to a use, together with the related costs.  */
 264   struct cost_pair **cand_for_use;
 265
 266   /* Number of times each candidate is used.  */
 267   unsigned *n_cand_uses;
 268
 269   /* The candidates used.  */
 270   bitmap cands;
 271
 272   /* The number of candidates in the set.  */
 273   unsigned n_cands;
 274
 275   /* Total number of registers needed.  */
 276   unsigned n_regs;
 277
 278   /* Total cost of expressing uses.  */
 279   comp_cost cand_use_cost;
 280
 281   /* Total cost of candidates.  */
 282   unsigned cand_cost;
 283
 284   /* Number of times each invariant is used.  */
 285   unsigned *n_invariant_uses;
 286
 287   /* Total cost of the assignment.  */
 288   comp_cost cost;
 289 };
 290
 291 /* Difference of two iv candidate assignments.  */
 292
 293 struct iv_ca_delta
 294 {
 295   /* Changed use.  */
 296   struct iv_use *use;
 297
 298   /* An old assignment (for rollback purposes).  */
 299   struct cost_pair *old_cp;
 300
 301   /* A new assignment.  */
 302   struct cost_pair *new_cp;
 303
 304   /* Next change in the list.  */
 305   struct iv_ca_delta *next_change;
 306 };
 307
 308 /* Bound on number of candidates below that all candidates are considered.  */
 309
 310 #define CONSIDER_ALL_CANDIDATES_BOUND \
 311   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 312
 313 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 314    optimizing such a loop would help, and it would take ages).  */
 315
 316 #define MAX_CONSIDERED_USES \
 317   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 318
 319 /* If there are at most this number of ivs in the set, try removing unnecessary
 320    ivs from the set always.  */
 321
 322 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 323   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 324
 325 /* The list of trees for that the decl_rtl field must be reset is stored
 326    here.  */
 327
 328 static VEC(tree,heap) *decl_rtl_to_reset;
 329
 330 /* Number of uses recorded in DATA.  */
 331
 332 static inline unsigned
 333 n_iv_uses (struct ivopts_data *data)
 334 {
 335   return VEC_length (iv_use_p, data->iv_uses);
 336 }
 337
 338 /* Ith use recorded in DATA.  */
 339
 340 static inline struct iv_use *
 341 iv_use (struct ivopts_data *data, unsigned i)
 342 {
 343   return VEC_index (iv_use_p, data->iv_uses, i);
 344 }
 345
 346 /* Number of candidates recorded in DATA.  */
 347
 348 static inline unsigned
 349 n_iv_cands (struct ivopts_data *data)
 350 {
 351   return VEC_length (iv_cand_p, data->iv_candidates);
 352 }
 353
 354 /* Ith candidate recorded in DATA.  */
 355
 356 static inline struct iv_cand *
 357 iv_cand (struct ivopts_data *data, unsigned i)
 358 {
 359   return VEC_index (iv_cand_p, data->iv_candidates, i);
 360 }
 361
 362 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 363
 364 edge
 365 single_dom_exit (struct loop *loop)
 366 {
 367   edge exit = single_exit (loop);
 368
 369   if (!exit)
 370     return NULL;
 371
 372   if (!just_once_each_iteration_p (loop, exit->src))
 373     return NULL;
 374
 375   return exit;
 376 }
 377
 378 /* Dumps information about the induction variable IV to FILE.  */
 379
 380 extern void dump_iv (FILE *, struct iv *);
 381 void
 382 dump_iv (FILE *file, struct iv *iv)
 383 {
 384   if (iv->ssa_name)
 385     {
 386       fprintf (file, "ssa name ");
 387       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 388       fprintf (file, "\n");
 389     }
 390
 391   fprintf (file, "  type ");
 392   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 393   fprintf (file, "\n");
 394
 395   if (iv->step)
 396     {
 397       fprintf (file, "  base ");
 398       print_generic_expr (file, iv->base, TDF_SLIM);
 399       fprintf (file, "\n");
 400
 401       fprintf (file, "  step ");
 402       print_generic_expr (file, iv->step, TDF_SLIM);
 403       fprintf (file, "\n");
 404     }
 405   else
 406     {
 407       fprintf (file, "  invariant ");
 408       print_generic_expr (file, iv->base, TDF_SLIM);
 409       fprintf (file, "\n");
 410     }
 411
 412   if (iv->base_object)
 413     {
 414       fprintf (file, "  base object ");
 415       print_generic_expr (file, iv->base_object, TDF_SLIM);
 416       fprintf (file, "\n");
 417     }
 418
 419   if (iv->biv_p)
 420     fprintf (file, "  is a biv\n");
 421 }
 422
 423 /* Dumps information about the USE to FILE.  */
 424
 425 extern void dump_use (FILE *, struct iv_use *);
 426 void
 427 dump_use (FILE *file, struct iv_use *use)
 428 {
 429   fprintf (file, "use %d\n", use->id);
 430
 431   switch (use->type)
 432     {
 433     case USE_NONLINEAR_EXPR:
 434       fprintf (file, "  generic\n");
 435       break;
 436
 437     case USE_ADDRESS:
 438       fprintf (file, "  address\n");
 439       break;
 440
 441     case USE_COMPARE:
 442       fprintf (file, "  compare\n");
 443       break;
 444
 445     default:
 446       gcc_unreachable ();
 447     }
 448
 449   fprintf (file, "  in statement ");
 450   print_generic_expr (file, use->stmt, TDF_SLIM);
 451   fprintf (file, "\n");
 452
 453   fprintf (file, "  at position ");
 454   if (use->op_p)
 455     print_generic_expr (file, *use->op_p, TDF_SLIM);
 456   fprintf (file, "\n");
 457
 458   dump_iv (file, use->iv);
 459
 460   if (use->related_cands)
 461     {
 462       fprintf (file, "  related candidates ");
 463       dump_bitmap (file, use->related_cands);
 464     }
 465 }
 466
 467 /* Dumps information about the uses to FILE.  */
 468
 469 extern void dump_uses (FILE *, struct ivopts_data *);
 470 void
 471 dump_uses (FILE *file, struct ivopts_data *data)
 472 {
 473   unsigned i;
 474   struct iv_use *use;
 475
 476   for (i = 0; i < n_iv_uses (data); i++)
 477     {
 478       use = iv_use (data, i);
 479
 480       dump_use (file, use);
 481       fprintf (file, "\n");
 482     }
 483 }
 484
 485 /* Dumps information about induction variable candidate CAND to FILE.  */
 486
 487 extern void dump_cand (FILE *, struct iv_cand *);
 488 void
 489 dump_cand (FILE *file, struct iv_cand *cand)
 490 {
 491   struct iv *iv = cand->iv;
 492
 493   fprintf (file, "candidate %d%s\n",
 494            cand->id, cand->important ? " (important)" : "");
 495
 496   if (cand->depends_on)
 497     {
 498       fprintf (file, "  depends on ");
 499       dump_bitmap (file, cand->depends_on);
 500     }
 501
 502   if (!iv)
 503     {
 504       fprintf (file, "  final value replacement\n");
 505       return;
 506     }
 507
 508   switch (cand->pos)
 509     {
 510     case IP_NORMAL:
 511       fprintf (file, "  incremented before exit test\n");
 512       break;
 513
 514     case IP_END:
 515       fprintf (file, "  incremented at end\n");
 516       break;
 517
 518     case IP_ORIGINAL:
 519       fprintf (file, "  original biv\n");
 520       break;
 521     }
 522
 523   dump_iv (file, iv);
 524 }
 525
 526 /* Returns the info for ssa version VER.  */
 527
 528 static inline struct version_info *
 529 ver_info (struct ivopts_data *data, unsigned ver)
 530 {
 531   return data->version_info + ver;
 532 }
 533
 534 /* Returns the info for ssa name NAME.  */
 535
 536 static inline struct version_info *
 537 name_info (struct ivopts_data *data, tree name)
 538 {
 539   return ver_info (data, SSA_NAME_VERSION (name));
 540 }
 541
 542 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 543    emitted in LOOP.  */
 544
 545 static bool
 546 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
 547 {
 548   basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
 549
 550   gcc_assert (bb);
 551
 552   if (sbb == loop->latch)
 553     return true;
 554
 555   if (sbb != bb)
 556     return false;
 557
 558   return stmt == last_stmt (bb);
 559 }
 560
 561 /* Returns true if STMT if after the place where the original induction
 562    variable CAND is incremented.  */
 563
 564 static bool
 565 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
 566 {
 567   basic_block cand_bb = bb_for_stmt (cand->incremented_at);
 568   basic_block stmt_bb = bb_for_stmt (stmt);
 569   block_stmt_iterator bsi;
 570
 571   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 572     return false;
 573
 574   if (stmt_bb != cand_bb)
 575     return true;
 576
 577   /* Scan the block from the end, since the original ivs are usually
 578      incremented at the end of the loop body.  */
 579   for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
 580     {
 581       if (bsi_stmt (bsi) == cand->incremented_at)
 582         return false;
 583       if (bsi_stmt (bsi) == stmt)
 584         return true;
 585     }
 586 }
 587
 588 /* Returns true if STMT if after the place where the induction variable
 589    CAND is incremented in LOOP.  */
 590
 591 static bool
 592 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
 593 {
 594   switch (cand->pos)
 595     {
 596     case IP_END:
 597       return false;
 598
 599     case IP_NORMAL:
 600       return stmt_after_ip_normal_pos (loop, stmt);
 601
 602     case IP_ORIGINAL:
 603       return stmt_after_ip_original_pos (cand, stmt);
 604
 605     default:
 606       gcc_unreachable ();
 607     }
 608 }
 609
 610 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 611
 612 static bool
 613 abnormal_ssa_name_p (tree exp)
 614 {
 615   if (!exp)
 616     return false;
 617
 618   if (TREE_CODE (exp) != SSA_NAME)
 619     return false;
 620
 621   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 622 }
 623
 624 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 625    abnormal phi node.  Callback for for_each_index.  */
 626
 627 static bool
 628 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 629                                   void *data ATTRIBUTE_UNUSED)
 630 {
 631   if (TREE_CODE (base) == ARRAY_REF)
 632     {
 633       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 634         return false;
 635       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 636         return false;
 637     }
 638
 639   return !abnormal_ssa_name_p (*index);
 640 }
 641
 642 /* Returns true if EXPR contains a ssa name that occurs in an
 643    abnormal phi node.  */
 644
 645 bool
 646 contains_abnormal_ssa_name_p (tree expr)
 647 {
 648   enum tree_code code;
 649   enum tree_code_class codeclass;
 650
 651   if (!expr)
 652     return false;
 653
 654   code = TREE_CODE (expr);
 655   codeclass = TREE_CODE_CLASS (code);
 656
 657   if (code == SSA_NAME)
 658     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 659
 660   if (code == INTEGER_CST
 661       || is_gimple_min_invariant (expr))
 662     return false;
 663
 664   if (code == ADDR_EXPR)
 665     return !for_each_index (&TREE_OPERAND (expr, 0),
 666                             idx_contains_abnormal_ssa_name_p,
 667                             NULL);
 668
 669   switch (codeclass)
 670     {
 671     case tcc_binary:
 672     case tcc_comparison:
 673       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 674         return true;
 675
 676       /* Fallthru.  */
 677     case tcc_unary:
 678       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 679         return true;
 680
 681       break;
 682
 683     default:
 684       gcc_unreachable ();
 685     }
 686
 687   return false;
 688 }
 689
 690 /*  Returns tree describing number of iterations determined from
 691     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 692
 693 static tree
 694 niter_for_exit (struct ivopts_data *data, edge exit)
 695 {
 696   struct tree_niter_desc desc;
 697   tree niter;
 698   void **slot;
 699
 700   if (!data->niters)
 701     {
 702       data->niters = pointer_map_create ();
 703       slot = NULL;
 704     }
 705   else
 706     slot = pointer_map_contains (data->niters, exit);
 707
 708   if (!slot)
 709     {
 710       /* Try to determine number of iterations.  We must know it
 711          unconditionally (i.e., without possibility of # of iterations
 712          being zero).  Also, we cannot safely work with ssa names that
 713          appear in phi nodes on abnormal edges, so that we do not create
 714          overlapping life ranges for them (PR 27283).  */
 715       if (number_of_iterations_exit (data->current_loop,
 716                                      exit, &desc, true)
 717           && integer_zerop (desc.may_be_zero)
 718           && !contains_abnormal_ssa_name_p (desc.niter))
 719         niter = desc.niter;
 720       else
 721         niter = NULL_TREE;
 722
 723       *pointer_map_insert (data->niters, exit) = niter;
 724     }
 725   else
 726     niter = (tree) *slot;
 727
 728   return niter;
 729 }
 730
 731 /* Returns tree describing number of iterations determined from
 732    single dominating exit of DATA->current_loop, or NULL if something
 733    goes wrong.  */
 734
 735 static tree
 736 niter_for_single_dom_exit (struct ivopts_data *data)
 737 {
 738   edge exit = single_dom_exit (data->current_loop);
 739
 740   if (!exit)
 741     return NULL;
 742
 743   return niter_for_exit (data, exit);
 744 }
 745
 746 /* Initializes data structures used by the iv optimization pass, stored
 747    in DATA.  */
 748
 749 static void
 750 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 751 {
 752   data->version_info_size = 2 * num_ssa_names;
 753   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 754   data->relevant = BITMAP_ALLOC (NULL);
 755   data->important_candidates = BITMAP_ALLOC (NULL);
 756   data->max_inv_id = 0;
 757   data->niters = NULL;
 758   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 759   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 760   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 761 }
 762
 763 /* Returns a memory object to that EXPR points.  In case we are able to
 764    determine that it does not point to any such object, NULL is returned.  */
 765
 766 static tree
 767 determine_base_object (tree expr)
 768 {
 769   enum tree_code code = TREE_CODE (expr);
 770   tree base, obj;
 771
 772   /* If this is a pointer casted to any type, we need to determine
 773      the base object for the pointer; so handle conversions before
 774      throwing away non-pointer expressions.  */
 775   if (TREE_CODE (expr) == NOP_EXPR
 776       || TREE_CODE (expr) == CONVERT_EXPR)
 777     return determine_base_object (TREE_OPERAND (expr, 0));
 778
 779   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 780     return NULL_TREE;
 781
 782   switch (code)
 783     {
 784     case INTEGER_CST:
 785       return NULL_TREE;
 786
 787     case ADDR_EXPR:
 788       obj = TREE_OPERAND (expr, 0);
 789       base = get_base_address (obj);
 790
 791       if (!base)
 792         return expr;
 793
 794       if (TREE_CODE (base) == INDIRECT_REF)
 795         return determine_base_object (TREE_OPERAND (base, 0));
 796
 797       return fold_convert (ptr_type_node,
 798                            build_fold_addr_expr (base));
 799
 800     case POINTER_PLUS_EXPR:
 801       return determine_base_object (TREE_OPERAND (expr, 0));
 802
 803     case PLUS_EXPR:
 804     case MINUS_EXPR:
 805       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 806       gcc_unreachable ();
 807
 808     default:
 809       return fold_convert (ptr_type_node, expr);
 810     }
 811 }
 812
 813 /* Allocates an induction variable with given initial value BASE and step STEP
 814    for loop LOOP.  */
 815
 816 static struct iv *
 817 alloc_iv (tree base, tree step)
 818 {
 819   struct iv *iv = XCNEW (struct iv);
 820   gcc_assert (step != NULL_TREE);
 821
 822   iv->base = base;
 823   iv->base_object = determine_base_object (base);
 824   iv->step = step;
 825   iv->biv_p = false;
 826   iv->have_use_for = false;
 827   iv->use_id = 0;
 828   iv->ssa_name = NULL_TREE;
 829
 830   return iv;
 831 }
 832
 833 /* Sets STEP and BASE for induction variable IV.  */
 834
 835 static void
 836 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 837 {
 838   struct version_info *info = name_info (data, iv);
 839
 840   gcc_assert (!info->iv);
 841
 842   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 843   info->iv = alloc_iv (base, step);
 844   info->iv->ssa_name = iv;
 845 }
 846
 847 /* Finds induction variable declaration for VAR.  */
 848
 849 static struct iv *
 850 get_iv (struct ivopts_data *data, tree var)
 851 {
 852   basic_block bb;
 853   tree type = TREE_TYPE (var);
 854
 855   if (!POINTER_TYPE_P (type)
 856       && !INTEGRAL_TYPE_P (type))
 857     return NULL;
 858
 859   if (!name_info (data, var)->iv)
 860     {
 861       bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 862
 863       if (!bb
 864           || !flow_bb_inside_loop_p (data->current_loop, bb))
 865         set_iv (data, var, var, build_int_cst (type, 0));
 866     }
 867
 868   return name_info (data, var)->iv;
 869 }
 870
 871 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 872    not define a simple affine biv with nonzero step.  */
 873
 874 static tree
 875 determine_biv_step (tree phi)
 876 {
 877   struct loop *loop = bb_for_stmt (phi)->loop_father;
 878   tree name = PHI_RESULT (phi);
 879   affine_iv iv;
 880
 881   if (!is_gimple_reg (name))
 882     return NULL_TREE;
 883
 884   if (!simple_iv (loop, phi, name, &iv, true))
 885     return NULL_TREE;
 886
 887   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 888 }
 889
 890 /* Finds basic ivs.  */
 891
 892 static bool
 893 find_bivs (struct ivopts_data *data)
 894 {
 895   tree phi, step, type, base;
 896   bool found = false;
 897   struct loop *loop = data->current_loop;
 898
 899   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 900     {
 901       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
 902         continue;
 903
 904       step = determine_biv_step (phi);
 905       if (!step)
 906         continue;
 907
 908       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
 909       base = expand_simple_operations (base);
 910       if (contains_abnormal_ssa_name_p (base)
 911           || contains_abnormal_ssa_name_p (step))
 912         continue;
 913
 914       type = TREE_TYPE (PHI_RESULT (phi));
 915       base = fold_convert (type, base);
 916       if (step)
 917         step = fold_convert (type, step);
 918
 919       set_iv (data, PHI_RESULT (phi), base, step);
 920       found = true;
 921     }
 922
 923   return found;
 924 }
 925
 926 /* Marks basic ivs.  */
 927
 928 static void
 929 mark_bivs (struct ivopts_data *data)
 930 {
 931   tree phi, var;
 932   struct iv *iv, *incr_iv;
 933   struct loop *loop = data->current_loop;
 934   basic_block incr_bb;
 935
 936   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 937     {
 938       iv = get_iv (data, PHI_RESULT (phi));
 939       if (!iv)
 940         continue;
 941
 942       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
 943       incr_iv = get_iv (data, var);
 944       if (!incr_iv)
 945         continue;
 946
 947       /* If the increment is in the subloop, ignore it.  */
 948       incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
 949       if (incr_bb->loop_father != data->current_loop
 950           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
 951         continue;
 952
 953       iv->biv_p = true;
 954       incr_iv->biv_p = true;
 955     }
 956 }
 957
 958 /* Checks whether STMT defines a linear induction variable and stores its
 959    parameters to IV.  */
 960
 961 static bool
 962 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
 963 {
 964   tree lhs;
 965   struct loop *loop = data->current_loop;
 966
 967   iv->base = NULL_TREE;
 968   iv->step = NULL_TREE;
 969
 970   if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
 971     return false;
 972
 973   lhs = GIMPLE_STMT_OPERAND (stmt, 0);
 974   if (TREE_CODE (lhs) != SSA_NAME)
 975     return false;
 976
 977   if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
 978     return false;
 979   iv->base = expand_simple_operations (iv->base);
 980
 981   if (contains_abnormal_ssa_name_p (iv->base)
 982       || contains_abnormal_ssa_name_p (iv->step))
 983     return false;
 984
 985   return true;
 986 }
 987
 988 /* Finds general ivs in statement STMT.  */
 989
 990 static void
 991 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
 992 {
 993   affine_iv iv;
 994
 995   if (!find_givs_in_stmt_scev (data, stmt, &iv))
 996     return;
 997
 998   set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
 999 }
1000
1001 /* Finds general ivs in basic block BB.  */
1002
1003 static void
1004 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1005 {
1006   block_stmt_iterator bsi;
1007
1008   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1009     find_givs_in_stmt (data, bsi_stmt (bsi));
1010 }
1011
1012 /* Finds general ivs.  */
1013
1014 static void
1015 find_givs (struct ivopts_data *data)
1016 {
1017   struct loop *loop = data->current_loop;
1018   basic_block *body = get_loop_body_in_dom_order (loop);
1019   unsigned i;
1020
1021   for (i = 0; i < loop->num_nodes; i++)
1022     find_givs_in_bb (data, body[i]);
1023   free (body);
1024 }
1025
1026 /* For each ssa name defined in LOOP determines whether it is an induction
1027    variable and if so, its initial value and step.  */
1028
1029 static bool
1030 find_induction_variables (struct ivopts_data *data)
1031 {
1032   unsigned i;
1033   bitmap_iterator bi;
1034
1035   if (!find_bivs (data))
1036     return false;
1037
1038   find_givs (data);
1039   mark_bivs (data);
1040
1041   if (dump_file && (dump_flags & TDF_DETAILS))
1042     {
1043       tree niter = niter_for_single_dom_exit (data);
1044
1045       if (niter)
1046         {
1047           fprintf (dump_file, "  number of iterations ");
1048           print_generic_expr (dump_file, niter, TDF_SLIM);
1049           fprintf (dump_file, "\n\n");
1050         };
1051
1052       fprintf (dump_file, "Induction variables:\n\n");
1053
1054       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1055         {
1056           if (ver_info (data, i)->iv)
1057             dump_iv (dump_file, ver_info (data, i)->iv);
1058         }
1059     }
1060
1061   return true;
1062 }
1063
1064 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1065
1066 static struct iv_use *
1067 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1068             tree stmt, enum use_type use_type)
1069 {
1070   struct iv_use *use = XCNEW (struct iv_use);
1071
1072   use->id = n_iv_uses (data);
1073   use->type = use_type;
1074   use->iv = iv;
1075   use->stmt = stmt;
1076   use->op_p = use_p;
1077   use->related_cands = BITMAP_ALLOC (NULL);
1078
1079   /* To avoid showing ssa name in the dumps, if it was not reset by the
1080      caller.  */
1081   iv->ssa_name = NULL_TREE;
1082
1083   if (dump_file && (dump_flags & TDF_DETAILS))
1084     dump_use (dump_file, use);
1085
1086   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1087
1088   return use;
1089 }
1090
1091 /* Checks whether OP is a loop-level invariant and if so, records it.
1092    NONLINEAR_USE is true if the invariant is used in a way we do not
1093    handle specially.  */
1094
1095 static void
1096 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1097 {
1098   basic_block bb;
1099   struct version_info *info;
1100
1101   if (TREE_CODE (op) != SSA_NAME
1102       || !is_gimple_reg (op))
1103     return;
1104
1105   bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1106   if (bb
1107       && flow_bb_inside_loop_p (data->current_loop, bb))
1108     return;
1109
1110   info = name_info (data, op);
1111   info->name = op;
1112   info->has_nonlin_use |= nonlinear_use;
1113   if (!info->inv_id)
1114     info->inv_id = ++data->max_inv_id;
1115   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1116 }
1117
1118 /* Checks whether the use OP is interesting and if so, records it.  */
1119
1120 static struct iv_use *
1121 find_interesting_uses_op (struct ivopts_data *data, tree op)
1122 {
1123   struct iv *iv;
1124   struct iv *civ;
1125   tree stmt;
1126   struct iv_use *use;
1127
1128   if (TREE_CODE (op) != SSA_NAME)
1129     return NULL;
1130
1131   iv = get_iv (data, op);
1132   if (!iv)
1133     return NULL;
1134
1135   if (iv->have_use_for)
1136     {
1137       use = iv_use (data, iv->use_id);
1138
1139       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1140       return use;
1141     }
1142
1143   if (integer_zerop (iv->step))
1144     {
1145       record_invariant (data, op, true);
1146       return NULL;
1147     }
1148   iv->have_use_for = true;
1149
1150   civ = XNEW (struct iv);
1151   *civ = *iv;
1152
1153   stmt = SSA_NAME_DEF_STMT (op);
1154   gcc_assert (TREE_CODE (stmt) == PHI_NODE
1155               || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1156
1157   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1158   iv->use_id = use->id;
1159
1160   return use;
1161 }
1162
1163 /* Given a condition *COND_P, checks whether it is a compare of an induction
1164    variable and an invariant.  If this is the case, CONTROL_VAR is set
1165    to location of the iv, BOUND to the location of the invariant,
1166    IV_VAR and IV_BOUND are set to the corresponding induction variable
1167    descriptions, and true is returned.  If this is not the case,
1168    CONTROL_VAR and BOUND are set to the arguments of the condition and
1169    false is returned.  */
1170
1171 static bool
1172 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1173                        tree **control_var, tree **bound,
1174                        struct iv **iv_var, struct iv **iv_bound)
1175 {
1176   /* The nodes returned when COND has just one operand.  Note that you should
1177      not modify anything in BOUND or IV_BOUND because of this.  */
1178   static struct iv const_iv;
1179   static tree zero;
1180   tree cond = *cond_p;
1181   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1182   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1183   bool ret = false;
1184
1185   zero = integer_zero_node;
1186   const_iv.step = integer_zero_node;
1187
1188   if (TREE_CODE (cond) == SSA_NAME)
1189     {
1190       op0 = cond_p;
1191       iv0 = get_iv (data, cond);
1192       ret = (iv0 && !integer_zerop (iv0->step));
1193       goto end;
1194     }
1195
1196   if (!COMPARISON_CLASS_P (cond))
1197     {
1198       op0 = cond_p;
1199       goto end;
1200     }
1201
1202   op0 = &TREE_OPERAND (cond, 0);
1203   op1 = &TREE_OPERAND (cond, 1);
1204   if (TREE_CODE (*op0) == SSA_NAME)
1205     iv0 = get_iv (data, *op0);
1206   if (TREE_CODE (*op1) == SSA_NAME)
1207     iv1 = get_iv (data, *op1);
1208
1209   /* Exactly one of the compared values must be an iv, and the other one must
1210      be an invariant.  */
1211   if (!iv0 || !iv1)
1212     goto end;
1213
1214   if (integer_zerop (iv0->step))
1215     {
1216       /* Control variable may be on the other side.  */
1217       tmp_op = op0; op0 = op1; op1 = tmp_op;
1218       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1219     }
1220   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1221
1222 end:
1223   if (control_var)
1224     *control_var = op0;;
1225   if (iv_var)
1226     *iv_var = iv0;;
1227   if (bound)
1228     *bound = op1;
1229   if (iv_bound)
1230     *iv_bound = iv1;
1231
1232   return ret;
1233 }
1234
1235 /* Checks whether the condition *COND_P in STMT is interesting
1236    and if so, records it.  */
1237
1238 static void
1239 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1240 {
1241   tree *var_p, *bound_p;
1242   struct iv *var_iv, *civ;
1243
1244   if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1245     {
1246       find_interesting_uses_op (data, *var_p);
1247       find_interesting_uses_op (data, *bound_p);
1248       return;
1249     }
1250
1251   civ = XNEW (struct iv);
1252   *civ = *var_iv;
1253   record_use (data, cond_p, civ, stmt, USE_COMPARE);
1254 }
1255
1256 /* Returns true if expression EXPR is obviously invariant in LOOP,
1257    i.e. if all its operands are defined outside of the LOOP.  */
1258
1259 bool
1260 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1261 {
1262   basic_block def_bb;
1263   unsigned i, len;
1264
1265   if (is_gimple_min_invariant (expr))
1266     return true;
1267
1268   if (TREE_CODE (expr) == SSA_NAME)
1269     {
1270       def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1271       if (def_bb
1272           && flow_bb_inside_loop_p (loop, def_bb))
1273         return false;
1274
1275       return true;
1276     }
1277
1278   if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1279     return false;
1280
1281   len = TREE_OPERAND_LENGTH (expr);
1282   for (i = 0; i < len; i++)
1283     if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1284       return false;
1285
1286   return true;
1287 }
1288
1289 /* Cumulates the steps of indices into DATA and replaces their values with the
1290    initial ones.  Returns false when the value of the index cannot be determined.
1291    Callback for for_each_index.  */
1292
1293 struct ifs_ivopts_data
1294 {
1295   struct ivopts_data *ivopts_data;
1296   tree stmt;
1297   tree step;
1298 };
1299
1300 static bool
1301 idx_find_step (tree base, tree *idx, void *data)
1302 {
1303   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1304   struct iv *iv;
1305   tree step, iv_base, iv_step, lbound, off;
1306   struct loop *loop = dta->ivopts_data->current_loop;
1307
1308   if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1309       || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1310     return false;
1311
1312   /* If base is a component ref, require that the offset of the reference
1313      be invariant.  */
1314   if (TREE_CODE (base) == COMPONENT_REF)
1315     {
1316       off = component_ref_field_offset (base);
1317       return expr_invariant_in_loop_p (loop, off);
1318     }
1319
1320   /* If base is array, first check whether we will be able to move the
1321      reference out of the loop (in order to take its address in strength
1322      reduction).  In order for this to work we need both lower bound
1323      and step to be loop invariants.  */
1324   if (TREE_CODE (base) == ARRAY_REF)
1325     {
1326       step = array_ref_element_size (base);
1327       lbound = array_ref_low_bound (base);
1328
1329       if (!expr_invariant_in_loop_p (loop, step)
1330           || !expr_invariant_in_loop_p (loop, lbound))
1331         return false;
1332     }
1333
1334   if (TREE_CODE (*idx) != SSA_NAME)
1335     return true;
1336
1337   iv = get_iv (dta->ivopts_data, *idx);
1338   if (!iv)
1339     return false;
1340
1341   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1342           *&x[0], which is not folded and does not trigger the
1343           ARRAY_REF path below.  */
1344   *idx = iv->base;
1345
1346   if (integer_zerop (iv->step))
1347     return true;
1348
1349   if (TREE_CODE (base) == ARRAY_REF)
1350     {
1351       step = array_ref_element_size (base);
1352
1353       /* We only handle addresses whose step is an integer constant.  */
1354       if (TREE_CODE (step) != INTEGER_CST)
1355         return false;
1356     }
1357   else
1358     /* The step for pointer arithmetics already is 1 byte.  */
1359     step = build_int_cst (sizetype, 1);
1360
1361   iv_base = iv->base;
1362   iv_step = iv->step;
1363   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1364                             sizetype, &iv_base, &iv_step, dta->stmt,
1365                             false))
1366     {
1367       /* The index might wrap.  */
1368       return false;
1369     }
1370
1371   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1372   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1373
1374   return true;
1375 }
1376
1377 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1378    object is passed to it in DATA.  */
1379
1380 static bool
1381 idx_record_use (tree base, tree *idx,
1382                 void *vdata)
1383 {
1384   struct ivopts_data *data = (struct ivopts_data *) vdata;
1385   find_interesting_uses_op (data, *idx);
1386   if (TREE_CODE (base) == ARRAY_REF)
1387     {
1388       find_interesting_uses_op (data, array_ref_element_size (base));
1389       find_interesting_uses_op (data, array_ref_low_bound (base));
1390     }
1391   return true;
1392 }
1393
1394 /* Returns true if memory reference REF may be unaligned.  */
1395
1396 static bool
1397 may_be_unaligned_p (tree ref)
1398 {
1399   tree base;
1400   tree base_type;
1401   HOST_WIDE_INT bitsize;
1402   HOST_WIDE_INT bitpos;
1403   tree toffset;
1404   enum machine_mode mode;
1405   int unsignedp, volatilep;
1406   unsigned base_align;
1407
1408   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1409      thus they are not misaligned.  */
1410   if (TREE_CODE (ref) == TARGET_MEM_REF)
1411     return false;
1412
1413   /* The test below is basically copy of what expr.c:normal_inner_ref
1414      does to check whether the object must be loaded by parts when
1415      STRICT_ALIGNMENT is true.  */
1416   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1417                               &unsignedp, &volatilep, true);
1418   base_type = TREE_TYPE (base);
1419   base_align = TYPE_ALIGN (base_type);
1420
1421   if (mode != BLKmode)
1422     {
1423       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1424
1425       if (base_align < mode_align
1426           || (bitpos % mode_align) != 0
1427           || (bitpos % BITS_PER_UNIT) != 0)
1428         return true;
1429
1430       if (toffset
1431           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1432         return true;
1433     }
1434
1435   return false;
1436 }
1437
1438 /* Return true if EXPR may be non-addressable.   */
1439
1440 static bool
1441 may_be_nonaddressable_p (tree expr)
1442 {
1443   switch (TREE_CODE (expr))
1444     {
1445     case TARGET_MEM_REF:
1446       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1447          target, thus they are always addressable.  */
1448       return false;
1449
1450     case COMPONENT_REF:
1451       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1452              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1453
1454     case VIEW_CONVERT_EXPR:
1455       /* This kind of view-conversions may wrap non-addressable objects
1456          and make them look addressable.  After some processing the
1457          non-addressability may be uncovered again, causing ADDR_EXPRs
1458          of inappropriate objects to be built.  */
1459       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1460           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1461         return true;
1462
1463       /* ... fall through ... */
1464
1465     case ARRAY_REF:
1466     case ARRAY_RANGE_REF:
1467       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1468
1469     case CONVERT_EXPR:
1470     case NON_LVALUE_EXPR:
1471     case NOP_EXPR:
1472       return true;
1473
1474     default:
1475       break;
1476     }
1477
1478   return false;
1479 }
1480
1481 /* Finds addresses in *OP_P inside STMT.  */
1482
1483 static void
1484 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1485 {
1486   tree base = *op_p, step = build_int_cst (sizetype, 0);
1487   struct iv *civ;
1488   struct ifs_ivopts_data ifs_ivopts_data;
1489
1490   /* Do not play with volatile memory references.  A bit too conservative,
1491      perhaps, but safe.  */
1492   if (stmt_ann (stmt)->has_volatile_ops)
1493     goto fail;
1494
1495   /* Ignore bitfields for now.  Not really something terribly complicated
1496      to handle.  TODO.  */
1497   if (TREE_CODE (base) == BIT_FIELD_REF)
1498     goto fail;
1499
1500   base = unshare_expr (base);
1501
1502   if (TREE_CODE (base) == TARGET_MEM_REF)
1503     {
1504       tree type = build_pointer_type (TREE_TYPE (base));
1505       tree astep;
1506
1507       if (TMR_BASE (base)
1508           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1509         {
1510           civ = get_iv (data, TMR_BASE (base));
1511           if (!civ)
1512             goto fail;
1513
1514           TMR_BASE (base) = civ->base;
1515           step = civ->step;
1516         }
1517       if (TMR_INDEX (base)
1518           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1519         {
1520           civ = get_iv (data, TMR_INDEX (base));
1521           if (!civ)
1522             goto fail;
1523
1524           TMR_INDEX (base) = civ->base;
1525           astep = civ->step;
1526
1527           if (astep)
1528             {
1529               if (TMR_STEP (base))
1530                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1531
1532               step = fold_build2 (PLUS_EXPR, type, step, astep);
1533             }
1534         }
1535
1536       if (integer_zerop (step))
1537         goto fail;
1538       base = tree_mem_ref_addr (type, base);
1539     }
1540   else
1541     {
1542       ifs_ivopts_data.ivopts_data = data;
1543       ifs_ivopts_data.stmt = stmt;
1544       ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1545       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1546           || integer_zerop (ifs_ivopts_data.step))
1547         goto fail;
1548       step = ifs_ivopts_data.step;
1549
1550       gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1551       gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1552
1553       /* Check that the base expression is addressable.  This needs
1554          to be done after substituting bases of IVs into it.  */
1555       if (may_be_nonaddressable_p (base))
1556         goto fail;
1557
1558       /* Moreover, on strict alignment platforms, check that it is
1559          sufficiently aligned.  */
1560       if (STRICT_ALIGNMENT && may_be_unaligned_p (base))
1561         goto fail;
1562
1563       base = build_fold_addr_expr (base);
1564
1565       /* Substituting bases of IVs into the base expression might
1566          have caused folding opportunities.  */
1567       if (TREE_CODE (base) == ADDR_EXPR)
1568         {
1569           tree *ref = &TREE_OPERAND (base, 0);
1570           while (handled_component_p (*ref))
1571             ref = &TREE_OPERAND (*ref, 0);
1572           if (TREE_CODE (*ref) == INDIRECT_REF)
1573             *ref = fold_indirect_ref (*ref);
1574         }
1575     }
1576
1577   civ = alloc_iv (base, step);
1578   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1579   return;
1580
1581 fail:
1582   for_each_index (op_p, idx_record_use, data);
1583 }
1584
1585 /* Finds and records invariants used in STMT.  */
1586
1587 static void
1588 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1589 {
1590   ssa_op_iter iter;
1591   use_operand_p use_p;
1592   tree op;
1593
1594   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1595     {
1596       op = USE_FROM_PTR (use_p);
1597       record_invariant (data, op, false);
1598     }
1599 }
1600
1601 /* Finds interesting uses of induction variables in the statement STMT.  */
1602
1603 static void
1604 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1605 {
1606   struct iv *iv;
1607   tree op, lhs, rhs;
1608   ssa_op_iter iter;
1609   use_operand_p use_p;
1610
1611   find_invariants_stmt (data, stmt);
1612
1613   if (TREE_CODE (stmt) == COND_EXPR)
1614     {
1615       find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1616       return;
1617     }
1618
1619   if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1620     {
1621       lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1622       rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1623
1624       if (TREE_CODE (lhs) == SSA_NAME)
1625         {
1626           /* If the statement defines an induction variable, the uses are not
1627              interesting by themselves.  */
1628
1629           iv = get_iv (data, lhs);
1630
1631           if (iv && !integer_zerop (iv->step))
1632             return;
1633         }
1634
1635       switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1636         {
1637         case tcc_comparison:
1638           find_interesting_uses_cond (data, stmt,
1639                                       &GIMPLE_STMT_OPERAND (stmt, 1));
1640           return;
1641
1642         case tcc_reference:
1643           find_interesting_uses_address (data, stmt,
1644                                          &GIMPLE_STMT_OPERAND (stmt, 1));
1645           if (REFERENCE_CLASS_P (lhs))
1646             find_interesting_uses_address (data, stmt,
1647                                            &GIMPLE_STMT_OPERAND (stmt, 0));
1648           return;
1649
1650         default: ;
1651         }
1652
1653       if (REFERENCE_CLASS_P (lhs)
1654           && is_gimple_val (rhs))
1655         {
1656           find_interesting_uses_address (data, stmt,
1657                                          &GIMPLE_STMT_OPERAND (stmt, 0));
1658           find_interesting_uses_op (data, rhs);
1659           return;
1660         }
1661
1662       /* TODO -- we should also handle address uses of type
1663
1664          memory = call (whatever);
1665
1666          and
1667
1668          call (memory).  */
1669     }
1670
1671   if (TREE_CODE (stmt) == PHI_NODE
1672       && bb_for_stmt (stmt) == data->current_loop->header)
1673     {
1674       lhs = PHI_RESULT (stmt);
1675       iv = get_iv (data, lhs);
1676
1677       if (iv && !integer_zerop (iv->step))
1678         return;
1679     }
1680
1681   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1682     {
1683       op = USE_FROM_PTR (use_p);
1684
1685       if (TREE_CODE (op) != SSA_NAME)
1686         continue;
1687
1688       iv = get_iv (data, op);
1689       if (!iv)
1690         continue;
1691
1692       find_interesting_uses_op (data, op);
1693     }
1694 }
1695
1696 /* Finds interesting uses of induction variables outside of loops
1697    on loop exit edge EXIT.  */
1698
1699 static void
1700 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1701 {
1702   tree phi, def;
1703
1704   for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1705     {
1706       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1707       if (is_gimple_reg (def))
1708         find_interesting_uses_op (data, def);
1709     }
1710 }
1711
1712 /* Finds uses of the induction variables that are interesting.  */
1713
1714 static void
1715 find_interesting_uses (struct ivopts_data *data)
1716 {
1717   basic_block bb;
1718   block_stmt_iterator bsi;
1719   tree phi;
1720   basic_block *body = get_loop_body (data->current_loop);
1721   unsigned i;
1722   struct version_info *info;
1723   edge e;
1724
1725   if (dump_file && (dump_flags & TDF_DETAILS))
1726     fprintf (dump_file, "Uses:\n\n");
1727
1728   for (i = 0; i < data->current_loop->num_nodes; i++)
1729     {
1730       edge_iterator ei;
1731       bb = body[i];
1732
1733       FOR_EACH_EDGE (e, ei, bb->succs)
1734         if (e->dest != EXIT_BLOCK_PTR
1735             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1736           find_interesting_uses_outside (data, e);
1737
1738       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1739         find_interesting_uses_stmt (data, phi);
1740       for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1741         find_interesting_uses_stmt (data, bsi_stmt (bsi));
1742     }
1743
1744   if (dump_file && (dump_flags & TDF_DETAILS))
1745     {
1746       bitmap_iterator bi;
1747
1748       fprintf (dump_file, "\n");
1749
1750       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1751         {
1752           info = ver_info (data, i);
1753           if (info->inv_id)
1754             {
1755               fprintf (dump_file, "  ");
1756               print_generic_expr (dump_file, info->name, TDF_SLIM);
1757               fprintf (dump_file, " is invariant (%d)%s\n",
1758                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1759             }
1760         }
1761
1762       fprintf (dump_file, "\n");
1763     }
1764
1765   free (body);
1766 }
1767
1768 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
1769    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
1770    we are at the top-level of the processed address.  */
1771
1772 static tree
1773 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1774                 unsigned HOST_WIDE_INT *offset)
1775 {
1776   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1777   enum tree_code code;
1778   tree type, orig_type = TREE_TYPE (expr);
1779   unsigned HOST_WIDE_INT off0, off1, st;
1780   tree orig_expr = expr;
1781
1782   STRIP_NOPS (expr);
1783
1784   type = TREE_TYPE (expr);
1785   code = TREE_CODE (expr);
1786   *offset = 0;
1787
1788   switch (code)
1789     {
1790     case INTEGER_CST:
1791       if (!cst_and_fits_in_hwi (expr)
1792           || integer_zerop (expr))
1793         return orig_expr;
1794
1795       *offset = int_cst_value (expr);
1796       return build_int_cst (orig_type, 0);
1797
1798     case POINTER_PLUS_EXPR:
1799     case PLUS_EXPR:
1800     case MINUS_EXPR:
1801       op0 = TREE_OPERAND (expr, 0);
1802       op1 = TREE_OPERAND (expr, 1);
1803
1804       op0 = strip_offset_1 (op0, false, false, &off0);
1805       op1 = strip_offset_1 (op1, false, false, &off1);
1806
1807       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1808       if (op0 == TREE_OPERAND (expr, 0)
1809           && op1 == TREE_OPERAND (expr, 1))
1810         return orig_expr;
1811
1812       if (integer_zerop (op1))
1813         expr = op0;
1814       else if (integer_zerop (op0))
1815         {
1816           if (code == MINUS_EXPR)
1817             expr = fold_build1 (NEGATE_EXPR, type, op1);
1818           else
1819             expr = op1;
1820         }
1821       else
1822         expr = fold_build2 (code, type, op0, op1);
1823
1824       return fold_convert (orig_type, expr);
1825
1826     case ARRAY_REF:
1827       if (!inside_addr)
1828         return orig_expr;
1829
1830       step = array_ref_element_size (expr);
1831       if (!cst_and_fits_in_hwi (step))
1832         break;
1833
1834       st = int_cst_value (step);
1835       op1 = TREE_OPERAND (expr, 1);
1836       op1 = strip_offset_1 (op1, false, false, &off1);
1837       *offset = off1 * st;
1838
1839       if (top_compref
1840           && integer_zerop (op1))
1841         {
1842           /* Strip the component reference completely.  */
1843           op0 = TREE_OPERAND (expr, 0);
1844           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1845           *offset += off0;
1846           return op0;
1847         }
1848       break;
1849
1850     case COMPONENT_REF:
1851       if (!inside_addr)
1852         return orig_expr;
1853
1854       tmp = component_ref_field_offset (expr);
1855       if (top_compref
1856           && cst_and_fits_in_hwi (tmp))
1857         {
1858           /* Strip the component reference completely.  */
1859           op0 = TREE_OPERAND (expr, 0);
1860           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1861           *offset = off0 + int_cst_value (tmp);
1862           return op0;
1863         }
1864       break;
1865
1866     case ADDR_EXPR:
1867       op0 = TREE_OPERAND (expr, 0);
1868       op0 = strip_offset_1 (op0, true, true, &off0);
1869       *offset += off0;
1870
1871       if (op0 == TREE_OPERAND (expr, 0))
1872         return orig_expr;
1873
1874       expr = build_fold_addr_expr (op0);
1875       return fold_convert (orig_type, expr);
1876
1877     case INDIRECT_REF:
1878       inside_addr = false;
1879       break;
1880
1881     default:
1882       return orig_expr;
1883     }
1884
1885   /* Default handling of expressions for that we want to recurse into
1886      the first operand.  */
1887   op0 = TREE_OPERAND (expr, 0);
1888   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1889   *offset += off0;
1890
1891   if (op0 == TREE_OPERAND (expr, 0)
1892       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1893     return orig_expr;
1894
1895   expr = copy_node (expr);
1896   TREE_OPERAND (expr, 0) = op0;
1897   if (op1)
1898     TREE_OPERAND (expr, 1) = op1;
1899
1900   /* Inside address, we might strip the top level component references,
1901      thus changing type of the expression.  Handling of ADDR_EXPR
1902      will fix that.  */
1903   expr = fold_convert (orig_type, expr);
1904
1905   return expr;
1906 }
1907
1908 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
1909
1910 static tree
1911 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1912 {
1913   return strip_offset_1 (expr, false, false, offset);
1914 }
1915
1916 /* Returns variant of TYPE that can be used as base for different uses.
1917    We return unsigned type with the same precision, which avoids problems
1918    with overflows.  */
1919
1920 static tree
1921 generic_type_for (tree type)
1922 {
1923   if (GENERIC_ADDR_SPACE_POINTER_TYPE_P (type))
1924     return unsigned_type_for (type);
1925
1926   if (OTHER_ADDR_SPACE_POINTER_TYPE_P (type))
1927     {
1928       int qual = ENCODE_QUAL_ADDR_SPACE (TYPE_ADDR_SPACE (TREE_TYPE (type)));
1929       return build_pointer_type
1930         (build_qualified_type (void_type_node, qual));
1931     }
1932
1933   if (TYPE_UNSIGNED (type))
1934     return type;
1935
1936   return unsigned_type_for (type);
1937 }
1938
1939 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
1940    the bitmap to that we should store it.  */
1941
1942 static struct ivopts_data *fd_ivopts_data;
1943 static tree
1944 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1945 {
1946   bitmap *depends_on = (bitmap *) data;
1947   struct version_info *info;
1948
1949   if (TREE_CODE (*expr_p) != SSA_NAME)
1950     return NULL_TREE;
1951   info = name_info (fd_ivopts_data, *expr_p);
1952
1953   if (!info->inv_id || info->has_nonlin_use)
1954     return NULL_TREE;
1955
1956   if (!*depends_on)
1957     *depends_on = BITMAP_ALLOC (NULL);
1958   bitmap_set_bit (*depends_on, info->inv_id);
1959
1960   return NULL_TREE;
1961 }
1962
1963 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
1964    position to POS.  If USE is not NULL, the candidate is set as related to
1965    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
1966    replacement of the final value of the iv by a direct computation.  */
1967
1968 static struct iv_cand *
1969 add_candidate_1 (struct ivopts_data *data,
1970                  tree base, tree step, bool important, enum iv_position pos,
1971                  struct iv_use *use, tree incremented_at)
1972 {
1973   unsigned i;
1974   struct iv_cand *cand = NULL;
1975   tree type, orig_type;
1976
1977   if (base)
1978     {
1979       orig_type = TREE_TYPE (base);
1980       type = generic_type_for (orig_type);
1981       if (type != orig_type)
1982         {
1983           base = fold_convert (type, base);
1984           step = fold_convert (type, step);
1985         }
1986     }
1987
1988   for (i = 0; i < n_iv_cands (data); i++)
1989     {
1990       cand = iv_cand (data, i);
1991
1992       if (cand->pos != pos)
1993         continue;
1994
1995       if (cand->incremented_at != incremented_at)
1996         continue;
1997
1998       if (!cand->iv)
1999         {
2000           if (!base && !step)
2001             break;
2002
2003           continue;
2004         }
2005
2006       if (!base && !step)
2007         continue;
2008
2009       if (operand_equal_p (base, cand->iv->base, 0)
2010           && operand_equal_p (step, cand->iv->step, 0))
2011         break;
2012     }
2013
2014   if (i == n_iv_cands (data))
2015     {
2016       cand = XCNEW (struct iv_cand);
2017       cand->id = i;
2018
2019       if (!base && !step)
2020         cand->iv = NULL;
2021       else
2022         cand->iv = alloc_iv (base, step);
2023
2024       cand->pos = pos;
2025       if (pos != IP_ORIGINAL && cand->iv)
2026         {
2027           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2028           cand->var_after = cand->var_before;
2029         }
2030       cand->important = important;
2031       cand->incremented_at = incremented_at;
2032       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2033
2034       if (step
2035           && TREE_CODE (step) != INTEGER_CST)
2036         {
2037           fd_ivopts_data = data;
2038           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2039         }
2040
2041       if (dump_file && (dump_flags & TDF_DETAILS))
2042         dump_cand (dump_file, cand);
2043     }
2044
2045   if (important && !cand->important)
2046     {
2047       cand->important = true;
2048       if (dump_file && (dump_flags & TDF_DETAILS))
2049         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2050     }
2051
2052   if (use)
2053     {
2054       bitmap_set_bit (use->related_cands, i);
2055       if (dump_file && (dump_flags & TDF_DETAILS))
2056         fprintf (dump_file, "Candidate %d is related to use %d\n",
2057                  cand->id, use->id);
2058     }
2059
2060   return cand;
2061 }
2062
2063 /* Returns true if incrementing the induction variable at the end of the LOOP
2064    is allowed.
2065
2066    The purpose is to avoid splitting latch edge with a biv increment, thus
2067    creating a jump, possibly confusing other optimization passes and leaving
2068    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2069    is not available (so we do not have a better alternative), or if the latch
2070    edge is already nonempty.  */
2071
2072 static bool
2073 allow_ip_end_pos_p (struct loop *loop)
2074 {
2075   if (!ip_normal_pos (loop))
2076     return true;
2077
2078   if (!empty_block_p (ip_end_pos (loop)))
2079     return true;
2080
2081   return false;
2082 }
2083
2084 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2085    position to POS.  If USE is not NULL, the candidate is set as related to
2086    it.  The candidate computation is scheduled on all available positions.  */
2087
2088 static void
2089 add_candidate (struct ivopts_data *data,
2090                tree base, tree step, bool important, struct iv_use *use)
2091 {
2092   if (ip_normal_pos (data->current_loop))
2093     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2094   if (ip_end_pos (data->current_loop)
2095       && allow_ip_end_pos_p (data->current_loop))
2096     add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2097 }
2098
2099 /* Add a standard "0 + 1 * iteration" iv candidate for a
2100    type with SIZE bits.  */
2101
2102 static void
2103 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2104                                      unsigned int size)
2105 {
2106   tree type = lang_hooks.types.type_for_size (size, true);
2107   add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2108                  true, NULL);
2109 }
2110
2111 /* Adds standard iv candidates.  */
2112
2113 static void
2114 add_standard_iv_candidates (struct ivopts_data *data)
2115 {
2116   add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2117
2118   /* The same for a double-integer type if it is still fast enough.  */
2119   if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2120     add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2121 }
2122
2123
2124 /* Adds candidates bases on the old induction variable IV.  */
2125
2126 static void
2127 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2128 {
2129   tree phi, def;
2130   struct iv_cand *cand;
2131
2132   add_candidate (data, iv->base, iv->step, true, NULL);
2133
2134   /* The same, but with initial value zero.  */
2135   add_candidate (data,
2136                  build_int_cst (TREE_TYPE (iv->base), 0),
2137                  iv->step, true, NULL);
2138
2139   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2140   if (TREE_CODE (phi) == PHI_NODE)
2141     {
2142       /* Additionally record the possibility of leaving the original iv
2143          untouched.  */
2144       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2145       cand = add_candidate_1 (data,
2146                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2147                               SSA_NAME_DEF_STMT (def));
2148       cand->var_before = iv->ssa_name;
2149       cand->var_after = def;
2150     }
2151 }
2152
2153 /* Adds candidates based on the old induction variables.  */
2154
2155 static void
2156 add_old_ivs_candidates (struct ivopts_data *data)
2157 {
2158   unsigned i;
2159   struct iv *iv;
2160   bitmap_iterator bi;
2161
2162   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2163     {
2164       iv = ver_info (data, i)->iv;
2165       if (iv && iv->biv_p && !integer_zerop (iv->step))
2166         add_old_iv_candidates (data, iv);
2167     }
2168 }
2169
2170 /* Adds candidates based on the value of the induction variable IV and USE.  */
2171
2172 static void
2173 add_iv_value_candidates (struct ivopts_data *data,
2174                          struct iv *iv, struct iv_use *use)
2175 {
2176   unsigned HOST_WIDE_INT offset;
2177   tree base;
2178
2179   add_candidate (data, iv->base, iv->step, false, use);
2180
2181   /* The same, but with initial value zero.  Make such variable important,
2182      since it is generic enough so that possibly many uses may be based
2183      on it.  */
2184   add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2185                  iv->step, true, use);
2186
2187   /* Third, try removing the constant offset.  */
2188   base = strip_offset (iv->base, &offset);
2189   if (offset)
2190     add_candidate (data, base, iv->step, false, use);
2191 }
2192
2193 /* Adds candidates based on the uses.  */
2194
2195 static void
2196 add_derived_ivs_candidates (struct ivopts_data *data)
2197 {
2198   unsigned i;
2199
2200   for (i = 0; i < n_iv_uses (data); i++)
2201     {
2202       struct iv_use *use = iv_use (data, i);
2203
2204       if (!use)
2205         continue;
2206
2207       switch (use->type)
2208         {
2209         case USE_NONLINEAR_EXPR:
2210         case USE_COMPARE:
2211         case USE_ADDRESS:
2212           /* Just add the ivs based on the value of the iv used here.  */
2213           add_iv_value_candidates (data, use->iv, use);
2214           break;
2215
2216         default:
2217           gcc_unreachable ();
2218         }
2219     }
2220 }
2221
2222 /* Record important candidates and add them to related_cands bitmaps
2223    if needed.  */
2224
2225 static void
2226 record_important_candidates (struct ivopts_data *data)
2227 {
2228   unsigned i;
2229   struct iv_use *use;
2230
2231   for (i = 0; i < n_iv_cands (data); i++)
2232     {
2233       struct iv_cand *cand = iv_cand (data, i);
2234
2235       if (cand->important)
2236         bitmap_set_bit (data->important_candidates, i);
2237     }
2238
2239   data->consider_all_candidates = (n_iv_cands (data)
2240                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2241
2242   if (data->consider_all_candidates)
2243     {
2244       /* We will not need "related_cands" bitmaps in this case,
2245          so release them to decrease peak memory consumption.  */
2246       for (i = 0; i < n_iv_uses (data); i++)
2247         {
2248           use = iv_use (data, i);
2249           BITMAP_FREE (use->related_cands);
2250         }
2251     }
2252   else
2253     {
2254       /* Add important candidates to the related_cands bitmaps.  */
2255       for (i = 0; i < n_iv_uses (data); i++)
2256         bitmap_ior_into (iv_use (data, i)->related_cands,
2257                          data->important_candidates);
2258     }
2259 }
2260
2261 /* Finds the candidates for the induction variables.  */
2262
2263 static void
2264 find_iv_candidates (struct ivopts_data *data)
2265 {
2266   /* Add commonly used ivs.  */
2267   add_standard_iv_candidates (data);
2268
2269   /* Add old induction variables.  */
2270   add_old_ivs_candidates (data);
2271
2272   /* Add induction variables derived from uses.  */
2273   add_derived_ivs_candidates (data);
2274
2275   /* Record the important candidates.  */
2276   record_important_candidates (data);
2277 }
2278
2279 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2280    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2281    we allocate a simple list to every use.  */
2282
2283 static void
2284 alloc_use_cost_map (struct ivopts_data *data)
2285 {
2286   unsigned i, size, s, j;
2287
2288   for (i = 0; i < n_iv_uses (data); i++)
2289     {
2290       struct iv_use *use = iv_use (data, i);
2291       bitmap_iterator bi;
2292
2293       if (data->consider_all_candidates)
2294         size = n_iv_cands (data);
2295       else
2296         {
2297           s = 0;
2298           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2299             {
2300               s++;
2301             }
2302
2303           /* Round up to the power of two, so that moduling by it is fast.  */
2304           for (size = 1; size < s; size <<= 1)
2305             continue;
2306         }
2307
2308       use->n_map_members = size;
2309       use->cost_map = XCNEWVEC (struct cost_pair, size);
2310     }
2311 }
2312
2313 /* Returns description of computation cost of expression whose runtime
2314    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2315
2316 static comp_cost
2317 new_cost (unsigned runtime, unsigned complexity)
2318 {
2319   comp_cost cost;
2320
2321   cost.cost = runtime;
2322   cost.complexity = complexity;
2323
2324   return cost;
2325 }
2326
2327 /* Adds costs COST1 and COST2.  */
2328
2329 static comp_cost
2330 add_costs (comp_cost cost1, comp_cost cost2)
2331 {
2332   cost1.cost += cost2.cost;
2333   cost1.complexity += cost2.complexity;
2334
2335   return cost1;
2336 }
2337 /* Subtracts costs COST1 and COST2.  */
2338
2339 static comp_cost
2340 sub_costs (comp_cost cost1, comp_cost cost2)
2341 {
2342   cost1.cost -= cost2.cost;
2343   cost1.complexity -= cost2.complexity;
2344
2345   return cost1;
2346 }
2347
2348 /* Returns a negative number if COST1 < COST2, a positive number if
2349    COST1 > COST2, and 0 if COST1 = COST2.  */
2350
2351 static int
2352 compare_costs (comp_cost cost1, comp_cost cost2)
2353 {
2354   if (cost1.cost == cost2.cost)
2355     return cost1.complexity - cost2.complexity;
2356
2357   return cost1.cost - cost2.cost;
2358 }
2359
2360 /* Returns true if COST is infinite.  */
2361
2362 static bool
2363 infinite_cost_p (comp_cost cost)
2364 {
2365   return cost.cost == INFTY;
2366 }
2367
2368 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2369    on invariants DEPENDS_ON and that the value used in expressing it
2370    is VALUE.*/
2371
2372 static void
2373 set_use_iv_cost (struct ivopts_data *data,
2374                  struct iv_use *use, struct iv_cand *cand,
2375                  comp_cost cost, bitmap depends_on, tree value)
2376 {
2377   unsigned i, s;
2378
2379   if (infinite_cost_p (cost))
2380     {
2381       BITMAP_FREE (depends_on);
2382       return;
2383     }
2384
2385   if (data->consider_all_candidates)
2386     {
2387       use->cost_map[cand->id].cand = cand;
2388       use->cost_map[cand->id].cost = cost;
2389       use->cost_map[cand->id].depends_on = depends_on;
2390       use->cost_map[cand->id].value = value;
2391       return;
2392     }
2393
2394   /* n_map_members is a power of two, so this computes modulo.  */
2395   s = cand->id & (use->n_map_members - 1);
2396   for (i = s; i < use->n_map_members; i++)
2397     if (!use->cost_map[i].cand)
2398       goto found;
2399   for (i = 0; i < s; i++)
2400     if (!use->cost_map[i].cand)
2401       goto found;
2402
2403   gcc_unreachable ();
2404
2405 found:
2406   use->cost_map[i].cand = cand;
2407   use->cost_map[i].cost = cost;
2408   use->cost_map[i].depends_on = depends_on;
2409   use->cost_map[i].value = value;
2410 }
2411
2412 /* Gets cost of (USE, CANDIDATE) pair.  */
2413
2414 static struct cost_pair *
2415 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2416                  struct iv_cand *cand)
2417 {
2418   unsigned i, s;
2419   struct cost_pair *ret;
2420
2421   if (!cand)
2422     return NULL;
2423
2424   if (data->consider_all_candidates)
2425     {
2426       ret = use->cost_map + cand->id;
2427       if (!ret->cand)
2428         return NULL;
2429
2430       return ret;
2431     }
2432
2433   /* n_map_members is a power of two, so this computes modulo.  */
2434   s = cand->id & (use->n_map_members - 1);
2435   for (i = s; i < use->n_map_members; i++)
2436     if (use->cost_map[i].cand == cand)
2437       return use->cost_map + i;
2438
2439   for (i = 0; i < s; i++)
2440     if (use->cost_map[i].cand == cand)
2441       return use->cost_map + i;
2442
2443   return NULL;
2444 }
2445
2446 /* Returns estimate on cost of computing SEQ.  */
2447
2448 static unsigned
2449 seq_cost (rtx seq)
2450 {
2451   unsigned cost = 0;
2452   rtx set;
2453
2454   for (; seq; seq = NEXT_INSN (seq))
2455     {
2456       set = single_set (seq);
2457       if (set)
2458         cost += rtx_cost (set, SET);
2459       else
2460         cost++;
2461     }
2462
2463   return cost;
2464 }
2465
2466 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2467 static rtx
2468 produce_memory_decl_rtl (tree obj, int *regno)
2469 {
2470   rtx x;
2471
2472   gcc_assert (obj);
2473   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2474     {
2475       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2476       x = gen_rtx_SYMBOL_REF (Pmode, name);
2477       SET_SYMBOL_REF_DECL (x, obj);
2478       x = gen_rtx_MEM (DECL_MODE (obj), x);
2479       targetm.encode_section_info (obj, x, true);
2480     }
2481   else
2482     {
2483       x = gen_raw_REG (Pmode, (*regno)++);
2484       x = gen_rtx_MEM (DECL_MODE (obj), x);
2485     }
2486
2487   return x;
2488 }
2489
2490 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2491    walk_tree.  DATA contains the actual fake register number.  */
2492
2493 static tree
2494 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2495 {
2496   tree obj = NULL_TREE;
2497   rtx x = NULL_RTX;
2498   int *regno = (int *) data;
2499
2500   switch (TREE_CODE (*expr_p))
2501     {
2502     case ADDR_EXPR:
2503       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2504            handled_component_p (*expr_p);
2505            expr_p = &TREE_OPERAND (*expr_p, 0))
2506         continue;
2507       obj = *expr_p;
2508       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2509         x = produce_memory_decl_rtl (obj, regno);
2510       break;
2511
2512     case SSA_NAME:
2513       *ws = 0;
2514       obj = SSA_NAME_VAR (*expr_p);
2515       if (!DECL_RTL_SET_P (obj))
2516         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2517       break;
2518
2519     case VAR_DECL:
2520     case PARM_DECL:
2521     case RESULT_DECL:
2522       *ws = 0;
2523       obj = *expr_p;
2524
2525       if (DECL_RTL_SET_P (obj))
2526         break;
2527
2528       if (DECL_MODE (obj) == BLKmode)
2529         x = produce_memory_decl_rtl (obj, regno);
2530       else
2531         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2532
2533       break;
2534
2535     default:
2536       break;
2537     }
2538
2539   if (x)
2540     {
2541       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2542       SET_DECL_RTL (obj, x);
2543     }
2544
2545   return NULL_TREE;
2546 }
2547
2548 /* Determines cost of the computation of EXPR.  */
2549
2550 static unsigned
2551 computation_cost (tree expr)
2552 {
2553   rtx seq, rslt;
2554   tree type = TREE_TYPE (expr);
2555   unsigned cost;
2556   /* Avoid using hard regs in ways which may be unsupported.  */
2557   int regno = LAST_VIRTUAL_REGISTER + 1;
2558
2559   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2560   start_sequence ();
2561   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2562   seq = get_insns ();
2563   end_sequence ();
2564
2565   cost = seq_cost (seq);
2566   if (MEM_P (rslt))
2567     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2568
2569   return cost;
2570 }
2571
2572 /* Returns variable containing the value of candidate CAND at statement AT.  */
2573
2574 static tree
2575 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2576 {
2577   if (stmt_after_increment (loop, cand, stmt))
2578     return cand->var_after;
2579   else
2580     return cand->var_before;
2581 }
2582
2583 /* Return the most significant (sign) bit of T.  Similar to tree_int_cst_msb,
2584    but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE.  */
2585
2586 int
2587 tree_int_cst_sign_bit (const_tree t)
2588 {
2589   unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2590   unsigned HOST_WIDE_INT w;
2591
2592   if (bitno < HOST_BITS_PER_WIDE_INT)
2593     w = TREE_INT_CST_LOW (t);
2594   else
2595     {
2596       w = TREE_INT_CST_HIGH (t);
2597       bitno -= HOST_BITS_PER_WIDE_INT;
2598     }
2599
2600   return (w >> bitno) & 1;
2601 }
2602
2603 /* If we can prove that TOP = cst * BOT for some constant cst,
2604    store cst to MUL and return true.  Otherwise return false.
2605    The returned value is always sign-extended, regardless of the
2606    signedness of TOP and BOT.  */
2607
2608 static bool
2609 constant_multiple_of (tree top, tree bot, double_int *mul)
2610 {
2611   tree mby;
2612   enum tree_code code;
2613   double_int res, p0, p1;
2614   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2615
2616   STRIP_NOPS (top);
2617   STRIP_NOPS (bot);
2618
2619   if (operand_equal_p (top, bot, 0))
2620     {
2621       *mul = double_int_one;
2622       return true;
2623     }
2624
2625   code = TREE_CODE (top);
2626   switch (code)
2627     {
2628     case MULT_EXPR:
2629       mby = TREE_OPERAND (top, 1);
2630       if (TREE_CODE (mby) != INTEGER_CST)
2631         return false;
2632
2633       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2634         return false;
2635
2636       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2637                               precision);
2638       return true;
2639
2640     case PLUS_EXPR:
2641     case MINUS_EXPR:
2642       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2643           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2644         return false;
2645
2646       if (code == MINUS_EXPR)
2647         p1 = double_int_neg (p1);
2648       *mul = double_int_sext (double_int_add (p0, p1), precision);
2649       return true;
2650
2651     case INTEGER_CST:
2652       if (TREE_CODE (bot) != INTEGER_CST)
2653         return false;
2654
2655       p0 = double_int_sext (tree_to_double_int (top), precision);
2656       p1 = double_int_sext (tree_to_double_int (bot), precision);
2657       if (double_int_zero_p (p1))
2658         return false;
2659       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2660                               precision);
2661       return double_int_zero_p (res);
2662
2663     default:
2664       return false;
2665     }
2666 }
2667
2668 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2669    same precision that is at least as wide as the precision of TYPE, stores
2670    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2671    type of A and B.  */
2672
2673 static tree
2674 determine_common_wider_type (tree *a, tree *b)
2675 {
2676   tree wider_type = NULL;
2677   tree suba, subb;
2678   tree atype = TREE_TYPE (*a);
2679
2680   if ((TREE_CODE (*a) == NOP_EXPR
2681        || TREE_CODE (*a) == CONVERT_EXPR))
2682     {
2683       suba = TREE_OPERAND (*a, 0);
2684       wider_type = TREE_TYPE (suba);
2685       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2686         return atype;
2687     }
2688   else
2689     return atype;
2690
2691   if ((TREE_CODE (*b) == NOP_EXPR
2692        || TREE_CODE (*b) == CONVERT_EXPR))
2693     {
2694       subb = TREE_OPERAND (*b, 0);
2695       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2696         return atype;
2697     }
2698   else
2699     return atype;
2700
2701   *a = suba;
2702   *b = subb;
2703   return wider_type;
2704 }
2705
2706 /* Determines the expression by that USE is expressed from induction variable
2707    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2708    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2709
2710 static bool
2711 get_computation_aff (struct loop *loop,
2712                      struct iv_use *use, struct iv_cand *cand, tree at,
2713                      struct affine_tree_combination *aff)
2714 {
2715   tree ubase = use->iv->base;
2716   tree ustep = use->iv->step;
2717   tree cbase = cand->iv->base;
2718   tree cstep = cand->iv->step, cstep_common;
2719   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2720   tree common_type, var;
2721   tree uutype;
2722   aff_tree cbase_aff, var_aff;
2723   double_int rat;
2724
2725   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2726     {
2727       /* We do not have a precision to express the values of use.  */
2728       return false;
2729     }
2730
2731   var = var_at_stmt (loop, cand, at);
2732   uutype = unsigned_type_for (utype);
2733
2734   /* If the conversion is not noop, perform it.  */
2735   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2736     {
2737       cstep = fold_convert (uutype, cstep);
2738       cbase = fold_convert (uutype, cbase);
2739       var = fold_convert (uutype, var);
2740     }
2741
2742   if (!constant_multiple_of (ustep, cstep, &rat))
2743     return false;
2744
2745   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2746      type, we achieve better folding by computing their difference in this
2747      wider type, and cast the result to UUTYPE.  We do not need to worry about
2748      overflows, as all the arithmetics will in the end be performed in UUTYPE
2749      anyway.  */
2750   common_type = determine_common_wider_type (&ubase, &cbase);
2751
2752   /* use = ubase - ratio * cbase + ratio * var.  */
2753   tree_to_aff_combination (ubase, common_type, aff);
2754   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2755   tree_to_aff_combination (var, uutype, &var_aff);
2756
2757   /* We need to shift the value if we are after the increment.  */
2758   if (stmt_after_increment (loop, cand, at))
2759     {
2760       aff_tree cstep_aff;
2761
2762       if (common_type != uutype)
2763         cstep_common = fold_convert (common_type, cstep);
2764       else
2765         cstep_common = cstep;
2766
2767       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2768       aff_combination_add (&cbase_aff, &cstep_aff);
2769     }
2770
2771   aff_combination_scale (&cbase_aff, double_int_neg (rat));
2772   aff_combination_add (aff, &cbase_aff);
2773   if (common_type != uutype)
2774     aff_combination_convert (aff, uutype);
2775
2776   aff_combination_scale (&var_aff, rat);
2777   aff_combination_add (aff, &var_aff);
2778
2779   return true;
2780 }
2781
2782 /* Determines the expression by that USE is expressed from induction variable
2783    CAND at statement AT in LOOP.  The computation is unshared.  */
2784
2785 static tree
2786 get_computation_at (struct loop *loop,
2787                     struct iv_use *use, struct iv_cand *cand, tree at)
2788 {
2789   aff_tree aff;
2790   tree type = TREE_TYPE (use->iv->base);
2791
2792   if (!get_computation_aff (loop, use, cand, at, &aff))
2793     return NULL_TREE;
2794   unshare_aff_combination (&aff);
2795   return fold_convert (type, aff_combination_to_tree (&aff));
2796 }
2797
2798 /* Determines the expression by that USE is expressed from induction variable
2799    CAND in LOOP.  The computation is unshared.  */
2800
2801 static tree
2802 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2803 {
2804   return get_computation_at (loop, use, cand, use->stmt);
2805 }
2806
2807 /* Returns cost of addition in MODE.  */
2808
2809 static unsigned
2810 add_cost (enum machine_mode mode)
2811 {
2812   static unsigned costs[NUM_MACHINE_MODES];
2813   rtx seq;
2814   unsigned cost;
2815
2816   if (costs[mode])
2817     return costs[mode];
2818
2819   start_sequence ();
2820   force_operand (gen_rtx_fmt_ee (PLUS, mode,
2821                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2822                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2823                  NULL_RTX);
2824   seq = get_insns ();
2825   end_sequence ();
2826
2827   cost = seq_cost (seq);
2828   if (!cost)
2829     cost = 1;
2830
2831   costs[mode] = cost;
2832
2833   if (dump_file && (dump_flags & TDF_DETAILS))
2834     fprintf (dump_file, "Addition in %s costs %d\n",
2835              GET_MODE_NAME (mode), cost);
2836   return cost;
2837 }
2838
2839 /* Entry in a hashtable of already known costs for multiplication.  */
2840 struct mbc_entry
2841 {
2842   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
2843   enum machine_mode mode;       /* In mode.  */
2844   unsigned cost;                /* The cost.  */
2845 };
2846
2847 /* Counts hash value for the ENTRY.  */
2848
2849 static hashval_t
2850 mbc_entry_hash (const void *entry)
2851 {
2852   const struct mbc_entry *e = (const struct mbc_entry *) entry;
2853
2854   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2855 }
2856
2857 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
2858
2859 static int
2860 mbc_entry_eq (const void *entry1, const void *entry2)
2861 {
2862   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2863   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2864
2865   return (e1->mode == e2->mode
2866           && e1->cst == e2->cst);
2867 }
2868
2869 /* Returns cost of multiplication by constant CST in MODE.  */
2870
2871 unsigned
2872 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2873 {
2874   static htab_t costs;
2875   struct mbc_entry **cached, act;
2876   rtx seq;
2877   unsigned cost;
2878
2879   if (!costs)
2880     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2881
2882   act.mode = mode;
2883   act.cst = cst;
2884   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2885   if (*cached)
2886     return (*cached)->cost;
2887
2888   *cached = XNEW (struct mbc_entry);
2889   (*cached)->mode = mode;
2890   (*cached)->cst = cst;
2891
2892   start_sequence ();
2893   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2894                gen_int_mode (cst, mode), NULL_RTX, 0);
2895   seq = get_insns ();
2896   end_sequence ();
2897
2898   cost = seq_cost (seq);
2899
2900   if (dump_file && (dump_flags & TDF_DETAILS))
2901     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2902              (int) cst, GET_MODE_NAME (mode), cost);
2903
2904   (*cached)->cost = cost;
2905
2906   return cost;
2907 }
2908
2909 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
2910    validity for a memory reference accessing memory of mode MODE.  */
2911
2912 bool
2913 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2914 {
2915 #define MAX_RATIO 128
2916   static sbitmap valid_mult[MAX_MACHINE_MODE];
2917
2918   if (!valid_mult[mode])
2919     {
2920       rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2921       rtx addr;
2922       HOST_WIDE_INT i;
2923
2924       valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2925       sbitmap_zero (valid_mult[mode]);
2926       addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2927       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2928         {
2929           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2930           if (memory_address_p (mode, addr))
2931             SET_BIT (valid_mult[mode], i + MAX_RATIO);
2932         }
2933
2934       if (dump_file && (dump_flags & TDF_DETAILS))
2935         {
2936           fprintf (dump_file, "  allowed multipliers:");
2937           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2938             if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2939               fprintf (dump_file, " %d", (int) i);
2940           fprintf (dump_file, "\n");
2941           fprintf (dump_file, "\n");
2942         }
2943     }
2944
2945   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2946     return false;
2947
2948   return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2949 }
2950
2951 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2952    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
2953    variable is omitted.  Compute the cost for a memory reference that accesses
2954    a memory location of mode MEM_MODE.
2955
2956    TODO -- there must be some better way.  This all is quite crude.  */
2957
2958 static comp_cost
2959 get_address_cost (bool symbol_present, bool var_present,
2960                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2961                   enum machine_mode mem_mode)
2962 {
2963   static bool initialized[MAX_MACHINE_MODE];
2964   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2965   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2966   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2967   unsigned cost, acost, complexity;
2968   bool offset_p, ratio_p;
2969   HOST_WIDE_INT s_offset;
2970   unsigned HOST_WIDE_INT mask;
2971   unsigned bits;
2972
2973   if (!initialized[mem_mode])
2974     {
2975       HOST_WIDE_INT i;
2976       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2977       int old_cse_not_expected;
2978       unsigned sym_p, var_p, off_p, rat_p, add_c;
2979       rtx seq, addr, base;
2980       rtx reg0, reg1;
2981
2982       initialized[mem_mode] = true;
2983
2984       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2985
2986       addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2987       for (i = start; i <= 1 << 20; i <<= 1)
2988         {
2989           XEXP (addr, 1) = gen_int_mode (i, Pmode);
2990           if (!memory_address_p (mem_mode, addr))
2991             break;
2992         }
2993       max_offset[mem_mode] = i == start ? 0 : i >> 1;
2994       off[mem_mode] = max_offset[mem_mode];
2995
2996       for (i = start; i <= 1 << 20; i <<= 1)
2997         {
2998           XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2999           if (!memory_address_p (mem_mode, addr))
3000             break;
3001         }
3002       min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
3003
3004       if (dump_file && (dump_flags & TDF_DETAILS))
3005         {
3006           fprintf (dump_file, "get_address_cost:\n");
3007           fprintf (dump_file, "  min offset %s %d\n",
3008                    GET_MODE_NAME (mem_mode),
3009                    (int) min_offset[mem_mode]);
3010           fprintf (dump_file, "  max offset %s %d\n",
3011                    GET_MODE_NAME (mem_mode),
3012                    (int) max_offset[mem_mode]);
3013         }
3014
3015       rat[mem_mode] = 1;
3016       for (i = 2; i <= MAX_RATIO; i++)
3017         if (multiplier_allowed_in_address_p (i, mem_mode))
3018           {
3019             rat[mem_mode] = i;
3020             break;
3021           }
3022
3023       /* Compute the cost of various addressing modes.  */
3024       acost = 0;
3025       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3026       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3027
3028       for (i = 0; i < 16; i++)
3029         {
3030           sym_p = i & 1;
3031           var_p = (i >> 1) & 1;
3032           off_p = (i >> 2) & 1;
3033           rat_p = (i >> 3) & 1;
3034
3035           addr = reg0;
3036           if (rat_p)
3037             addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3038                                    gen_int_mode (rat[mem_mode], Pmode));
3039
3040           if (var_p)
3041             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3042
3043           if (sym_p)
3044             {
3045               base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3046               /* ??? We can run into trouble with some backends by presenting
3047                  it with symbols which havn't been properly passed through
3048                  targetm.encode_section_info.  By setting the local bit, we
3049                  enhance the probability of things working.  */
3050               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3051
3052               if (off_p)
3053                 base = gen_rtx_fmt_e (CONST, Pmode,
3054                                       gen_rtx_fmt_ee (PLUS, Pmode,
3055                                                       base,
3056                                                       gen_int_mode (off[mem_mode],
3057                                                                     Pmode)));
3058             }
3059           else if (off_p)
3060             base = gen_int_mode (off[mem_mode], Pmode);
3061           else
3062             base = NULL_RTX;
3063
3064           if (base)
3065             addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3066
3067           start_sequence ();
3068           /* To avoid splitting addressing modes, pretend that no cse will
3069              follow.  */
3070           old_cse_not_expected = cse_not_expected;
3071           cse_not_expected = true;
3072           addr = memory_address (mem_mode, addr);
3073           cse_not_expected = old_cse_not_expected;
3074           seq = get_insns ();
3075           end_sequence ();
3076
3077           acost = seq_cost (seq);
3078           acost += address_cost (addr, mem_mode);
3079
3080           if (!acost)
3081             acost = 1;
3082           costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
3083         }
3084
3085       /* On some targets, it is quite expensive to load symbol to a register,
3086          which makes addresses that contain symbols look much more expensive.
3087          However, the symbol will have to be loaded in any case before the
3088          loop (and quite likely we have it in register already), so it does not
3089          make much sense to penalize them too heavily.  So make some final
3090          tweaks for the SYMBOL_PRESENT modes:
3091
3092          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3093          var is cheaper, use this mode with small penalty.
3094          If VAR_PRESENT is true, try whether the mode with
3095          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3096          if this is the case, use it.  */
3097       add_c = add_cost (Pmode);
3098       for (i = 0; i < 8; i++)
3099         {
3100           var_p = i & 1;
3101           off_p = (i >> 1) & 1;
3102           rat_p = (i >> 2) & 1;
3103
3104           acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3105           if (var_p)
3106             acost += add_c;
3107
3108           if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3109             costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3110         }
3111
3112       if (dump_file && (dump_flags & TDF_DETAILS))
3113         {
3114           fprintf (dump_file, "Address costs:\n");
3115
3116           for (i = 0; i < 16; i++)
3117             {
3118               sym_p = i & 1;
3119               var_p = (i >> 1) & 1;
3120               off_p = (i >> 2) & 1;
3121               rat_p = (i >> 3) & 1;
3122
3123               fprintf (dump_file, "  ");
3124               if (sym_p)
3125                 fprintf (dump_file, "sym + ");
3126               if (var_p)
3127                 fprintf (dump_file, "var + ");
3128               if (off_p)
3129                 fprintf (dump_file, "cst + ");
3130               if (rat_p)
3131                 fprintf (dump_file, "rat * ");
3132
3133               acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3134               fprintf (dump_file, "index costs %d\n", acost);
3135             }
3136           fprintf (dump_file, "\n");
3137         }
3138     }
3139
3140   bits = GET_MODE_BITSIZE (Pmode);
3141   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3142   offset &= mask;
3143   if ((offset >> (bits - 1) & 1))
3144     offset |= ~mask;
3145   s_offset = offset;
3146
3147   cost = 0;
3148   offset_p = (s_offset != 0
3149               && min_offset[mem_mode] <= s_offset
3150               && s_offset <= max_offset[mem_mode]);
3151   ratio_p = (ratio != 1
3152              && multiplier_allowed_in_address_p (ratio, mem_mode));
3153
3154   if (ratio != 1 && !ratio_p)
3155     cost += multiply_by_cost (ratio, Pmode);
3156
3157   if (s_offset && !offset_p && !symbol_present)
3158     cost += add_cost (Pmode);
3159
3160   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3161   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3162   return new_cost (cost + acost, complexity);
3163 }
3164
3165 /* Estimates cost of forcing expression EXPR into a variable.  */
3166
3167 static comp_cost
3168 force_expr_to_var_cost (tree expr)
3169 {
3170   static bool costs_initialized = false;
3171   static unsigned integer_cost;
3172   static unsigned symbol_cost;
3173   static unsigned address_cost;
3174   tree op0, op1;
3175   comp_cost cost0, cost1, cost;
3176   enum machine_mode mode;
3177
3178   if (!costs_initialized)
3179     {
3180       tree type = build_pointer_type (integer_type_node);
3181       tree var, addr;
3182       rtx x;
3183
3184       var = create_tmp_var_raw (integer_type_node, "test_var");
3185       TREE_STATIC (var) = 1;
3186       x = produce_memory_decl_rtl (var, NULL);
3187       SET_DECL_RTL (var, x);
3188
3189       integer_cost = computation_cost (build_int_cst (integer_type_node,
3190                                                       2000));
3191
3192       addr = build1 (ADDR_EXPR, type, var);
3193       symbol_cost = computation_cost (addr) + 1;
3194
3195       address_cost
3196         = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3197                                     addr,
3198                                     build_int_cst (sizetype, 2000))) + 1;
3199       if (dump_file && (dump_flags & TDF_DETAILS))
3200         {
3201           fprintf (dump_file, "force_expr_to_var_cost:\n");
3202           fprintf (dump_file, "  integer %d\n", (int) integer_cost);
3203           fprintf (dump_file, "  symbol %d\n", (int) symbol_cost);
3204           fprintf (dump_file, "  address %d\n", (int) address_cost);
3205           fprintf (dump_file, "  other %d\n", (int) target_spill_cost);
3206           fprintf (dump_file, "\n");
3207         }
3208
3209       costs_initialized = true;
3210     }
3211
3212   STRIP_NOPS (expr);
3213
3214   if (SSA_VAR_P (expr))
3215     return zero_cost;
3216
3217   if (TREE_INVARIANT (expr))
3218     {
3219       if (TREE_CODE (expr) == INTEGER_CST)
3220         return new_cost (integer_cost, 0);
3221
3222       if (TREE_CODE (expr) == ADDR_EXPR)
3223         {
3224           tree obj = TREE_OPERAND (expr, 0);
3225
3226           if (TREE_CODE (obj) == VAR_DECL
3227               || TREE_CODE (obj) == PARM_DECL
3228               || TREE_CODE (obj) == RESULT_DECL)
3229             return new_cost (symbol_cost, 0);
3230         }
3231
3232       return new_cost (address_cost, 0);
3233     }
3234
3235   switch (TREE_CODE (expr))
3236     {
3237     case POINTER_PLUS_EXPR:
3238     case PLUS_EXPR:
3239     case MINUS_EXPR:
3240     case MULT_EXPR:
3241       op0 = TREE_OPERAND (expr, 0);
3242       op1 = TREE_OPERAND (expr, 1);
3243       STRIP_NOPS (op0);
3244       STRIP_NOPS (op1);
3245
3246       if (is_gimple_val (op0))
3247         cost0 = zero_cost;
3248       else
3249         cost0 = force_expr_to_var_cost (op0);
3250
3251       if (is_gimple_val (op1))
3252         cost1 = zero_cost;
3253       else
3254         cost1 = force_expr_to_var_cost (op1);
3255
3256       break;
3257
3258     default:
3259       /* Just an arbitrary value, FIXME.  */
3260       return new_cost (target_spill_cost, 0);
3261     }
3262
3263   mode = TYPE_MODE (TREE_TYPE (expr));
3264   switch (TREE_CODE (expr))
3265     {
3266     case POINTER_PLUS_EXPR:
3267     case PLUS_EXPR:
3268     case MINUS_EXPR:
3269       cost = new_cost (add_cost (mode), 0);
3270       break;
3271
3272     case MULT_EXPR:
3273       if (cst_and_fits_in_hwi (op0))
3274         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode), 0);
3275       else if (cst_and_fits_in_hwi (op1))
3276         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode), 0);
3277       else
3278         return new_cost (target_spill_cost, 0);
3279       break;
3280
3281     default:
3282       gcc_unreachable ();
3283     }
3284
3285   cost = add_costs (cost, cost0);
3286   cost = add_costs (cost, cost1);
3287
3288   /* Bound the cost by target_spill_cost.  The parts of complicated
3289      computations often are either loop invariant or at least can
3290      be shared between several iv uses, so letting this grow without
3291      limits would not give reasonable results.  */
3292   if (cost.cost > target_spill_cost)
3293     cost.cost = target_spill_cost;
3294
3295   return cost;
3296 }
3297
3298 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3299    invariants the computation depends on.  */
3300
3301 static comp_cost
3302 force_var_cost (struct ivopts_data *data,
3303                 tree expr, bitmap *depends_on)
3304 {
3305   if (depends_on)
3306     {
3307       fd_ivopts_data = data;
3308       walk_tree (&expr, find_depends, depends_on, NULL);
3309     }
3310
3311   return force_expr_to_var_cost (expr);
3312 }
3313
3314 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3315    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3316    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3317    invariants the computation depends on.  */
3318
3319 static comp_cost
3320 split_address_cost (struct ivopts_data *data,
3321                     tree addr, bool *symbol_present, bool *var_present,
3322                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3323 {
3324   tree core;
3325   HOST_WIDE_INT bitsize;
3326   HOST_WIDE_INT bitpos;
3327   tree toffset;
3328   enum machine_mode mode;
3329   int unsignedp, volatilep;
3330
3331   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3332                               &unsignedp, &volatilep, false);
3333
3334   if (toffset != 0
3335       || bitpos % BITS_PER_UNIT != 0
3336       || TREE_CODE (core) != VAR_DECL)
3337     {
3338       *symbol_present = false;
3339       *var_present = true;
3340       fd_ivopts_data = data;
3341       walk_tree (&addr, find_depends, depends_on, NULL);
3342       return new_cost (target_spill_cost, 0);
3343     }
3344
3345   *offset += bitpos / BITS_PER_UNIT;
3346   if (TREE_STATIC (core)
3347       || DECL_EXTERNAL (core))
3348     {
3349       *symbol_present = true;
3350       *var_present = false;
3351       return zero_cost;
3352     }
3353
3354   *symbol_present = false;
3355   *var_present = true;
3356   return zero_cost;
3357 }
3358
3359 /* Estimates cost of expressing difference of addresses E1 - E2 as
3360    var + symbol + offset.  The value of offset is added to OFFSET,
3361    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3362    part is missing.  DEPENDS_ON is a set of the invariants the computation
3363    depends on.  */
3364
3365 static comp_cost
3366 ptr_difference_cost (struct ivopts_data *data,
3367                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3368                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3369 {
3370   HOST_WIDE_INT diff = 0;
3371   comp_cost cost;
3372
3373   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3374
3375   if (ptr_difference_const (e1, e2, &diff))
3376     {
3377       *offset += diff;
3378       *symbol_present = false;
3379       *var_present = false;
3380       return zero_cost;
3381     }
3382
3383   if (integer_zerop (e2))
3384     return split_address_cost (data, TREE_OPERAND (e1, 0),
3385                                symbol_present, var_present, offset, depends_on);
3386
3387   *symbol_present = false;
3388   *var_present = true;
3389
3390   cost = force_var_cost (data, e1, depends_on);
3391   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3392   cost.cost += add_cost (Pmode);
3393
3394   return cost;
3395 }
3396
3397 /* Estimates cost of expressing difference E1 - E2 as
3398    var + symbol + offset.  The value of offset is added to OFFSET,
3399    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3400    part is missing.  DEPENDS_ON is a set of the invariants the computation
3401    depends on.  */
3402
3403 static comp_cost
3404 difference_cost (struct ivopts_data *data,
3405                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3406                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3407 {
3408   comp_cost cost;
3409   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3410   unsigned HOST_WIDE_INT off1, off2;
3411
3412   e1 = strip_offset (e1, &off1);
3413   e2 = strip_offset (e2, &off2);
3414   *offset += off1 - off2;
3415
3416   STRIP_NOPS (e1);
3417   STRIP_NOPS (e2);
3418
3419   if (TREE_CODE (e1) == ADDR_EXPR)
3420     return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3421                                 depends_on);
3422   *symbol_present = false;
3423
3424   if (operand_equal_p (e1, e2, 0))
3425     {
3426       *var_present = false;
3427       return zero_cost;
3428     }
3429   *var_present = true;
3430   if (integer_zerop (e2))
3431     return force_var_cost (data, e1, depends_on);
3432
3433   if (integer_zerop (e1))
3434     {
3435       cost = force_var_cost (data, e2, depends_on);
3436       cost.cost += multiply_by_cost (-1, mode);
3437
3438       return cost;
3439     }
3440
3441   cost = force_var_cost (data, e1, depends_on);
3442   cost = add_costs (cost, force_var_cost (data, e2, depends_on));
3443   cost.cost += add_cost (mode);
3444
3445   return cost;
3446 }
3447
3448 /* Determines the cost of the computation by that USE is expressed
3449    from induction variable CAND.  If ADDRESS_P is true, we just need
3450    to create an address from it, otherwise we want to get it into
3451    register.  A set of invariants we depend on is stored in
3452    DEPENDS_ON.  AT is the statement at that the value is computed.  */
3453
3454 static comp_cost
3455 get_computation_cost_at (struct ivopts_data *data,
3456                          struct iv_use *use, struct iv_cand *cand,
3457                          bool address_p, bitmap *depends_on, tree at)
3458 {
3459   tree ubase = use->iv->base, ustep = use->iv->step;
3460   tree cbase, cstep;
3461   tree utype = TREE_TYPE (ubase), ctype;
3462   unsigned HOST_WIDE_INT cstepi, offset = 0;
3463   HOST_WIDE_INT ratio, aratio;
3464   bool var_present, symbol_present;
3465   comp_cost cost;
3466   unsigned n_sums;
3467   double_int rat;
3468
3469   *depends_on = NULL;
3470
3471   /* Only consider real candidates.  */
3472   if (!cand->iv)
3473     return infinite_cost;
3474
3475   cbase = cand->iv->base;
3476   cstep = cand->iv->step;
3477   ctype = TREE_TYPE (cbase);
3478
3479   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3480     {
3481       /* We do not have a precision to express the values of use.  */
3482       return infinite_cost;
3483     }
3484
3485   if (address_p)
3486     {
3487       /* Do not try to express address of an object with computation based
3488          on address of a different object.  This may cause problems in rtl
3489          level alias analysis (that does not expect this to be happening,
3490          as this is illegal in C), and would be unlikely to be useful
3491          anyway.  */
3492       if (use->iv->base_object
3493           && cand->iv->base_object
3494           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3495         return infinite_cost;
3496     }
3497
3498   if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3499     {
3500       /* TODO -- add direct handling of this case.  */
3501       goto fallback;
3502     }
3503
3504   /* CSTEPI is removed from the offset in case statement is after the
3505      increment.  If the step is not constant, we use zero instead.
3506      This is a bit imprecise (there is the extra addition), but
3507      redundancy elimination is likely to transform the code so that
3508      it uses value of the variable before increment anyway,
3509      so it is not that much unrealistic.  */
3510   if (cst_and_fits_in_hwi (cstep))
3511     cstepi = int_cst_value (cstep);
3512   else
3513     cstepi = 0;
3514
3515   if (!constant_multiple_of (ustep, cstep, &rat))
3516     return infinite_cost;
3517
3518   if (double_int_fits_in_shwi_p (rat))
3519     ratio = double_int_to_shwi (rat);
3520   else
3521     return infinite_cost;
3522
3523   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
3524      or ratio == 1, it is better to handle this like
3525
3526      ubase - ratio * cbase + ratio * var
3527
3528      (also holds in the case ratio == -1, TODO.  */
3529
3530   if (cst_and_fits_in_hwi (cbase))
3531     {
3532       offset = - ratio * int_cst_value (cbase);
3533       cost = difference_cost (data,
3534                               ubase, build_int_cst (utype, 0),
3535                               &symbol_present, &var_present, &offset,
3536                               depends_on);
3537     }
3538   else if (ratio == 1)
3539     {
3540       cost = difference_cost (data,
3541                               ubase, cbase,
3542                               &symbol_present, &var_present, &offset,
3543                               depends_on);
3544     }
3545   else
3546     {
3547       cost = force_var_cost (data, cbase, depends_on);
3548       cost.cost += add_cost (TYPE_MODE (ctype));
3549       cost = add_costs (cost,
3550                         difference_cost (data,
3551                                          ubase, build_int_cst (utype, 0),
3552                                          &symbol_present, &var_present,
3553                                          &offset, depends_on));
3554     }
3555
3556   /* If we are after the increment, the value of the candidate is higher by
3557      one iteration.  */
3558   if (stmt_after_increment (data->current_loop, cand, at))
3559     offset -= ratio * cstepi;
3560
3561   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3562      (symbol/var/const parts may be omitted).  If we are looking for an address,
3563      find the cost of addressing this.  */
3564   if (address_p)
3565     return add_costs (cost, get_address_cost (symbol_present, var_present,
3566                                 offset, ratio,
3567                                 TYPE_MODE (TREE_TYPE (*use->op_p))));
3568
3569   /* Otherwise estimate the costs for computing the expression.  */
3570   aratio = ratio > 0 ? ratio : -ratio;
3571   if (!symbol_present && !var_present && !offset)
3572     {
3573       if (ratio != 1)
3574         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3575
3576       return cost;
3577     }
3578
3579   if (aratio != 1)
3580     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3581
3582   n_sums = 1;
3583   if (var_present
3584       /* Symbol + offset should be compile-time computable.  */
3585       && (symbol_present || offset))
3586     n_sums++;
3587
3588   /* Having offset does not affect runtime cost in case it is added to
3589      symbol, but it increases complexity.  */
3590   if (offset)
3591     cost.complexity++;
3592
3593   cost.cost += n_sums * add_cost (TYPE_MODE (ctype));
3594   return cost;
3595
3596 fallback:
3597   {
3598     /* Just get the expression, expand it and measure the cost.  */
3599     tree comp = get_computation_at (data->current_loop, use, cand, at);
3600
3601     if (!comp)
3602       return infinite_cost;
3603
3604     if (address_p)
3605       comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3606
3607     return new_cost (computation_cost (comp), 0);
3608   }
3609 }
3610
3611 /* Determines the cost of the computation by that USE is expressed
3612    from induction variable CAND.  If ADDRESS_P is true, we just need
3613    to create an address from it, otherwise we want to get it into
3614    register.  A set of invariants we depend on is stored in
3615    DEPENDS_ON.  */
3616
3617 static comp_cost
3618 get_computation_cost (struct ivopts_data *data,
3619                       struct iv_use *use, struct iv_cand *cand,
3620                       bool address_p, bitmap *depends_on)
3621 {
3622   return get_computation_cost_at (data,
3623                                   use, cand, address_p, depends_on, use->stmt);
3624 }
3625
3626 /* Determines cost of basing replacement of USE on CAND in a generic
3627    expression.  */
3628
3629 static bool
3630 determine_use_iv_cost_generic (struct ivopts_data *data,
3631                                struct iv_use *use, struct iv_cand *cand)
3632 {
3633   bitmap depends_on;
3634   comp_cost cost;
3635
3636   /* The simple case first -- if we need to express value of the preserved
3637      original biv, the cost is 0.  This also prevents us from counting the
3638      cost of increment twice -- once at this use and once in the cost of
3639      the candidate.  */
3640   if (cand->pos == IP_ORIGINAL
3641       && cand->incremented_at == use->stmt)
3642     {
3643       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3644       return true;
3645     }
3646
3647   cost = get_computation_cost (data, use, cand, false, &depends_on);
3648   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3649
3650   return !infinite_cost_p (cost);
3651 }
3652
3653 /* Determines cost of basing replacement of USE on CAND in an address.  */
3654
3655 static bool
3656 determine_use_iv_cost_address (struct ivopts_data *data,
3657                                struct iv_use *use, struct iv_cand *cand)
3658 {
3659   bitmap depends_on;
3660   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
3661
3662   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3663
3664   return !infinite_cost_p (cost);
3665 }
3666
3667 /* Computes value of candidate CAND at position AT in iteration NITER, and
3668    stores it to VAL.  */
3669
3670 static void
3671 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3672                aff_tree *val)
3673 {
3674   aff_tree step, delta, nit;
3675   struct iv *iv = cand->iv;
3676   tree type = TREE_TYPE (iv->base);
3677
3678   tree_to_aff_combination (iv->step, type, &step);
3679   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3680   aff_combination_convert (&nit, type);
3681   aff_combination_mult (&nit, &step, &delta);
3682   if (stmt_after_increment (loop, cand, at))
3683     aff_combination_add (&delta, &step);
3684
3685   tree_to_aff_combination (iv->base, type, val);
3686   aff_combination_add (val, &delta);
3687 }
3688
3689 /* Returns period of induction variable iv.  */
3690
3691 static tree
3692 iv_period (struct iv *iv)
3693 {
3694   tree step = iv->step, period, type;
3695   tree pow2div;
3696
3697   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3698
3699   /* Period of the iv is gcd (step, type range).  Since type range is power
3700      of two, it suffices to determine the maximum power of two that divides
3701      step.  */
3702   pow2div = num_ending_zeros (step);
3703   type = unsigned_type_for (TREE_TYPE (step));
3704
3705   period = build_low_bits_mask (type,
3706                                 (TYPE_PRECISION (type)
3707                                  - tree_low_cst (pow2div, 1)));
3708
3709   return period;
3710 }
3711
3712 /* Returns the comparison operator used when eliminating the iv USE.  */
3713
3714 static enum tree_code
3715 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3716 {
3717   struct loop *loop = data->current_loop;
3718   basic_block ex_bb;
3719   edge exit;
3720
3721   ex_bb = bb_for_stmt (use->stmt);
3722   exit = EDGE_SUCC (ex_bb, 0);
3723   if (flow_bb_inside_loop_p (loop, exit->dest))
3724     exit = EDGE_SUCC (ex_bb, 1);
3725
3726   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3727 }
3728
3729 /* Check whether it is possible to express the condition in USE by comparison
3730    of candidate CAND.  If so, store the value compared with to BOUND.  */
3731
3732 static bool
3733 may_eliminate_iv (struct ivopts_data *data,
3734                   struct iv_use *use, struct iv_cand *cand, tree *bound)
3735 {
3736   basic_block ex_bb;
3737   edge exit;
3738   tree nit, period;
3739   struct loop *loop = data->current_loop;
3740   aff_tree bnd;
3741
3742   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3743     return false;
3744
3745   /* For now works only for exits that dominate the loop latch.
3746      TODO: extend to other conditions inside loop body.  */
3747   ex_bb = bb_for_stmt (use->stmt);
3748   if (use->stmt != last_stmt (ex_bb)
3749       || TREE_CODE (use->stmt) != COND_EXPR)
3750     return false;
3751   if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3752     return false;
3753
3754   exit = EDGE_SUCC (ex_bb, 0);
3755   if (flow_bb_inside_loop_p (loop, exit->dest))
3756     exit = EDGE_SUCC (ex_bb, 1);
3757   if (flow_bb_inside_loop_p (loop, exit->dest))
3758     return false;
3759
3760   nit = niter_for_exit (data, exit);
3761   if (!nit)
3762     return false;
3763
3764   /* Determine whether we can use the variable to test the exit condition.
3765      This is the case iff the period of the induction variable is greater
3766      than the number of iterations for which the exit condition is true.  */
3767   period = iv_period (cand->iv);
3768
3769   /* If the number of iterations is constant, compare against it directly.  */
3770   if (TREE_CODE (nit) == INTEGER_CST)
3771     {
3772       if (!tree_int_cst_lt (nit, period))
3773         return false;
3774     }
3775
3776   /* If not, and if this is the only possible exit of the loop, see whether
3777      we can get a conservative estimate on the number of iterations of the
3778      entire loop and compare against that instead.  */
3779   else if (loop_only_exit_p (loop, exit))
3780     {
3781       double_int period_value, max_niter;
3782       if (!estimated_loop_iterations (loop, true, &max_niter))
3783         return false;
3784       period_value = tree_to_double_int (period);
3785       if (double_int_ucmp (max_niter, period_value) >= 0)
3786         return false;
3787     }
3788
3789   /* Otherwise, punt.  */
3790   else
3791     return false;
3792
3793   cand_value_at (loop, cand, use->stmt, nit, &bnd);
3794
3795   *bound = aff_combination_to_tree (&bnd);
3796   /* It is unlikely that computing the number of iterations using division
3797      would be more profitable than keeping the original induction variable.  */
3798   if (expression_expensive_p (*bound))
3799     return false;
3800   return true;
3801 }
3802
3803 /* Determines cost of basing replacement of USE on CAND in a condition.  */
3804
3805 static bool
3806 determine_use_iv_cost_condition (struct ivopts_data *data,
3807                                  struct iv_use *use, struct iv_cand *cand)
3808 {
3809   tree bound = NULL_TREE;
3810   struct iv *cmp_iv;
3811   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3812   comp_cost elim_cost, express_cost, cost;
3813   bool ok;
3814
3815   /* Only consider real candidates.  */
3816   if (!cand->iv)
3817     {
3818       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
3819       return false;
3820     }
3821
3822   /* Try iv elimination.  */
3823   if (may_eliminate_iv (data, use, cand, &bound))
3824     {
3825       elim_cost = force_var_cost (data, bound, &depends_on_elim);
3826       /* The bound is a loop invariant, so it will be only computed
3827          once.  */
3828       elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
3829     }
3830   else
3831     elim_cost = infinite_cost;
3832
3833   /* Try expressing the original giv.  If it is compared with an invariant,
3834      note that we cannot get rid of it.  */
3835   ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3836   gcc_assert (ok);
3837
3838   express_cost = get_computation_cost (data, use, cand, false,
3839                                        &depends_on_express);
3840   fd_ivopts_data = data;
3841   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3842
3843   /* Choose the better approach.  */
3844   if (compare_costs (elim_cost, express_cost) < 0)
3845     {
3846       cost = elim_cost;
3847       depends_on = depends_on_elim;
3848       depends_on_elim = NULL;
3849     }
3850   else
3851     {
3852       cost = express_cost;
3853       depends_on = depends_on_express;
3854       depends_on_express = NULL;
3855       bound = NULL_TREE;
3856     }
3857
3858   set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3859
3860   if (depends_on_elim)
3861     BITMAP_FREE (depends_on_elim);
3862   if (depends_on_express)
3863     BITMAP_FREE (depends_on_express);
3864
3865   return !infinite_cost_p (cost);
3866 }
3867
3868 /* Determines cost of basing replacement of USE on CAND.  Returns false
3869    if USE cannot be based on CAND.  */
3870
3871 static bool
3872 determine_use_iv_cost (struct ivopts_data *data,
3873                        struct iv_use *use, struct iv_cand *cand)
3874 {
3875   switch (use->type)
3876     {
3877     case USE_NONLINEAR_EXPR:
3878       return determine_use_iv_cost_generic (data, use, cand);
3879
3880     case USE_ADDRESS:
3881       return determine_use_iv_cost_address (data, use, cand);
3882
3883     case USE_COMPARE:
3884       return determine_use_iv_cost_condition (data, use, cand);
3885
3886     default:
3887       gcc_unreachable ();
3888     }
3889 }
3890
3891 /* Determines costs of basing the use of the iv on an iv candidate.  */
3892
3893 static void
3894 determine_use_iv_costs (struct ivopts_data *data)
3895 {
3896   unsigned i, j;
3897   struct iv_use *use;
3898   struct iv_cand *cand;
3899   bitmap to_clear = BITMAP_ALLOC (NULL);
3900
3901   alloc_use_cost_map (data);
3902
3903   for (i = 0; i < n_iv_uses (data); i++)
3904     {
3905       use = iv_use (data, i);
3906
3907       if (data->consider_all_candidates)
3908         {
3909           for (j = 0; j < n_iv_cands (data); j++)
3910             {
3911               cand = iv_cand (data, j);
3912               determine_use_iv_cost (data, use, cand);
3913             }
3914         }
3915       else
3916         {
3917           bitmap_iterator bi;
3918
3919           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3920             {
3921               cand = iv_cand (data, j);
3922               if (!determine_use_iv_cost (data, use, cand))
3923                 bitmap_set_bit (to_clear, j);
3924             }
3925
3926           /* Remove the candidates for that the cost is infinite from
3927              the list of related candidates.  */
3928           bitmap_and_compl_into (use->related_cands, to_clear);
3929           bitmap_clear (to_clear);
3930         }
3931     }
3932
3933   BITMAP_FREE (to_clear);
3934
3935   if (dump_file && (dump_flags & TDF_DETAILS))
3936     {
3937       fprintf (dump_file, "Use-candidate costs:\n");
3938
3939       for (i = 0; i < n_iv_uses (data); i++)
3940         {
3941           use = iv_use (data, i);
3942
3943           fprintf (dump_file, "Use %d:\n", i);
3944           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
3945           for (j = 0; j < use->n_map_members; j++)
3946             {
3947               if (!use->cost_map[j].cand
3948                   || infinite_cost_p (use->cost_map[j].cost))
3949                 continue;
3950
3951               fprintf (dump_file, "  %d\t%d\t%d\t",
3952                        use->cost_map[j].cand->id,
3953                        use->cost_map[j].cost.cost,
3954                        use->cost_map[j].cost.complexity);
3955               if (use->cost_map[j].depends_on)
3956                 bitmap_print (dump_file,
3957                               use->cost_map[j].depends_on, "","");
3958               fprintf (dump_file, "\n");
3959             }
3960
3961           fprintf (dump_file, "\n");
3962         }
3963       fprintf (dump_file, "\n");
3964     }
3965 }
3966
3967 /* Determines cost of the candidate CAND.  */
3968
3969 static void
3970 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3971 {
3972   comp_cost cost_base;
3973   unsigned cost, cost_step;
3974   tree base;
3975
3976   if (!cand->iv)
3977     {
3978       cand->cost = 0;
3979       return;
3980     }
3981
3982   /* There are two costs associated with the candidate -- its increment
3983      and its initialization.  The second is almost negligible for any loop
3984      that rolls enough, so we take it just very little into account.  */
3985
3986   base = cand->iv->base;
3987   cost_base = force_var_cost (data, base, NULL);
3988   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3989
3990   cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
3991
3992   /* Prefer the original ivs unless we may gain something by replacing it.
3993      The reason is to makee debugging simpler; so this is not relevant for
3994      artificial ivs created by other optimization passes.  */
3995   if (cand->pos != IP_ORIGINAL
3996       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3997     cost++;
3998
3999   /* Prefer not to insert statements into latch unless there are some
4000      already (so that we do not create unnecessary jumps).  */
4001   if (cand->pos == IP_END
4002       && empty_block_p (ip_end_pos (data->current_loop)))
4003     cost++;
4004
4005   cand->cost = cost;
4006 }
4007
4008 /* Determines costs of computation of the candidates.  */
4009
4010 static void
4011 determine_iv_costs (struct ivopts_data *data)
4012 {
4013   unsigned i;
4014
4015   if (dump_file && (dump_flags & TDF_DETAILS))
4016     {
4017       fprintf (dump_file, "Candidate costs:\n");
4018       fprintf (dump_file, "  cand\tcost\n");
4019     }
4020
4021   for (i = 0; i < n_iv_cands (data); i++)
4022     {
4023       struct iv_cand *cand = iv_cand (data, i);
4024
4025       determine_iv_cost (data, cand);
4026
4027       if (dump_file && (dump_flags & TDF_DETAILS))
4028         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
4029     }
4030
4031   if (dump_file && (dump_flags & TDF_DETAILS))
4032     fprintf (dump_file, "\n");
4033 }
4034
4035 /* Calculates cost for having SIZE induction variables.  */
4036
4037 static unsigned
4038 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4039 {
4040   /* We add size to the cost, so that we prefer eliminating ivs
4041      if possible.  */
4042   return size + estimate_reg_pressure_cost (size, data->regs_used);
4043 }
4044
4045 /* For each size of the induction variable set determine the penalty.  */
4046
4047 static void
4048 determine_set_costs (struct ivopts_data *data)
4049 {
4050   unsigned j, n;
4051   tree phi, op;
4052   struct loop *loop = data->current_loop;
4053   bitmap_iterator bi;
4054
4055   /* We use the following model (definitely improvable, especially the
4056      cost function -- TODO):
4057
4058      We estimate the number of registers available (using MD data), name it A.
4059
4060      We estimate the number of registers used by the loop, name it U.  This
4061      number is obtained as the number of loop phi nodes (not counting virtual
4062      registers and bivs) + the number of variables from outside of the loop.
4063
4064      We set a reserve R (free regs that are used for temporary computations,
4065      etc.).  For now the reserve is a constant 3.
4066
4067      Let I be the number of induction variables.
4068
4069      -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4070         make a lot of ivs without a reason).
4071      -- if A - R < U + I <= A, the cost is I * PRES_COST
4072      -- if U + I > A, the cost is I * PRES_COST and
4073         number of uses * SPILL_COST * (U + I - A) / (U + I) is added.  */
4074
4075   if (dump_file && (dump_flags & TDF_DETAILS))
4076     {
4077       fprintf (dump_file, "Global costs:\n");
4078       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
4079       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost);
4080       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost);
4081     }
4082
4083   n = 0;
4084   for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4085     {
4086       op = PHI_RESULT (phi);
4087
4088       if (!is_gimple_reg (op))
4089         continue;
4090
4091       if (get_iv (data, op))
4092         continue;
4093
4094       n++;
4095     }
4096
4097   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4098     {
4099       struct version_info *info = ver_info (data, j);
4100
4101       if (info->inv_id && info->has_nonlin_use)
4102         n++;
4103     }
4104
4105   data->regs_used = n;
4106   if (dump_file && (dump_flags & TDF_DETAILS))
4107     fprintf (dump_file, "  regs_used %d\n", n);
4108
4109   if (dump_file && (dump_flags & TDF_DETAILS))
4110     {
4111       fprintf (dump_file, "  cost for size:\n");
4112       fprintf (dump_file, "  ivs\tcost\n");
4113       for (j = 0; j <= 2 * target_avail_regs; j++)
4114         fprintf (dump_file, "  %d\t%d\n", j,
4115                  ivopts_global_cost_for_size (data, j));
4116       fprintf (dump_file, "\n");
4117     }
4118 }
4119
4120 /* Returns true if A is a cheaper cost pair than B.  */
4121
4122 static bool
4123 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4124 {
4125   int cmp;
4126
4127   if (!a)
4128     return false;
4129
4130   if (!b)
4131     return true;
4132
4133   cmp = compare_costs (a->cost, b->cost);
4134   if (cmp < 0)
4135     return true;
4136
4137   if (cmp > 0)
4138     return false;
4139
4140   /* In case the costs are the same, prefer the cheaper candidate.  */
4141   if (a->cand->cost < b->cand->cost)
4142     return true;
4143
4144   return false;
4145 }
4146
4147 /* Computes the cost field of IVS structure.  */
4148
4149 static void
4150 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4151 {
4152   comp_cost cost = ivs->cand_use_cost;
4153   cost.cost += ivs->cand_cost;
4154   cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4155
4156   ivs->cost = cost;
4157 }
4158
4159 /* Remove invariants in set INVS to set IVS.  */
4160
4161 static void
4162 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4163 {
4164   bitmap_iterator bi;
4165   unsigned iid;
4166
4167   if (!invs)
4168     return;
4169
4170   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4171     {
4172       ivs->n_invariant_uses[iid]--;
4173       if (ivs->n_invariant_uses[iid] == 0)
4174         ivs->n_regs--;
4175     }
4176 }
4177
4178 /* Set USE not to be expressed by any candidate in IVS.  */
4179
4180 static void
4181 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4182                  struct iv_use *use)
4183 {
4184   unsigned uid = use->id, cid;
4185   struct cost_pair *cp;
4186
4187   cp = ivs->cand_for_use[uid];
4188   if (!cp)
4189     return;
4190   cid = cp->cand->id;
4191
4192   ivs->bad_uses++;
4193   ivs->cand_for_use[uid] = NULL;
4194   ivs->n_cand_uses[cid]--;
4195
4196   if (ivs->n_cand_uses[cid] == 0)
4197     {
4198       bitmap_clear_bit (ivs->cands, cid);
4199       /* Do not count the pseudocandidates.  */
4200       if (cp->cand->iv)
4201         ivs->n_regs--;
4202       ivs->n_cands--;
4203       ivs->cand_cost -= cp->cand->cost;
4204
4205       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4206     }
4207
4208   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4209
4210   iv_ca_set_remove_invariants (ivs, cp->depends_on);
4211   iv_ca_recount_cost (data, ivs);
4212 }
4213
4214 /* Add invariants in set INVS to set IVS.  */
4215
4216 static void
4217 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4218 {
4219   bitmap_iterator bi;
4220   unsigned iid;
4221
4222   if (!invs)
4223     return;
4224
4225   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4226     {
4227       ivs->n_invariant_uses[iid]++;
4228       if (ivs->n_invariant_uses[iid] == 1)
4229         ivs->n_regs++;
4230     }
4231 }
4232
4233 /* Set cost pair for USE in set IVS to CP.  */
4234
4235 static void
4236 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4237               struct iv_use *use, struct cost_pair *cp)
4238 {
4239   unsigned uid = use->id, cid;
4240
4241   if (ivs->cand_for_use[uid] == cp)
4242     return;
4243
4244   if (ivs->cand_for_use[uid])
4245     iv_ca_set_no_cp (data, ivs, use);
4246
4247   if (cp)
4248     {
4249       cid = cp->cand->id;
4250
4251       ivs->bad_uses--;
4252       ivs->cand_for_use[uid] = cp;
4253       ivs->n_cand_uses[cid]++;
4254       if (ivs->n_cand_uses[cid] == 1)
4255         {
4256           bitmap_set_bit (ivs->cands, cid);
4257           /* Do not count the pseudocandidates.  */
4258           if (cp->cand->iv)
4259             ivs->n_regs++;
4260           ivs->n_cands++;
4261           ivs->cand_cost += cp->cand->cost;
4262
4263           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4264         }
4265
4266       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4267       iv_ca_set_add_invariants (ivs, cp->depends_on);
4268       iv_ca_recount_cost (data, ivs);
4269     }
4270 }
4271
4272 /* Extend set IVS by expressing USE by some of the candidates in it
4273    if possible.  */
4274
4275 static void
4276 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4277                struct iv_use *use)
4278 {
4279   struct cost_pair *best_cp = NULL, *cp;
4280   bitmap_iterator bi;
4281   unsigned i;
4282
4283   gcc_assert (ivs->upto >= use->id);
4284
4285   if (ivs->upto == use->id)
4286     {
4287       ivs->upto++;
4288       ivs->bad_uses++;
4289     }
4290
4291   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4292     {
4293       cp = get_use_iv_cost (data, use, iv_cand (data, i));
4294
4295       if (cheaper_cost_pair (cp, best_cp))
4296         best_cp = cp;
4297     }
4298
4299   iv_ca_set_cp (data, ivs, use, best_cp);
4300 }
4301
4302 /* Get cost for assignment IVS.  */
4303
4304 static comp_cost
4305 iv_ca_cost (struct iv_ca *ivs)
4306 {
4307   return (ivs->bad_uses ? infinite_cost : ivs->cost);
4308 }
4309
4310 /* Returns true if all dependences of CP are among invariants in IVS.  */
4311
4312 static bool
4313 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4314 {
4315   unsigned i;
4316   bitmap_iterator bi;
4317
4318   if (!cp->depends_on)
4319     return true;
4320
4321   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4322     {
4323       if (ivs->n_invariant_uses[i] == 0)
4324         return false;
4325     }
4326
4327   return true;
4328 }
4329
4330 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4331    it before NEXT_CHANGE.  */
4332
4333 static struct iv_ca_delta *
4334 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4335                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4336 {
4337   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4338
4339   change->use = use;
4340   change->old_cp = old_cp;
4341   change->new_cp = new_cp;
4342   change->next_change = next_change;
4343
4344   return change;
4345 }
4346
4347 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
4348    are rewritten.  */
4349
4350 static struct iv_ca_delta *
4351 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4352 {
4353   struct iv_ca_delta *last;
4354
4355   if (!l2)
4356     return l1;
4357
4358   if (!l1)
4359     return l2;
4360
4361   for (last = l1; last->next_change; last = last->next_change)
4362     continue;
4363   last->next_change = l2;
4364
4365   return l1;
4366 }
4367
4368 /* Returns candidate by that USE is expressed in IVS.  */
4369
4370 static struct cost_pair *
4371 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4372 {
4373   return ivs->cand_for_use[use->id];
4374 }
4375
4376 /* Reverse the list of changes DELTA, forming the inverse to it.  */
4377
4378 static struct iv_ca_delta *
4379 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4380 {
4381   struct iv_ca_delta *act, *next, *prev = NULL;
4382   struct cost_pair *tmp;
4383
4384   for (act = delta; act; act = next)
4385     {
4386       next = act->next_change;
4387       act->next_change = prev;
4388       prev = act;
4389
4390       tmp = act->old_cp;
4391       act->old_cp = act->new_cp;
4392       act->new_cp = tmp;
4393     }
4394
4395   return prev;
4396 }
4397
4398 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
4399    reverted instead.  */
4400
4401 static void
4402 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4403                     struct iv_ca_delta *delta, bool forward)
4404 {
4405   struct cost_pair *from, *to;
4406   struct iv_ca_delta *act;
4407
4408   if (!forward)
4409     delta = iv_ca_delta_reverse (delta);
4410
4411   for (act = delta; act; act = act->next_change)
4412     {
4413       from = act->old_cp;
4414       to = act->new_cp;
4415       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4416       iv_ca_set_cp (data, ivs, act->use, to);
4417     }
4418
4419   if (!forward)
4420     iv_ca_delta_reverse (delta);
4421 }
4422
4423 /* Returns true if CAND is used in IVS.  */
4424
4425 static bool
4426 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4427 {
4428   return ivs->n_cand_uses[cand->id] > 0;
4429 }
4430
4431 /* Returns number of induction variable candidates in the set IVS.  */
4432
4433 static unsigned
4434 iv_ca_n_cands (struct iv_ca *ivs)
4435 {
4436   return ivs->n_cands;
4437 }
4438
4439 /* Free the list of changes DELTA.  */
4440
4441 static void
4442 iv_ca_delta_free (struct iv_ca_delta **delta)
4443 {
4444   struct iv_ca_delta *act, *next;
4445
4446   for (act = *delta; act; act = next)
4447     {
4448       next = act->next_change;
4449       free (act);
4450     }
4451
4452   *delta = NULL;
4453 }
4454
4455 /* Allocates new iv candidates assignment.  */
4456
4457 static struct iv_ca *
4458 iv_ca_new (struct ivopts_data *data)
4459 {
4460   struct iv_ca *nw = XNEW (struct iv_ca);
4461
4462   nw->upto = 0;
4463   nw->bad_uses = 0;
4464   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4465   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4466   nw->cands = BITMAP_ALLOC (NULL);
4467   nw->n_cands = 0;
4468   nw->n_regs = 0;
4469   nw->cand_use_cost = zero_cost;
4470   nw->cand_cost = 0;
4471   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4472   nw->cost = zero_cost;
4473
4474   return nw;
4475 }
4476
4477 /* Free memory occupied by the set IVS.  */
4478
4479 static void
4480 iv_ca_free (struct iv_ca **ivs)
4481 {
4482   free ((*ivs)->cand_for_use);
4483   free ((*ivs)->n_cand_uses);
4484   BITMAP_FREE ((*ivs)->cands);
4485   free ((*ivs)->n_invariant_uses);
4486   free (*ivs);
4487   *ivs = NULL;
4488 }
4489
4490 /* Dumps IVS to FILE.  */
4491
4492 static void
4493 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4494 {
4495   const char *pref = "  invariants ";
4496   unsigned i;
4497   comp_cost cost = iv_ca_cost (ivs);
4498
4499   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
4500   bitmap_print (file, ivs->cands, "  candidates ","\n");
4501
4502   for (i = 1; i <= data->max_inv_id; i++)
4503     if (ivs->n_invariant_uses[i])
4504       {
4505         fprintf (file, "%s%d", pref, i);
4506         pref = ", ";
4507       }
4508   fprintf (file, "\n");
4509 }
4510
4511 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
4512    new set, and store differences in DELTA.  Number of induction variables
4513    in the new set is stored to N_IVS.  */
4514
4515 static comp_cost
4516 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4517               struct iv_cand *cand, struct iv_ca_delta **delta,
4518               unsigned *n_ivs)
4519 {
4520   unsigned i;
4521   comp_cost cost;
4522   struct iv_use *use;
4523   struct cost_pair *old_cp, *new_cp;
4524
4525   *delta = NULL;
4526   for (i = 0; i < ivs->upto; i++)
4527     {
4528       use = iv_use (data, i);
4529       old_cp = iv_ca_cand_for_use (ivs, use);
4530
4531       if (old_cp
4532           && old_cp->cand == cand)
4533         continue;
4534
4535       new_cp = get_use_iv_cost (data, use, cand);
4536       if (!new_cp)
4537         continue;
4538
4539       if (!iv_ca_has_deps (ivs, new_cp))
4540         continue;
4541
4542       if (!cheaper_cost_pair (new_cp, old_cp))
4543         continue;
4544
4545       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4546     }
4547
4548   iv_ca_delta_commit (data, ivs, *delta, true);
4549   cost = iv_ca_cost (ivs);
4550   if (n_ivs)
4551     *n_ivs = iv_ca_n_cands (ivs);
4552   iv_ca_delta_commit (data, ivs, *delta, false);
4553
4554   return cost;
4555 }
4556
4557 /* Try narrowing set IVS by removing CAND.  Return the cost of
4558    the new set and store the differences in DELTA.  */
4559
4560 static comp_cost
4561 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4562               struct iv_cand *cand, struct iv_ca_delta **delta)
4563 {
4564   unsigned i, ci;
4565   struct iv_use *use;
4566   struct cost_pair *old_cp, *new_cp, *cp;
4567   bitmap_iterator bi;
4568   struct iv_cand *cnd;
4569   comp_cost cost;
4570
4571   *delta = NULL;
4572   for (i = 0; i < n_iv_uses (data); i++)
4573     {
4574       use = iv_use (data, i);
4575
4576       old_cp = iv_ca_cand_for_use (ivs, use);
4577       if (old_cp->cand != cand)
4578         continue;
4579
4580       new_cp = NULL;
4581
4582       if (data->consider_all_candidates)
4583         {
4584           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4585             {
4586               if (ci == cand->id)
4587                 continue;
4588
4589               cnd = iv_cand (data, ci);
4590
4591               cp = get_use_iv_cost (data, use, cnd);
4592               if (!cp)
4593                 continue;
4594               if (!iv_ca_has_deps (ivs, cp))
4595                 continue;
4596
4597               if (!cheaper_cost_pair (cp, new_cp))
4598                 continue;
4599
4600               new_cp = cp;
4601             }
4602         }
4603       else
4604         {
4605           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4606             {
4607               if (ci == cand->id)
4608                 continue;
4609
4610               cnd = iv_cand (data, ci);
4611
4612               cp = get_use_iv_cost (data, use, cnd);
4613               if (!cp)
4614                 continue;
4615               if (!iv_ca_has_deps (ivs, cp))
4616                 continue;
4617
4618               if (!cheaper_cost_pair (cp, new_cp))
4619                 continue;
4620
4621               new_cp = cp;
4622             }
4623         }
4624
4625       if (!new_cp)
4626         {
4627           iv_ca_delta_free (delta);
4628           return infinite_cost;
4629         }
4630
4631       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4632     }
4633
4634   iv_ca_delta_commit (data, ivs, *delta, true);
4635   cost = iv_ca_cost (ivs);
4636   iv_ca_delta_commit (data, ivs, *delta, false);
4637
4638   return cost;
4639 }
4640
4641 /* Try optimizing the set of candidates IVS by removing candidates different
4642    from to EXCEPT_CAND from it.  Return cost of the new set, and store
4643    differences in DELTA.  */
4644
4645 static comp_cost
4646 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4647              struct iv_cand *except_cand, struct iv_ca_delta **delta)
4648 {
4649   bitmap_iterator bi;
4650   struct iv_ca_delta *act_delta, *best_delta;
4651   unsigned i;
4652   comp_cost best_cost, acost;
4653   struct iv_cand *cand;
4654
4655   best_delta = NULL;
4656   best_cost = iv_ca_cost (ivs);
4657
4658   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4659     {
4660       cand = iv_cand (data, i);
4661
4662       if (cand == except_cand)
4663         continue;
4664
4665       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4666
4667       if (compare_costs (acost, best_cost) < 0)
4668         {
4669           best_cost = acost;
4670           iv_ca_delta_free (&best_delta);
4671           best_delta = act_delta;
4672         }
4673       else
4674         iv_ca_delta_free (&act_delta);
4675     }
4676
4677   if (!best_delta)
4678     {
4679       *delta = NULL;
4680       return best_cost;
4681     }
4682
4683   /* Recurse to possibly remove other unnecessary ivs.  */
4684   iv_ca_delta_commit (data, ivs, best_delta, true);
4685   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4686   iv_ca_delta_commit (data, ivs, best_delta, false);
4687   *delta = iv_ca_delta_join (best_delta, *delta);
4688   return best_cost;
4689 }
4690
4691 /* Tries to extend the sets IVS in the best possible way in order
4692    to express the USE.  */
4693
4694 static bool
4695 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4696                   struct iv_use *use)
4697 {
4698   comp_cost best_cost, act_cost;
4699   unsigned i;
4700   bitmap_iterator bi;
4701   struct iv_cand *cand;
4702   struct iv_ca_delta *best_delta = NULL, *act_delta;
4703   struct cost_pair *cp;
4704
4705   iv_ca_add_use (data, ivs, use);
4706   best_cost = iv_ca_cost (ivs);
4707
4708   cp = iv_ca_cand_for_use (ivs, use);
4709   if (cp)
4710     {
4711       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4712       iv_ca_set_no_cp (data, ivs, use);
4713     }
4714
4715   /* First try important candidates not based on any memory object.  Only if
4716      this fails, try the specific ones.  Rationale -- in loops with many
4717      variables the best choice often is to use just one generic biv.  If we
4718      added here many ivs specific to the uses, the optimization algorithm later
4719      would be likely to get stuck in a local minimum, thus causing us to create
4720      too many ivs.  The approach from few ivs to more seems more likely to be
4721      successful -- starting from few ivs, replacing an expensive use by a
4722      specific iv should always be a win.  */
4723   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4724     {
4725       cand = iv_cand (data, i);
4726
4727       if (cand->iv->base_object != NULL_TREE)
4728         continue;
4729
4730       if (iv_ca_cand_used_p (ivs, cand))
4731         continue;
4732
4733       cp = get_use_iv_cost (data, use, cand);
4734       if (!cp)
4735         continue;
4736
4737       iv_ca_set_cp (data, ivs, use, cp);
4738       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4739       iv_ca_set_no_cp (data, ivs, use);
4740       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4741
4742       if (compare_costs (act_cost, best_cost) < 0)
4743         {
4744           best_cost = act_cost;
4745
4746           iv_ca_delta_free (&best_delta);
4747           best_delta = act_delta;
4748         }
4749       else
4750         iv_ca_delta_free (&act_delta);
4751     }
4752
4753   if (infinite_cost_p (best_cost))
4754     {
4755       for (i = 0; i < use->n_map_members; i++)
4756         {
4757           cp = use->cost_map + i;
4758           cand = cp->cand;
4759           if (!cand)
4760             continue;
4761
4762           /* Already tried this.  */
4763           if (cand->important && cand->iv->base_object == NULL_TREE)
4764             continue;
4765
4766           if (iv_ca_cand_used_p (ivs, cand))
4767             continue;
4768
4769           act_delta = NULL;
4770           iv_ca_set_cp (data, ivs, use, cp);
4771           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4772           iv_ca_set_no_cp (data, ivs, use);
4773           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4774                                        cp, act_delta);
4775
4776           if (compare_costs (act_cost, best_cost) < 0)
4777             {
4778               best_cost = act_cost;
4779
4780               if (best_delta)
4781                 iv_ca_delta_free (&best_delta);
4782               best_delta = act_delta;
4783             }
4784           else
4785             iv_ca_delta_free (&act_delta);
4786         }
4787     }
4788
4789   iv_ca_delta_commit (data, ivs, best_delta, true);
4790   iv_ca_delta_free (&best_delta);
4791
4792   return !infinite_cost_p (best_cost);
4793 }
4794
4795 /* Finds an initial assignment of candidates to uses.  */
4796
4797 static struct iv_ca *
4798 get_initial_solution (struct ivopts_data *data)
4799 {
4800   struct iv_ca *ivs = iv_ca_new (data);
4801   unsigned i;
4802
4803   for (i = 0; i < n_iv_uses (data); i++)
4804     if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4805       {
4806         iv_ca_free (&ivs);
4807         return NULL;
4808       }
4809
4810   return ivs;
4811 }
4812
4813 /* Tries to improve set of induction variables IVS.  */
4814
4815 static bool
4816 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4817 {
4818   unsigned i, n_ivs;
4819   comp_cost acost, best_cost = iv_ca_cost (ivs);
4820   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4821   struct iv_cand *cand;
4822
4823   /* Try extending the set of induction variables by one.  */
4824   for (i = 0; i < n_iv_cands (data); i++)
4825     {
4826       cand = iv_cand (data, i);
4827
4828       if (iv_ca_cand_used_p (ivs, cand))
4829         continue;
4830
4831       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4832       if (!act_delta)
4833         continue;
4834
4835       /* If we successfully added the candidate and the set is small enough,
4836          try optimizing it by removing other candidates.  */
4837       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4838         {
4839           iv_ca_delta_commit (data, ivs, act_delta, true);
4840           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4841           iv_ca_delta_commit (data, ivs, act_delta, false);
4842           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4843         }
4844
4845       if (compare_costs (acost, best_cost) < 0)
4846         {
4847           best_cost = acost;
4848           iv_ca_delta_free (&best_delta);
4849           best_delta = act_delta;
4850         }
4851       else
4852         iv_ca_delta_free (&act_delta);
4853     }
4854
4855   if (!best_delta)
4856     {
4857       /* Try removing the candidates from the set instead.  */
4858       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4859
4860       /* Nothing more we can do.  */
4861       if (!best_delta)
4862         return false;
4863     }
4864
4865   iv_ca_delta_commit (data, ivs, best_delta, true);
4866   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
4867   iv_ca_delta_free (&best_delta);
4868   return true;
4869 }
4870
4871 /* Attempts to find the optimal set of induction variables.  We do simple
4872    greedy heuristic -- we try to replace at most one candidate in the selected
4873    solution and remove the unused ivs while this improves the cost.  */
4874
4875 static struct iv_ca *
4876 find_optimal_iv_set (struct ivopts_data *data)
4877 {
4878   unsigned i;
4879   struct iv_ca *set;
4880   struct iv_use *use;
4881
4882   /* Get the initial solution.  */
4883   set = get_initial_solution (data);
4884   if (!set)
4885     {
4886       if (dump_file && (dump_flags & TDF_DETAILS))
4887         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4888       return NULL;
4889     }
4890
4891   if (dump_file && (dump_flags & TDF_DETAILS))
4892     {
4893       fprintf (dump_file, "Initial set of candidates:\n");
4894       iv_ca_dump (data, dump_file, set);
4895     }
4896
4897   while (try_improve_iv_set (data, set))
4898     {
4899       if (dump_file && (dump_flags & TDF_DETAILS))
4900         {
4901           fprintf (dump_file, "Improved to:\n");
4902           iv_ca_dump (data, dump_file, set);
4903         }
4904     }
4905
4906   if (dump_file && (dump_flags & TDF_DETAILS))
4907     {
4908       comp_cost cost = iv_ca_cost (set);
4909       fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4910     }
4911
4912   for (i = 0; i < n_iv_uses (data); i++)
4913     {
4914       use = iv_use (data, i);
4915       use->selected = iv_ca_cand_for_use (set, use)->cand;
4916     }
4917
4918   return set;
4919 }
4920
4921 /* Creates a new induction variable corresponding to CAND.  */
4922
4923 static void
4924 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4925 {
4926   block_stmt_iterator incr_pos;
4927   tree base;
4928   bool after = false;
4929
4930   if (!cand->iv)
4931     return;
4932
4933   switch (cand->pos)
4934     {
4935     case IP_NORMAL:
4936       incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4937       break;
4938
4939     case IP_END:
4940       incr_pos = bsi_last (ip_end_pos (data->current_loop));
4941       after = true;
4942       break;
4943
4944     case IP_ORIGINAL:
4945       /* Mark that the iv is preserved.  */
4946       name_info (data, cand->var_before)->preserve_biv = true;
4947       name_info (data, cand->var_after)->preserve_biv = true;
4948
4949       /* Rewrite the increment so that it uses var_before directly.  */
4950       find_interesting_uses_op (data, cand->var_after)->selected = cand;
4951
4952       return;
4953     }
4954
4955   gimple_add_tmp_var (cand->var_before);
4956   add_referenced_var (cand->var_before);
4957
4958   base = unshare_expr (cand->iv->base);
4959
4960   create_iv (base, unshare_expr (cand->iv->step),
4961              cand->var_before, data->current_loop,
4962              &incr_pos, after, &cand->var_before, &cand->var_after);
4963 }
4964
4965 /* Creates new induction variables described in SET.  */
4966
4967 static void
4968 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4969 {
4970   unsigned i;
4971   struct iv_cand *cand;
4972   bitmap_iterator bi;
4973
4974   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4975     {
4976       cand = iv_cand (data, i);
4977       create_new_iv (data, cand);
4978     }
4979 }
4980
4981 /* Removes statement STMT (real or a phi node).  If INCLUDING_DEFINED_NAME
4982    is true, remove also the ssa name defined by the statement.  */
4983
4984 static void
4985 remove_statement (tree stmt, bool including_defined_name)
4986 {
4987   if (TREE_CODE (stmt) == PHI_NODE)
4988     {
4989       remove_phi_node (stmt, NULL_TREE, including_defined_name);
4990     }
4991   else
4992     {
4993       block_stmt_iterator bsi = bsi_for_stmt (stmt);
4994
4995       bsi_remove (&bsi, true);
4996       release_defs (stmt);
4997     }
4998 }
4999
5000 /* Rewrites USE (definition of iv used in a nonlinear expression)
5001    using candidate CAND.  */
5002
5003 static void
5004 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5005                             struct iv_use *use, struct iv_cand *cand)
5006 {
5007   tree comp;
5008   tree op, tgt, ass;
5009   block_stmt_iterator bsi;
5010
5011   /* An important special case -- if we are asked to express value of
5012      the original iv by itself, just exit; there is no need to
5013      introduce a new computation (that might also need casting the
5014      variable to unsigned and back).  */
5015   if (cand->pos == IP_ORIGINAL
5016       && cand->incremented_at == use->stmt)
5017     {
5018       tree step, ctype, utype;
5019       enum tree_code incr_code = PLUS_EXPR;
5020
5021       gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
5022       gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
5023
5024       step = cand->iv->step;
5025       ctype = TREE_TYPE (step);
5026       utype = TREE_TYPE (cand->var_after);
5027       if (TREE_CODE (step) == NEGATE_EXPR)
5028         {
5029           incr_code = MINUS_EXPR;
5030           step = TREE_OPERAND (step, 0);
5031         }
5032
5033       /* Check whether we may leave the computation unchanged.
5034          This is the case only if it does not rely on other
5035          computations in the loop -- otherwise, the computation
5036          we rely upon may be removed in remove_unused_ivs,
5037          thus leading to ICE.  */
5038       op = GIMPLE_STMT_OPERAND (use->stmt, 1);
5039       if (TREE_CODE (op) == PLUS_EXPR
5040           || TREE_CODE (op) == MINUS_EXPR
5041           || TREE_CODE (op) == POINTER_PLUS_EXPR)
5042         {
5043           if (TREE_OPERAND (op, 0) == cand->var_before)
5044             op = TREE_OPERAND (op, 1);
5045           else if (TREE_CODE (op) != MINUS_EXPR
5046                    && TREE_OPERAND (op, 1) == cand->var_before)
5047             op = TREE_OPERAND (op, 0);
5048           else
5049             op = NULL_TREE;
5050         }
5051       else
5052         op = NULL_TREE;
5053
5054       if (op
5055           && (TREE_CODE (op) == INTEGER_CST
5056               || operand_equal_p (op, step, 0)))
5057         return;
5058
5059       /* Otherwise, add the necessary computations to express
5060          the iv.  */
5061       op = fold_convert (ctype, cand->var_before);
5062       comp = fold_convert (utype,
5063                            build2 (incr_code, ctype, op,
5064                                    unshare_expr (step)));
5065     }
5066   else
5067     {
5068       comp = get_computation (data->current_loop, use, cand);
5069       gcc_assert (comp != NULL_TREE);
5070     }
5071
5072   switch (TREE_CODE (use->stmt))
5073     {
5074     case PHI_NODE:
5075       tgt = PHI_RESULT (use->stmt);
5076
5077       /* If we should keep the biv, do not replace it.  */
5078       if (name_info (data, tgt)->preserve_biv)
5079         return;
5080
5081       bsi = bsi_after_labels (bb_for_stmt (use->stmt));
5082       break;
5083
5084     case GIMPLE_MODIFY_STMT:
5085       tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
5086       bsi = bsi_for_stmt (use->stmt);
5087       break;
5088
5089     default:
5090       gcc_unreachable ();
5091     }
5092
5093   op = force_gimple_operand_bsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5094                                  true, BSI_SAME_STMT);
5095
5096   if (TREE_CODE (use->stmt) == PHI_NODE)
5097     {
5098       ass = build_gimple_modify_stmt (tgt, op);
5099       bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
5100       remove_statement (use->stmt, false);
5101       SSA_NAME_DEF_STMT (tgt) = ass;
5102     }
5103   else
5104     GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
5105 }
5106
5107 /* Replaces ssa name in index IDX by its basic variable.  Callback for
5108    for_each_index.  */
5109
5110 static bool
5111 idx_remove_ssa_names (tree base, tree *idx,
5112                       void *data ATTRIBUTE_UNUSED)
5113 {
5114   tree *op;
5115
5116   if (TREE_CODE (*idx) == SSA_NAME)
5117     *idx = SSA_NAME_VAR (*idx);
5118
5119   if (TREE_CODE (base) == ARRAY_REF)
5120     {
5121       op = &TREE_OPERAND (base, 2);
5122       if (*op
5123           && TREE_CODE (*op) == SSA_NAME)
5124         *op = SSA_NAME_VAR (*op);
5125       op = &TREE_OPERAND (base, 3);
5126       if (*op
5127           && TREE_CODE (*op) == SSA_NAME)
5128         *op = SSA_NAME_VAR (*op);
5129     }
5130
5131   return true;
5132 }
5133
5134 /* Unshares REF and replaces ssa names inside it by their basic variables.  */
5135
5136 static tree
5137 unshare_and_remove_ssa_names (tree ref)
5138 {
5139   ref = unshare_expr (ref);
5140   for_each_index (&ref, idx_remove_ssa_names, NULL);
5141
5142   return ref;
5143 }
5144
5145 /* Extract the alias analysis info for the memory reference REF.  There are
5146    several ways how this information may be stored and what precisely is
5147    its semantics depending on the type of the reference, but there always is
5148    somewhere hidden one _DECL node that is used to determine the set of
5149    virtual operands for the reference.  The code below deciphers this jungle
5150    and extracts this single useful piece of information.  */
5151
5152 static tree
5153 get_ref_tag (tree ref, tree orig)
5154 {
5155   tree var = get_base_address (ref);
5156   tree aref = NULL_TREE, tag, sv;
5157   HOST_WIDE_INT offset, size, maxsize;
5158
5159   for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5160     {
5161       aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5162       if (ref)
5163         break;
5164     }
5165
5166   if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5167     return aref;
5168
5169   if (!var)
5170     return NULL_TREE;
5171
5172   if (TREE_CODE (var) == INDIRECT_REF)
5173     {
5174       /* If the base is a dereference of a pointer, first check its name memory
5175          tag.  If it does not have one, use its symbol memory tag.  */
5176       var = TREE_OPERAND (var, 0);
5177       if (TREE_CODE (var) != SSA_NAME)
5178         return NULL_TREE;
5179
5180       if (SSA_NAME_PTR_INFO (var))
5181         {
5182           tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5183           if (tag)
5184             return tag;
5185         }
5186
5187       var = SSA_NAME_VAR (var);
5188       tag = symbol_mem_tag (var);
5189       gcc_assert (tag != NULL_TREE);
5190       return tag;
5191     }
5192   else
5193     {
5194       if (!DECL_P (var))
5195         return NULL_TREE;
5196
5197       tag = symbol_mem_tag (var);
5198       if (tag)
5199         return tag;
5200
5201       return var;
5202     }
5203 }
5204
5205 /* Copies the reference information from OLD_REF to NEW_REF.  */
5206
5207 static void
5208 copy_ref_info (tree new_ref, tree old_ref)
5209 {
5210   if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5211     copy_mem_ref_info (new_ref, old_ref);
5212   else
5213     {
5214       TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5215       TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5216     }
5217 }
5218
5219 /* Rewrites USE (address that is an iv) using candidate CAND.  */
5220
5221 static void
5222 rewrite_use_address (struct ivopts_data *data,
5223                      struct iv_use *use, struct iv_cand *cand)
5224 {
5225   aff_tree aff;
5226   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5227   tree ref;
5228   bool ok;
5229
5230   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5231   gcc_assert (ok);
5232   unshare_aff_combination (&aff);
5233
5234   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5235   copy_ref_info (ref, *use->op_p);
5236   *use->op_p = ref;
5237 }
5238
5239 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5240    candidate CAND.  */
5241
5242 static void
5243 rewrite_use_compare (struct ivopts_data *data,
5244                      struct iv_use *use, struct iv_cand *cand)
5245 {
5246   tree comp, *var_p, op, bound;
5247   block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5248   enum tree_code compare;
5249   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5250   bool ok;
5251
5252   bound = cp->value;
5253   if (bound)
5254     {
5255       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5256       tree var_type = TREE_TYPE (var);
5257
5258       compare = iv_elimination_compare (data, use);
5259       bound = unshare_expr (fold_convert (var_type, bound));
5260       op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE,
5261                                      true, BSI_SAME_STMT);
5262
5263       *use->op_p = build2 (compare, boolean_type_node, var, op);
5264       return;
5265     }
5266
5267   /* The induction variable elimination failed; just express the original
5268      giv.  */
5269   comp = get_computation (data->current_loop, use, cand);
5270   gcc_assert (comp != NULL_TREE);
5271
5272   ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5273   gcc_assert (ok);
5274
5275   *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5276                                      true, BSI_SAME_STMT);
5277 }
5278
5279 /* Rewrites USE using candidate CAND.  */
5280
5281 static void
5282 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5283 {
5284   push_stmt_changes (&use->stmt);
5285
5286   switch (use->type)
5287     {
5288       case USE_NONLINEAR_EXPR:
5289         rewrite_use_nonlinear_expr (data, use, cand);
5290         break;
5291
5292       case USE_ADDRESS:
5293         rewrite_use_address (data, use, cand);
5294         break;
5295
5296       case USE_COMPARE:
5297         rewrite_use_compare (data, use, cand);
5298         break;
5299
5300       default:
5301         gcc_unreachable ();
5302     }
5303
5304   pop_stmt_changes (&use->stmt);
5305 }
5306
5307 /* Rewrite the uses using the selected induction variables.  */
5308
5309 static void
5310 rewrite_uses (struct ivopts_data *data)
5311 {
5312   unsigned i;
5313   struct iv_cand *cand;
5314   struct iv_use *use;
5315
5316   for (i = 0; i < n_iv_uses (data); i++)
5317     {
5318       use = iv_use (data, i);
5319       cand = use->selected;
5320       gcc_assert (cand);
5321
5322       rewrite_use (data, use, cand);
5323     }
5324 }
5325
5326 /* Removes the ivs that are not used after rewriting.  */
5327
5328 static void
5329 remove_unused_ivs (struct ivopts_data *data)
5330 {
5331   unsigned j;
5332   bitmap_iterator bi;
5333
5334   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5335     {
5336       struct version_info *info;
5337
5338       info = ver_info (data, j);
5339       if (info->iv
5340           && !integer_zerop (info->iv->step)
5341           && !info->inv_id
5342           && !info->iv->have_use_for
5343           && !info->preserve_biv)
5344         remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5345     }
5346 }
5347
5348 /* Frees data allocated by the optimization of a single loop.  */
5349
5350 static void
5351 free_loop_data (struct ivopts_data *data)
5352 {
5353   unsigned i, j;
5354   bitmap_iterator bi;
5355   tree obj;
5356
5357   if (data->niters)
5358     {
5359       pointer_map_destroy (data->niters);
5360       data->niters = NULL;
5361     }
5362
5363   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5364     {
5365       struct version_info *info;
5366
5367       info = ver_info (data, i);
5368       if (info->iv)
5369         free (info->iv);
5370       info->iv = NULL;
5371       info->has_nonlin_use = false;
5372       info->preserve_biv = false;
5373       info->inv_id = 0;
5374     }
5375   bitmap_clear (data->relevant);
5376   bitmap_clear (data->important_candidates);
5377
5378   for (i = 0; i < n_iv_uses (data); i++)
5379     {
5380       struct iv_use *use = iv_use (data, i);
5381
5382       free (use->iv);
5383       BITMAP_FREE (use->related_cands);
5384       for (j = 0; j < use->n_map_members; j++)
5385         if (use->cost_map[j].depends_on)
5386           BITMAP_FREE (use->cost_map[j].depends_on);
5387       free (use->cost_map);
5388       free (use);
5389     }
5390   VEC_truncate (iv_use_p, data->iv_uses, 0);
5391
5392   for (i = 0; i < n_iv_cands (data); i++)
5393     {
5394       struct iv_cand *cand = iv_cand (data, i);
5395
5396       if (cand->iv)
5397         free (cand->iv);
5398       if (cand->depends_on)
5399         BITMAP_FREE (cand->depends_on);
5400       free (cand);
5401     }
5402   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5403
5404   if (data->version_info_size < num_ssa_names)
5405     {
5406       data->version_info_size = 2 * num_ssa_names;
5407       free (data->version_info);
5408       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5409     }
5410
5411   data->max_inv_id = 0;
5412
5413   for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5414     SET_DECL_RTL (obj, NULL_RTX);
5415
5416   VEC_truncate (tree, decl_rtl_to_reset, 0);
5417 }
5418
5419 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
5420    loop tree.  */
5421
5422 static void
5423 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5424 {
5425   free_loop_data (data);
5426   free (data->version_info);
5427   BITMAP_FREE (data->relevant);
5428   BITMAP_FREE (data->important_candidates);
5429
5430   VEC_free (tree, heap, decl_rtl_to_reset);
5431   VEC_free (iv_use_p, heap, data->iv_uses);
5432   VEC_free (iv_cand_p, heap, data->iv_candidates);
5433 }
5434
5435 /* Optimizes the LOOP.  Returns true if anything changed.  */
5436
5437 static bool
5438 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5439 {
5440   bool changed = false;
5441   struct iv_ca *iv_ca;
5442   edge exit;
5443
5444   gcc_assert (!data->niters);
5445   data->current_loop = loop;
5446
5447   if (dump_file && (dump_flags & TDF_DETAILS))
5448     {
5449       fprintf (dump_file, "Processing loop %d\n", loop->num);
5450
5451       exit = single_dom_exit (loop);
5452       if (exit)
5453         {
5454           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
5455                    exit->src->index, exit->dest->index);
5456           print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5457           fprintf (dump_file, "\n");
5458         }
5459
5460       fprintf (dump_file, "\n");
5461     }
5462
5463   /* For each ssa name determines whether it behaves as an induction variable
5464      in some loop.  */
5465   if (!find_induction_variables (data))
5466     goto finish;
5467
5468   /* Finds interesting uses (item 1).  */
5469   find_interesting_uses (data);
5470   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5471     goto finish;
5472
5473   /* Finds candidates for the induction variables (item 2).  */
5474   find_iv_candidates (data);
5475
5476   /* Calculates the costs (item 3, part 1).  */
5477   determine_use_iv_costs (data);
5478   determine_iv_costs (data);
5479   determine_set_costs (data);
5480
5481   /* Find the optimal set of induction variables (item 3, part 2).  */
5482   iv_ca = find_optimal_iv_set (data);
5483   if (!iv_ca)
5484     goto finish;
5485   changed = true;
5486
5487   /* Create the new induction variables (item 4, part 1).  */
5488   create_new_ivs (data, iv_ca);
5489   iv_ca_free (&iv_ca);
5490
5491   /* Rewrite the uses (item 4, part 2).  */
5492   rewrite_uses (data);
5493
5494   /* Remove the ivs that are unused after rewriting.  */
5495   remove_unused_ivs (data);
5496
5497   /* We have changed the structure of induction variables; it might happen
5498      that definitions in the scev database refer to some of them that were
5499      eliminated.  */
5500   scev_reset ();
5501
5502 finish:
5503   free_loop_data (data);
5504
5505   return changed;
5506 }
5507
5508 /* Main entry point.  Optimizes induction variables in loops.  */
5509
5510 void
5511 tree_ssa_iv_optimize (void)
5512 {
5513   struct loop *loop;
5514   struct ivopts_data data;
5515   loop_iterator li;
5516
5517   tree_ssa_iv_optimize_init (&data);
5518
5519   /* Optimize the loops starting with the innermost ones.  */
5520   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5521     {
5522       if (dump_file && (dump_flags & TDF_DETAILS))
5523         flow_loop_dump (loop, dump_file, NULL, 1);
5524
5525       tree_ssa_iv_optimize_loop (&data, loop);
5526     }
5527
5528   tree_ssa_iv_optimize_finalize (&data);
5529 }