gcc/tree-ssa-loop-ivopts.c

   1 /* Induction variable optimizations.
   2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
   3    Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it
   8 under the terms of the GNU General Public License as published by the
   9 Free Software Foundation; either version 3, or (at your option) any
  10 later version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT
  13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 /* This pass tries to find the optimal set of induction variables for the loop.
  22    It optimizes just the basic linear induction variables (although adding
  23    support for other types should not be too hard).  It includes the
  24    optimizations commonly known as strength reduction, induction variable
  25    coalescing and induction variable elimination.  It does it in the
  26    following steps:
  27
  28    1) The interesting uses of induction variables are found.  This includes
  29
  30       -- uses of induction variables in non-linear expressions
  31       -- addresses of arrays
  32       -- comparisons of induction variables
  33
  34    2) Candidates for the induction variables are found.  This includes
  35
  36       -- old induction variables
  37       -- the variables defined by expressions derived from the "interesting
  38          uses" above
  39
  40    3) The optimal (w.r. to a cost function) set of variables is chosen.  The
  41       cost function assigns a cost to sets of induction variables and consists
  42       of three parts:
  43
  44       -- The use costs.  Each of the interesting uses chooses the best induction
  45          variable in the set and adds its cost to the sum.  The cost reflects
  46          the time spent on modifying the induction variables value to be usable
  47          for the given purpose (adding base and offset for arrays, etc.).
  48       -- The variable costs.  Each of the variables has a cost assigned that
  49          reflects the costs associated with incrementing the value of the
  50          variable.  The original variables are somewhat preferred.
  51       -- The set cost.  Depending on the size of the set, extra cost may be
  52          added to reflect register pressure.
  53
  54       All the costs are defined in a machine-specific way, using the target
  55       hooks and machine descriptions to determine them.
  56
  57    4) The trees are transformed to use the new variables, the dead code is
  58       removed.
  59
  60    All of this is done loop by loop.  Doing it globally is theoretically
  61    possible, it might give a better performance and it might enable us
  62    to decide costs more precisely, but getting all the interactions right
  63    would be complicated.  */
  64
  65 #include "config.h"
  66 #include "system.h"
  67 #include "coretypes.h"
  68 #include "tm.h"
  69 #include "tree.h"
  70 #include "tm_p.h"
  71 #include "basic-block.h"
  72 #include "tree-pretty-print.h"
  73 #include "gimple-pretty-print.h"
  74 #include "tree-flow.h"
  75 #include "tree-dump.h"
  76 #include "timevar.h"
  77 #include "cfgloop.h"
  78 #include "tree-pass.h"
  79 #include "ggc.h"
  80 #include "insn-config.h"
  81 #include "recog.h"
  82 #include "pointer-set.h"
  83 #include "hashtab.h"
  84 #include "tree-chrec.h"
  85 #include "tree-scalar-evolution.h"
  86 #include "cfgloop.h"
  87 #include "params.h"
  88 #include "langhooks.h"
  89 #include "tree-affine.h"
  90 #include "target.h"
  91 #include "tree-inline.h"
  92 #include "tree-ssa-propagate.h"
  93
  94 /* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses.
  95  */
  96 #include "expmed.h"
  97 #undef add_cost
  98 #undef zero_cost
  99
 100 /* FIXME: Expressions are expanded to RTL in this pass to determine the
 101    cost of different addressing modes.  This should be moved to a TBD
 102    interface between the GIMPLE and RTL worlds.  */
 103 #include "expr.h"
 104
 105 /* The infinite cost.  */
 106 #define INFTY 10000000
 107
 108 #define AVG_LOOP_NITER(LOOP) 5
 109
 110 /* Returns the expected number of loop iterations for LOOP.
 111    The average trip count is computed from profile data if it
 112    exists. */
 113
 114 static inline HOST_WIDE_INT
 115 avg_loop_niter (struct loop *loop)
 116 {
 117   HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
 118   if (niter == -1)
 119     return AVG_LOOP_NITER (loop);
 120
 121   return niter;
 122 }
 123
 124 /* Representation of the induction variable.  */
 125 struct iv
 126 {
 127   tree base;            /* Initial value of the iv.  */
 128   tree base_object;     /* A memory object to that the induction variable points.  */
 129   tree step;            /* Step of the iv (constant only).  */
 130   tree ssa_name;        /* The ssa name with the value.  */
 131   bool biv_p;           /* Is it a biv?  */
 132   bool have_use_for;    /* Do we already have a use for it?  */
 133   unsigned use_id;      /* The identifier in the use if it is the case.  */
 134 };
 135
 136 /* Per-ssa version information (induction variable descriptions, etc.).  */
 137 struct version_info
 138 {
 139   tree name;            /* The ssa name.  */
 140   struct iv *iv;        /* Induction variable description.  */
 141   bool has_nonlin_use;  /* For a loop-level invariant, whether it is used in
 142                            an expression that is not an induction variable.  */
 143   bool preserve_biv;    /* For the original biv, whether to preserve it.  */
 144   unsigned inv_id;      /* Id of an invariant.  */
 145 };
 146
 147 /* Types of uses.  */
 148 enum use_type
 149 {
 150   USE_NONLINEAR_EXPR,   /* Use in a nonlinear expression.  */
 151   USE_ADDRESS,          /* Use in an address.  */
 152   USE_COMPARE           /* Use is a compare.  */
 153 };
 154
 155 /* Cost of a computation.  */
 156 typedef struct
 157 {
 158   int cost;             /* The runtime cost.  */
 159   unsigned complexity;  /* The estimate of the complexity of the code for
 160                            the computation (in no concrete units --
 161                            complexity field should be larger for more
 162                            complex expressions and addressing modes).  */
 163 } comp_cost;
 164
 165 static const comp_cost zero_cost = {0, 0};
 166 static const comp_cost infinite_cost = {INFTY, INFTY};
 167
 168 /* The candidate - cost pair.  */
 169 struct cost_pair
 170 {
 171   struct iv_cand *cand; /* The candidate.  */
 172   comp_cost cost;       /* The cost.  */
 173   bitmap depends_on;    /* The list of invariants that have to be
 174                            preserved.  */
 175   tree value;           /* For final value elimination, the expression for
 176                            the final value of the iv.  For iv elimination,
 177                            the new bound to compare with.  */
 178   enum tree_code comp;  /* For iv elimination, the comparison.  */
 179   int inv_expr_id;      /* Loop invariant expression id.  */
 180 };
 181
 182 /* Use.  */
 183 struct iv_use
 184 {
 185   unsigned id;          /* The id of the use.  */
 186   enum use_type type;   /* Type of the use.  */
 187   struct iv *iv;        /* The induction variable it is based on.  */
 188   gimple stmt;          /* Statement in that it occurs.  */
 189   tree *op_p;           /* The place where it occurs.  */
 190   bitmap related_cands; /* The set of "related" iv candidates, plus the common
 191                            important ones.  */
 192
 193   unsigned n_map_members; /* Number of candidates in the cost_map list.  */
 194   struct cost_pair *cost_map;
 195                         /* The costs wrto the iv candidates.  */
 196
 197   struct iv_cand *selected;
 198                         /* The selected candidate.  */
 199 };
 200
 201 /* The position where the iv is computed.  */
 202 enum iv_position
 203 {
 204   IP_NORMAL,            /* At the end, just before the exit condition.  */
 205   IP_END,               /* At the end of the latch block.  */
 206   IP_BEFORE_USE,        /* Immediately before a specific use.  */
 207   IP_AFTER_USE,         /* Immediately after a specific use.  */
 208   IP_ORIGINAL           /* The original biv.  */
 209 };
 210
 211 /* The induction variable candidate.  */
 212 struct iv_cand
 213 {
 214   unsigned id;          /* The number of the candidate.  */
 215   bool important;       /* Whether this is an "important" candidate, i.e. such
 216                            that it should be considered by all uses.  */
 217   ENUM_BITFIELD(iv_position) pos : 8;   /* Where it is computed.  */
 218   gimple incremented_at;/* For original biv, the statement where it is
 219                            incremented.  */
 220   tree var_before;      /* The variable used for it before increment.  */
 221   tree var_after;       /* The variable used for it after increment.  */
 222   struct iv *iv;        /* The value of the candidate.  NULL for
 223                            "pseudocandidate" used to indicate the possibility
 224                            to replace the final value of an iv by direct
 225                            computation of the value.  */
 226   unsigned cost;        /* Cost of the candidate.  */
 227   unsigned cost_step;   /* Cost of the candidate's increment operation.  */
 228   struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
 229                               where it is incremented.  */
 230   bitmap depends_on;    /* The list of invariants that are used in step of the
 231                            biv.  */
 232 };
 233
 234 /* Loop invariant expression hashtable entry.  */
 235 struct iv_inv_expr_ent
 236 {
 237   tree expr;
 238   int id;
 239   hashval_t hash;
 240 };
 241
 242 /* The data used by the induction variable optimizations.  */
 243
 244 typedef struct iv_use *iv_use_p;
 245 DEF_VEC_P(iv_use_p);
 246 DEF_VEC_ALLOC_P(iv_use_p,heap);
 247
 248 typedef struct iv_cand *iv_cand_p;
 249 DEF_VEC_P(iv_cand_p);
 250 DEF_VEC_ALLOC_P(iv_cand_p,heap);
 251
 252 struct ivopts_data
 253 {
 254   /* The currently optimized loop.  */
 255   struct loop *current_loop;
 256
 257   /* Numbers of iterations for all exits of the current loop.  */
 258   struct pointer_map_t *niters;
 259
 260   /* Number of registers used in it.  */
 261   unsigned regs_used;
 262
 263   /* The size of version_info array allocated.  */
 264   unsigned version_info_size;
 265
 266   /* The array of information for the ssa names.  */
 267   struct version_info *version_info;
 268
 269   /* The hashtable of loop invariant expressions created
 270      by ivopt.  */
 271   htab_t inv_expr_tab;
 272
 273   /* Loop invariant expression id.  */
 274   int inv_expr_id;
 275
 276   /* The bitmap of indices in version_info whose value was changed.  */
 277   bitmap relevant;
 278
 279   /* The uses of induction variables.  */
 280   VEC(iv_use_p,heap) *iv_uses;
 281
 282   /* The candidates.  */
 283   VEC(iv_cand_p,heap) *iv_candidates;
 284
 285   /* A bitmap of important candidates.  */
 286   bitmap important_candidates;
 287
 288   /* The maximum invariant id.  */
 289   unsigned max_inv_id;
 290
 291   /* Whether to consider just related and important candidates when replacing a
 292      use.  */
 293   bool consider_all_candidates;
 294
 295   /* Are we optimizing for speed?  */
 296   bool speed;
 297
 298   /* Whether the loop body includes any function calls.  */
 299   bool body_includes_call;
 300
 301   /* Whether the loop body can only be exited via single exit.  */
 302   bool loop_single_exit_p;
 303 };
 304
 305 /* An assignment of iv candidates to uses.  */
 306
 307 struct iv_ca
 308 {
 309   /* The number of uses covered by the assignment.  */
 310   unsigned upto;
 311
 312   /* Number of uses that cannot be expressed by the candidates in the set.  */
 313   unsigned bad_uses;
 314
 315   /* Candidate assigned to a use, together with the related costs.  */
 316   struct cost_pair **cand_for_use;
 317
 318   /* Number of times each candidate is used.  */
 319   unsigned *n_cand_uses;
 320
 321   /* The candidates used.  */
 322   bitmap cands;
 323
 324   /* The number of candidates in the set.  */
 325   unsigned n_cands;
 326
 327   /* Total number of registers needed.  */
 328   unsigned n_regs;
 329
 330   /* Total cost of expressing uses.  */
 331   comp_cost cand_use_cost;
 332
 333   /* Total cost of candidates.  */
 334   unsigned cand_cost;
 335
 336   /* Number of times each invariant is used.  */
 337   unsigned *n_invariant_uses;
 338
 339   /* The array holding the number of uses of each loop
 340      invariant expressions created by ivopt.  */
 341   unsigned *used_inv_expr;
 342
 343   /* The number of created loop invariants.  */
 344   unsigned num_used_inv_expr;
 345
 346   /* Total cost of the assignment.  */
 347   comp_cost cost;
 348 };
 349
 350 /* Difference of two iv candidate assignments.  */
 351
 352 struct iv_ca_delta
 353 {
 354   /* Changed use.  */
 355   struct iv_use *use;
 356
 357   /* An old assignment (for rollback purposes).  */
 358   struct cost_pair *old_cp;
 359
 360   /* A new assignment.  */
 361   struct cost_pair *new_cp;
 362
 363   /* Next change in the list.  */
 364   struct iv_ca_delta *next_change;
 365 };
 366
 367 /* Bound on number of candidates below that all candidates are considered.  */
 368
 369 #define CONSIDER_ALL_CANDIDATES_BOUND \
 370   ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
 371
 372 /* If there are more iv occurrences, we just give up (it is quite unlikely that
 373    optimizing such a loop would help, and it would take ages).  */
 374
 375 #define MAX_CONSIDERED_USES \
 376   ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
 377
 378 /* If there are at most this number of ivs in the set, try removing unnecessary
 379    ivs from the set always.  */
 380
 381 #define ALWAYS_PRUNE_CAND_SET_BOUND \
 382   ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
 383
 384 /* The list of trees for that the decl_rtl field must be reset is stored
 385    here.  */
 386
 387 static VEC(tree,heap) *decl_rtl_to_reset;
 388
 389 static comp_cost force_expr_to_var_cost (tree, bool);
 390
 391 /* Number of uses recorded in DATA.  */
 392
 393 static inline unsigned
 394 n_iv_uses (struct ivopts_data *data)
 395 {
 396   return VEC_length (iv_use_p, data->iv_uses);
 397 }
 398
 399 /* Ith use recorded in DATA.  */
 400
 401 static inline struct iv_use *
 402 iv_use (struct ivopts_data *data, unsigned i)
 403 {
 404   return VEC_index (iv_use_p, data->iv_uses, i);
 405 }
 406
 407 /* Number of candidates recorded in DATA.  */
 408
 409 static inline unsigned
 410 n_iv_cands (struct ivopts_data *data)
 411 {
 412   return VEC_length (iv_cand_p, data->iv_candidates);
 413 }
 414
 415 /* Ith candidate recorded in DATA.  */
 416
 417 static inline struct iv_cand *
 418 iv_cand (struct ivopts_data *data, unsigned i)
 419 {
 420   return VEC_index (iv_cand_p, data->iv_candidates, i);
 421 }
 422
 423 /* The single loop exit if it dominates the latch, NULL otherwise.  */
 424
 425 edge
 426 single_dom_exit (struct loop *loop)
 427 {
 428   edge exit = single_exit (loop);
 429
 430   if (!exit)
 431     return NULL;
 432
 433   if (!just_once_each_iteration_p (loop, exit->src))
 434     return NULL;
 435
 436   return exit;
 437 }
 438
 439 /* Dumps information about the induction variable IV to FILE.  */
 440
 441 extern void dump_iv (FILE *, struct iv *);
 442 void
 443 dump_iv (FILE *file, struct iv *iv)
 444 {
 445   if (iv->ssa_name)
 446     {
 447       fprintf (file, "ssa name ");
 448       print_generic_expr (file, iv->ssa_name, TDF_SLIM);
 449       fprintf (file, "\n");
 450     }
 451
 452   fprintf (file, "  type ");
 453   print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
 454   fprintf (file, "\n");
 455
 456   if (iv->step)
 457     {
 458       fprintf (file, "  base ");
 459       print_generic_expr (file, iv->base, TDF_SLIM);
 460       fprintf (file, "\n");
 461
 462       fprintf (file, "  step ");
 463       print_generic_expr (file, iv->step, TDF_SLIM);
 464       fprintf (file, "\n");
 465     }
 466   else
 467     {
 468       fprintf (file, "  invariant ");
 469       print_generic_expr (file, iv->base, TDF_SLIM);
 470       fprintf (file, "\n");
 471     }
 472
 473   if (iv->base_object)
 474     {
 475       fprintf (file, "  base object ");
 476       print_generic_expr (file, iv->base_object, TDF_SLIM);
 477       fprintf (file, "\n");
 478     }
 479
 480   if (iv->biv_p)
 481     fprintf (file, "  is a biv\n");
 482 }
 483
 484 /* Dumps information about the USE to FILE.  */
 485
 486 extern void dump_use (FILE *, struct iv_use *);
 487 void
 488 dump_use (FILE *file, struct iv_use *use)
 489 {
 490   fprintf (file, "use %d\n", use->id);
 491
 492   switch (use->type)
 493     {
 494     case USE_NONLINEAR_EXPR:
 495       fprintf (file, "  generic\n");
 496       break;
 497
 498     case USE_ADDRESS:
 499       fprintf (file, "  address\n");
 500       break;
 501
 502     case USE_COMPARE:
 503       fprintf (file, "  compare\n");
 504       break;
 505
 506     default:
 507       gcc_unreachable ();
 508     }
 509
 510   fprintf (file, "  in statement ");
 511   print_gimple_stmt (file, use->stmt, 0, 0);
 512   fprintf (file, "\n");
 513
 514   fprintf (file, "  at position ");
 515   if (use->op_p)
 516     print_generic_expr (file, *use->op_p, TDF_SLIM);
 517   fprintf (file, "\n");
 518
 519   dump_iv (file, use->iv);
 520
 521   if (use->related_cands)
 522     {
 523       fprintf (file, "  related candidates ");
 524       dump_bitmap (file, use->related_cands);
 525     }
 526 }
 527
 528 /* Dumps information about the uses to FILE.  */
 529
 530 extern void dump_uses (FILE *, struct ivopts_data *);
 531 void
 532 dump_uses (FILE *file, struct ivopts_data *data)
 533 {
 534   unsigned i;
 535   struct iv_use *use;
 536
 537   for (i = 0; i < n_iv_uses (data); i++)
 538     {
 539       use = iv_use (data, i);
 540
 541       dump_use (file, use);
 542       fprintf (file, "\n");
 543     }
 544 }
 545
 546 /* Dumps information about induction variable candidate CAND to FILE.  */
 547
 548 extern void dump_cand (FILE *, struct iv_cand *);
 549 void
 550 dump_cand (FILE *file, struct iv_cand *cand)
 551 {
 552   struct iv *iv = cand->iv;
 553
 554   fprintf (file, "candidate %d%s\n",
 555            cand->id, cand->important ? " (important)" : "");
 556
 557   if (cand->depends_on)
 558     {
 559       fprintf (file, "  depends on ");
 560       dump_bitmap (file, cand->depends_on);
 561     }
 562
 563   if (!iv)
 564     {
 565       fprintf (file, "  final value replacement\n");
 566       return;
 567     }
 568
 569   if (cand->var_before)
 570     {
 571       fprintf (file, "  var_before ");
 572       print_generic_expr (file, cand->var_before, TDF_SLIM);
 573       fprintf (file, "\n");
 574     }
 575   if (cand->var_after)
 576     {
 577       fprintf (file, "  var_after ");
 578       print_generic_expr (file, cand->var_after, TDF_SLIM);
 579       fprintf (file, "\n");
 580     }
 581
 582   switch (cand->pos)
 583     {
 584     case IP_NORMAL:
 585       fprintf (file, "  incremented before exit test\n");
 586       break;
 587
 588     case IP_BEFORE_USE:
 589       fprintf (file, "  incremented before use %d\n", cand->ainc_use->id);
 590       break;
 591
 592     case IP_AFTER_USE:
 593       fprintf (file, "  incremented after use %d\n", cand->ainc_use->id);
 594       break;
 595
 596     case IP_END:
 597       fprintf (file, "  incremented at end\n");
 598       break;
 599
 600     case IP_ORIGINAL:
 601       fprintf (file, "  original biv\n");
 602       break;
 603     }
 604
 605   dump_iv (file, iv);
 606 }
 607
 608 /* Returns the info for ssa version VER.  */
 609
 610 static inline struct version_info *
 611 ver_info (struct ivopts_data *data, unsigned ver)
 612 {
 613   return data->version_info + ver;
 614 }
 615
 616 /* Returns the info for ssa name NAME.  */
 617
 618 static inline struct version_info *
 619 name_info (struct ivopts_data *data, tree name)
 620 {
 621   return ver_info (data, SSA_NAME_VERSION (name));
 622 }
 623
 624 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
 625    emitted in LOOP.  */
 626
 627 static bool
 628 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
 629 {
 630   basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
 631
 632   gcc_assert (bb);
 633
 634   if (sbb == loop->latch)
 635     return true;
 636
 637   if (sbb != bb)
 638     return false;
 639
 640   return stmt == last_stmt (bb);
 641 }
 642
 643 /* Returns true if STMT if after the place where the original induction
 644    variable CAND is incremented.  If TRUE_IF_EQUAL is set, we return true
 645    if the positions are identical.  */
 646
 647 static bool
 648 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
 649 {
 650   basic_block cand_bb = gimple_bb (cand->incremented_at);
 651   basic_block stmt_bb = gimple_bb (stmt);
 652
 653   if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
 654     return false;
 655
 656   if (stmt_bb != cand_bb)
 657     return true;
 658
 659   if (true_if_equal
 660       && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
 661     return true;
 662   return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
 663 }
 664
 665 /* Returns true if STMT if after the place where the induction variable
 666    CAND is incremented in LOOP.  */
 667
 668 static bool
 669 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
 670 {
 671   switch (cand->pos)
 672     {
 673     case IP_END:
 674       return false;
 675
 676     case IP_NORMAL:
 677       return stmt_after_ip_normal_pos (loop, stmt);
 678
 679     case IP_ORIGINAL:
 680     case IP_AFTER_USE:
 681       return stmt_after_inc_pos (cand, stmt, false);
 682
 683     case IP_BEFORE_USE:
 684       return stmt_after_inc_pos (cand, stmt, true);
 685
 686     default:
 687       gcc_unreachable ();
 688     }
 689 }
 690
 691 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node.  */
 692
 693 static bool
 694 abnormal_ssa_name_p (tree exp)
 695 {
 696   if (!exp)
 697     return false;
 698
 699   if (TREE_CODE (exp) != SSA_NAME)
 700     return false;
 701
 702   return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
 703 }
 704
 705 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
 706    abnormal phi node.  Callback for for_each_index.  */
 707
 708 static bool
 709 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
 710                                   void *data ATTRIBUTE_UNUSED)
 711 {
 712   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
 713     {
 714       if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
 715         return false;
 716       if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
 717         return false;
 718     }
 719
 720   return !abnormal_ssa_name_p (*index);
 721 }
 722
 723 /* Returns true if EXPR contains a ssa name that occurs in an
 724    abnormal phi node.  */
 725
 726 bool
 727 contains_abnormal_ssa_name_p (tree expr)
 728 {
 729   enum tree_code code;
 730   enum tree_code_class codeclass;
 731
 732   if (!expr)
 733     return false;
 734
 735   code = TREE_CODE (expr);
 736   codeclass = TREE_CODE_CLASS (code);
 737
 738   if (code == SSA_NAME)
 739     return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
 740
 741   if (code == INTEGER_CST
 742       || is_gimple_min_invariant (expr))
 743     return false;
 744
 745   if (code == ADDR_EXPR)
 746     return !for_each_index (&TREE_OPERAND (expr, 0),
 747                             idx_contains_abnormal_ssa_name_p,
 748                             NULL);
 749
 750   if (code == COND_EXPR)
 751     return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
 752       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
 753       || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
 754
 755   switch (codeclass)
 756     {
 757     case tcc_binary:
 758     case tcc_comparison:
 759       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
 760         return true;
 761
 762       /* Fallthru.  */
 763     case tcc_unary:
 764       if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
 765         return true;
 766
 767       break;
 768
 769     default:
 770       gcc_unreachable ();
 771     }
 772
 773   return false;
 774 }
 775
 776 /*  Returns the structure describing number of iterations determined from
 777     EXIT of DATA->current_loop, or NULL if something goes wrong.  */
 778
 779 static struct tree_niter_desc *
 780 niter_for_exit (struct ivopts_data *data, edge exit)
 781 {
 782   struct tree_niter_desc *desc;
 783   void **slot;
 784
 785   if (!data->niters)
 786     {
 787       data->niters = pointer_map_create ();
 788       slot = NULL;
 789     }
 790   else
 791     slot = pointer_map_contains (data->niters, exit);
 792
 793   if (!slot)
 794     {
 795       /* Try to determine number of iterations.  We cannot safely work with ssa
 796          names that appear in phi nodes on abnormal edges, so that we do not
 797          create overlapping life ranges for them (PR 27283).  */
 798       desc = XNEW (struct tree_niter_desc);
 799       if (!number_of_iterations_exit (data->current_loop,
 800                                       exit, desc, true)
 801           || contains_abnormal_ssa_name_p (desc->niter))
 802         {
 803           XDELETE (desc);
 804           desc = NULL;
 805         }
 806       slot = pointer_map_insert (data->niters, exit);
 807       *slot = desc;
 808     }
 809   else
 810     desc = (struct tree_niter_desc *) *slot;
 811
 812   return desc;
 813 }
 814
 815 /* Returns the structure describing number of iterations determined from
 816    single dominating exit of DATA->current_loop, or NULL if something
 817    goes wrong.  */
 818
 819 static struct tree_niter_desc *
 820 niter_for_single_dom_exit (struct ivopts_data *data)
 821 {
 822   edge exit = single_dom_exit (data->current_loop);
 823
 824   if (!exit)
 825     return NULL;
 826
 827   return niter_for_exit (data, exit);
 828 }
 829
 830 /* Hash table equality function for expressions.  */
 831
 832 static int
 833 htab_inv_expr_eq (const void *ent1, const void *ent2)
 834 {
 835   const struct iv_inv_expr_ent *expr1 =
 836       (const struct iv_inv_expr_ent *)ent1;
 837   const struct iv_inv_expr_ent *expr2 =
 838       (const struct iv_inv_expr_ent *)ent2;
 839
 840   return expr1->hash == expr2->hash
 841          && operand_equal_p (expr1->expr, expr2->expr, 0);
 842 }
 843
 844 /* Hash function for loop invariant expressions.  */
 845
 846 static hashval_t
 847 htab_inv_expr_hash (const void *ent)
 848 {
 849   const struct iv_inv_expr_ent *expr =
 850       (const struct iv_inv_expr_ent *)ent;
 851   return expr->hash;
 852 }
 853
 854 /* Initializes data structures used by the iv optimization pass, stored
 855    in DATA.  */
 856
 857 static void
 858 tree_ssa_iv_optimize_init (struct ivopts_data *data)
 859 {
 860   data->version_info_size = 2 * num_ssa_names;
 861   data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
 862   data->relevant = BITMAP_ALLOC (NULL);
 863   data->important_candidates = BITMAP_ALLOC (NULL);
 864   data->max_inv_id = 0;
 865   data->niters = NULL;
 866   data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
 867   data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
 868   data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
 869                                     htab_inv_expr_eq, free);
 870   data->inv_expr_id = 0;
 871   decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
 872 }
 873
 874 /* Returns a memory object to that EXPR points.  In case we are able to
 875    determine that it does not point to any such object, NULL is returned.  */
 876
 877 static tree
 878 determine_base_object (tree expr)
 879 {
 880   enum tree_code code = TREE_CODE (expr);
 881   tree base, obj;
 882
 883   /* If this is a pointer casted to any type, we need to determine
 884      the base object for the pointer; so handle conversions before
 885      throwing away non-pointer expressions.  */
 886   if (CONVERT_EXPR_P (expr))
 887     return determine_base_object (TREE_OPERAND (expr, 0));
 888
 889   if (!POINTER_TYPE_P (TREE_TYPE (expr)))
 890     return NULL_TREE;
 891
 892   switch (code)
 893     {
 894     case INTEGER_CST:
 895       return NULL_TREE;
 896
 897     case ADDR_EXPR:
 898       obj = TREE_OPERAND (expr, 0);
 899       base = get_base_address (obj);
 900
 901       if (!base)
 902         return expr;
 903
 904       if (TREE_CODE (base) == MEM_REF)
 905         return determine_base_object (TREE_OPERAND (base, 0));
 906
 907       return fold_convert (ptr_type_node,
 908                            build_fold_addr_expr (base));
 909
 910     case POINTER_PLUS_EXPR:
 911       return determine_base_object (TREE_OPERAND (expr, 0));
 912
 913     case PLUS_EXPR:
 914     case MINUS_EXPR:
 915       /* Pointer addition is done solely using POINTER_PLUS_EXPR.  */
 916       gcc_unreachable ();
 917
 918     default:
 919       return fold_convert (ptr_type_node, expr);
 920     }
 921 }
 922
 923 /* Allocates an induction variable with given initial value BASE and step STEP
 924    for loop LOOP.  */
 925
 926 static struct iv *
 927 alloc_iv (tree base, tree step)
 928 {
 929   struct iv *iv = XCNEW (struct iv);
 930   gcc_assert (step != NULL_TREE);
 931
 932   iv->base = base;
 933   iv->base_object = determine_base_object (base);
 934   iv->step = step;
 935   iv->biv_p = false;
 936   iv->have_use_for = false;
 937   iv->use_id = 0;
 938   iv->ssa_name = NULL_TREE;
 939
 940   return iv;
 941 }
 942
 943 /* Sets STEP and BASE for induction variable IV.  */
 944
 945 static void
 946 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
 947 {
 948   struct version_info *info = name_info (data, iv);
 949
 950   gcc_assert (!info->iv);
 951
 952   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
 953   info->iv = alloc_iv (base, step);
 954   info->iv->ssa_name = iv;
 955 }
 956
 957 /* Finds induction variable declaration for VAR.  */
 958
 959 static struct iv *
 960 get_iv (struct ivopts_data *data, tree var)
 961 {
 962   basic_block bb;
 963   tree type = TREE_TYPE (var);
 964
 965   if (!POINTER_TYPE_P (type)
 966       && !INTEGRAL_TYPE_P (type))
 967     return NULL;
 968
 969   if (!name_info (data, var)->iv)
 970     {
 971       bb = gimple_bb (SSA_NAME_DEF_STMT (var));
 972
 973       if (!bb
 974           || !flow_bb_inside_loop_p (data->current_loop, bb))
 975         set_iv (data, var, var, build_int_cst (type, 0));
 976     }
 977
 978   return name_info (data, var)->iv;
 979 }
 980
 981 /* Determines the step of a biv defined in PHI.  Returns NULL if PHI does
 982    not define a simple affine biv with nonzero step.  */
 983
 984 static tree
 985 determine_biv_step (gimple phi)
 986 {
 987   struct loop *loop = gimple_bb (phi)->loop_father;
 988   tree name = PHI_RESULT (phi);
 989   affine_iv iv;
 990
 991   if (!is_gimple_reg (name))
 992     return NULL_TREE;
 993
 994   if (!simple_iv (loop, loop, name, &iv, true))
 995     return NULL_TREE;
 996
 997   return integer_zerop (iv.step) ? NULL_TREE : iv.step;
 998 }
 999
1000 /* Finds basic ivs.  */
1001
1002 static bool
1003 find_bivs (struct ivopts_data *data)
1004 {
1005   gimple phi;
1006   tree step, type, base;
1007   bool found = false;
1008   struct loop *loop = data->current_loop;
1009   gimple_stmt_iterator psi;
1010
1011   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1012     {
1013       phi = gsi_stmt (psi);
1014
1015       if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1016         continue;
1017
1018       step = determine_biv_step (phi);
1019       if (!step)
1020         continue;
1021
1022       base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1023       base = expand_simple_operations (base);
1024       if (contains_abnormal_ssa_name_p (base)
1025           || contains_abnormal_ssa_name_p (step))
1026         continue;
1027
1028       type = TREE_TYPE (PHI_RESULT (phi));
1029       base = fold_convert (type, base);
1030       if (step)
1031         {
1032           if (POINTER_TYPE_P (type))
1033             step = convert_to_ptrofftype (step);
1034           else
1035             step = fold_convert (type, step);
1036         }
1037
1038       set_iv (data, PHI_RESULT (phi), base, step);
1039       found = true;
1040     }
1041
1042   return found;
1043 }
1044
1045 /* Marks basic ivs.  */
1046
1047 static void
1048 mark_bivs (struct ivopts_data *data)
1049 {
1050   gimple phi;
1051   tree var;
1052   struct iv *iv, *incr_iv;
1053   struct loop *loop = data->current_loop;
1054   basic_block incr_bb;
1055   gimple_stmt_iterator psi;
1056
1057   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1058     {
1059       phi = gsi_stmt (psi);
1060
1061       iv = get_iv (data, PHI_RESULT (phi));
1062       if (!iv)
1063         continue;
1064
1065       var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1066       incr_iv = get_iv (data, var);
1067       if (!incr_iv)
1068         continue;
1069
1070       /* If the increment is in the subloop, ignore it.  */
1071       incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1072       if (incr_bb->loop_father != data->current_loop
1073           || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1074         continue;
1075
1076       iv->biv_p = true;
1077       incr_iv->biv_p = true;
1078     }
1079 }
1080
1081 /* Checks whether STMT defines a linear induction variable and stores its
1082    parameters to IV.  */
1083
1084 static bool
1085 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1086 {
1087   tree lhs;
1088   struct loop *loop = data->current_loop;
1089
1090   iv->base = NULL_TREE;
1091   iv->step = NULL_TREE;
1092
1093   if (gimple_code (stmt) != GIMPLE_ASSIGN)
1094     return false;
1095
1096   lhs = gimple_assign_lhs (stmt);
1097   if (TREE_CODE (lhs) != SSA_NAME)
1098     return false;
1099
1100   if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1101     return false;
1102   iv->base = expand_simple_operations (iv->base);
1103
1104   if (contains_abnormal_ssa_name_p (iv->base)
1105       || contains_abnormal_ssa_name_p (iv->step))
1106     return false;
1107
1108   /* If STMT could throw, then do not consider STMT as defining a GIV.
1109      While this will suppress optimizations, we can not safely delete this
1110      GIV and associated statements, even if it appears it is not used.  */
1111   if (stmt_could_throw_p (stmt))
1112     return false;
1113
1114   return true;
1115 }
1116
1117 /* Finds general ivs in statement STMT.  */
1118
1119 static void
1120 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1121 {
1122   affine_iv iv;
1123
1124   if (!find_givs_in_stmt_scev (data, stmt, &iv))
1125     return;
1126
1127   set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1128 }
1129
1130 /* Finds general ivs in basic block BB.  */
1131
1132 static void
1133 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1134 {
1135   gimple_stmt_iterator bsi;
1136
1137   for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1138     find_givs_in_stmt (data, gsi_stmt (bsi));
1139 }
1140
1141 /* Finds general ivs.  */
1142
1143 static void
1144 find_givs (struct ivopts_data *data)
1145 {
1146   struct loop *loop = data->current_loop;
1147   basic_block *body = get_loop_body_in_dom_order (loop);
1148   unsigned i;
1149
1150   for (i = 0; i < loop->num_nodes; i++)
1151     find_givs_in_bb (data, body[i]);
1152   free (body);
1153 }
1154
1155 /* For each ssa name defined in LOOP determines whether it is an induction
1156    variable and if so, its initial value and step.  */
1157
1158 static bool
1159 find_induction_variables (struct ivopts_data *data)
1160 {
1161   unsigned i;
1162   bitmap_iterator bi;
1163
1164   if (!find_bivs (data))
1165     return false;
1166
1167   find_givs (data);
1168   mark_bivs (data);
1169
1170   if (dump_file && (dump_flags & TDF_DETAILS))
1171     {
1172       struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1173
1174       if (niter)
1175         {
1176           fprintf (dump_file, "  number of iterations ");
1177           print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1178           if (!integer_zerop (niter->may_be_zero))
1179             {
1180               fprintf (dump_file, "; zero if ");
1181               print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1182             }
1183           fprintf (dump_file, "\n\n");
1184         };
1185
1186       fprintf (dump_file, "Induction variables:\n\n");
1187
1188       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1189         {
1190           if (ver_info (data, i)->iv)
1191             dump_iv (dump_file, ver_info (data, i)->iv);
1192         }
1193     }
1194
1195   return true;
1196 }
1197
1198 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.  */
1199
1200 static struct iv_use *
1201 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1202             gimple stmt, enum use_type use_type)
1203 {
1204   struct iv_use *use = XCNEW (struct iv_use);
1205
1206   use->id = n_iv_uses (data);
1207   use->type = use_type;
1208   use->iv = iv;
1209   use->stmt = stmt;
1210   use->op_p = use_p;
1211   use->related_cands = BITMAP_ALLOC (NULL);
1212
1213   /* To avoid showing ssa name in the dumps, if it was not reset by the
1214      caller.  */
1215   iv->ssa_name = NULL_TREE;
1216
1217   if (dump_file && (dump_flags & TDF_DETAILS))
1218     dump_use (dump_file, use);
1219
1220   VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1221
1222   return use;
1223 }
1224
1225 /* Checks whether OP is a loop-level invariant and if so, records it.
1226    NONLINEAR_USE is true if the invariant is used in a way we do not
1227    handle specially.  */
1228
1229 static void
1230 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1231 {
1232   basic_block bb;
1233   struct version_info *info;
1234
1235   if (TREE_CODE (op) != SSA_NAME
1236       || !is_gimple_reg (op))
1237     return;
1238
1239   bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1240   if (bb
1241       && flow_bb_inside_loop_p (data->current_loop, bb))
1242     return;
1243
1244   info = name_info (data, op);
1245   info->name = op;
1246   info->has_nonlin_use |= nonlinear_use;
1247   if (!info->inv_id)
1248     info->inv_id = ++data->max_inv_id;
1249   bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1250 }
1251
1252 /* Checks whether the use OP is interesting and if so, records it.  */
1253
1254 static struct iv_use *
1255 find_interesting_uses_op (struct ivopts_data *data, tree op)
1256 {
1257   struct iv *iv;
1258   struct iv *civ;
1259   gimple stmt;
1260   struct iv_use *use;
1261
1262   if (TREE_CODE (op) != SSA_NAME)
1263     return NULL;
1264
1265   iv = get_iv (data, op);
1266   if (!iv)
1267     return NULL;
1268
1269   if (iv->have_use_for)
1270     {
1271       use = iv_use (data, iv->use_id);
1272
1273       gcc_assert (use->type == USE_NONLINEAR_EXPR);
1274       return use;
1275     }
1276
1277   if (integer_zerop (iv->step))
1278     {
1279       record_invariant (data, op, true);
1280       return NULL;
1281     }
1282   iv->have_use_for = true;
1283
1284   civ = XNEW (struct iv);
1285   *civ = *iv;
1286
1287   stmt = SSA_NAME_DEF_STMT (op);
1288   gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1289               || is_gimple_assign (stmt));
1290
1291   use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1292   iv->use_id = use->id;
1293
1294   return use;
1295 }
1296
1297 /* Given a condition in statement STMT, checks whether it is a compare
1298    of an induction variable and an invariant.  If this is the case,
1299    CONTROL_VAR is set to location of the iv, BOUND to the location of
1300    the invariant, IV_VAR and IV_BOUND are set to the corresponding
1301    induction variable descriptions, and true is returned.  If this is not
1302    the case, CONTROL_VAR and BOUND are set to the arguments of the
1303    condition and false is returned.  */
1304
1305 static bool
1306 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1307                        tree **control_var, tree **bound,
1308                        struct iv **iv_var, struct iv **iv_bound)
1309 {
1310   /* The objects returned when COND has constant operands.  */
1311   static struct iv const_iv;
1312   static tree zero;
1313   tree *op0 = &zero, *op1 = &zero, *tmp_op;
1314   struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1315   bool ret = false;
1316
1317   if (gimple_code (stmt) == GIMPLE_COND)
1318     {
1319       op0 = gimple_cond_lhs_ptr (stmt);
1320       op1 = gimple_cond_rhs_ptr (stmt);
1321     }
1322   else
1323     {
1324       op0 = gimple_assign_rhs1_ptr (stmt);
1325       op1 = gimple_assign_rhs2_ptr (stmt);
1326     }
1327
1328   zero = integer_zero_node;
1329   const_iv.step = integer_zero_node;
1330
1331   if (TREE_CODE (*op0) == SSA_NAME)
1332     iv0 = get_iv (data, *op0);
1333   if (TREE_CODE (*op1) == SSA_NAME)
1334     iv1 = get_iv (data, *op1);
1335
1336   /* Exactly one of the compared values must be an iv, and the other one must
1337      be an invariant.  */
1338   if (!iv0 || !iv1)
1339     goto end;
1340
1341   if (integer_zerop (iv0->step))
1342     {
1343       /* Control variable may be on the other side.  */
1344       tmp_op = op0; op0 = op1; op1 = tmp_op;
1345       tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1346     }
1347   ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1348
1349 end:
1350   if (control_var)
1351     *control_var = op0;;
1352   if (iv_var)
1353     *iv_var = iv0;;
1354   if (bound)
1355     *bound = op1;
1356   if (iv_bound)
1357     *iv_bound = iv1;
1358
1359   return ret;
1360 }
1361
1362 /* Checks whether the condition in STMT is interesting and if so,
1363    records it.  */
1364
1365 static void
1366 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1367 {
1368   tree *var_p, *bound_p;
1369   struct iv *var_iv, *civ;
1370
1371   if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1372     {
1373       find_interesting_uses_op (data, *var_p);
1374       find_interesting_uses_op (data, *bound_p);
1375       return;
1376     }
1377
1378   civ = XNEW (struct iv);
1379   *civ = *var_iv;
1380   record_use (data, NULL, civ, stmt, USE_COMPARE);
1381 }
1382
1383 /* Returns true if expression EXPR is obviously invariant in LOOP,
1384    i.e. if all its operands are defined outside of the LOOP.  LOOP
1385    should not be the function body.  */
1386
1387 bool
1388 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1389 {
1390   basic_block def_bb;
1391   unsigned i, len;
1392
1393   gcc_assert (loop_depth (loop) > 0);
1394
1395   if (is_gimple_min_invariant (expr))
1396     return true;
1397
1398   if (TREE_CODE (expr) == SSA_NAME)
1399     {
1400       def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1401       if (def_bb
1402           && flow_bb_inside_loop_p (loop, def_bb))
1403         return false;
1404
1405       return true;
1406     }
1407
1408   if (!EXPR_P (expr))
1409     return false;
1410
1411   len = TREE_OPERAND_LENGTH (expr);
1412   for (i = 0; i < len; i++)
1413     if (TREE_OPERAND (expr, i)
1414         && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1415       return false;
1416
1417   return true;
1418 }
1419
1420 /* Returns true if statement STMT is obviously invariant in LOOP,
1421    i.e. if all its operands on the RHS are defined outside of the LOOP.
1422    LOOP should not be the function body.  */
1423
1424 bool
1425 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1426 {
1427   unsigned i;
1428   tree lhs;
1429
1430   gcc_assert (loop_depth (loop) > 0);
1431
1432   lhs = gimple_get_lhs (stmt);
1433   for (i = 0; i < gimple_num_ops (stmt); i++)
1434     {
1435       tree op = gimple_op (stmt, i);
1436       if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1437         return false;
1438     }
1439
1440   return true;
1441 }
1442
1443 /* Cumulates the steps of indices into DATA and replaces their values with the
1444    initial ones.  Returns false when the value of the index cannot be determined.
1445    Callback for for_each_index.  */
1446
1447 struct ifs_ivopts_data
1448 {
1449   struct ivopts_data *ivopts_data;
1450   gimple stmt;
1451   tree step;
1452 };
1453
1454 static bool
1455 idx_find_step (tree base, tree *idx, void *data)
1456 {
1457   struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1458   struct iv *iv;
1459   tree step, iv_base, iv_step, lbound, off;
1460   struct loop *loop = dta->ivopts_data->current_loop;
1461
1462   /* If base is a component ref, require that the offset of the reference
1463      be invariant.  */
1464   if (TREE_CODE (base) == COMPONENT_REF)
1465     {
1466       off = component_ref_field_offset (base);
1467       return expr_invariant_in_loop_p (loop, off);
1468     }
1469
1470   /* If base is array, first check whether we will be able to move the
1471      reference out of the loop (in order to take its address in strength
1472      reduction).  In order for this to work we need both lower bound
1473      and step to be loop invariants.  */
1474   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1475     {
1476       /* Moreover, for a range, the size needs to be invariant as well.  */
1477       if (TREE_CODE (base) == ARRAY_RANGE_REF
1478           && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1479         return false;
1480
1481       step = array_ref_element_size (base);
1482       lbound = array_ref_low_bound (base);
1483
1484       if (!expr_invariant_in_loop_p (loop, step)
1485           || !expr_invariant_in_loop_p (loop, lbound))
1486         return false;
1487     }
1488
1489   if (TREE_CODE (*idx) != SSA_NAME)
1490     return true;
1491
1492   iv = get_iv (dta->ivopts_data, *idx);
1493   if (!iv)
1494     return false;
1495
1496   /* XXX  We produce for a base of *D42 with iv->base being &x[0]
1497           *&x[0], which is not folded and does not trigger the
1498           ARRAY_REF path below.  */
1499   *idx = iv->base;
1500
1501   if (integer_zerop (iv->step))
1502     return true;
1503
1504   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1505     {
1506       step = array_ref_element_size (base);
1507
1508       /* We only handle addresses whose step is an integer constant.  */
1509       if (TREE_CODE (step) != INTEGER_CST)
1510         return false;
1511     }
1512   else
1513     /* The step for pointer arithmetics already is 1 byte.  */
1514     step = size_one_node;
1515
1516   iv_base = iv->base;
1517   iv_step = iv->step;
1518   if (!convert_affine_scev (dta->ivopts_data->current_loop,
1519                             sizetype, &iv_base, &iv_step, dta->stmt,
1520                             false))
1521     {
1522       /* The index might wrap.  */
1523       return false;
1524     }
1525
1526   step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1527   dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1528
1529   return true;
1530 }
1531
1532 /* Records use in index IDX.  Callback for for_each_index.  Ivopts data
1533    object is passed to it in DATA.  */
1534
1535 static bool
1536 idx_record_use (tree base, tree *idx,
1537                 void *vdata)
1538 {
1539   struct ivopts_data *data = (struct ivopts_data *) vdata;
1540   find_interesting_uses_op (data, *idx);
1541   if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1542     {
1543       find_interesting_uses_op (data, array_ref_element_size (base));
1544       find_interesting_uses_op (data, array_ref_low_bound (base));
1545     }
1546   return true;
1547 }
1548
1549 /* If we can prove that TOP = cst * BOT for some constant cst,
1550    store cst to MUL and return true.  Otherwise return false.
1551    The returned value is always sign-extended, regardless of the
1552    signedness of TOP and BOT.  */
1553
1554 static bool
1555 constant_multiple_of (tree top, tree bot, double_int *mul)
1556 {
1557   tree mby;
1558   enum tree_code code;
1559   double_int res, p0, p1;
1560   unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1561
1562   STRIP_NOPS (top);
1563   STRIP_NOPS (bot);
1564
1565   if (operand_equal_p (top, bot, 0))
1566     {
1567       *mul = double_int_one;
1568       return true;
1569     }
1570
1571   code = TREE_CODE (top);
1572   switch (code)
1573     {
1574     case MULT_EXPR:
1575       mby = TREE_OPERAND (top, 1);
1576       if (TREE_CODE (mby) != INTEGER_CST)
1577         return false;
1578
1579       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1580         return false;
1581
1582       *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1583                               precision);
1584       return true;
1585
1586     case PLUS_EXPR:
1587     case MINUS_EXPR:
1588       if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1589           || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1590         return false;
1591
1592       if (code == MINUS_EXPR)
1593         p1 = double_int_neg (p1);
1594       *mul = double_int_sext (double_int_add (p0, p1), precision);
1595       return true;
1596
1597     case INTEGER_CST:
1598       if (TREE_CODE (bot) != INTEGER_CST)
1599         return false;
1600
1601       p0 = double_int_sext (tree_to_double_int (top), precision);
1602       p1 = double_int_sext (tree_to_double_int (bot), precision);
1603       if (double_int_zero_p (p1))
1604         return false;
1605       *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1606                               precision);
1607       return double_int_zero_p (res);
1608
1609     default:
1610       return false;
1611     }
1612 }
1613
1614 /* Returns true if memory reference REF with step STEP may be unaligned.  */
1615
1616 static bool
1617 may_be_unaligned_p (tree ref, tree step)
1618 {
1619   tree base;
1620   tree base_type;
1621   HOST_WIDE_INT bitsize;
1622   HOST_WIDE_INT bitpos;
1623   tree toffset;
1624   enum machine_mode mode;
1625   int unsignedp, volatilep;
1626   unsigned base_align;
1627
1628   /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1629      thus they are not misaligned.  */
1630   if (TREE_CODE (ref) == TARGET_MEM_REF)
1631     return false;
1632
1633   /* The test below is basically copy of what expr.c:normal_inner_ref
1634      does to check whether the object must be loaded by parts when
1635      STRICT_ALIGNMENT is true.  */
1636   base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1637                               &unsignedp, &volatilep, true);
1638   base_type = TREE_TYPE (base);
1639   base_align = get_object_alignment (base);
1640   base_align = MAX (base_align, TYPE_ALIGN (base_type));
1641
1642   if (mode != BLKmode)
1643     {
1644       unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1645
1646       if (base_align < mode_align
1647           || (bitpos % mode_align) != 0
1648           || (bitpos % BITS_PER_UNIT) != 0)
1649         return true;
1650
1651       if (toffset
1652           && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1653         return true;
1654
1655       if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1656         return true;
1657     }
1658
1659   return false;
1660 }
1661
1662 /* Return true if EXPR may be non-addressable.   */
1663
1664 bool
1665 may_be_nonaddressable_p (tree expr)
1666 {
1667   switch (TREE_CODE (expr))
1668     {
1669     case TARGET_MEM_REF:
1670       /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1671          target, thus they are always addressable.  */
1672       return false;
1673
1674     case COMPONENT_REF:
1675       return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1676              || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1677
1678     case VIEW_CONVERT_EXPR:
1679       /* This kind of view-conversions may wrap non-addressable objects
1680          and make them look addressable.  After some processing the
1681          non-addressability may be uncovered again, causing ADDR_EXPRs
1682          of inappropriate objects to be built.  */
1683       if (is_gimple_reg (TREE_OPERAND (expr, 0))
1684           || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1685         return true;
1686
1687       /* ... fall through ... */
1688
1689     case ARRAY_REF:
1690     case ARRAY_RANGE_REF:
1691       return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1692
1693     CASE_CONVERT:
1694       return true;
1695
1696     default:
1697       break;
1698     }
1699
1700   return false;
1701 }
1702
1703 /* Finds addresses in *OP_P inside STMT.  */
1704
1705 static void
1706 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1707 {
1708   tree base = *op_p, step = size_zero_node;
1709   struct iv *civ;
1710   struct ifs_ivopts_data ifs_ivopts_data;
1711
1712   /* Do not play with volatile memory references.  A bit too conservative,
1713      perhaps, but safe.  */
1714   if (gimple_has_volatile_ops (stmt))
1715     goto fail;
1716
1717   /* Ignore bitfields for now.  Not really something terribly complicated
1718      to handle.  TODO.  */
1719   if (TREE_CODE (base) == BIT_FIELD_REF)
1720     goto fail;
1721
1722   base = unshare_expr (base);
1723
1724   if (TREE_CODE (base) == TARGET_MEM_REF)
1725     {
1726       tree type = build_pointer_type (TREE_TYPE (base));
1727       tree astep;
1728
1729       if (TMR_BASE (base)
1730           && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1731         {
1732           civ = get_iv (data, TMR_BASE (base));
1733           if (!civ)
1734             goto fail;
1735
1736           TMR_BASE (base) = civ->base;
1737           step = civ->step;
1738         }
1739       if (TMR_INDEX2 (base)
1740           && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1741         {
1742           civ = get_iv (data, TMR_INDEX2 (base));
1743           if (!civ)
1744             goto fail;
1745
1746           TMR_INDEX2 (base) = civ->base;
1747           step = civ->step;
1748         }
1749       if (TMR_INDEX (base)
1750           && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1751         {
1752           civ = get_iv (data, TMR_INDEX (base));
1753           if (!civ)
1754             goto fail;
1755
1756           TMR_INDEX (base) = civ->base;
1757           astep = civ->step;
1758
1759           if (astep)
1760             {
1761               if (TMR_STEP (base))
1762                 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1763
1764               step = fold_build2 (PLUS_EXPR, type, step, astep);
1765             }
1766         }
1767
1768       if (integer_zerop (step))
1769         goto fail;
1770       base = tree_mem_ref_addr (type, base);
1771     }
1772   else
1773     {
1774       ifs_ivopts_data.ivopts_data = data;
1775       ifs_ivopts_data.stmt = stmt;
1776       ifs_ivopts_data.step = size_zero_node;
1777       if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1778           || integer_zerop (ifs_ivopts_data.step))
1779         goto fail;
1780       step = ifs_ivopts_data.step;
1781
1782       /* Check that the base expression is addressable.  This needs
1783          to be done after substituting bases of IVs into it.  */
1784       if (may_be_nonaddressable_p (base))
1785         goto fail;
1786
1787       /* Moreover, on strict alignment platforms, check that it is
1788          sufficiently aligned.  */
1789       if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1790         goto fail;
1791
1792       base = build_fold_addr_expr (base);
1793
1794       /* Substituting bases of IVs into the base expression might
1795          have caused folding opportunities.  */
1796       if (TREE_CODE (base) == ADDR_EXPR)
1797         {
1798           tree *ref = &TREE_OPERAND (base, 0);
1799           while (handled_component_p (*ref))
1800             ref = &TREE_OPERAND (*ref, 0);
1801           if (TREE_CODE (*ref) == MEM_REF)
1802             {
1803               tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1804                                       TREE_OPERAND (*ref, 0),
1805                                       TREE_OPERAND (*ref, 1));
1806               if (tem)
1807                 *ref = tem;
1808             }
1809         }
1810     }
1811
1812   civ = alloc_iv (base, step);
1813   record_use (data, op_p, civ, stmt, USE_ADDRESS);
1814   return;
1815
1816 fail:
1817   for_each_index (op_p, idx_record_use, data);
1818 }
1819
1820 /* Finds and records invariants used in STMT.  */
1821
1822 static void
1823 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1824 {
1825   ssa_op_iter iter;
1826   use_operand_p use_p;
1827   tree op;
1828
1829   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1830     {
1831       op = USE_FROM_PTR (use_p);
1832       record_invariant (data, op, false);
1833     }
1834 }
1835
1836 /* Finds interesting uses of induction variables in the statement STMT.  */
1837
1838 static void
1839 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1840 {
1841   struct iv *iv;
1842   tree op, *lhs, *rhs;
1843   ssa_op_iter iter;
1844   use_operand_p use_p;
1845   enum tree_code code;
1846
1847   find_invariants_stmt (data, stmt);
1848
1849   if (gimple_code (stmt) == GIMPLE_COND)
1850     {
1851       find_interesting_uses_cond (data, stmt);
1852       return;
1853     }
1854
1855   if (is_gimple_assign (stmt))
1856     {
1857       lhs = gimple_assign_lhs_ptr (stmt);
1858       rhs = gimple_assign_rhs1_ptr (stmt);
1859
1860       if (TREE_CODE (*lhs) == SSA_NAME)
1861         {
1862           /* If the statement defines an induction variable, the uses are not
1863              interesting by themselves.  */
1864
1865           iv = get_iv (data, *lhs);
1866
1867           if (iv && !integer_zerop (iv->step))
1868             return;
1869         }
1870
1871       code = gimple_assign_rhs_code (stmt);
1872       if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1873           && (REFERENCE_CLASS_P (*rhs)
1874               || is_gimple_val (*rhs)))
1875         {
1876           if (REFERENCE_CLASS_P (*rhs))
1877             find_interesting_uses_address (data, stmt, rhs);
1878           else
1879             find_interesting_uses_op (data, *rhs);
1880
1881           if (REFERENCE_CLASS_P (*lhs))
1882             find_interesting_uses_address (data, stmt, lhs);
1883           return;
1884         }
1885       else if (TREE_CODE_CLASS (code) == tcc_comparison)
1886         {
1887           find_interesting_uses_cond (data, stmt);
1888           return;
1889         }
1890
1891       /* TODO -- we should also handle address uses of type
1892
1893          memory = call (whatever);
1894
1895          and
1896
1897          call (memory).  */
1898     }
1899
1900   if (gimple_code (stmt) == GIMPLE_PHI
1901       && gimple_bb (stmt) == data->current_loop->header)
1902     {
1903       iv = get_iv (data, PHI_RESULT (stmt));
1904
1905       if (iv && !integer_zerop (iv->step))
1906         return;
1907     }
1908
1909   FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1910     {
1911       op = USE_FROM_PTR (use_p);
1912
1913       if (TREE_CODE (op) != SSA_NAME)
1914         continue;
1915
1916       iv = get_iv (data, op);
1917       if (!iv)
1918         continue;
1919
1920       find_interesting_uses_op (data, op);
1921     }
1922 }
1923
1924 /* Finds interesting uses of induction variables outside of loops
1925    on loop exit edge EXIT.  */
1926
1927 static void
1928 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1929 {
1930   gimple phi;
1931   gimple_stmt_iterator psi;
1932   tree def;
1933
1934   for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1935     {
1936       phi = gsi_stmt (psi);
1937       def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1938       if (is_gimple_reg (def))
1939         find_interesting_uses_op (data, def);
1940     }
1941 }
1942
1943 /* Finds uses of the induction variables that are interesting.  */
1944
1945 static void
1946 find_interesting_uses (struct ivopts_data *data)
1947 {
1948   basic_block bb;
1949   gimple_stmt_iterator bsi;
1950   basic_block *body = get_loop_body (data->current_loop);
1951   unsigned i;
1952   struct version_info *info;
1953   edge e;
1954
1955   if (dump_file && (dump_flags & TDF_DETAILS))
1956     fprintf (dump_file, "Uses:\n\n");
1957
1958   for (i = 0; i < data->current_loop->num_nodes; i++)
1959     {
1960       edge_iterator ei;
1961       bb = body[i];
1962
1963       FOR_EACH_EDGE (e, ei, bb->succs)
1964         if (e->dest != EXIT_BLOCK_PTR
1965             && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1966           find_interesting_uses_outside (data, e);
1967
1968       for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1969         find_interesting_uses_stmt (data, gsi_stmt (bsi));
1970       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1971         if (!is_gimple_debug (gsi_stmt (bsi)))
1972           find_interesting_uses_stmt (data, gsi_stmt (bsi));
1973     }
1974
1975   if (dump_file && (dump_flags & TDF_DETAILS))
1976     {
1977       bitmap_iterator bi;
1978
1979       fprintf (dump_file, "\n");
1980
1981       EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1982         {
1983           info = ver_info (data, i);
1984           if (info->inv_id)
1985             {
1986               fprintf (dump_file, "  ");
1987               print_generic_expr (dump_file, info->name, TDF_SLIM);
1988               fprintf (dump_file, " is invariant (%d)%s\n",
1989                        info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1990             }
1991         }
1992
1993       fprintf (dump_file, "\n");
1994     }
1995
1996   free (body);
1997 }
1998
1999 /* Strips constant offsets from EXPR and stores them to OFFSET.  If INSIDE_ADDR
2000    is true, assume we are inside an address.  If TOP_COMPREF is true, assume
2001    we are at the top-level of the processed address.  */
2002
2003 static tree
2004 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2005                 unsigned HOST_WIDE_INT *offset)
2006 {
2007   tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2008   enum tree_code code;
2009   tree type, orig_type = TREE_TYPE (expr);
2010   unsigned HOST_WIDE_INT off0, off1, st;
2011   tree orig_expr = expr;
2012
2013   STRIP_NOPS (expr);
2014
2015   type = TREE_TYPE (expr);
2016   code = TREE_CODE (expr);
2017   *offset = 0;
2018
2019   switch (code)
2020     {
2021     case INTEGER_CST:
2022       if (!cst_and_fits_in_hwi (expr)
2023           || integer_zerop (expr))
2024         return orig_expr;
2025
2026       *offset = int_cst_value (expr);
2027       return build_int_cst (orig_type, 0);
2028
2029     case POINTER_PLUS_EXPR:
2030     case PLUS_EXPR:
2031     case MINUS_EXPR:
2032       op0 = TREE_OPERAND (expr, 0);
2033       op1 = TREE_OPERAND (expr, 1);
2034
2035       op0 = strip_offset_1 (op0, false, false, &off0);
2036       op1 = strip_offset_1 (op1, false, false, &off1);
2037
2038       *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2039       if (op0 == TREE_OPERAND (expr, 0)
2040           && op1 == TREE_OPERAND (expr, 1))
2041         return orig_expr;
2042
2043       if (integer_zerop (op1))
2044         expr = op0;
2045       else if (integer_zerop (op0))
2046         {
2047           if (code == MINUS_EXPR)
2048             expr = fold_build1 (NEGATE_EXPR, type, op1);
2049           else
2050             expr = op1;
2051         }
2052       else
2053         expr = fold_build2 (code, type, op0, op1);
2054
2055       return fold_convert (orig_type, expr);
2056
2057     case MULT_EXPR:
2058       op1 = TREE_OPERAND (expr, 1);
2059       if (!cst_and_fits_in_hwi (op1))
2060         return orig_expr;
2061
2062       op0 = TREE_OPERAND (expr, 0);
2063       op0 = strip_offset_1 (op0, false, false, &off0);
2064       if (op0 == TREE_OPERAND (expr, 0))
2065         return orig_expr;
2066
2067       *offset = off0 * int_cst_value (op1);
2068       if (integer_zerop (op0))
2069         expr = op0;
2070       else
2071         expr = fold_build2 (MULT_EXPR, type, op0, op1);
2072
2073       return fold_convert (orig_type, expr);
2074
2075     case ARRAY_REF:
2076     case ARRAY_RANGE_REF:
2077       if (!inside_addr)
2078         return orig_expr;
2079
2080       step = array_ref_element_size (expr);
2081       if (!cst_and_fits_in_hwi (step))
2082         break;
2083
2084       st = int_cst_value (step);
2085       op1 = TREE_OPERAND (expr, 1);
2086       op1 = strip_offset_1 (op1, false, false, &off1);
2087       *offset = off1 * st;
2088
2089       if (top_compref
2090           && integer_zerop (op1))
2091         {
2092           /* Strip the component reference completely.  */
2093           op0 = TREE_OPERAND (expr, 0);
2094           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2095           *offset += off0;
2096           return op0;
2097         }
2098       break;
2099
2100     case COMPONENT_REF:
2101       if (!inside_addr)
2102         return orig_expr;
2103
2104       tmp = component_ref_field_offset (expr);
2105       if (top_compref
2106           && cst_and_fits_in_hwi (tmp))
2107         {
2108           /* Strip the component reference completely.  */
2109           op0 = TREE_OPERAND (expr, 0);
2110           op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2111           *offset = off0 + int_cst_value (tmp);
2112           return op0;
2113         }
2114       break;
2115
2116     case ADDR_EXPR:
2117       op0 = TREE_OPERAND (expr, 0);
2118       op0 = strip_offset_1 (op0, true, true, &off0);
2119       *offset += off0;
2120
2121       if (op0 == TREE_OPERAND (expr, 0))
2122         return orig_expr;
2123
2124       expr = build_fold_addr_expr (op0);
2125       return fold_convert (orig_type, expr);
2126
2127     case MEM_REF:
2128       /* ???  Offset operand?  */
2129       inside_addr = false;
2130       break;
2131
2132     default:
2133       return orig_expr;
2134     }
2135
2136   /* Default handling of expressions for that we want to recurse into
2137      the first operand.  */
2138   op0 = TREE_OPERAND (expr, 0);
2139   op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2140   *offset += off0;
2141
2142   if (op0 == TREE_OPERAND (expr, 0)
2143       && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2144     return orig_expr;
2145
2146   expr = copy_node (expr);
2147   TREE_OPERAND (expr, 0) = op0;
2148   if (op1)
2149     TREE_OPERAND (expr, 1) = op1;
2150
2151   /* Inside address, we might strip the top level component references,
2152      thus changing type of the expression.  Handling of ADDR_EXPR
2153      will fix that.  */
2154   expr = fold_convert (orig_type, expr);
2155
2156   return expr;
2157 }
2158
2159 /* Strips constant offsets from EXPR and stores them to OFFSET.  */
2160
2161 static tree
2162 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2163 {
2164   return strip_offset_1 (expr, false, false, offset);
2165 }
2166
2167 /* Returns variant of TYPE that can be used as base for different uses.
2168    We return unsigned type with the same precision, which avoids problems
2169    with overflows.  */
2170
2171 static tree
2172 generic_type_for (tree type)
2173 {
2174   if (POINTER_TYPE_P (type))
2175     return unsigned_type_for (type);
2176
2177   if (TYPE_UNSIGNED (type))
2178     return type;
2179
2180   return unsigned_type_for (type);
2181 }
2182
2183 /* Records invariants in *EXPR_P.  Callback for walk_tree.  DATA contains
2184    the bitmap to that we should store it.  */
2185
2186 static struct ivopts_data *fd_ivopts_data;
2187 static tree
2188 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2189 {
2190   bitmap *depends_on = (bitmap *) data;
2191   struct version_info *info;
2192
2193   if (TREE_CODE (*expr_p) != SSA_NAME)
2194     return NULL_TREE;
2195   info = name_info (fd_ivopts_data, *expr_p);
2196
2197   if (!info->inv_id || info->has_nonlin_use)
2198     return NULL_TREE;
2199
2200   if (!*depends_on)
2201     *depends_on = BITMAP_ALLOC (NULL);
2202   bitmap_set_bit (*depends_on, info->inv_id);
2203
2204   return NULL_TREE;
2205 }
2206
2207 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2208    position to POS.  If USE is not NULL, the candidate is set as related to
2209    it.  If both BASE and STEP are NULL, we add a pseudocandidate for the
2210    replacement of the final value of the iv by a direct computation.  */
2211
2212 static struct iv_cand *
2213 add_candidate_1 (struct ivopts_data *data,
2214                  tree base, tree step, bool important, enum iv_position pos,
2215                  struct iv_use *use, gimple incremented_at)
2216 {
2217   unsigned i;
2218   struct iv_cand *cand = NULL;
2219   tree type, orig_type;
2220
2221   /* For non-original variables, make sure their values are computed in a type
2222      that does not invoke undefined behavior on overflows (since in general,
2223      we cannot prove that these induction variables are non-wrapping).  */
2224   if (pos != IP_ORIGINAL)
2225     {
2226       orig_type = TREE_TYPE (base);
2227       type = generic_type_for (orig_type);
2228       if (type != orig_type)
2229         {
2230           base = fold_convert (type, base);
2231           step = fold_convert (type, step);
2232         }
2233     }
2234
2235   for (i = 0; i < n_iv_cands (data); i++)
2236     {
2237       cand = iv_cand (data, i);
2238
2239       if (cand->pos != pos)
2240         continue;
2241
2242       if (cand->incremented_at != incremented_at
2243           || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2244               && cand->ainc_use != use))
2245         continue;
2246
2247       if (!cand->iv)
2248         {
2249           if (!base && !step)
2250             break;
2251
2252           continue;
2253         }
2254
2255       if (!base && !step)
2256         continue;
2257
2258       if (operand_equal_p (base, cand->iv->base, 0)
2259           && operand_equal_p (step, cand->iv->step, 0)
2260           && (TYPE_PRECISION (TREE_TYPE (base))
2261               == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2262         break;
2263     }
2264
2265   if (i == n_iv_cands (data))
2266     {
2267       cand = XCNEW (struct iv_cand);
2268       cand->id = i;
2269
2270       if (!base && !step)
2271         cand->iv = NULL;
2272       else
2273         cand->iv = alloc_iv (base, step);
2274
2275       cand->pos = pos;
2276       if (pos != IP_ORIGINAL && cand->iv)
2277         {
2278           cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2279           cand->var_after = cand->var_before;
2280         }
2281       cand->important = important;
2282       cand->incremented_at = incremented_at;
2283       VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2284
2285       if (step
2286           && TREE_CODE (step) != INTEGER_CST)
2287         {
2288           fd_ivopts_data = data;
2289           walk_tree (&step, find_depends, &cand->depends_on, NULL);
2290         }
2291
2292       if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2293         cand->ainc_use = use;
2294       else
2295         cand->ainc_use = NULL;
2296
2297       if (dump_file && (dump_flags & TDF_DETAILS))
2298         dump_cand (dump_file, cand);
2299     }
2300
2301   if (important && !cand->important)
2302     {
2303       cand->important = true;
2304       if (dump_file && (dump_flags & TDF_DETAILS))
2305         fprintf (dump_file, "Candidate %d is important\n", cand->id);
2306     }
2307
2308   if (use)
2309     {
2310       bitmap_set_bit (use->related_cands, i);
2311       if (dump_file && (dump_flags & TDF_DETAILS))
2312         fprintf (dump_file, "Candidate %d is related to use %d\n",
2313                  cand->id, use->id);
2314     }
2315
2316   return cand;
2317 }
2318
2319 /* Returns true if incrementing the induction variable at the end of the LOOP
2320    is allowed.
2321
2322    The purpose is to avoid splitting latch edge with a biv increment, thus
2323    creating a jump, possibly confusing other optimization passes and leaving
2324    less freedom to scheduler.  So we allow IP_END_POS only if IP_NORMAL_POS
2325    is not available (so we do not have a better alternative), or if the latch
2326    edge is already nonempty.  */
2327
2328 static bool
2329 allow_ip_end_pos_p (struct loop *loop)
2330 {
2331   if (!ip_normal_pos (loop))
2332     return true;
2333
2334   if (!empty_block_p (ip_end_pos (loop)))
2335     return true;
2336
2337   return false;
2338 }
2339
2340 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2341    Important field is set to IMPORTANT.  */
2342
2343 static void
2344 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2345                         bool important, struct iv_use *use)
2346 {
2347   basic_block use_bb = gimple_bb (use->stmt);
2348   enum machine_mode mem_mode;
2349   unsigned HOST_WIDE_INT cstepi;
2350
2351   /* If we insert the increment in any position other than the standard
2352      ones, we must ensure that it is incremented once per iteration.
2353      It must not be in an inner nested loop, or one side of an if
2354      statement.  */
2355   if (use_bb->loop_father != data->current_loop
2356       || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2357       || stmt_could_throw_p (use->stmt)
2358       || !cst_and_fits_in_hwi (step))
2359     return;
2360
2361   cstepi = int_cst_value (step);
2362
2363   mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2364   if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2365         || USE_STORE_PRE_INCREMENT (mem_mode))
2366        && GET_MODE_SIZE (mem_mode) == cstepi)
2367       || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2368            || USE_STORE_PRE_DECREMENT (mem_mode))
2369           && GET_MODE_SIZE (mem_mode) == -cstepi))
2370     {
2371       enum tree_code code = MINUS_EXPR;
2372       tree new_base;
2373       tree new_step = step;
2374
2375       if (POINTER_TYPE_P (TREE_TYPE (base)))
2376         {
2377           new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2378           code = POINTER_PLUS_EXPR;
2379         }
2380       else
2381         new_step = fold_convert (TREE_TYPE (base), new_step);
2382       new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2383       add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2384                        use->stmt);
2385     }
2386   if (((USE_LOAD_POST_INCREMENT (mem_mode)
2387         || USE_STORE_POST_INCREMENT (mem_mode))
2388        && GET_MODE_SIZE (mem_mode) == cstepi)
2389       || ((USE_LOAD_POST_DECREMENT (mem_mode)
2390            || USE_STORE_POST_DECREMENT (mem_mode))
2391           && GET_MODE_SIZE (mem_mode) == -cstepi))
2392     {
2393       add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2394                        use->stmt);
2395     }
2396 }
2397
2398 /* Adds a candidate BASE + STEP * i.  Important field is set to IMPORTANT and
2399    position to POS.  If USE is not NULL, the candidate is set as related to
2400    it.  The candidate computation is scheduled on all available positions.  */
2401
2402 static void
2403 add_candidate (struct ivopts_data *data,
2404                tree base, tree step, bool important, struct iv_use *use)
2405 {
2406   if (ip_normal_pos (data->current_loop))
2407     add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2408   if (ip_end_pos (data->current_loop)
2409       && allow_ip_end_pos_p (data->current_loop))
2410     add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2411
2412   if (use != NULL && use->type == USE_ADDRESS)
2413     add_autoinc_candidates (data, base, step, important, use);
2414 }
2415
2416 /* Adds standard iv candidates.  */
2417
2418 static void
2419 add_standard_iv_candidates (struct ivopts_data *data)
2420 {
2421   add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2422
2423   /* The same for a double-integer type if it is still fast enough.  */
2424   if (TYPE_PRECISION
2425         (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2426       && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2427     add_candidate (data, build_int_cst (long_integer_type_node, 0),
2428                    build_int_cst (long_integer_type_node, 1), true, NULL);
2429
2430   /* The same for a double-integer type if it is still fast enough.  */
2431   if (TYPE_PRECISION
2432         (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2433       && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2434     add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2435                    build_int_cst (long_long_integer_type_node, 1), true, NULL);
2436 }
2437
2438
2439 /* Adds candidates bases on the old induction variable IV.  */
2440
2441 static void
2442 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2443 {
2444   gimple phi;
2445   tree def;
2446   struct iv_cand *cand;
2447
2448   add_candidate (data, iv->base, iv->step, true, NULL);
2449
2450   /* The same, but with initial value zero.  */
2451   if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2452     add_candidate (data, size_int (0), iv->step, true, NULL);
2453   else
2454     add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2455                    iv->step, true, NULL);
2456
2457   phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2458   if (gimple_code (phi) == GIMPLE_PHI)
2459     {
2460       /* Additionally record the possibility of leaving the original iv
2461          untouched.  */
2462       def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2463       cand = add_candidate_1 (data,
2464                               iv->base, iv->step, true, IP_ORIGINAL, NULL,
2465                               SSA_NAME_DEF_STMT (def));
2466       cand->var_before = iv->ssa_name;
2467       cand->var_after = def;
2468     }
2469 }
2470
2471 /* Adds candidates based on the old induction variables.  */
2472
2473 static void
2474 add_old_ivs_candidates (struct ivopts_data *data)
2475 {
2476   unsigned i;
2477   struct iv *iv;
2478   bitmap_iterator bi;
2479
2480   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2481     {
2482       iv = ver_info (data, i)->iv;
2483       if (iv && iv->biv_p && !integer_zerop (iv->step))
2484         add_old_iv_candidates (data, iv);
2485     }
2486 }
2487
2488 /* Adds candidates based on the value of the induction variable IV and USE.  */
2489
2490 static void
2491 add_iv_value_candidates (struct ivopts_data *data,
2492                          struct iv *iv, struct iv_use *use)
2493 {
2494   unsigned HOST_WIDE_INT offset;
2495   tree base;
2496   tree basetype;
2497
2498   add_candidate (data, iv->base, iv->step, false, use);
2499
2500   /* The same, but with initial value zero.  Make such variable important,
2501      since it is generic enough so that possibly many uses may be based
2502      on it.  */
2503   basetype = TREE_TYPE (iv->base);
2504   if (POINTER_TYPE_P (basetype))
2505     basetype = sizetype;
2506   add_candidate (data, build_int_cst (basetype, 0),
2507                  iv->step, true, use);
2508
2509   /* Third, try removing the constant offset.  Make sure to even
2510      add a candidate for &a[0] vs. (T *)&a.  */
2511   base = strip_offset (iv->base, &offset);
2512   if (offset
2513       || base != iv->base)
2514     add_candidate (data, base, iv->step, false, use);
2515 }
2516
2517 /* Adds candidates based on the uses.  */
2518
2519 static void
2520 add_derived_ivs_candidates (struct ivopts_data *data)
2521 {
2522   unsigned i;
2523
2524   for (i = 0; i < n_iv_uses (data); i++)
2525     {
2526       struct iv_use *use = iv_use (data, i);
2527
2528       if (!use)
2529         continue;
2530
2531       switch (use->type)
2532         {
2533         case USE_NONLINEAR_EXPR:
2534         case USE_COMPARE:
2535         case USE_ADDRESS:
2536           /* Just add the ivs based on the value of the iv used here.  */
2537           add_iv_value_candidates (data, use->iv, use);
2538           break;
2539
2540         default:
2541           gcc_unreachable ();
2542         }
2543     }
2544 }
2545
2546 /* Record important candidates and add them to related_cands bitmaps
2547    if needed.  */
2548
2549 static void
2550 record_important_candidates (struct ivopts_data *data)
2551 {
2552   unsigned i;
2553   struct iv_use *use;
2554
2555   for (i = 0; i < n_iv_cands (data); i++)
2556     {
2557       struct iv_cand *cand = iv_cand (data, i);
2558
2559       if (cand->important)
2560         bitmap_set_bit (data->important_candidates, i);
2561     }
2562
2563   data->consider_all_candidates = (n_iv_cands (data)
2564                                    <= CONSIDER_ALL_CANDIDATES_BOUND);
2565
2566   if (data->consider_all_candidates)
2567     {
2568       /* We will not need "related_cands" bitmaps in this case,
2569          so release them to decrease peak memory consumption.  */
2570       for (i = 0; i < n_iv_uses (data); i++)
2571         {
2572           use = iv_use (data, i);
2573           BITMAP_FREE (use->related_cands);
2574         }
2575     }
2576   else
2577     {
2578       /* Add important candidates to the related_cands bitmaps.  */
2579       for (i = 0; i < n_iv_uses (data); i++)
2580         bitmap_ior_into (iv_use (data, i)->related_cands,
2581                          data->important_candidates);
2582     }
2583 }
2584
2585 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2586    If consider_all_candidates is true, we use a two-dimensional array, otherwise
2587    we allocate a simple list to every use.  */
2588
2589 static void
2590 alloc_use_cost_map (struct ivopts_data *data)
2591 {
2592   unsigned i, size, s, j;
2593
2594   for (i = 0; i < n_iv_uses (data); i++)
2595     {
2596       struct iv_use *use = iv_use (data, i);
2597       bitmap_iterator bi;
2598
2599       if (data->consider_all_candidates)
2600         size = n_iv_cands (data);
2601       else
2602         {
2603           s = 0;
2604           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2605             {
2606               s++;
2607             }
2608
2609           /* Round up to the power of two, so that moduling by it is fast.  */
2610           for (size = 1; size < s; size <<= 1)
2611             continue;
2612         }
2613
2614       use->n_map_members = size;
2615       use->cost_map = XCNEWVEC (struct cost_pair, size);
2616     }
2617 }
2618
2619 /* Returns description of computation cost of expression whose runtime
2620    cost is RUNTIME and complexity corresponds to COMPLEXITY.  */
2621
2622 static comp_cost
2623 new_cost (unsigned runtime, unsigned complexity)
2624 {
2625   comp_cost cost;
2626
2627   cost.cost = runtime;
2628   cost.complexity = complexity;
2629
2630   return cost;
2631 }
2632
2633 /* Adds costs COST1 and COST2.  */
2634
2635 static comp_cost
2636 add_costs (comp_cost cost1, comp_cost cost2)
2637 {
2638   cost1.cost += cost2.cost;
2639   cost1.complexity += cost2.complexity;
2640
2641   return cost1;
2642 }
2643 /* Subtracts costs COST1 and COST2.  */
2644
2645 static comp_cost
2646 sub_costs (comp_cost cost1, comp_cost cost2)
2647 {
2648   cost1.cost -= cost2.cost;
2649   cost1.complexity -= cost2.complexity;
2650
2651   return cost1;
2652 }
2653
2654 /* Returns a negative number if COST1 < COST2, a positive number if
2655    COST1 > COST2, and 0 if COST1 = COST2.  */
2656
2657 static int
2658 compare_costs (comp_cost cost1, comp_cost cost2)
2659 {
2660   if (cost1.cost == cost2.cost)
2661     return cost1.complexity - cost2.complexity;
2662
2663   return cost1.cost - cost2.cost;
2664 }
2665
2666 /* Returns true if COST is infinite.  */
2667
2668 static bool
2669 infinite_cost_p (comp_cost cost)
2670 {
2671   return cost.cost == INFTY;
2672 }
2673
2674 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2675    on invariants DEPENDS_ON and that the value used in expressing it
2676    is VALUE, and in case of iv elimination the comparison operator is COMP.  */
2677
2678 static void
2679 set_use_iv_cost (struct ivopts_data *data,
2680                  struct iv_use *use, struct iv_cand *cand,
2681                  comp_cost cost, bitmap depends_on, tree value,
2682                  enum tree_code comp, int inv_expr_id)
2683 {
2684   unsigned i, s;
2685
2686   if (infinite_cost_p (cost))
2687     {
2688       BITMAP_FREE (depends_on);
2689       return;
2690     }
2691
2692   if (data->consider_all_candidates)
2693     {
2694       use->cost_map[cand->id].cand = cand;
2695       use->cost_map[cand->id].cost = cost;
2696       use->cost_map[cand->id].depends_on = depends_on;
2697       use->cost_map[cand->id].value = value;
2698       use->cost_map[cand->id].comp = comp;
2699       use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2700       return;
2701     }
2702
2703   /* n_map_members is a power of two, so this computes modulo.  */
2704   s = cand->id & (use->n_map_members - 1);
2705   for (i = s; i < use->n_map_members; i++)
2706     if (!use->cost_map[i].cand)
2707       goto found;
2708   for (i = 0; i < s; i++)
2709     if (!use->cost_map[i].cand)
2710       goto found;
2711
2712   gcc_unreachable ();
2713
2714 found:
2715   use->cost_map[i].cand = cand;
2716   use->cost_map[i].cost = cost;
2717   use->cost_map[i].depends_on = depends_on;
2718   use->cost_map[i].value = value;
2719   use->cost_map[i].comp = comp;
2720   use->cost_map[i].inv_expr_id = inv_expr_id;
2721 }
2722
2723 /* Gets cost of (USE, CANDIDATE) pair.  */
2724
2725 static struct cost_pair *
2726 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2727                  struct iv_cand *cand)
2728 {
2729   unsigned i, s;
2730   struct cost_pair *ret;
2731
2732   if (!cand)
2733     return NULL;
2734
2735   if (data->consider_all_candidates)
2736     {
2737       ret = use->cost_map + cand->id;
2738       if (!ret->cand)
2739         return NULL;
2740
2741       return ret;
2742     }
2743
2744   /* n_map_members is a power of two, so this computes modulo.  */
2745   s = cand->id & (use->n_map_members - 1);
2746   for (i = s; i < use->n_map_members; i++)
2747     if (use->cost_map[i].cand == cand)
2748       return use->cost_map + i;
2749
2750   for (i = 0; i < s; i++)
2751     if (use->cost_map[i].cand == cand)
2752       return use->cost_map + i;
2753
2754   return NULL;
2755 }
2756
2757 /* Returns estimate on cost of computing SEQ.  */
2758
2759 static unsigned
2760 seq_cost (rtx seq, bool speed)
2761 {
2762   unsigned cost = 0;
2763   rtx set;
2764
2765   for (; seq; seq = NEXT_INSN (seq))
2766     {
2767       set = single_set (seq);
2768       if (set)
2769         cost += set_src_cost (SET_SRC (set), speed);
2770       else
2771         cost++;
2772     }
2773
2774   return cost;
2775 }
2776
2777 /* Produce DECL_RTL for object obj so it looks like it is stored in memory.  */
2778 static rtx
2779 produce_memory_decl_rtl (tree obj, int *regno)
2780 {
2781   addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2782   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2783   rtx x;
2784
2785   gcc_assert (obj);
2786   if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2787     {
2788       const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2789       x = gen_rtx_SYMBOL_REF (address_mode, name);
2790       SET_SYMBOL_REF_DECL (x, obj);
2791       x = gen_rtx_MEM (DECL_MODE (obj), x);
2792       set_mem_addr_space (x, as);
2793       targetm.encode_section_info (obj, x, true);
2794     }
2795   else
2796     {
2797       x = gen_raw_REG (address_mode, (*regno)++);
2798       x = gen_rtx_MEM (DECL_MODE (obj), x);
2799       set_mem_addr_space (x, as);
2800     }
2801
2802   return x;
2803 }
2804
2805 /* Prepares decl_rtl for variables referred in *EXPR_P.  Callback for
2806    walk_tree.  DATA contains the actual fake register number.  */
2807
2808 static tree
2809 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2810 {
2811   tree obj = NULL_TREE;
2812   rtx x = NULL_RTX;
2813   int *regno = (int *) data;
2814
2815   switch (TREE_CODE (*expr_p))
2816     {
2817     case ADDR_EXPR:
2818       for (expr_p = &TREE_OPERAND (*expr_p, 0);
2819            handled_component_p (*expr_p);
2820            expr_p = &TREE_OPERAND (*expr_p, 0))
2821         continue;
2822       obj = *expr_p;
2823       if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2824         x = produce_memory_decl_rtl (obj, regno);
2825       break;
2826
2827     case SSA_NAME:
2828       *ws = 0;
2829       obj = SSA_NAME_VAR (*expr_p);
2830       if (!DECL_RTL_SET_P (obj))
2831         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2832       break;
2833
2834     case VAR_DECL:
2835     case PARM_DECL:
2836     case RESULT_DECL:
2837       *ws = 0;
2838       obj = *expr_p;
2839
2840       if (DECL_RTL_SET_P (obj))
2841         break;
2842
2843       if (DECL_MODE (obj) == BLKmode)
2844         x = produce_memory_decl_rtl (obj, regno);
2845       else
2846         x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2847
2848       break;
2849
2850     default:
2851       break;
2852     }
2853
2854   if (x)
2855     {
2856       VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2857       SET_DECL_RTL (obj, x);
2858     }
2859
2860   return NULL_TREE;
2861 }
2862
2863 /* Determines cost of the computation of EXPR.  */
2864
2865 static unsigned
2866 computation_cost (tree expr, bool speed)
2867 {
2868   rtx seq, rslt;
2869   tree type = TREE_TYPE (expr);
2870   unsigned cost;
2871   /* Avoid using hard regs in ways which may be unsupported.  */
2872   int regno = LAST_VIRTUAL_REGISTER + 1;
2873   struct cgraph_node *node = cgraph_get_node (current_function_decl);
2874   enum node_frequency real_frequency = node->frequency;
2875
2876   node->frequency = NODE_FREQUENCY_NORMAL;
2877   crtl->maybe_hot_insn_p = speed;
2878   walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2879   start_sequence ();
2880   rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2881   seq = get_insns ();
2882   end_sequence ();
2883   default_rtl_profile ();
2884   node->frequency = real_frequency;
2885
2886   cost = seq_cost (seq, speed);
2887   if (MEM_P (rslt))
2888     cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2889                           TYPE_ADDR_SPACE (type), speed);
2890   else if (!REG_P (rslt))
2891     cost += set_src_cost (rslt, speed);
2892
2893   return cost;
2894 }
2895
2896 /* Returns variable containing the value of candidate CAND at statement AT.  */
2897
2898 static tree
2899 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2900 {
2901   if (stmt_after_increment (loop, cand, stmt))
2902     return cand->var_after;
2903   else
2904     return cand->var_before;
2905 }
2906
2907 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2908    same precision that is at least as wide as the precision of TYPE, stores
2909    BA to A and BB to B, and returns the type of BA.  Otherwise, returns the
2910    type of A and B.  */
2911
2912 static tree
2913 determine_common_wider_type (tree *a, tree *b)
2914 {
2915   tree wider_type = NULL;
2916   tree suba, subb;
2917   tree atype = TREE_TYPE (*a);
2918
2919   if (CONVERT_EXPR_P (*a))
2920     {
2921       suba = TREE_OPERAND (*a, 0);
2922       wider_type = TREE_TYPE (suba);
2923       if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2924         return atype;
2925     }
2926   else
2927     return atype;
2928
2929   if (CONVERT_EXPR_P (*b))
2930     {
2931       subb = TREE_OPERAND (*b, 0);
2932       if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2933         return atype;
2934     }
2935   else
2936     return atype;
2937
2938   *a = suba;
2939   *b = subb;
2940   return wider_type;
2941 }
2942
2943 /* Determines the expression by that USE is expressed from induction variable
2944    CAND at statement AT in LOOP.  The expression is stored in a decomposed
2945    form into AFF.  Returns false if USE cannot be expressed using CAND.  */
2946
2947 static bool
2948 get_computation_aff (struct loop *loop,
2949                      struct iv_use *use, struct iv_cand *cand, gimple at,
2950                      struct affine_tree_combination *aff)
2951 {
2952   tree ubase = use->iv->base;
2953   tree ustep = use->iv->step;
2954   tree cbase = cand->iv->base;
2955   tree cstep = cand->iv->step, cstep_common;
2956   tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2957   tree common_type, var;
2958   tree uutype;
2959   aff_tree cbase_aff, var_aff;
2960   double_int rat;
2961
2962   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2963     {
2964       /* We do not have a precision to express the values of use.  */
2965       return false;
2966     }
2967
2968   var = var_at_stmt (loop, cand, at);
2969   uutype = unsigned_type_for (utype);
2970
2971   /* If the conversion is not noop, perform it.  */
2972   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2973     {
2974       cstep = fold_convert (uutype, cstep);
2975       cbase = fold_convert (uutype, cbase);
2976       var = fold_convert (uutype, var);
2977     }
2978
2979   if (!constant_multiple_of (ustep, cstep, &rat))
2980     return false;
2981
2982   /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2983      type, we achieve better folding by computing their difference in this
2984      wider type, and cast the result to UUTYPE.  We do not need to worry about
2985      overflows, as all the arithmetics will in the end be performed in UUTYPE
2986      anyway.  */
2987   common_type = determine_common_wider_type (&ubase, &cbase);
2988
2989   /* use = ubase - ratio * cbase + ratio * var.  */
2990   tree_to_aff_combination (ubase, common_type, aff);
2991   tree_to_aff_combination (cbase, common_type, &cbase_aff);
2992   tree_to_aff_combination (var, uutype, &var_aff);
2993
2994   /* We need to shift the value if we are after the increment.  */
2995   if (stmt_after_increment (loop, cand, at))
2996     {
2997       aff_tree cstep_aff;
2998
2999       if (common_type != uutype)
3000         cstep_common = fold_convert (common_type, cstep);
3001       else
3002         cstep_common = cstep;
3003
3004       tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3005       aff_combination_add (&cbase_aff, &cstep_aff);
3006     }
3007
3008   aff_combination_scale (&cbase_aff, double_int_neg (rat));
3009   aff_combination_add (aff, &cbase_aff);
3010   if (common_type != uutype)
3011     aff_combination_convert (aff, uutype);
3012
3013   aff_combination_scale (&var_aff, rat);
3014   aff_combination_add (aff, &var_aff);
3015
3016   return true;
3017 }
3018
3019 /* Determines the expression by that USE is expressed from induction variable
3020    CAND at statement AT in LOOP.  The computation is unshared.  */
3021
3022 static tree
3023 get_computation_at (struct loop *loop,
3024                     struct iv_use *use, struct iv_cand *cand, gimple at)
3025 {
3026   aff_tree aff;
3027   tree type = TREE_TYPE (use->iv->base);
3028
3029   if (!get_computation_aff (loop, use, cand, at, &aff))
3030     return NULL_TREE;
3031   unshare_aff_combination (&aff);
3032   return fold_convert (type, aff_combination_to_tree (&aff));
3033 }
3034
3035 /* Determines the expression by that USE is expressed from induction variable
3036    CAND in LOOP.  The computation is unshared.  */
3037
3038 static tree
3039 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3040 {
3041   return get_computation_at (loop, use, cand, use->stmt);
3042 }
3043
3044 /* Adjust the cost COST for being in loop setup rather than loop body.
3045    If we're optimizing for space, the loop setup overhead is constant;
3046    if we're optimizing for speed, amortize it over the per-iteration cost.  */
3047 static unsigned
3048 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3049 {
3050   if (cost == INFTY)
3051     return cost;
3052   else if (optimize_loop_for_speed_p (data->current_loop))
3053     return cost / avg_loop_niter (data->current_loop);
3054   else
3055     return cost;
3056 }
3057
3058 /* Returns cost of addition in MODE.  */
3059
3060 static unsigned
3061 add_cost (enum machine_mode mode, bool speed)
3062 {
3063   static unsigned costs[NUM_MACHINE_MODES];
3064   rtx seq;
3065   unsigned cost;
3066
3067   if (costs[mode])
3068     return costs[mode];
3069
3070   start_sequence ();
3071   force_operand (gen_rtx_fmt_ee (PLUS, mode,
3072                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3073                                  gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3074                  NULL_RTX);
3075   seq = get_insns ();
3076   end_sequence ();
3077
3078   cost = seq_cost (seq, speed);
3079   if (!cost)
3080     cost = 1;
3081
3082   costs[mode] = cost;
3083
3084   if (dump_file && (dump_flags & TDF_DETAILS))
3085     fprintf (dump_file, "Addition in %s costs %d\n",
3086              GET_MODE_NAME (mode), cost);
3087   return cost;
3088 }
3089
3090 /* Entry in a hashtable of already known costs for multiplication.  */
3091 struct mbc_entry
3092 {
3093   HOST_WIDE_INT cst;            /* The constant to multiply by.  */
3094   enum machine_mode mode;       /* In mode.  */
3095   unsigned cost;                /* The cost.  */
3096 };
3097
3098 /* Counts hash value for the ENTRY.  */
3099
3100 static hashval_t
3101 mbc_entry_hash (const void *entry)
3102 {
3103   const struct mbc_entry *e = (const struct mbc_entry *) entry;
3104
3105   return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3106 }
3107
3108 /* Compares the hash table entries ENTRY1 and ENTRY2.  */
3109
3110 static int
3111 mbc_entry_eq (const void *entry1, const void *entry2)
3112 {
3113   const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3114   const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3115
3116   return (e1->mode == e2->mode
3117           && e1->cst == e2->cst);
3118 }
3119
3120 /* Returns cost of multiplication by constant CST in MODE.  */
3121
3122 unsigned
3123 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3124 {
3125   static htab_t costs;
3126   struct mbc_entry **cached, act;
3127   rtx seq;
3128   unsigned cost;
3129
3130   if (!costs)
3131     costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3132
3133   act.mode = mode;
3134   act.cst = cst;
3135   cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3136   if (*cached)
3137     return (*cached)->cost;
3138
3139   *cached = XNEW (struct mbc_entry);
3140   (*cached)->mode = mode;
3141   (*cached)->cst = cst;
3142
3143   start_sequence ();
3144   expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3145                gen_int_mode (cst, mode), NULL_RTX, 0);
3146   seq = get_insns ();
3147   end_sequence ();
3148
3149   cost = seq_cost (seq, speed);
3150
3151   if (dump_file && (dump_flags & TDF_DETAILS))
3152     fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3153              (int) cst, GET_MODE_NAME (mode), cost);
3154
3155   (*cached)->cost = cost;
3156
3157   return cost;
3158 }
3159
3160 /* Returns true if multiplying by RATIO is allowed in an address.  Test the
3161    validity for a memory reference accessing memory of mode MODE in
3162    address space AS.  */
3163
3164 DEF_VEC_P (sbitmap);
3165 DEF_VEC_ALLOC_P (sbitmap, heap);
3166
3167 bool
3168 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3169                                  addr_space_t as)
3170 {
3171 #define MAX_RATIO 128
3172   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3173   static VEC (sbitmap, heap) *valid_mult_list;
3174   sbitmap valid_mult;
3175
3176   if (data_index >= VEC_length (sbitmap, valid_mult_list))
3177     VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3178
3179   valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3180   if (!valid_mult)
3181     {
3182       enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3183       rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3184       rtx addr;
3185       HOST_WIDE_INT i;
3186
3187       valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3188       sbitmap_zero (valid_mult);
3189       addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3190       for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3191         {
3192           XEXP (addr, 1) = gen_int_mode (i, address_mode);
3193           if (memory_address_addr_space_p (mode, addr, as))
3194             SET_BIT (valid_mult, i + MAX_RATIO);
3195         }
3196
3197       if (dump_file && (dump_flags & TDF_DETAILS))
3198         {
3199           fprintf (dump_file, "  allowed multipliers:");
3200           for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3201             if (TEST_BIT (valid_mult, i + MAX_RATIO))
3202               fprintf (dump_file, " %d", (int) i);
3203           fprintf (dump_file, "\n");
3204           fprintf (dump_file, "\n");
3205         }
3206
3207       VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3208     }
3209
3210   if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3211     return false;
3212
3213   return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3214 }
3215
3216 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3217    If SYMBOL_PRESENT is false, symbol is omitted.  If VAR_PRESENT is false,
3218    variable is omitted.  Compute the cost for a memory reference that accesses
3219    a memory location of mode MEM_MODE in address space AS.
3220
3221    MAY_AUTOINC is set to true if the autoincrement (increasing index by
3222    size of MEM_MODE / RATIO) is available.  To make this determination, we
3223    look at the size of the increment to be made, which is given in CSTEP.
3224    CSTEP may be zero if the step is unknown.
3225    STMT_AFTER_INC is true iff the statement we're looking at is after the
3226    increment of the original biv.
3227
3228    TODO -- there must be some better way.  This all is quite crude.  */
3229
3230 typedef struct
3231 {
3232   HOST_WIDE_INT min_offset, max_offset;
3233   unsigned costs[2][2][2][2];
3234 } *address_cost_data;
3235
3236 DEF_VEC_P (address_cost_data);
3237 DEF_VEC_ALLOC_P (address_cost_data, heap);
3238
3239 static comp_cost
3240 get_address_cost (bool symbol_present, bool var_present,
3241                   unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3242                   HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3243                   addr_space_t as, bool speed,
3244                   bool stmt_after_inc, bool *may_autoinc)
3245 {
3246   enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3247   static VEC(address_cost_data, heap) *address_cost_data_list;
3248   unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3249   address_cost_data data;
3250   static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3251   static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3252   unsigned cost, acost, complexity;
3253   bool offset_p, ratio_p, autoinc;
3254   HOST_WIDE_INT s_offset, autoinc_offset, msize;
3255   unsigned HOST_WIDE_INT mask;
3256   unsigned bits;
3257
3258   if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3259     VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3260                            data_index + 1);
3261
3262   data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3263   if (!data)
3264     {
3265       HOST_WIDE_INT i;
3266       HOST_WIDE_INT rat, off = 0;
3267       int old_cse_not_expected, width;
3268       unsigned sym_p, var_p, off_p, rat_p, add_c;
3269       rtx seq, addr, base;
3270       rtx reg0, reg1;
3271
3272       data = (address_cost_data) xcalloc (1, sizeof (*data));
3273
3274       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3275
3276       width = GET_MODE_BITSIZE (address_mode) - 1;
3277       if (width > (HOST_BITS_PER_WIDE_INT - 1))
3278         width = HOST_BITS_PER_WIDE_INT - 1;
3279       addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3280
3281       for (i = width; i >= 0; i--)
3282         {
3283           off = -((HOST_WIDE_INT) 1 << i);
3284           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3285           if (memory_address_addr_space_p (mem_mode, addr, as))
3286             break;
3287         }
3288       data->min_offset = (i == -1? 0 : off);
3289
3290       for (i = width; i >= 0; i--)
3291         {
3292           off = ((HOST_WIDE_INT) 1 << i) - 1;
3293           XEXP (addr, 1) = gen_int_mode (off, address_mode);
3294           if (memory_address_addr_space_p (mem_mode, addr, as))
3295             break;
3296         }
3297       if (i == -1)
3298         off = 0;
3299       data->max_offset = off;
3300
3301       if (dump_file && (dump_flags & TDF_DETAILS))
3302         {
3303           fprintf (dump_file, "get_address_cost:\n");
3304           fprintf (dump_file, "  min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3305                    GET_MODE_NAME (mem_mode),
3306                    data->min_offset);
3307           fprintf (dump_file, "  max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3308                    GET_MODE_NAME (mem_mode),
3309                    data->max_offset);
3310         }
3311
3312       rat = 1;
3313       for (i = 2; i <= MAX_RATIO; i++)
3314         if (multiplier_allowed_in_address_p (i, mem_mode, as))
3315           {
3316             rat = i;
3317             break;
3318           }
3319
3320       /* Compute the cost of various addressing modes.  */
3321       acost = 0;
3322       reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3323       reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3324
3325       if (USE_LOAD_PRE_DECREMENT (mem_mode)
3326           || USE_STORE_PRE_DECREMENT (mem_mode))
3327         {
3328           addr = gen_rtx_PRE_DEC (address_mode, reg0);
3329           has_predec[mem_mode]
3330             = memory_address_addr_space_p (mem_mode, addr, as);
3331         }
3332       if (USE_LOAD_POST_DECREMENT (mem_mode)
3333           || USE_STORE_POST_DECREMENT (mem_mode))
3334         {
3335           addr = gen_rtx_POST_DEC (address_mode, reg0);
3336           has_postdec[mem_mode]
3337             = memory_address_addr_space_p (mem_mode, addr, as);
3338         }
3339       if (USE_LOAD_PRE_INCREMENT (mem_mode)
3340           || USE_STORE_PRE_DECREMENT (mem_mode))
3341         {
3342           addr = gen_rtx_PRE_INC (address_mode, reg0);
3343           has_preinc[mem_mode]
3344             = memory_address_addr_space_p (mem_mode, addr, as);
3345         }
3346       if (USE_LOAD_POST_INCREMENT (mem_mode)
3347           || USE_STORE_POST_INCREMENT (mem_mode))
3348         {
3349           addr = gen_rtx_POST_INC (address_mode, reg0);
3350           has_postinc[mem_mode]
3351             = memory_address_addr_space_p (mem_mode, addr, as);
3352         }
3353       for (i = 0; i < 16; i++)
3354         {
3355           sym_p = i & 1;
3356           var_p = (i >> 1) & 1;
3357           off_p = (i >> 2) & 1;
3358           rat_p = (i >> 3) & 1;
3359
3360           addr = reg0;
3361           if (rat_p)
3362             addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3363                                    gen_int_mode (rat, address_mode));
3364
3365           if (var_p)
3366             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3367
3368           if (sym_p)
3369             {
3370               base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3371               /* ??? We can run into trouble with some backends by presenting
3372                  it with symbols which haven't been properly passed through
3373                  targetm.encode_section_info.  By setting the local bit, we
3374                  enhance the probability of things working.  */
3375               SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3376
3377               if (off_p)
3378                 base = gen_rtx_fmt_e (CONST, address_mode,
3379                                       gen_rtx_fmt_ee
3380                                         (PLUS, address_mode, base,
3381                                          gen_int_mode (off, address_mode)));
3382             }
3383           else if (off_p)
3384             base = gen_int_mode (off, address_mode);
3385           else
3386             base = NULL_RTX;
3387
3388           if (base)
3389             addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3390
3391           start_sequence ();
3392           /* To avoid splitting addressing modes, pretend that no cse will
3393              follow.  */
3394           old_cse_not_expected = cse_not_expected;
3395           cse_not_expected = true;
3396           addr = memory_address_addr_space (mem_mode, addr, as);
3397           cse_not_expected = old_cse_not_expected;
3398           seq = get_insns ();
3399           end_sequence ();
3400
3401           acost = seq_cost (seq, speed);
3402           acost += address_cost (addr, mem_mode, as, speed);
3403
3404           if (!acost)
3405             acost = 1;
3406           data->costs[sym_p][var_p][off_p][rat_p] = acost;
3407         }
3408
3409       /* On some targets, it is quite expensive to load symbol to a register,
3410          which makes addresses that contain symbols look much more expensive.
3411          However, the symbol will have to be loaded in any case before the
3412          loop (and quite likely we have it in register already), so it does not
3413          make much sense to penalize them too heavily.  So make some final
3414          tweaks for the SYMBOL_PRESENT modes:
3415
3416          If VAR_PRESENT is false, and the mode obtained by changing symbol to
3417          var is cheaper, use this mode with small penalty.
3418          If VAR_PRESENT is true, try whether the mode with
3419          SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3420          if this is the case, use it.  */
3421       add_c = add_cost (address_mode, speed);
3422       for (i = 0; i < 8; i++)
3423         {
3424           var_p = i & 1;
3425           off_p = (i >> 1) & 1;
3426           rat_p = (i >> 2) & 1;
3427
3428           acost = data->costs[0][1][off_p][rat_p] + 1;
3429           if (var_p)
3430             acost += add_c;
3431
3432           if (acost < data->costs[1][var_p][off_p][rat_p])
3433             data->costs[1][var_p][off_p][rat_p] = acost;
3434         }
3435
3436       if (dump_file && (dump_flags & TDF_DETAILS))
3437         {
3438           fprintf (dump_file, "Address costs:\n");
3439
3440           for (i = 0; i < 16; i++)
3441             {
3442               sym_p = i & 1;
3443               var_p = (i >> 1) & 1;
3444               off_p = (i >> 2) & 1;
3445               rat_p = (i >> 3) & 1;
3446
3447               fprintf (dump_file, "  ");
3448               if (sym_p)
3449                 fprintf (dump_file, "sym + ");
3450               if (var_p)
3451                 fprintf (dump_file, "var + ");
3452               if (off_p)
3453                 fprintf (dump_file, "cst + ");
3454               if (rat_p)
3455                 fprintf (dump_file, "rat * ");
3456
3457               acost = data->costs[sym_p][var_p][off_p][rat_p];
3458               fprintf (dump_file, "index costs %d\n", acost);
3459             }
3460           if (has_predec[mem_mode] || has_postdec[mem_mode]
3461               || has_preinc[mem_mode] || has_postinc[mem_mode])
3462             fprintf (dump_file, "  May include autoinc/dec\n");
3463           fprintf (dump_file, "\n");
3464         }
3465
3466       VEC_replace (address_cost_data, address_cost_data_list,
3467                    data_index, data);
3468     }
3469
3470   bits = GET_MODE_BITSIZE (address_mode);
3471   mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3472   offset &= mask;
3473   if ((offset >> (bits - 1) & 1))
3474     offset |= ~mask;
3475   s_offset = offset;
3476
3477   autoinc = false;
3478   msize = GET_MODE_SIZE (mem_mode);
3479   autoinc_offset = offset;
3480   if (stmt_after_inc)
3481     autoinc_offset += ratio * cstep;
3482   if (symbol_present || var_present || ratio != 1)
3483     autoinc = false;
3484   else if ((has_postinc[mem_mode] && autoinc_offset == 0
3485                && msize == cstep)
3486            || (has_postdec[mem_mode] && autoinc_offset == 0
3487                && msize == -cstep)
3488            || (has_preinc[mem_mode] && autoinc_offset == msize
3489                && msize == cstep)
3490            || (has_predec[mem_mode] && autoinc_offset == -msize
3491                && msize == -cstep))
3492     autoinc = true;
3493
3494   cost = 0;
3495   offset_p = (s_offset != 0
3496               && data->min_offset <= s_offset
3497               && s_offset <= data->max_offset);
3498   ratio_p = (ratio != 1
3499              && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3500
3501   if (ratio != 1 && !ratio_p)
3502     cost += multiply_by_cost (ratio, address_mode, speed);
3503
3504   if (s_offset && !offset_p && !symbol_present)
3505     cost += add_cost (address_mode, speed);
3506
3507   if (may_autoinc)
3508     *may_autoinc = autoinc;
3509   acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3510   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3511   return new_cost (cost + acost, complexity);
3512 }
3513
3514  /* Calculate the SPEED or size cost of shiftadd EXPR in MODE.  MULT is the
3515     the EXPR operand holding the shift.  COST0 and COST1 are the costs for
3516     calculating the operands of EXPR.  Returns true if successful, and returns
3517     the cost in COST.  */
3518
3519 static bool
3520 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3521                    comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3522 {
3523   comp_cost res;
3524   tree op1 = TREE_OPERAND (expr, 1);
3525   tree cst = TREE_OPERAND (mult, 1);
3526   tree multop = TREE_OPERAND (mult, 0);
3527   int m = exact_log2 (int_cst_value (cst));
3528   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3529   int sa_cost;
3530
3531   if (!(m >= 0 && m < maxm))
3532     return false;
3533
3534   sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3535              ? shiftadd_cost[speed][mode][m]
3536              : (mult == op1
3537                 ? shiftsub1_cost[speed][mode][m]
3538                 : shiftsub0_cost[speed][mode][m]));
3539   res = new_cost (sa_cost, 0);
3540   res = add_costs (res, mult == op1 ? cost0 : cost1);
3541
3542   STRIP_NOPS (multop);
3543   if (!is_gimple_val (multop))
3544     res = add_costs (res, force_expr_to_var_cost (multop, speed));
3545
3546   *cost = res;
3547   return true;
3548 }
3549
3550 /* Estimates cost of forcing expression EXPR into a variable.  */
3551
3552 static comp_cost
3553 force_expr_to_var_cost (tree expr, bool speed)
3554 {
3555   static bool costs_initialized = false;
3556   static unsigned integer_cost [2];
3557   static unsigned symbol_cost [2];
3558   static unsigned address_cost [2];
3559   tree op0, op1;
3560   comp_cost cost0, cost1, cost;
3561   enum machine_mode mode;
3562
3563   if (!costs_initialized)
3564     {
3565       tree type = build_pointer_type (integer_type_node);
3566       tree var, addr;
3567       rtx x;
3568       int i;
3569
3570       var = create_tmp_var_raw (integer_type_node, "test_var");
3571       TREE_STATIC (var) = 1;
3572       x = produce_memory_decl_rtl (var, NULL);
3573       SET_DECL_RTL (var, x);
3574
3575       addr = build1 (ADDR_EXPR, type, var);
3576
3577
3578       for (i = 0; i < 2; i++)
3579         {
3580           integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3581                                                              2000), i);
3582
3583           symbol_cost[i] = computation_cost (addr, i) + 1;
3584
3585           address_cost[i]
3586             = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3587           if (dump_file && (dump_flags & TDF_DETAILS))
3588             {
3589               fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3590               fprintf (dump_file, "  integer %d\n", (int) integer_cost[i]);
3591               fprintf (dump_file, "  symbol %d\n", (int) symbol_cost[i]);
3592               fprintf (dump_file, "  address %d\n", (int) address_cost[i]);
3593               fprintf (dump_file, "  other %d\n", (int) target_spill_cost[i]);
3594               fprintf (dump_file, "\n");
3595             }
3596         }
3597
3598       costs_initialized = true;
3599     }
3600
3601   STRIP_NOPS (expr);
3602
3603   if (SSA_VAR_P (expr))
3604     return zero_cost;
3605
3606   if (is_gimple_min_invariant (expr))
3607     {
3608       if (TREE_CODE (expr) == INTEGER_CST)
3609         return new_cost (integer_cost [speed], 0);
3610
3611       if (TREE_CODE (expr) == ADDR_EXPR)
3612         {
3613           tree obj = TREE_OPERAND (expr, 0);
3614
3615           if (TREE_CODE (obj) == VAR_DECL
3616               || TREE_CODE (obj) == PARM_DECL
3617               || TREE_CODE (obj) == RESULT_DECL)
3618             return new_cost (symbol_cost [speed], 0);
3619         }
3620
3621       return new_cost (address_cost [speed], 0);
3622     }
3623
3624   switch (TREE_CODE (expr))
3625     {
3626     case POINTER_PLUS_EXPR:
3627     case PLUS_EXPR:
3628     case MINUS_EXPR:
3629     case MULT_EXPR:
3630       op0 = TREE_OPERAND (expr, 0);
3631       op1 = TREE_OPERAND (expr, 1);
3632       STRIP_NOPS (op0);
3633       STRIP_NOPS (op1);
3634
3635       if (is_gimple_val (op0))
3636         cost0 = zero_cost;
3637       else
3638         cost0 = force_expr_to_var_cost (op0, speed);
3639
3640       if (is_gimple_val (op1))
3641         cost1 = zero_cost;
3642       else
3643         cost1 = force_expr_to_var_cost (op1, speed);
3644
3645       break;
3646
3647     case NEGATE_EXPR:
3648       op0 = TREE_OPERAND (expr, 0);
3649       STRIP_NOPS (op0);
3650       op1 = NULL_TREE;
3651
3652       if (is_gimple_val (op0))
3653         cost0 = zero_cost;
3654       else
3655         cost0 = force_expr_to_var_cost (op0, speed);
3656
3657       cost1 = zero_cost;
3658       break;
3659
3660     default:
3661       /* Just an arbitrary value, FIXME.  */
3662       return new_cost (target_spill_cost[speed], 0);
3663     }
3664
3665   mode = TYPE_MODE (TREE_TYPE (expr));
3666   switch (TREE_CODE (expr))
3667     {
3668     case POINTER_PLUS_EXPR:
3669     case PLUS_EXPR:
3670     case MINUS_EXPR:
3671     case NEGATE_EXPR:
3672       cost = new_cost (add_cost (mode, speed), 0);
3673       if (TREE_CODE (expr) != NEGATE_EXPR)
3674         {
3675           tree mult = NULL_TREE;
3676           comp_cost sa_cost;
3677           if (TREE_CODE (op1) == MULT_EXPR)
3678             mult = op1;
3679           else if (TREE_CODE (op0) == MULT_EXPR)
3680             mult = op0;
3681
3682           if (mult != NULL_TREE
3683               && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3684               && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
3685                                     &sa_cost))
3686             return sa_cost;
3687         }
3688       break;
3689
3690     case MULT_EXPR:
3691       if (cst_and_fits_in_hwi (op0))
3692         cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3693       else if (cst_and_fits_in_hwi (op1))
3694         cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3695       else
3696         return new_cost (target_spill_cost [speed], 0);
3697       break;
3698
3699     default:
3700       gcc_unreachable ();
3701     }
3702
3703   cost = add_costs (cost, cost0);
3704   cost = add_costs (cost, cost1);
3705
3706   /* Bound the cost by target_spill_cost.  The parts of complicated
3707      computations often are either loop invariant or at least can
3708      be shared between several iv uses, so letting this grow without
3709      limits would not give reasonable results.  */
3710   if (cost.cost > (int) target_spill_cost [speed])
3711     cost.cost = target_spill_cost [speed];
3712
3713   return cost;
3714 }
3715
3716 /* Estimates cost of forcing EXPR into a variable.  DEPENDS_ON is a set of the
3717    invariants the computation depends on.  */
3718
3719 static comp_cost
3720 force_var_cost (struct ivopts_data *data,
3721                 tree expr, bitmap *depends_on)
3722 {
3723   if (depends_on)
3724     {
3725       fd_ivopts_data = data;
3726       walk_tree (&expr, find_depends, depends_on, NULL);
3727     }
3728
3729   return force_expr_to_var_cost (expr, data->speed);
3730 }
3731
3732 /* Estimates cost of expressing address ADDR  as var + symbol + offset.  The
3733    value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3734    to false if the corresponding part is missing.  DEPENDS_ON is a set of the
3735    invariants the computation depends on.  */
3736
3737 static comp_cost
3738 split_address_cost (struct ivopts_data *data,
3739                     tree addr, bool *symbol_present, bool *var_present,
3740                     unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3741 {
3742   tree core;
3743   HOST_WIDE_INT bitsize;
3744   HOST_WIDE_INT bitpos;
3745   tree toffset;
3746   enum machine_mode mode;
3747   int unsignedp, volatilep;
3748
3749   core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3750                               &unsignedp, &volatilep, false);
3751
3752   if (toffset != 0
3753       || bitpos % BITS_PER_UNIT != 0
3754       || TREE_CODE (core) != VAR_DECL)
3755     {
3756       *symbol_present = false;
3757       *var_present = true;
3758       fd_ivopts_data = data;
3759       walk_tree (&addr, find_depends, depends_on, NULL);
3760       return new_cost (target_spill_cost[data->speed], 0);
3761     }
3762
3763   *offset += bitpos / BITS_PER_UNIT;
3764   if (TREE_STATIC (core)
3765       || DECL_EXTERNAL (core))
3766     {
3767       *symbol_present = true;
3768       *var_present = false;
3769       return zero_cost;
3770     }
3771
3772   *symbol_present = false;
3773   *var_present = true;
3774   return zero_cost;
3775 }
3776
3777 /* Estimates cost of expressing difference of addresses E1 - E2 as
3778    var + symbol + offset.  The value of offset is added to OFFSET,
3779    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3780    part is missing.  DEPENDS_ON is a set of the invariants the computation
3781    depends on.  */
3782
3783 static comp_cost
3784 ptr_difference_cost (struct ivopts_data *data,
3785                      tree e1, tree e2, bool *symbol_present, bool *var_present,
3786                      unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3787 {
3788   HOST_WIDE_INT diff = 0;
3789   aff_tree aff_e1, aff_e2;
3790   tree type;
3791
3792   gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3793
3794   if (ptr_difference_const (e1, e2, &diff))
3795     {
3796       *offset += diff;
3797       *symbol_present = false;
3798       *var_present = false;
3799       return zero_cost;
3800     }
3801
3802   if (integer_zerop (e2))
3803     return split_address_cost (data, TREE_OPERAND (e1, 0),
3804                                symbol_present, var_present, offset, depends_on);
3805
3806   *symbol_present = false;
3807   *var_present = true;
3808
3809   type = signed_type_for (TREE_TYPE (e1));
3810   tree_to_aff_combination (e1, type, &aff_e1);
3811   tree_to_aff_combination (e2, type, &aff_e2);
3812   aff_combination_scale (&aff_e2, double_int_minus_one);
3813   aff_combination_add (&aff_e1, &aff_e2);
3814
3815   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3816 }
3817
3818 /* Estimates cost of expressing difference E1 - E2 as
3819    var + symbol + offset.  The value of offset is added to OFFSET,
3820    SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3821    part is missing.  DEPENDS_ON is a set of the invariants the computation
3822    depends on.  */
3823
3824 static comp_cost
3825 difference_cost (struct ivopts_data *data,
3826                  tree e1, tree e2, bool *symbol_present, bool *var_present,
3827                  unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3828 {
3829   enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3830   unsigned HOST_WIDE_INT off1, off2;
3831   aff_tree aff_e1, aff_e2;
3832   tree type;
3833
3834   e1 = strip_offset (e1, &off1);
3835   e2 = strip_offset (e2, &off2);
3836   *offset += off1 - off2;
3837
3838   STRIP_NOPS (e1);
3839   STRIP_NOPS (e2);
3840
3841   if (TREE_CODE (e1) == ADDR_EXPR)
3842     return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3843                                 offset, depends_on);
3844   *symbol_present = false;
3845
3846   if (operand_equal_p (e1, e2, 0))
3847     {
3848       *var_present = false;
3849       return zero_cost;
3850     }
3851
3852   *var_present = true;
3853
3854   if (integer_zerop (e2))
3855     return force_var_cost (data, e1, depends_on);
3856
3857   if (integer_zerop (e1))
3858     {
3859       comp_cost cost = force_var_cost (data, e2, depends_on);
3860       cost.cost += multiply_by_cost (-1, mode, data->speed);
3861       return cost;
3862     }
3863
3864   type = signed_type_for (TREE_TYPE (e1));
3865   tree_to_aff_combination (e1, type, &aff_e1);
3866   tree_to_aff_combination (e2, type, &aff_e2);
3867   aff_combination_scale (&aff_e2, double_int_minus_one);
3868   aff_combination_add (&aff_e1, &aff_e2);
3869
3870   return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3871 }
3872
3873 /* Returns true if AFF1 and AFF2 are identical.  */
3874
3875 static bool
3876 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3877 {
3878   unsigned i;
3879
3880   if (aff1->n != aff2->n)
3881     return false;
3882
3883   for (i = 0; i < aff1->n; i++)
3884     {
3885       if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3886         return false;
3887
3888       if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3889         return false;
3890     }
3891   return true;
3892 }
3893
3894 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id.  */
3895
3896 static int
3897 get_expr_id (struct ivopts_data *data, tree expr)
3898 {
3899   struct iv_inv_expr_ent ent;
3900   struct iv_inv_expr_ent **slot;
3901
3902   ent.expr = expr;
3903   ent.hash = iterative_hash_expr (expr, 0);
3904   slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3905                                                      &ent, INSERT);
3906   if (*slot)
3907     return (*slot)->id;
3908
3909   *slot = XNEW (struct iv_inv_expr_ent);
3910   (*slot)->expr = expr;
3911   (*slot)->hash = ent.hash;
3912   (*slot)->id = data->inv_expr_id++;
3913   return (*slot)->id;
3914 }
3915
3916 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3917    requires a new compiler generated temporary.  Returns -1 otherwise.
3918    ADDRESS_P is a flag indicating if the expression is for address
3919    computation.  */
3920
3921 static int
3922 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3923                             tree cbase, HOST_WIDE_INT ratio,
3924                             bool address_p)
3925 {
3926   aff_tree ubase_aff, cbase_aff;
3927   tree expr, ub, cb;
3928
3929   STRIP_NOPS (ubase);
3930   STRIP_NOPS (cbase);
3931   ub = ubase;
3932   cb = cbase;
3933
3934   if ((TREE_CODE (ubase) == INTEGER_CST)
3935       && (TREE_CODE (cbase) == INTEGER_CST))
3936     return -1;
3937
3938   /* Strips the constant part. */
3939   if (TREE_CODE (ubase) == PLUS_EXPR
3940       || TREE_CODE (ubase) == MINUS_EXPR
3941       || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3942     {
3943       if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3944         ubase = TREE_OPERAND (ubase, 0);
3945     }
3946
3947   /* Strips the constant part. */
3948   if (TREE_CODE (cbase) == PLUS_EXPR
3949       || TREE_CODE (cbase) == MINUS_EXPR
3950       || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3951     {
3952       if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3953         cbase = TREE_OPERAND (cbase, 0);
3954     }
3955
3956   if (address_p)
3957     {
3958       if (((TREE_CODE (ubase) == SSA_NAME)
3959            || (TREE_CODE (ubase) == ADDR_EXPR
3960                && is_gimple_min_invariant (ubase)))
3961           && (TREE_CODE (cbase) == INTEGER_CST))
3962         return -1;
3963
3964       if (((TREE_CODE (cbase) == SSA_NAME)
3965            || (TREE_CODE (cbase) == ADDR_EXPR
3966                && is_gimple_min_invariant (cbase)))
3967           && (TREE_CODE (ubase) == INTEGER_CST))
3968         return -1;
3969     }
3970
3971   if (ratio == 1)
3972     {
3973       if(operand_equal_p (ubase, cbase, 0))
3974         return -1;
3975
3976       if (TREE_CODE (ubase) == ADDR_EXPR
3977           && TREE_CODE (cbase) == ADDR_EXPR)
3978         {
3979           tree usym, csym;
3980
3981           usym = TREE_OPERAND (ubase, 0);
3982           csym = TREE_OPERAND (cbase, 0);
3983           if (TREE_CODE (usym) == ARRAY_REF)
3984             {
3985               tree ind = TREE_OPERAND (usym, 1);
3986               if (TREE_CODE (ind) == INTEGER_CST
3987                   && host_integerp (ind, 0)
3988                   && TREE_INT_CST_LOW (ind) == 0)
3989                 usym = TREE_OPERAND (usym, 0);
3990             }
3991           if (TREE_CODE (csym) == ARRAY_REF)
3992             {
3993               tree ind = TREE_OPERAND (csym, 1);
3994               if (TREE_CODE (ind) == INTEGER_CST
3995                   && host_integerp (ind, 0)
3996                   && TREE_INT_CST_LOW (ind) == 0)
3997                 csym = TREE_OPERAND (csym, 0);
3998             }
3999           if (operand_equal_p (usym, csym, 0))
4000             return -1;
4001         }
4002       /* Now do more complex comparison  */
4003       tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4004       tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4005       if (compare_aff_trees (&ubase_aff, &cbase_aff))
4006         return -1;
4007     }
4008
4009   tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4010   tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4011
4012   aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
4013   aff_combination_add (&ubase_aff, &cbase_aff);
4014   expr = aff_combination_to_tree (&ubase_aff);
4015   return get_expr_id (data, expr);
4016 }
4017
4018
4019
4020 /* Determines the cost of the computation by that USE is expressed
4021    from induction variable CAND.  If ADDRESS_P is true, we just need
4022    to create an address from it, otherwise we want to get it into
4023    register.  A set of invariants we depend on is stored in
4024    DEPENDS_ON.  AT is the statement at that the value is computed.
4025    If CAN_AUTOINC is nonnull, use it to record whether autoinc
4026    addressing is likely.  */
4027
4028 static comp_cost
4029 get_computation_cost_at (struct ivopts_data *data,
4030                          struct iv_use *use, struct iv_cand *cand,
4031                          bool address_p, bitmap *depends_on, gimple at,
4032                          bool *can_autoinc,
4033                          int *inv_expr_id)
4034 {
4035   tree ubase = use->iv->base, ustep = use->iv->step;
4036   tree cbase, cstep;
4037   tree utype = TREE_TYPE (ubase), ctype;
4038   unsigned HOST_WIDE_INT cstepi, offset = 0;
4039   HOST_WIDE_INT ratio, aratio;
4040   bool var_present, symbol_present, stmt_is_after_inc;
4041   comp_cost cost;
4042   double_int rat;
4043   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4044
4045   *depends_on = NULL;
4046
4047   /* Only consider real candidates.  */
4048   if (!cand->iv)
4049     return infinite_cost;
4050
4051   cbase = cand->iv->base;
4052   cstep = cand->iv->step;
4053   ctype = TREE_TYPE (cbase);
4054
4055   if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4056     {
4057       /* We do not have a precision to express the values of use.  */
4058       return infinite_cost;
4059     }
4060
4061   if (address_p
4062       || (use->iv->base_object
4063           && cand->iv->base_object
4064           && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4065           && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4066     {
4067       /* Do not try to express address of an object with computation based
4068          on address of a different object.  This may cause problems in rtl
4069          level alias analysis (that does not expect this to be happening,
4070          as this is illegal in C), and would be unlikely to be useful
4071          anyway.  */
4072       if (use->iv->base_object
4073           && cand->iv->base_object
4074           && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4075         return infinite_cost;
4076     }
4077
4078   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4079     {
4080       /* TODO -- add direct handling of this case.  */
4081       goto fallback;
4082     }
4083
4084   /* CSTEPI is removed from the offset in case statement is after the
4085      increment.  If the step is not constant, we use zero instead.
4086      This is a bit imprecise (there is the extra addition), but
4087      redundancy elimination is likely to transform the code so that
4088      it uses value of the variable before increment anyway,
4089      so it is not that much unrealistic.  */
4090   if (cst_and_fits_in_hwi (cstep))
4091     cstepi = int_cst_value (cstep);
4092   else
4093     cstepi = 0;
4094
4095   if (!constant_multiple_of (ustep, cstep, &rat))
4096     return infinite_cost;
4097
4098   if (double_int_fits_in_shwi_p (rat))
4099     ratio = double_int_to_shwi (rat);
4100   else
4101     return infinite_cost;
4102
4103   STRIP_NOPS (cbase);
4104   ctype = TREE_TYPE (cbase);
4105
4106   stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4107
4108   /* use = ubase + ratio * (var - cbase).  If either cbase is a constant
4109      or ratio == 1, it is better to handle this like
4110
4111      ubase - ratio * cbase + ratio * var
4112
4113      (also holds in the case ratio == -1, TODO.  */
4114
4115   if (cst_and_fits_in_hwi (cbase))
4116     {
4117       offset = - ratio * int_cst_value (cbase);
4118       cost = difference_cost (data,
4119                               ubase, build_int_cst (utype, 0),
4120                               &symbol_present, &var_present, &offset,
4121                               depends_on);
4122       cost.cost /= avg_loop_niter (data->current_loop);
4123     }
4124   else if (ratio == 1)
4125     {
4126       tree real_cbase = cbase;
4127
4128       /* Check to see if any adjustment is needed.  */
4129       if (cstepi == 0 && stmt_is_after_inc)
4130         {
4131           aff_tree real_cbase_aff;
4132           aff_tree cstep_aff;
4133
4134           tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4135                                    &real_cbase_aff);
4136           tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4137
4138           aff_combination_add (&real_cbase_aff, &cstep_aff);
4139           real_cbase = aff_combination_to_tree (&real_cbase_aff);
4140         }
4141
4142       cost = difference_cost (data,
4143                               ubase, real_cbase,
4144                               &symbol_present, &var_present, &offset,
4145                               depends_on);
4146       cost.cost /= avg_loop_niter (data->current_loop);
4147     }
4148   else if (address_p
4149            && !POINTER_TYPE_P (ctype)
4150            && multiplier_allowed_in_address_p
4151                 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4152                         TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4153     {
4154       cbase
4155         = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4156       cost = difference_cost (data,
4157                               ubase, cbase,
4158                               &symbol_present, &var_present, &offset,
4159                               depends_on);
4160       cost.cost /= avg_loop_niter (data->current_loop);
4161     }
4162   else
4163     {
4164       cost = force_var_cost (data, cbase, depends_on);
4165       cost = add_costs (cost,
4166                         difference_cost (data,
4167                                          ubase, build_int_cst (utype, 0),
4168                                          &symbol_present, &var_present,
4169                                          &offset, depends_on));
4170       cost.cost /= avg_loop_niter (data->current_loop);
4171       cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4172     }
4173
4174   if (inv_expr_id)
4175     {
4176       *inv_expr_id =
4177           get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4178       /* Clear depends on.  */
4179       if (*inv_expr_id != -1 && depends_on && *depends_on)
4180         bitmap_clear (*depends_on);
4181     }
4182
4183   /* If we are after the increment, the value of the candidate is higher by
4184      one iteration.  */
4185   if (stmt_is_after_inc)
4186     offset -= ratio * cstepi;
4187
4188   /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4189      (symbol/var1/const parts may be omitted).  If we are looking for an
4190      address, find the cost of addressing this.  */
4191   if (address_p)
4192     return add_costs (cost,
4193                       get_address_cost (symbol_present, var_present,
4194                                         offset, ratio, cstepi,
4195                                         TYPE_MODE (TREE_TYPE (utype)),
4196                                         TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4197                                         speed, stmt_is_after_inc,
4198                                         can_autoinc));
4199
4200   /* Otherwise estimate the costs for computing the expression.  */
4201   if (!symbol_present && !var_present && !offset)
4202     {
4203       if (ratio != 1)
4204         cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4205       return cost;
4206     }
4207
4208   /* Symbol + offset should be compile-time computable so consider that they
4209       are added once to the variable, if present.  */
4210   if (var_present && (symbol_present || offset))
4211     cost.cost += adjust_setup_cost (data,
4212                                     add_cost (TYPE_MODE (ctype), speed));
4213
4214   /* Having offset does not affect runtime cost in case it is added to
4215      symbol, but it increases complexity.  */
4216   if (offset)
4217     cost.complexity++;
4218
4219   cost.cost += add_cost (TYPE_MODE (ctype), speed);
4220
4221   aratio = ratio > 0 ? ratio : -ratio;
4222   if (aratio != 1)
4223     cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4224   return cost;
4225
4226 fallback:
4227   if (can_autoinc)
4228     *can_autoinc = false;
4229
4230   {
4231     /* Just get the expression, expand it and measure the cost.  */
4232     tree comp = get_computation_at (data->current_loop, use, cand, at);
4233
4234     if (!comp)
4235       return infinite_cost;
4236
4237     if (address_p)
4238       comp = build_simple_mem_ref (comp);
4239
4240     return new_cost (computation_cost (comp, speed), 0);
4241   }
4242 }
4243
4244 /* Determines the cost of the computation by that USE is expressed
4245    from induction variable CAND.  If ADDRESS_P is true, we just need
4246    to create an address from it, otherwise we want to get it into
4247    register.  A set of invariants we depend on is stored in
4248    DEPENDS_ON.  If CAN_AUTOINC is nonnull, use it to record whether
4249    autoinc addressing is likely.  */
4250
4251 static comp_cost
4252 get_computation_cost (struct ivopts_data *data,
4253                       struct iv_use *use, struct iv_cand *cand,
4254                       bool address_p, bitmap *depends_on,
4255                       bool *can_autoinc, int *inv_expr_id)
4256 {
4257   return get_computation_cost_at (data,
4258                                   use, cand, address_p, depends_on, use->stmt,
4259                                   can_autoinc, inv_expr_id);
4260 }
4261
4262 /* Determines cost of basing replacement of USE on CAND in a generic
4263    expression.  */
4264
4265 static bool
4266 determine_use_iv_cost_generic (struct ivopts_data *data,
4267                                struct iv_use *use, struct iv_cand *cand)
4268 {
4269   bitmap depends_on;
4270   comp_cost cost;
4271   int inv_expr_id = -1;
4272
4273   /* The simple case first -- if we need to express value of the preserved
4274      original biv, the cost is 0.  This also prevents us from counting the
4275      cost of increment twice -- once at this use and once in the cost of
4276      the candidate.  */
4277   if (cand->pos == IP_ORIGINAL
4278       && cand->incremented_at == use->stmt)
4279     {
4280       set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE,
4281                        ERROR_MARK, -1);
4282       return true;
4283     }
4284
4285   cost = get_computation_cost (data, use, cand, false, &depends_on,
4286                                NULL, &inv_expr_id);
4287
4288   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4289                    inv_expr_id);
4290
4291   return !infinite_cost_p (cost);
4292 }
4293
4294 /* Determines cost of basing replacement of USE on CAND in an address.  */
4295
4296 static bool
4297 determine_use_iv_cost_address (struct ivopts_data *data,
4298                                struct iv_use *use, struct iv_cand *cand)
4299 {
4300   bitmap depends_on;
4301   bool can_autoinc;
4302   int inv_expr_id = -1;
4303   comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4304                                          &can_autoinc, &inv_expr_id);
4305
4306   if (cand->ainc_use == use)
4307     {
4308       if (can_autoinc)
4309         cost.cost -= cand->cost_step;
4310       /* If we generated the candidate solely for exploiting autoincrement
4311          opportunities, and it turns out it can't be used, set the cost to
4312          infinity to make sure we ignore it.  */
4313       else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4314         cost = infinite_cost;
4315     }
4316   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4317                    inv_expr_id);
4318
4319   return !infinite_cost_p (cost);
4320 }
4321
4322 /* Computes value of candidate CAND at position AT in iteration NITER, and
4323    stores it to VAL.  */
4324
4325 static void
4326 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4327                aff_tree *val)
4328 {
4329   aff_tree step, delta, nit;
4330   struct iv *iv = cand->iv;
4331   tree type = TREE_TYPE (iv->base);
4332   tree steptype = type;
4333   if (POINTER_TYPE_P (type))
4334     steptype = sizetype;
4335
4336   tree_to_aff_combination (iv->step, steptype, &step);
4337   tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4338   aff_combination_convert (&nit, steptype);
4339   aff_combination_mult (&nit, &step, &delta);
4340   if (stmt_after_increment (loop, cand, at))
4341     aff_combination_add (&delta, &step);
4342
4343   tree_to_aff_combination (iv->base, type, val);
4344   aff_combination_add (val, &delta);
4345 }
4346
4347 /* Returns period of induction variable iv.  */
4348
4349 static tree
4350 iv_period (struct iv *iv)
4351 {
4352   tree step = iv->step, period, type;
4353   tree pow2div;
4354
4355   gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4356
4357   type = unsigned_type_for (TREE_TYPE (step));
4358   /* Period of the iv is lcm (step, type_range)/step -1,
4359      i.e., N*type_range/step - 1. Since type range is power
4360      of two, N == (step >> num_of_ending_zeros_binary (step),
4361      so the final result is
4362
4363        (type_range >> num_of_ending_zeros_binary (step)) - 1
4364
4365   */
4366   pow2div = num_ending_zeros (step);
4367
4368   period = build_low_bits_mask (type,
4369                                 (TYPE_PRECISION (type)
4370                                  - tree_low_cst (pow2div, 1)));
4371
4372   return period;
4373 }
4374
4375 /* Returns the comparison operator used when eliminating the iv USE.  */
4376
4377 static enum tree_code
4378 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4379 {
4380   struct loop *loop = data->current_loop;
4381   basic_block ex_bb;
4382   edge exit;
4383
4384   ex_bb = gimple_bb (use->stmt);
4385   exit = EDGE_SUCC (ex_bb, 0);
4386   if (flow_bb_inside_loop_p (loop, exit->dest))
4387     exit = EDGE_SUCC (ex_bb, 1);
4388
4389   return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4390 }
4391
4392 static tree
4393 strip_wrap_conserving_type_conversions (tree exp)
4394 {
4395   while (tree_ssa_useless_type_conversion (exp)
4396          && (nowrap_type_p (TREE_TYPE (exp))
4397              == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4398     exp = TREE_OPERAND (exp, 0);
4399   return exp;
4400 }
4401
4402 /* Walk the SSA form and check whether E == WHAT.  Fairly simplistic, we
4403    check for an exact match.  */
4404
4405 static bool
4406 expr_equal_p (tree e, tree what)
4407 {
4408   gimple stmt;
4409   enum tree_code code;
4410
4411   e = strip_wrap_conserving_type_conversions (e);
4412   what = strip_wrap_conserving_type_conversions (what);
4413
4414   code = TREE_CODE (what);
4415   if (TREE_TYPE (e) != TREE_TYPE (what))
4416     return false;
4417
4418   if (operand_equal_p (e, what, 0))
4419     return true;
4420
4421   if (TREE_CODE (e) != SSA_NAME)
4422     return false;
4423
4424   stmt = SSA_NAME_DEF_STMT (e);
4425   if (gimple_code (stmt) != GIMPLE_ASSIGN
4426       || gimple_assign_rhs_code (stmt) != code)
4427     return false;
4428
4429   switch (get_gimple_rhs_class (code))
4430     {
4431     case GIMPLE_BINARY_RHS:
4432       if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4433         return false;
4434       /* Fallthru.  */
4435
4436     case GIMPLE_UNARY_RHS:
4437     case GIMPLE_SINGLE_RHS:
4438       return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4439     default:
4440       return false;
4441     }
4442 }
4443
4444 /* Returns true if we can prove that BASE - OFFSET does not overflow.  For now,
4445    we only detect the situation that BASE = SOMETHING + OFFSET, where the
4446    calculation is performed in non-wrapping type.
4447
4448    TODO: More generally, we could test for the situation that
4449          BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4450          This would require knowing the sign of OFFSET.
4451
4452          Also, we only look for the first addition in the computation of BASE.
4453          More complex analysis would be better, but introducing it just for
4454          this optimization seems like an overkill.  */
4455
4456 static bool
4457 difference_cannot_overflow_p (tree base, tree offset)
4458 {
4459   enum tree_code code;
4460   tree e1, e2;
4461
4462   if (!nowrap_type_p (TREE_TYPE (base)))
4463     return false;
4464
4465   base = expand_simple_operations (base);
4466
4467   if (TREE_CODE (base) == SSA_NAME)
4468     {
4469       gimple stmt = SSA_NAME_DEF_STMT (base);
4470
4471       if (gimple_code (stmt) != GIMPLE_ASSIGN)
4472         return false;
4473
4474       code = gimple_assign_rhs_code (stmt);
4475       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4476         return false;
4477
4478       e1 = gimple_assign_rhs1 (stmt);
4479       e2 = gimple_assign_rhs2 (stmt);
4480     }
4481   else
4482     {
4483       code = TREE_CODE (base);
4484       if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4485         return false;
4486       e1 = TREE_OPERAND (base, 0);
4487       e2 = TREE_OPERAND (base, 1);
4488     }
4489
4490   /* TODO: deeper inspection may be necessary to prove the equality.  */
4491   switch (code)
4492     {
4493     case PLUS_EXPR:
4494       return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4495     case POINTER_PLUS_EXPR:
4496       return expr_equal_p (e2, offset);
4497
4498     default:
4499       return false;
4500     }
4501 }
4502
4503 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4504    comparison with CAND.  NITER describes the number of iterations of
4505    the loops.  If successful, the comparison in COMP_P is altered accordingly.
4506
4507    We aim to handle the following situation:
4508
4509    sometype *base, *p;
4510    int a, b, i;
4511
4512    i = a;
4513    p = p_0 = base + a;
4514
4515    do
4516      {
4517        bla (*p);
4518        p++;
4519        i++;
4520      }
4521    while (i < b);
4522
4523    Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4524    We aim to optimize this to
4525
4526    p = p_0 = base + a;
4527    do
4528      {
4529        bla (*p);
4530        p++;
4531      }
4532    while (p < p_0 - a + b);
4533
4534    This preserves the correctness, since the pointer arithmetics does not
4535    overflow.  More precisely:
4536
4537    1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4538       overflow in computing it or the values of p.
4539    2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4540       overflow.  To prove this, we use the fact that p_0 = base + a.  */
4541
4542 static bool
4543 iv_elimination_compare_lt (struct ivopts_data *data,
4544                            struct iv_cand *cand, enum tree_code *comp_p,
4545                            struct tree_niter_desc *niter)
4546 {
4547   tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4548   struct affine_tree_combination nit, tmpa, tmpb;
4549   enum tree_code comp;
4550   HOST_WIDE_INT step;
4551
4552   /* We need to know that the candidate induction variable does not overflow.
4553      While more complex analysis may be used to prove this, for now just
4554      check that the variable appears in the original program and that it
4555      is computed in a type that guarantees no overflows.  */
4556   cand_type = TREE_TYPE (cand->iv->base);
4557   if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4558     return false;
4559
4560   /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4561      the calculation of the BOUND could overflow, making the comparison
4562      invalid.  */
4563   if (!data->loop_single_exit_p)
4564     return false;
4565
4566   /* We need to be able to decide whether candidate is increasing or decreasing
4567      in order to choose the right comparison operator.  */
4568   if (!cst_and_fits_in_hwi (cand->iv->step))
4569     return false;
4570   step = int_cst_value (cand->iv->step);
4571
4572   /* Check that the number of iterations matches the expected pattern:
4573      a + 1 > b ? 0 : b - a - 1.  */
4574   mbz = niter->may_be_zero;
4575   if (TREE_CODE (mbz) == GT_EXPR)
4576     {
4577       /* Handle a + 1 > b.  */
4578       tree op0 = TREE_OPERAND (mbz, 0);
4579       if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4580         {
4581           a = TREE_OPERAND (op0, 0);
4582           b = TREE_OPERAND (mbz, 1);
4583         }
4584       else
4585         return false;
4586     }
4587   else if (TREE_CODE (mbz) == LT_EXPR)
4588     {
4589       tree op1 = TREE_OPERAND (mbz, 1);
4590
4591       /* Handle b < a + 1.  */
4592       if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4593         {
4594           a = TREE_OPERAND (op1, 0);
4595           b = TREE_OPERAND (mbz, 0);
4596         }
4597       else
4598         return false;
4599     }
4600   else
4601     return false;
4602
4603   /* Expected number of iterations is B - A - 1.  Check that it matches
4604      the actual number, i.e., that B - A - NITER = 1.  */
4605   tree_to_aff_combination (niter->niter, nit_type, &nit);
4606   tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4607   tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4608   aff_combination_scale (&nit, double_int_minus_one);
4609   aff_combination_scale (&tmpa, double_int_minus_one);
4610   aff_combination_add (&tmpb, &tmpa);
4611   aff_combination_add (&tmpb, &nit);
4612   if (tmpb.n != 0 || !double_int_equal_p (tmpb.offset, double_int_one))
4613     return false;
4614
4615   /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4616      overflow.  */
4617   offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4618                         cand->iv->step,
4619                         fold_convert (TREE_TYPE (cand->iv->step), a));
4620   if (!difference_cannot_overflow_p (cand->iv->base, offset))
4621     return false;
4622
4623   /* Determine the new comparison operator.  */
4624   comp = step < 0 ? GT_EXPR : LT_EXPR;
4625   if (*comp_p == NE_EXPR)
4626     *comp_p = comp;
4627   else if (*comp_p == EQ_EXPR)
4628     *comp_p = invert_tree_comparison (comp, false);
4629   else
4630     gcc_unreachable ();
4631
4632   return true;
4633 }
4634
4635 /* Check whether it is possible to express the condition in USE by comparison
4636    of candidate CAND.  If so, store the value compared with to BOUND, and the
4637    comparison operator to COMP.  */
4638
4639 static bool
4640 may_eliminate_iv (struct ivopts_data *data,
4641                   struct iv_use *use, struct iv_cand *cand, tree *bound,
4642                   enum tree_code *comp)
4643 {
4644   basic_block ex_bb;
4645   edge exit;
4646   tree period;
4647   struct loop *loop = data->current_loop;
4648   aff_tree bnd;
4649   struct tree_niter_desc *desc = NULL;
4650
4651   if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4652     return false;
4653
4654   /* For now works only for exits that dominate the loop latch.
4655      TODO: extend to other conditions inside loop body.  */
4656   ex_bb = gimple_bb (use->stmt);
4657   if (use->stmt != last_stmt (ex_bb)
4658       || gimple_code (use->stmt) != GIMPLE_COND
4659       || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4660     return false;
4661
4662   exit = EDGE_SUCC (ex_bb, 0);
4663   if (flow_bb_inside_loop_p (loop, exit->dest))
4664     exit = EDGE_SUCC (ex_bb, 1);
4665   if (flow_bb_inside_loop_p (loop, exit->dest))
4666     return false;
4667
4668   desc = niter_for_exit (data, exit);
4669   if (!desc)
4670     return false;
4671
4672   /* Determine whether we can use the variable to test the exit condition.
4673      This is the case iff the period of the induction variable is greater
4674      than the number of iterations for which the exit condition is true.  */
4675   period = iv_period (cand->iv);
4676
4677   /* If the number of iterations is constant, compare against it directly.  */
4678   if (TREE_CODE (desc->niter) == INTEGER_CST)
4679     {
4680       /* See cand_value_at.  */
4681       if (stmt_after_increment (loop, cand, use->stmt))
4682         {
4683           if (!tree_int_cst_lt (desc->niter, period))
4684             return false;
4685         }
4686       else
4687         {
4688           if (tree_int_cst_lt (period, desc->niter))
4689             return false;
4690         }
4691     }
4692
4693   /* If not, and if this is the only possible exit of the loop, see whether
4694      we can get a conservative estimate on the number of iterations of the
4695      entire loop and compare against that instead.  */
4696   else
4697     {
4698       double_int period_value, max_niter;
4699
4700       max_niter = desc->max;
4701       if (stmt_after_increment (loop, cand, use->stmt))
4702         max_niter = double_int_add (max_niter, double_int_one);
4703       period_value = tree_to_double_int (period);
4704       if (double_int_ucmp (max_niter, period_value) > 0)
4705         {
4706           /* See if we can take advantage of inferred loop bound information.  */
4707           if (data->loop_single_exit_p)
4708             {
4709               if (!max_loop_iterations (loop, &max_niter))
4710                 return false;
4711               /* The loop bound is already adjusted by adding 1.  */
4712               if (double_int_ucmp (max_niter, period_value) > 0)
4713                 return false;
4714             }
4715           else
4716             return false;
4717         }
4718     }
4719
4720   cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4721
4722   *bound = aff_combination_to_tree (&bnd);
4723   *comp = iv_elimination_compare (data, use);
4724
4725   /* It is unlikely that computing the number of iterations using division
4726      would be more profitable than keeping the original induction variable.  */
4727   if (expression_expensive_p (*bound))
4728     return false;
4729
4730   /* Sometimes, it is possible to handle the situation that the number of
4731      iterations may be zero unless additional assumtions by using <
4732      instead of != in the exit condition.
4733
4734      TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4735            base the exit condition on it.  However, that is often too
4736            expensive.  */
4737   if (!integer_zerop (desc->may_be_zero))
4738     return iv_elimination_compare_lt (data, cand, comp, desc);
4739
4740   return true;
4741 }
4742
4743  /* Calculates the cost of BOUND, if it is a PARM_DECL.  A PARM_DECL must
4744     be copied, if is is used in the loop body and DATA->body_includes_call.  */
4745
4746 static int
4747 parm_decl_cost (struct ivopts_data *data, tree bound)
4748 {
4749   tree sbound = bound;
4750   STRIP_NOPS (sbound);
4751
4752   if (TREE_CODE (sbound) == SSA_NAME
4753       && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4754       && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
4755       && data->body_includes_call)
4756     return COSTS_N_INSNS (1);
4757
4758   return 0;
4759 }
4760
4761 /* Determines cost of basing replacement of USE on CAND in a condition.  */
4762
4763 static bool
4764 determine_use_iv_cost_condition (struct ivopts_data *data,
4765                                  struct iv_use *use, struct iv_cand *cand)
4766 {
4767   tree bound = NULL_TREE;
4768   struct iv *cmp_iv;
4769   bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4770   comp_cost elim_cost, express_cost, cost, bound_cost;
4771   bool ok;
4772   int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4773   tree *control_var, *bound_cst;
4774   enum tree_code comp = ERROR_MARK;
4775
4776   /* Only consider real candidates.  */
4777   if (!cand->iv)
4778     {
4779       set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4780                        ERROR_MARK, -1);
4781       return false;
4782     }
4783
4784   /* Try iv elimination.  */
4785   if (may_eliminate_iv (data, use, cand, &bound, &comp))
4786     {
4787       elim_cost = force_var_cost (data, bound, &depends_on_elim);
4788       if (elim_cost.cost == 0)
4789         elim_cost.cost = parm_decl_cost (data, bound);
4790       else if (TREE_CODE (bound) == INTEGER_CST)
4791         elim_cost.cost = 0;
4792       /* If we replace a loop condition 'i < n' with 'p < base + n',
4793          depends_on_elim will have 'base' and 'n' set, which implies
4794          that both 'base' and 'n' will be live during the loop.  More likely,
4795          'base + n' will be loop invariant, resulting in only one live value
4796          during the loop.  So in that case we clear depends_on_elim and set
4797         elim_inv_expr_id instead.  */
4798       if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4799         {
4800           elim_inv_expr_id = get_expr_id (data, bound);
4801           bitmap_clear (depends_on_elim);
4802         }
4803       /* The bound is a loop invariant, so it will be only computed
4804          once.  */
4805       elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4806     }
4807   else
4808     elim_cost = infinite_cost;
4809
4810   /* Try expressing the original giv.  If it is compared with an invariant,
4811      note that we cannot get rid of it.  */
4812   ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4813                               NULL, &cmp_iv);
4814   gcc_assert (ok);
4815
4816   /* When the condition is a comparison of the candidate IV against
4817      zero, prefer this IV.
4818
4819      TODO: The constant that we're subtracting from the cost should
4820      be target-dependent.  This information should be added to the
4821      target costs for each backend.  */
4822   if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4823       && integer_zerop (*bound_cst)
4824       && (operand_equal_p (*control_var, cand->var_after, 0)
4825           || operand_equal_p (*control_var, cand->var_before, 0)))
4826     elim_cost.cost -= 1;
4827
4828   express_cost = get_computation_cost (data, use, cand, false,
4829                                        &depends_on_express, NULL,
4830                                        &express_inv_expr_id);
4831   fd_ivopts_data = data;
4832   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4833
4834   /* Count the cost of the original bound as well.  */
4835   bound_cost = force_var_cost (data, *bound_cst, NULL);
4836   if (bound_cost.cost == 0)
4837     bound_cost.cost = parm_decl_cost (data, *bound_cst);
4838   else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4839     bound_cost.cost = 0;
4840   express_cost.cost += bound_cost.cost;
4841
4842   /* Choose the better approach, preferring the eliminated IV. */
4843   if (compare_costs (elim_cost, express_cost) <= 0)
4844     {
4845       cost = elim_cost;
4846       depends_on = depends_on_elim;
4847       depends_on_elim = NULL;
4848       inv_expr_id = elim_inv_expr_id;
4849     }
4850   else
4851     {
4852       cost = express_cost;
4853       depends_on = depends_on_express;
4854       depends_on_express = NULL;
4855       bound = NULL_TREE;
4856       comp = ERROR_MARK;
4857       inv_expr_id = express_inv_expr_id;
4858     }
4859
4860   set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4861
4862   if (depends_on_elim)
4863     BITMAP_FREE (depends_on_elim);
4864   if (depends_on_express)
4865     BITMAP_FREE (depends_on_express);
4866
4867   return !infinite_cost_p (cost);
4868 }
4869
4870 /* Determines cost of basing replacement of USE on CAND.  Returns false
4871    if USE cannot be based on CAND.  */
4872
4873 static bool
4874 determine_use_iv_cost (struct ivopts_data *data,
4875                        struct iv_use *use, struct iv_cand *cand)
4876 {
4877   switch (use->type)
4878     {
4879     case USE_NONLINEAR_EXPR:
4880       return determine_use_iv_cost_generic (data, use, cand);
4881
4882     case USE_ADDRESS:
4883       return determine_use_iv_cost_address (data, use, cand);
4884
4885     case USE_COMPARE:
4886       return determine_use_iv_cost_condition (data, use, cand);
4887
4888     default:
4889       gcc_unreachable ();
4890     }
4891 }
4892
4893 /* Return true if get_computation_cost indicates that autoincrement is
4894    a possibility for the pair of USE and CAND, false otherwise.  */
4895
4896 static bool
4897 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4898                            struct iv_cand *cand)
4899 {
4900   bitmap depends_on;
4901   bool can_autoinc;
4902   comp_cost cost;
4903
4904   if (use->type != USE_ADDRESS)
4905     return false;
4906
4907   cost = get_computation_cost (data, use, cand, true, &depends_on,
4908                                &can_autoinc, NULL);
4909
4910   BITMAP_FREE (depends_on);
4911
4912   return !infinite_cost_p (cost) && can_autoinc;
4913 }
4914
4915 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4916    use that allows autoincrement, and set their AINC_USE if possible.  */
4917
4918 static void
4919 set_autoinc_for_original_candidates (struct ivopts_data *data)
4920 {
4921   unsigned i, j;
4922
4923   for (i = 0; i < n_iv_cands (data); i++)
4924     {
4925       struct iv_cand *cand = iv_cand (data, i);
4926       struct iv_use *closest = NULL;
4927       if (cand->pos != IP_ORIGINAL)
4928         continue;
4929       for (j = 0; j < n_iv_uses (data); j++)
4930         {
4931           struct iv_use *use = iv_use (data, j);
4932           unsigned uid = gimple_uid (use->stmt);
4933           if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4934               || uid > gimple_uid (cand->incremented_at))
4935             continue;
4936           if (closest == NULL || uid > gimple_uid (closest->stmt))
4937             closest = use;
4938         }
4939       if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4940         continue;
4941       cand->ainc_use = closest;
4942     }
4943 }
4944
4945 /* Finds the candidates for the induction variables.  */
4946
4947 static void
4948 find_iv_candidates (struct ivopts_data *data)
4949 {
4950   /* Add commonly used ivs.  */
4951   add_standard_iv_candidates (data);
4952
4953   /* Add old induction variables.  */
4954   add_old_ivs_candidates (data);
4955
4956   /* Add induction variables derived from uses.  */
4957   add_derived_ivs_candidates (data);
4958
4959   set_autoinc_for_original_candidates (data);
4960
4961   /* Record the important candidates.  */
4962   record_important_candidates (data);
4963 }
4964
4965 /* Determines costs of basing the use of the iv on an iv candidate.  */
4966
4967 static void
4968 determine_use_iv_costs (struct ivopts_data *data)
4969 {
4970   unsigned i, j;
4971   struct iv_use *use;
4972   struct iv_cand *cand;
4973   bitmap to_clear = BITMAP_ALLOC (NULL);
4974
4975   alloc_use_cost_map (data);
4976
4977   for (i = 0; i < n_iv_uses (data); i++)
4978     {
4979       use = iv_use (data, i);
4980
4981       if (data->consider_all_candidates)
4982         {
4983           for (j = 0; j < n_iv_cands (data); j++)
4984             {
4985               cand = iv_cand (data, j);
4986               determine_use_iv_cost (data, use, cand);
4987             }
4988         }
4989       else
4990         {
4991           bitmap_iterator bi;
4992
4993           EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4994             {
4995               cand = iv_cand (data, j);
4996               if (!determine_use_iv_cost (data, use, cand))
4997                 bitmap_set_bit (to_clear, j);
4998             }
4999
5000           /* Remove the candidates for that the cost is infinite from
5001              the list of related candidates.  */
5002           bitmap_and_compl_into (use->related_cands, to_clear);
5003           bitmap_clear (to_clear);
5004         }
5005     }
5006
5007   BITMAP_FREE (to_clear);
5008
5009   if (dump_file && (dump_flags & TDF_DETAILS))
5010     {
5011       fprintf (dump_file, "Use-candidate costs:\n");
5012
5013       for (i = 0; i < n_iv_uses (data); i++)
5014         {
5015           use = iv_use (data, i);
5016
5017           fprintf (dump_file, "Use %d:\n", i);
5018           fprintf (dump_file, "  cand\tcost\tcompl.\tdepends on\n");
5019           for (j = 0; j < use->n_map_members; j++)
5020             {
5021               if (!use->cost_map[j].cand
5022                   || infinite_cost_p (use->cost_map[j].cost))
5023                 continue;
5024
5025               fprintf (dump_file, "  %d\t%d\t%d\t",
5026                        use->cost_map[j].cand->id,
5027                        use->cost_map[j].cost.cost,
5028                        use->cost_map[j].cost.complexity);
5029               if (use->cost_map[j].depends_on)
5030                 bitmap_print (dump_file,
5031                               use->cost_map[j].depends_on, "","");
5032               if (use->cost_map[j].inv_expr_id != -1)
5033                 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5034               fprintf (dump_file, "\n");
5035             }
5036
5037           fprintf (dump_file, "\n");
5038         }
5039       fprintf (dump_file, "\n");
5040     }
5041 }
5042
5043 /* Determines cost of the candidate CAND.  */
5044
5045 static void
5046 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5047 {
5048   comp_cost cost_base;
5049   unsigned cost, cost_step;
5050   tree base;
5051
5052   if (!cand->iv)
5053     {
5054       cand->cost = 0;
5055       return;
5056     }
5057
5058   /* There are two costs associated with the candidate -- its increment
5059      and its initialization.  The second is almost negligible for any loop
5060      that rolls enough, so we take it just very little into account.  */
5061
5062   base = cand->iv->base;
5063   cost_base = force_var_cost (data, base, NULL);
5064   /* It will be exceptional that the iv register happens to be initialized with
5065      the proper value at no cost.  In general, there will at least be a regcopy
5066      or a const set.  */
5067   if (cost_base.cost == 0)
5068     cost_base.cost = COSTS_N_INSNS (1);
5069   cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
5070
5071   cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5072
5073   /* Prefer the original ivs unless we may gain something by replacing it.
5074      The reason is to make debugging simpler; so this is not relevant for
5075      artificial ivs created by other optimization passes.  */
5076   if (cand->pos != IP_ORIGINAL
5077       || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5078     cost++;
5079
5080   /* Prefer not to insert statements into latch unless there are some
5081      already (so that we do not create unnecessary jumps).  */
5082   if (cand->pos == IP_END
5083       && empty_block_p (ip_end_pos (data->current_loop)))
5084     cost++;
5085
5086   cand->cost = cost;
5087   cand->cost_step = cost_step;
5088 }
5089
5090 /* Determines costs of computation of the candidates.  */
5091
5092 static void
5093 determine_iv_costs (struct ivopts_data *data)
5094 {
5095   unsigned i;
5096
5097   if (dump_file && (dump_flags & TDF_DETAILS))
5098     {
5099       fprintf (dump_file, "Candidate costs:\n");
5100       fprintf (dump_file, "  cand\tcost\n");
5101     }
5102
5103   for (i = 0; i < n_iv_cands (data); i++)
5104     {
5105       struct iv_cand *cand = iv_cand (data, i);
5106
5107       determine_iv_cost (data, cand);
5108
5109       if (dump_file && (dump_flags & TDF_DETAILS))
5110         fprintf (dump_file, "  %d\t%d\n", i, cand->cost);
5111     }
5112
5113   if (dump_file && (dump_flags & TDF_DETAILS))
5114     fprintf (dump_file, "\n");
5115 }
5116
5117 /* Calculates cost for having SIZE induction variables.  */
5118
5119 static unsigned
5120 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5121 {
5122   /* We add size to the cost, so that we prefer eliminating ivs
5123      if possible.  */
5124   return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5125                                             data->body_includes_call);
5126 }
5127
5128 /* For each size of the induction variable set determine the penalty.  */
5129
5130 static void
5131 determine_set_costs (struct ivopts_data *data)
5132 {
5133   unsigned j, n;
5134   gimple phi;
5135   gimple_stmt_iterator psi;
5136   tree op;
5137   struct loop *loop = data->current_loop;
5138   bitmap_iterator bi;
5139
5140   if (dump_file && (dump_flags & TDF_DETAILS))
5141     {
5142       fprintf (dump_file, "Global costs:\n");
5143       fprintf (dump_file, "  target_avail_regs %d\n", target_avail_regs);
5144       fprintf (dump_file, "  target_clobbered_regs %d\n", target_clobbered_regs);
5145       fprintf (dump_file, "  target_reg_cost %d\n", target_reg_cost[data->speed]);
5146       fprintf (dump_file, "  target_spill_cost %d\n", target_spill_cost[data->speed]);
5147     }
5148
5149   n = 0;
5150   for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5151     {
5152       phi = gsi_stmt (psi);
5153       op = PHI_RESULT (phi);
5154
5155       if (!is_gimple_reg (op))
5156         continue;
5157
5158       if (get_iv (data, op))
5159         continue;
5160
5161       n++;
5162     }
5163
5164   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5165     {
5166       struct version_info *info = ver_info (data, j);
5167
5168       if (info->inv_id && info->has_nonlin_use)
5169         n++;
5170     }
5171
5172   data->regs_used = n;
5173   if (dump_file && (dump_flags & TDF_DETAILS))
5174     fprintf (dump_file, "  regs_used %d\n", n);
5175
5176   if (dump_file && (dump_flags & TDF_DETAILS))
5177     {
5178       fprintf (dump_file, "  cost for size:\n");
5179       fprintf (dump_file, "  ivs\tcost\n");
5180       for (j = 0; j <= 2 * target_avail_regs; j++)
5181         fprintf (dump_file, "  %d\t%d\n", j,
5182                  ivopts_global_cost_for_size (data, j));
5183       fprintf (dump_file, "\n");
5184     }
5185 }
5186
5187 /* Returns true if A is a cheaper cost pair than B.  */
5188
5189 static bool
5190 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5191 {
5192   int cmp;
5193
5194   if (!a)
5195     return false;
5196
5197   if (!b)
5198     return true;
5199
5200   cmp = compare_costs (a->cost, b->cost);
5201   if (cmp < 0)
5202     return true;
5203
5204   if (cmp > 0)
5205     return false;
5206
5207   /* In case the costs are the same, prefer the cheaper candidate.  */
5208   if (a->cand->cost < b->cand->cost)
5209     return true;
5210
5211   return false;
5212 }
5213
5214
5215 /* Returns candidate by that USE is expressed in IVS.  */
5216
5217 static struct cost_pair *
5218 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5219 {
5220   return ivs->cand_for_use[use->id];
5221 }
5222
5223 /* Computes the cost field of IVS structure.  */
5224
5225 static void
5226 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5227 {
5228   comp_cost cost = ivs->cand_use_cost;
5229
5230   cost.cost += ivs->cand_cost;
5231
5232   cost.cost += ivopts_global_cost_for_size (data,
5233                                             ivs->n_regs + ivs->num_used_inv_expr);
5234
5235   ivs->cost = cost;
5236 }
5237
5238 /* Remove invariants in set INVS to set IVS.  */
5239
5240 static void
5241 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5242 {
5243   bitmap_iterator bi;
5244   unsigned iid;
5245
5246   if (!invs)
5247     return;
5248
5249   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5250     {
5251       ivs->n_invariant_uses[iid]--;
5252       if (ivs->n_invariant_uses[iid] == 0)
5253         ivs->n_regs--;
5254     }
5255 }
5256
5257 /* Set USE not to be expressed by any candidate in IVS.  */
5258
5259 static void
5260 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5261                  struct iv_use *use)
5262 {
5263   unsigned uid = use->id, cid;
5264   struct cost_pair *cp;
5265
5266   cp = ivs->cand_for_use[uid];
5267   if (!cp)
5268     return;
5269   cid = cp->cand->id;
5270
5271   ivs->bad_uses++;
5272   ivs->cand_for_use[uid] = NULL;
5273   ivs->n_cand_uses[cid]--;
5274
5275   if (ivs->n_cand_uses[cid] == 0)
5276     {
5277       bitmap_clear_bit (ivs->cands, cid);
5278       /* Do not count the pseudocandidates.  */
5279       if (cp->cand->iv)
5280         ivs->n_regs--;
5281       ivs->n_cands--;
5282       ivs->cand_cost -= cp->cand->cost;
5283
5284       iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5285     }
5286
5287   ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5288
5289   iv_ca_set_remove_invariants (ivs, cp->depends_on);
5290
5291   if (cp->inv_expr_id != -1)
5292     {
5293       ivs->used_inv_expr[cp->inv_expr_id]--;
5294       if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5295         ivs->num_used_inv_expr--;
5296     }
5297   iv_ca_recount_cost (data, ivs);
5298 }
5299
5300 /* Add invariants in set INVS to set IVS.  */
5301
5302 static void
5303 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5304 {
5305   bitmap_iterator bi;
5306   unsigned iid;
5307
5308   if (!invs)
5309     return;
5310
5311   EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5312     {
5313       ivs->n_invariant_uses[iid]++;
5314       if (ivs->n_invariant_uses[iid] == 1)
5315         ivs->n_regs++;
5316     }
5317 }
5318
5319 /* Set cost pair for USE in set IVS to CP.  */
5320
5321 static void
5322 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5323               struct iv_use *use, struct cost_pair *cp)
5324 {
5325   unsigned uid = use->id, cid;
5326
5327   if (ivs->cand_for_use[uid] == cp)
5328     return;
5329
5330   if (ivs->cand_for_use[uid])
5331     iv_ca_set_no_cp (data, ivs, use);
5332
5333   if (cp)
5334     {
5335       cid = cp->cand->id;
5336
5337       ivs->bad_uses--;
5338       ivs->cand_for_use[uid] = cp;
5339       ivs->n_cand_uses[cid]++;
5340       if (ivs->n_cand_uses[cid] == 1)
5341         {
5342           bitmap_set_bit (ivs->cands, cid);
5343           /* Do not count the pseudocandidates.  */
5344           if (cp->cand->iv)
5345             ivs->n_regs++;
5346           ivs->n_cands++;
5347           ivs->cand_cost += cp->cand->cost;
5348
5349           iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5350         }
5351
5352       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5353       iv_ca_set_add_invariants (ivs, cp->depends_on);
5354
5355       if (cp->inv_expr_id != -1)
5356         {
5357           ivs->used_inv_expr[cp->inv_expr_id]++;
5358           if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5359             ivs->num_used_inv_expr++;
5360         }
5361       iv_ca_recount_cost (data, ivs);
5362     }
5363 }
5364
5365 /* Extend set IVS by expressing USE by some of the candidates in it
5366    if possible. All important candidates will be considered
5367    if IMPORTANT_CANDIDATES is true.  */
5368
5369 static void
5370 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5371                struct iv_use *use, bool important_candidates)
5372 {
5373   struct cost_pair *best_cp = NULL, *cp;
5374   bitmap_iterator bi;
5375   bitmap cands;
5376   unsigned i;
5377
5378   gcc_assert (ivs->upto >= use->id);
5379
5380   if (ivs->upto == use->id)
5381     {
5382       ivs->upto++;
5383       ivs->bad_uses++;
5384     }
5385
5386   cands = (important_candidates ? data->important_candidates : ivs->cands);
5387   EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5388     {
5389       struct iv_cand *cand = iv_cand (data, i);
5390
5391       cp = get_use_iv_cost (data, use, cand);
5392
5393       if (cheaper_cost_pair (cp, best_cp))
5394         best_cp = cp;
5395     }
5396
5397   iv_ca_set_cp (data, ivs, use, best_cp);
5398 }
5399
5400 /* Get cost for assignment IVS.  */
5401
5402 static comp_cost
5403 iv_ca_cost (struct iv_ca *ivs)
5404 {
5405   /* This was a conditional expression but it triggered a bug in
5406      Sun C 5.5.  */
5407   if (ivs->bad_uses)
5408     return infinite_cost;
5409   else
5410     return ivs->cost;
5411 }
5412
5413 /* Returns true if all dependences of CP are among invariants in IVS.  */
5414
5415 static bool
5416 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5417 {
5418   unsigned i;
5419   bitmap_iterator bi;
5420
5421   if (!cp->depends_on)
5422     return true;
5423
5424   EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5425     {
5426       if (ivs->n_invariant_uses[i] == 0)
5427         return false;
5428     }
5429
5430   return true;
5431 }
5432
5433 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5434    it before NEXT_CHANGE.  */
5435
5436 static struct iv_ca_delta *
5437 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5438                  struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5439 {
5440   struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5441
5442   change->use = use;
5443   change->old_cp = old_cp;
5444   change->new_cp = new_cp;
5445   change->next_change = next_change;
5446
5447   return change;
5448 }
5449
5450 /* Joins two lists of changes L1 and L2.  Destructive -- old lists
5451    are rewritten.  */
5452
5453 static struct iv_ca_delta *
5454 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5455 {
5456   struct iv_ca_delta *last;
5457
5458   if (!l2)
5459     return l1;
5460
5461   if (!l1)
5462     return l2;
5463
5464   for (last = l1; last->next_change; last = last->next_change)
5465     continue;
5466   last->next_change = l2;
5467
5468   return l1;
5469 }
5470
5471 /* Reverse the list of changes DELTA, forming the inverse to it.  */
5472
5473 static struct iv_ca_delta *
5474 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5475 {
5476   struct iv_ca_delta *act, *next, *prev = NULL;
5477   struct cost_pair *tmp;
5478
5479   for (act = delta; act; act = next)
5480     {
5481       next = act->next_change;
5482       act->next_change = prev;
5483       prev = act;
5484
5485       tmp = act->old_cp;
5486       act->old_cp = act->new_cp;
5487       act->new_cp = tmp;
5488     }
5489
5490   return prev;
5491 }
5492
5493 /* Commit changes in DELTA to IVS.  If FORWARD is false, the changes are
5494    reverted instead.  */
5495
5496 static void
5497 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5498                     struct iv_ca_delta *delta, bool forward)
5499 {
5500   struct cost_pair *from, *to;
5501   struct iv_ca_delta *act;
5502
5503   if (!forward)
5504     delta = iv_ca_delta_reverse (delta);
5505
5506   for (act = delta; act; act = act->next_change)
5507     {
5508       from = act->old_cp;
5509       to = act->new_cp;
5510       gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5511       iv_ca_set_cp (data, ivs, act->use, to);
5512     }
5513
5514   if (!forward)
5515     iv_ca_delta_reverse (delta);
5516 }
5517
5518 /* Returns true if CAND is used in IVS.  */
5519
5520 static bool
5521 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5522 {
5523   return ivs->n_cand_uses[cand->id] > 0;
5524 }
5525
5526 /* Returns number of induction variable candidates in the set IVS.  */
5527
5528 static unsigned
5529 iv_ca_n_cands (struct iv_ca *ivs)
5530 {
5531   return ivs->n_cands;
5532 }
5533
5534 /* Free the list of changes DELTA.  */
5535
5536 static void
5537 iv_ca_delta_free (struct iv_ca_delta **delta)
5538 {
5539   struct iv_ca_delta *act, *next;
5540
5541   for (act = *delta; act; act = next)
5542     {
5543       next = act->next_change;
5544       free (act);
5545     }
5546
5547   *delta = NULL;
5548 }
5549
5550 /* Allocates new iv candidates assignment.  */
5551
5552 static struct iv_ca *
5553 iv_ca_new (struct ivopts_data *data)
5554 {
5555   struct iv_ca *nw = XNEW (struct iv_ca);
5556
5557   nw->upto = 0;
5558   nw->bad_uses = 0;
5559   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5560   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5561   nw->cands = BITMAP_ALLOC (NULL);
5562   nw->n_cands = 0;
5563   nw->n_regs = 0;
5564   nw->cand_use_cost = zero_cost;
5565   nw->cand_cost = 0;
5566   nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5567   nw->cost = zero_cost;
5568   nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5569   nw->num_used_inv_expr = 0;
5570
5571   return nw;
5572 }
5573
5574 /* Free memory occupied by the set IVS.  */
5575
5576 static void
5577 iv_ca_free (struct iv_ca **ivs)
5578 {
5579   free ((*ivs)->cand_for_use);
5580   free ((*ivs)->n_cand_uses);
5581   BITMAP_FREE ((*ivs)->cands);
5582   free ((*ivs)->n_invariant_uses);
5583   free ((*ivs)->used_inv_expr);
5584   free (*ivs);
5585   *ivs = NULL;
5586 }
5587
5588 /* Dumps IVS to FILE.  */
5589
5590 static void
5591 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5592 {
5593   const char *pref = "  invariants ";
5594   unsigned i;
5595   comp_cost cost = iv_ca_cost (ivs);
5596
5597   fprintf (file, "  cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5598   fprintf (file, "  cand_cost: %d\n  cand_use_cost: %d (complexity %d)\n",
5599            ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5600   bitmap_print (file, ivs->cands, "  candidates: ","\n");
5601
5602    for (i = 0; i < ivs->upto; i++)
5603     {
5604       struct iv_use *use = iv_use (data, i);
5605       struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5606       if (cp)
5607         fprintf (file, "   use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5608                  use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5609       else
5610         fprintf (file, "   use:%d --> ??\n", use->id);
5611     }
5612
5613   for (i = 1; i <= data->max_inv_id; i++)
5614     if (ivs->n_invariant_uses[i])
5615       {
5616         fprintf (file, "%s%d", pref, i);
5617         pref = ", ";
5618       }
5619   fprintf (file, "\n\n");
5620 }
5621
5622 /* Try changing candidate in IVS to CAND for each use.  Return cost of the
5623    new set, and store differences in DELTA.  Number of induction variables
5624    in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5625    the function will try to find a solution with mimimal iv candidates.  */
5626
5627 static comp_cost
5628 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5629               struct iv_cand *cand, struct iv_ca_delta **delta,
5630               unsigned *n_ivs, bool min_ncand)
5631 {
5632   unsigned i;
5633   comp_cost cost;
5634   struct iv_use *use;
5635   struct cost_pair *old_cp, *new_cp;
5636
5637   *delta = NULL;
5638   for (i = 0; i < ivs->upto; i++)
5639     {
5640       use = iv_use (data, i);
5641       old_cp = iv_ca_cand_for_use (ivs, use);
5642
5643       if (old_cp
5644           && old_cp->cand == cand)
5645         continue;
5646
5647       new_cp = get_use_iv_cost (data, use, cand);
5648       if (!new_cp)
5649         continue;
5650
5651       if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5652         continue;
5653
5654       if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5655         continue;
5656
5657       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5658     }
5659
5660   iv_ca_delta_commit (data, ivs, *delta, true);
5661   cost = iv_ca_cost (ivs);
5662   if (n_ivs)
5663     *n_ivs = iv_ca_n_cands (ivs);
5664   iv_ca_delta_commit (data, ivs, *delta, false);
5665
5666   return cost;
5667 }
5668
5669 /* Try narrowing set IVS by removing CAND.  Return the cost of
5670    the new set and store the differences in DELTA.  */
5671
5672 static comp_cost
5673 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5674               struct iv_cand *cand, struct iv_ca_delta **delta)
5675 {
5676   unsigned i, ci;
5677   struct iv_use *use;
5678   struct cost_pair *old_cp, *new_cp, *cp;
5679   bitmap_iterator bi;
5680   struct iv_cand *cnd;
5681   comp_cost cost;
5682
5683   *delta = NULL;
5684   for (i = 0; i < n_iv_uses (data); i++)
5685     {
5686       use = iv_use (data, i);
5687
5688       old_cp = iv_ca_cand_for_use (ivs, use);
5689       if (old_cp->cand != cand)
5690         continue;
5691
5692       new_cp = NULL;
5693
5694       if (data->consider_all_candidates)
5695         {
5696           EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5697             {
5698               if (ci == cand->id)
5699                 continue;
5700
5701               cnd = iv_cand (data, ci);
5702
5703               cp = get_use_iv_cost (data, use, cnd);
5704               if (!cp)
5705                 continue;
5706
5707               if (!iv_ca_has_deps (ivs, cp))
5708                 continue;
5709
5710               if (!cheaper_cost_pair (cp, new_cp))
5711                 continue;
5712
5713               new_cp = cp;
5714             }
5715         }
5716       else
5717         {
5718           EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5719             {
5720               if (ci == cand->id)
5721                 continue;
5722
5723               cnd = iv_cand (data, ci);
5724
5725               cp = get_use_iv_cost (data, use, cnd);
5726               if (!cp)
5727                 continue;
5728               if (!iv_ca_has_deps (ivs, cp))
5729                 continue;
5730
5731               if (!cheaper_cost_pair (cp, new_cp))
5732                 continue;
5733
5734               new_cp = cp;
5735             }
5736         }
5737
5738       if (!new_cp)
5739         {
5740           iv_ca_delta_free (delta);
5741           return infinite_cost;
5742         }
5743
5744       *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5745     }
5746
5747   iv_ca_delta_commit (data, ivs, *delta, true);
5748   cost = iv_ca_cost (ivs);
5749   iv_ca_delta_commit (data, ivs, *delta, false);
5750
5751   return cost;
5752 }
5753
5754 /* Try optimizing the set of candidates IVS by removing candidates different
5755    from to EXCEPT_CAND from it.  Return cost of the new set, and store
5756    differences in DELTA.  */
5757
5758 static comp_cost
5759 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5760              struct iv_cand *except_cand, struct iv_ca_delta **delta)
5761 {
5762   bitmap_iterator bi;
5763   struct iv_ca_delta *act_delta, *best_delta;
5764   unsigned i;
5765   comp_cost best_cost, acost;
5766   struct iv_cand *cand;
5767
5768   best_delta = NULL;
5769   best_cost = iv_ca_cost (ivs);
5770
5771   EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5772     {
5773       cand = iv_cand (data, i);
5774
5775       if (cand == except_cand)
5776         continue;
5777
5778       acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5779
5780       if (compare_costs (acost, best_cost) < 0)
5781         {
5782           best_cost = acost;
5783           iv_ca_delta_free (&best_delta);
5784           best_delta = act_delta;
5785         }
5786       else
5787         iv_ca_delta_free (&act_delta);
5788     }
5789
5790   if (!best_delta)
5791     {
5792       *delta = NULL;
5793       return best_cost;
5794     }
5795
5796   /* Recurse to possibly remove other unnecessary ivs.  */
5797   iv_ca_delta_commit (data, ivs, best_delta, true);
5798   best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5799   iv_ca_delta_commit (data, ivs, best_delta, false);
5800   *delta = iv_ca_delta_join (best_delta, *delta);
5801   return best_cost;
5802 }
5803
5804 /* Tries to extend the sets IVS in the best possible way in order
5805    to express the USE.  If ORIGINALP is true, prefer candidates from
5806    the original set of IVs, otherwise favor important candidates not
5807    based on any memory object.  */
5808
5809 static bool
5810 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5811                   struct iv_use *use, bool originalp)
5812 {
5813   comp_cost best_cost, act_cost;
5814   unsigned i;
5815   bitmap_iterator bi;
5816   struct iv_cand *cand;
5817   struct iv_ca_delta *best_delta = NULL, *act_delta;
5818   struct cost_pair *cp;
5819
5820   iv_ca_add_use (data, ivs, use, false);
5821   best_cost = iv_ca_cost (ivs);
5822
5823   cp = iv_ca_cand_for_use (ivs, use);
5824   if (!cp)
5825     {
5826       ivs->upto--;
5827       ivs->bad_uses--;
5828       iv_ca_add_use (data, ivs, use, true);
5829       best_cost = iv_ca_cost (ivs);
5830       cp = iv_ca_cand_for_use (ivs, use);
5831     }
5832   if (cp)
5833     {
5834       best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5835       iv_ca_set_no_cp (data, ivs, use);
5836     }
5837
5838   /* If ORIGINALP is true, try to find the original IV for the use.  Otherwise
5839      first try important candidates not based on any memory object.  Only if
5840      this fails, try the specific ones.  Rationale -- in loops with many
5841      variables the best choice often is to use just one generic biv.  If we
5842      added here many ivs specific to the uses, the optimization algorithm later
5843      would be likely to get stuck in a local minimum, thus causing us to create
5844      too many ivs.  The approach from few ivs to more seems more likely to be
5845      successful -- starting from few ivs, replacing an expensive use by a
5846      specific iv should always be a win.  */
5847   EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5848     {
5849       cand = iv_cand (data, i);
5850
5851       if (originalp && cand->pos !=IP_ORIGINAL)
5852         continue;
5853
5854       if (!originalp && cand->iv->base_object != NULL_TREE)
5855         continue;
5856
5857       if (iv_ca_cand_used_p (ivs, cand))
5858         continue;
5859
5860       cp = get_use_iv_cost (data, use, cand);
5861       if (!cp)
5862         continue;
5863
5864       iv_ca_set_cp (data, ivs, use, cp);
5865       act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5866                                true);
5867       iv_ca_set_no_cp (data, ivs, use);
5868       act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5869
5870       if (compare_costs (act_cost, best_cost) < 0)
5871         {
5872           best_cost = act_cost;
5873
5874           iv_ca_delta_free (&best_delta);
5875           best_delta = act_delta;
5876         }
5877       else
5878         iv_ca_delta_free (&act_delta);
5879     }
5880
5881   if (infinite_cost_p (best_cost))
5882     {
5883       for (i = 0; i < use->n_map_members; i++)
5884         {
5885           cp = use->cost_map + i;
5886           cand = cp->cand;
5887           if (!cand)
5888             continue;
5889
5890           /* Already tried this.  */
5891           if (cand->important)
5892             {
5893               if (originalp && cand->pos == IP_ORIGINAL)
5894                 continue;
5895               if (!originalp && cand->iv->base_object == NULL_TREE)
5896                 continue;
5897             }
5898
5899           if (iv_ca_cand_used_p (ivs, cand))
5900             continue;
5901
5902           act_delta = NULL;
5903           iv_ca_set_cp (data, ivs, use, cp);
5904           act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5905           iv_ca_set_no_cp (data, ivs, use);
5906           act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5907                                        cp, act_delta);
5908
5909           if (compare_costs (act_cost, best_cost) < 0)
5910             {
5911               best_cost = act_cost;
5912
5913               if (best_delta)
5914                 iv_ca_delta_free (&best_delta);
5915               best_delta = act_delta;
5916             }
5917           else
5918             iv_ca_delta_free (&act_delta);
5919         }
5920     }
5921
5922   iv_ca_delta_commit (data, ivs, best_delta, true);
5923   iv_ca_delta_free (&best_delta);
5924
5925   return !infinite_cost_p (best_cost);
5926 }
5927
5928 /* Finds an initial assignment of candidates to uses.  */
5929
5930 static struct iv_ca *
5931 get_initial_solution (struct ivopts_data *data, bool originalp)
5932 {
5933   struct iv_ca *ivs = iv_ca_new (data);
5934   unsigned i;
5935
5936   for (i = 0; i < n_iv_uses (data); i++)
5937     if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5938       {
5939         iv_ca_free (&ivs);
5940         return NULL;
5941       }
5942
5943   return ivs;
5944 }
5945
5946 /* Tries to improve set of induction variables IVS.  */
5947
5948 static bool
5949 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5950 {
5951   unsigned i, n_ivs;
5952   comp_cost acost, best_cost = iv_ca_cost (ivs);
5953   struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5954   struct iv_cand *cand;
5955
5956   /* Try extending the set of induction variables by one.  */
5957   for (i = 0; i < n_iv_cands (data); i++)
5958     {
5959       cand = iv_cand (data, i);
5960
5961       if (iv_ca_cand_used_p (ivs, cand))
5962         continue;
5963
5964       acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5965       if (!act_delta)
5966         continue;
5967
5968       /* If we successfully added the candidate and the set is small enough,
5969          try optimizing it by removing other candidates.  */
5970       if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5971         {
5972           iv_ca_delta_commit (data, ivs, act_delta, true);
5973           acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5974           iv_ca_delta_commit (data, ivs, act_delta, false);
5975           act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5976         }
5977
5978       if (compare_costs (acost, best_cost) < 0)
5979         {
5980           best_cost = acost;
5981           iv_ca_delta_free (&best_delta);
5982           best_delta = act_delta;
5983         }
5984       else
5985         iv_ca_delta_free (&act_delta);
5986     }
5987
5988   if (!best_delta)
5989     {
5990       /* Try removing the candidates from the set instead.  */
5991       best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5992
5993       /* Nothing more we can do.  */
5994       if (!best_delta)
5995         return false;
5996     }
5997
5998   iv_ca_delta_commit (data, ivs, best_delta, true);
5999   gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6000   iv_ca_delta_free (&best_delta);
6001   return true;
6002 }
6003
6004 /* Attempts to find the optimal set of induction variables.  We do simple
6005    greedy heuristic -- we try to replace at most one candidate in the selected
6006    solution and remove the unused ivs while this improves the cost.  */
6007
6008 static struct iv_ca *
6009 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6010 {
6011   struct iv_ca *set;
6012
6013   /* Get the initial solution.  */
6014   set = get_initial_solution (data, originalp);
6015   if (!set)
6016     {
6017       if (dump_file && (dump_flags & TDF_DETAILS))
6018         fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6019       return NULL;
6020     }
6021
6022   if (dump_file && (dump_flags & TDF_DETAILS))
6023     {
6024       fprintf (dump_file, "Initial set of candidates:\n");
6025       iv_ca_dump (data, dump_file, set);
6026     }
6027
6028   while (try_improve_iv_set (data, set))
6029     {
6030       if (dump_file && (dump_flags & TDF_DETAILS))
6031         {
6032           fprintf (dump_file, "Improved to:\n");
6033           iv_ca_dump (data, dump_file, set);
6034         }
6035     }
6036
6037   return set;
6038 }
6039
6040 static struct iv_ca *
6041 find_optimal_iv_set (struct ivopts_data *data)
6042 {
6043   unsigned i;
6044   struct iv_ca *set, *origset;
6045   struct iv_use *use;
6046   comp_cost cost, origcost;
6047
6048   /* Determine the cost based on a strategy that starts with original IVs,
6049      and try again using a strategy that prefers candidates not based
6050      on any IVs.  */
6051   origset = find_optimal_iv_set_1 (data, true);
6052   set = find_optimal_iv_set_1 (data, false);
6053
6054   if (!origset && !set)
6055     return NULL;
6056
6057   origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6058   cost = set ? iv_ca_cost (set) : infinite_cost;
6059
6060   if (dump_file && (dump_flags & TDF_DETAILS))
6061     {
6062       fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6063                origcost.cost, origcost.complexity);
6064       fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6065                cost.cost, cost.complexity);
6066     }
6067
6068   /* Choose the one with the best cost.  */
6069   if (compare_costs (origcost, cost) <= 0)
6070     {
6071       if (set)
6072         iv_ca_free (&set);
6073       set = origset;
6074     }
6075   else if (origset)
6076     iv_ca_free (&origset);
6077
6078   for (i = 0; i < n_iv_uses (data); i++)
6079     {
6080       use = iv_use (data, i);
6081       use->selected = iv_ca_cand_for_use (set, use)->cand;
6082     }
6083
6084   return set;
6085 }
6086
6087 /* Creates a new induction variable corresponding to CAND.  */
6088
6089 static void
6090 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6091 {
6092   gimple_stmt_iterator incr_pos;
6093   tree base;
6094   bool after = false;
6095
6096   if (!cand->iv)
6097     return;
6098
6099   switch (cand->pos)
6100     {
6101     case IP_NORMAL:
6102       incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6103       break;
6104
6105     case IP_END:
6106       incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6107       after = true;
6108       break;
6109
6110     case IP_AFTER_USE:
6111       after = true;
6112       /* fall through */
6113     case IP_BEFORE_USE:
6114       incr_pos = gsi_for_stmt (cand->incremented_at);
6115       break;
6116
6117     case IP_ORIGINAL:
6118       /* Mark that the iv is preserved.  */
6119       name_info (data, cand->var_before)->preserve_biv = true;
6120       name_info (data, cand->var_after)->preserve_biv = true;
6121
6122       /* Rewrite the increment so that it uses var_before directly.  */
6123       find_interesting_uses_op (data, cand->var_after)->selected = cand;
6124       return;
6125     }
6126
6127   gimple_add_tmp_var (cand->var_before);
6128   add_referenced_var (cand->var_before);
6129
6130   base = unshare_expr (cand->iv->base);
6131
6132   create_iv (base, unshare_expr (cand->iv->step),
6133              cand->var_before, data->current_loop,
6134              &incr_pos, after, &cand->var_before, &cand->var_after);
6135 }
6136
6137 /* Creates new induction variables described in SET.  */
6138
6139 static void
6140 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6141 {
6142   unsigned i;
6143   struct iv_cand *cand;
6144   bitmap_iterator bi;
6145
6146   EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6147     {
6148       cand = iv_cand (data, i);
6149       create_new_iv (data, cand);
6150     }
6151
6152   if (dump_file && (dump_flags & TDF_DETAILS))
6153     {
6154       fprintf (dump_file, "\nSelected IV set: \n");
6155       EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6156         {
6157           cand = iv_cand (data, i);
6158           dump_cand (dump_file, cand);
6159         }
6160       fprintf (dump_file, "\n");
6161     }
6162 }
6163
6164 /* Rewrites USE (definition of iv used in a nonlinear expression)
6165    using candidate CAND.  */
6166
6167 static void
6168 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6169                             struct iv_use *use, struct iv_cand *cand)
6170 {
6171   tree comp;
6172   tree op, tgt;
6173   gimple ass;
6174   gimple_stmt_iterator bsi;
6175
6176   /* An important special case -- if we are asked to express value of
6177      the original iv by itself, just exit; there is no need to
6178      introduce a new computation (that might also need casting the
6179      variable to unsigned and back).  */
6180   if (cand->pos == IP_ORIGINAL
6181       && cand->incremented_at == use->stmt)
6182     {
6183       tree step, ctype, utype;
6184       enum tree_code incr_code = PLUS_EXPR, old_code;
6185
6186       gcc_assert (is_gimple_assign (use->stmt));
6187       gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6188
6189       step = cand->iv->step;
6190       ctype = TREE_TYPE (step);
6191       utype = TREE_TYPE (cand->var_after);
6192       if (TREE_CODE (step) == NEGATE_EXPR)
6193         {
6194           incr_code = MINUS_EXPR;
6195           step = TREE_OPERAND (step, 0);
6196         }
6197
6198       /* Check whether we may leave the computation unchanged.
6199          This is the case only if it does not rely on other
6200          computations in the loop -- otherwise, the computation
6201          we rely upon may be removed in remove_unused_ivs,
6202          thus leading to ICE.  */
6203       old_code = gimple_assign_rhs_code (use->stmt);
6204       if (old_code == PLUS_EXPR
6205           || old_code == MINUS_EXPR
6206           || old_code == POINTER_PLUS_EXPR)
6207         {
6208           if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6209             op = gimple_assign_rhs2 (use->stmt);
6210           else if (old_code != MINUS_EXPR
6211                    && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6212             op = gimple_assign_rhs1 (use->stmt);
6213           else
6214             op = NULL_TREE;
6215         }
6216       else
6217         op = NULL_TREE;
6218
6219       if (op
6220           && (TREE_CODE (op) == INTEGER_CST
6221               || operand_equal_p (op, step, 0)))
6222         return;
6223
6224       /* Otherwise, add the necessary computations to express
6225          the iv.  */
6226       op = fold_convert (ctype, cand->var_before);
6227       comp = fold_convert (utype,
6228                            build2 (incr_code, ctype, op,
6229                                    unshare_expr (step)));
6230     }
6231   else
6232     {
6233       comp = get_computation (data->current_loop, use, cand);
6234       gcc_assert (comp != NULL_TREE);
6235     }
6236
6237   switch (gimple_code (use->stmt))
6238     {
6239     case GIMPLE_PHI:
6240       tgt = PHI_RESULT (use->stmt);
6241
6242       /* If we should keep the biv, do not replace it.  */
6243       if (name_info (data, tgt)->preserve_biv)
6244         return;
6245
6246       bsi = gsi_after_labels (gimple_bb (use->stmt));
6247       break;
6248
6249     case GIMPLE_ASSIGN:
6250       tgt = gimple_assign_lhs (use->stmt);
6251       bsi = gsi_for_stmt (use->stmt);
6252       break;
6253
6254     default:
6255       gcc_unreachable ();
6256     }
6257
6258   if (!valid_gimple_rhs_p (comp)
6259       || (gimple_code (use->stmt) != GIMPLE_PHI
6260           /* We can't allow re-allocating the stmt as it might be pointed
6261              to still.  */
6262           && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6263               >= gimple_num_ops (gsi_stmt (bsi)))))
6264     {
6265       comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6266                                        true, GSI_SAME_STMT);
6267       if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6268         {
6269           duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6270           /* As this isn't a plain copy we have to reset alignment
6271              information.  */
6272           if (SSA_NAME_PTR_INFO (comp))
6273             mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6274         }
6275     }
6276
6277   if (gimple_code (use->stmt) == GIMPLE_PHI)
6278     {
6279       ass = gimple_build_assign (tgt, comp);
6280       gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6281
6282       bsi = gsi_for_stmt (use->stmt);
6283       remove_phi_node (&bsi, false);
6284     }
6285   else
6286     {
6287       gimple_assign_set_rhs_from_tree (&bsi, comp);
6288       use->stmt = gsi_stmt (bsi);
6289     }
6290 }
6291
6292 /* Performs a peephole optimization to reorder the iv update statement with
6293    a mem ref to enable instruction combining in later phases. The mem ref uses
6294    the iv value before the update, so the reordering transformation requires
6295    adjustment of the offset. CAND is the selected IV_CAND.
6296
6297    Example:
6298
6299    t = MEM_REF (base, iv1, 8, 16);  // base, index, stride, offset
6300    iv2 = iv1 + 1;
6301
6302    if (t < val)      (1)
6303      goto L;
6304    goto Head;
6305
6306
6307    directly propagating t over to (1) will introduce overlapping live range
6308    thus increase register pressure. This peephole transform it into:
6309
6310
6311    iv2 = iv1 + 1;
6312    t = MEM_REF (base, iv2, 8, 8);
6313    if (t < val)
6314      goto L;
6315    goto Head;
6316 */
6317
6318 static void
6319 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6320 {
6321   tree var_after;
6322   gimple iv_update, stmt;
6323   basic_block bb;
6324   gimple_stmt_iterator gsi, gsi_iv;
6325
6326   if (cand->pos != IP_NORMAL)
6327     return;
6328
6329   var_after = cand->var_after;
6330   iv_update = SSA_NAME_DEF_STMT (var_after);
6331
6332   bb = gimple_bb (iv_update);
6333   gsi = gsi_last_nondebug_bb (bb);
6334   stmt = gsi_stmt (gsi);
6335
6336   /* Only handle conditional statement for now.  */
6337   if (gimple_code (stmt) != GIMPLE_COND)
6338     return;
6339
6340   gsi_prev_nondebug (&gsi);
6341   stmt = gsi_stmt (gsi);
6342   if (stmt != iv_update)
6343     return;
6344
6345   gsi_prev_nondebug (&gsi);
6346   if (gsi_end_p (gsi))
6347     return;
6348
6349   stmt = gsi_stmt (gsi);
6350   if (gimple_code (stmt) != GIMPLE_ASSIGN)
6351     return;
6352
6353   if (stmt != use->stmt)
6354     return;
6355
6356   if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6357     return;
6358
6359   if (dump_file && (dump_flags & TDF_DETAILS))
6360     {
6361       fprintf (dump_file, "Reordering \n");
6362       print_gimple_stmt (dump_file, iv_update, 0, 0);
6363       print_gimple_stmt (dump_file, use->stmt, 0, 0);
6364       fprintf (dump_file, "\n");
6365     }
6366
6367   gsi = gsi_for_stmt (use->stmt);
6368   gsi_iv = gsi_for_stmt (iv_update);
6369   gsi_move_before (&gsi_iv, &gsi);
6370
6371   cand->pos = IP_BEFORE_USE;
6372   cand->incremented_at = use->stmt;
6373 }
6374
6375 /* Rewrites USE (address that is an iv) using candidate CAND.  */
6376
6377 static void
6378 rewrite_use_address (struct ivopts_data *data,
6379                      struct iv_use *use, struct iv_cand *cand)
6380 {
6381   aff_tree aff;
6382   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6383   tree base_hint = NULL_TREE;
6384   tree ref, iv;
6385   bool ok;
6386
6387   adjust_iv_update_pos (cand, use);
6388   ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6389   gcc_assert (ok);
6390   unshare_aff_combination (&aff);
6391
6392   /* To avoid undefined overflow problems, all IV candidates use unsigned
6393      integer types.  The drawback is that this makes it impossible for
6394      create_mem_ref to distinguish an IV that is based on a memory object
6395      from one that represents simply an offset.
6396
6397      To work around this problem, we pass a hint to create_mem_ref that
6398      indicates which variable (if any) in aff is an IV based on a memory
6399      object.  Note that we only consider the candidate.  If this is not
6400      based on an object, the base of the reference is in some subexpression
6401      of the use -- but these will use pointer types, so they are recognized
6402      by the create_mem_ref heuristics anyway.  */
6403   if (cand->iv->base_object)
6404     base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6405
6406   iv = var_at_stmt (data->current_loop, cand, use->stmt);
6407   ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6408                         reference_alias_ptr_type (*use->op_p),
6409                         iv, base_hint, data->speed);
6410   copy_ref_info (ref, *use->op_p);
6411   *use->op_p = ref;
6412 }
6413
6414 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6415    candidate CAND.  */
6416
6417 static void
6418 rewrite_use_compare (struct ivopts_data *data,
6419                      struct iv_use *use, struct iv_cand *cand)
6420 {
6421   tree comp, *var_p, op, bound;
6422   gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6423   enum tree_code compare;
6424   struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6425   bool ok;
6426
6427   bound = cp->value;
6428   if (bound)
6429     {
6430       tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6431       tree var_type = TREE_TYPE (var);
6432       gimple_seq stmts;
6433
6434       if (dump_file && (dump_flags & TDF_DETAILS))
6435         {
6436           fprintf (dump_file, "Replacing exit test: ");
6437           print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6438         }
6439       compare = cp->comp;
6440       bound = unshare_expr (fold_convert (var_type, bound));
6441       op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6442       if (stmts)
6443         gsi_insert_seq_on_edge_immediate (
6444                 loop_preheader_edge (data->current_loop),
6445                 stmts);
6446
6447       gimple_cond_set_lhs (use->stmt, var);
6448       gimple_cond_set_code (use->stmt, compare);
6449       gimple_cond_set_rhs (use->stmt, op);
6450       return;
6451     }
6452
6453   /* The induction variable elimination failed; just express the original
6454      giv.  */
6455   comp = get_computation (data->current_loop, use, cand);
6456   gcc_assert (comp != NULL_TREE);
6457
6458   ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6459   gcc_assert (ok);
6460
6461   *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6462                                      true, GSI_SAME_STMT);
6463 }
6464
6465 /* Rewrites USE using candidate CAND.  */
6466
6467 static void
6468 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6469 {
6470   switch (use->type)
6471     {
6472       case USE_NONLINEAR_EXPR:
6473         rewrite_use_nonlinear_expr (data, use, cand);
6474         break;
6475
6476       case USE_ADDRESS:
6477         rewrite_use_address (data, use, cand);
6478         break;
6479
6480       case USE_COMPARE:
6481         rewrite_use_compare (data, use, cand);
6482         break;
6483
6484       default:
6485         gcc_unreachable ();
6486     }
6487
6488   update_stmt (use->stmt);
6489 }
6490
6491 /* Rewrite the uses using the selected induction variables.  */
6492
6493 static void
6494 rewrite_uses (struct ivopts_data *data)
6495 {
6496   unsigned i;
6497   struct iv_cand *cand;
6498   struct iv_use *use;
6499
6500   for (i = 0; i < n_iv_uses (data); i++)
6501     {
6502       use = iv_use (data, i);
6503       cand = use->selected;
6504       gcc_assert (cand);
6505
6506       rewrite_use (data, use, cand);
6507     }
6508 }
6509
6510 /* Removes the ivs that are not used after rewriting.  */
6511
6512 static void
6513 remove_unused_ivs (struct ivopts_data *data)
6514 {
6515   unsigned j;
6516   bitmap_iterator bi;
6517   bitmap toremove = BITMAP_ALLOC (NULL);
6518
6519   /* Figure out an order in which to release SSA DEFs so that we don't
6520      release something that we'd have to propagate into a debug stmt
6521      afterwards.  */
6522   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6523     {
6524       struct version_info *info;
6525
6526       info = ver_info (data, j);
6527       if (info->iv
6528           && !integer_zerop (info->iv->step)
6529           && !info->inv_id
6530           && !info->iv->have_use_for
6531           && !info->preserve_biv)
6532         bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6533     }
6534
6535   release_defs_bitset (toremove);
6536
6537   BITMAP_FREE (toremove);
6538 }
6539
6540 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6541    for pointer_map_traverse.  */
6542
6543 static bool
6544 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6545                       void *data ATTRIBUTE_UNUSED)
6546 {
6547   struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6548
6549   free (niter);
6550   return true;
6551 }
6552
6553 /* Frees data allocated by the optimization of a single loop.  */
6554
6555 static void
6556 free_loop_data (struct ivopts_data *data)
6557 {
6558   unsigned i, j;
6559   bitmap_iterator bi;
6560   tree obj;
6561
6562   if (data->niters)
6563     {
6564       pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6565       pointer_map_destroy (data->niters);
6566       data->niters = NULL;
6567     }
6568
6569   EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6570     {
6571       struct version_info *info;
6572
6573       info = ver_info (data, i);
6574       free (info->iv);
6575       info->iv = NULL;
6576       info->has_nonlin_use = false;
6577       info->preserve_biv = false;
6578       info->inv_id = 0;
6579     }
6580   bitmap_clear (data->relevant);
6581   bitmap_clear (data->important_candidates);
6582
6583   for (i = 0; i < n_iv_uses (data); i++)
6584     {
6585       struct iv_use *use = iv_use (data, i);
6586
6587       free (use->iv);
6588       BITMAP_FREE (use->related_cands);
6589       for (j = 0; j < use->n_map_members; j++)
6590         if (use->cost_map[j].depends_on)
6591           BITMAP_FREE (use->cost_map[j].depends_on);
6592       free (use->cost_map);
6593       free (use);
6594     }
6595   VEC_truncate (iv_use_p, data->iv_uses, 0);
6596
6597   for (i = 0; i < n_iv_cands (data); i++)
6598     {
6599       struct iv_cand *cand = iv_cand (data, i);
6600
6601       free (cand->iv);
6602       if (cand->depends_on)
6603         BITMAP_FREE (cand->depends_on);
6604       free (cand);
6605     }
6606   VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6607
6608   if (data->version_info_size < num_ssa_names)
6609     {
6610       data->version_info_size = 2 * num_ssa_names;
6611       free (data->version_info);
6612       data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6613     }
6614
6615   data->max_inv_id = 0;
6616
6617   FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6618     SET_DECL_RTL (obj, NULL_RTX);
6619
6620   VEC_truncate (tree, decl_rtl_to_reset, 0);
6621
6622   htab_empty (data->inv_expr_tab);
6623   data->inv_expr_id = 0;
6624 }
6625
6626 /* Finalizes data structures used by the iv optimization pass.  LOOPS is the
6627    loop tree.  */
6628
6629 static void
6630 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6631 {
6632   free_loop_data (data);
6633   free (data->version_info);
6634   BITMAP_FREE (data->relevant);
6635   BITMAP_FREE (data->important_candidates);
6636
6637   VEC_free (tree, heap, decl_rtl_to_reset);
6638   VEC_free (iv_use_p, heap, data->iv_uses);
6639   VEC_free (iv_cand_p, heap, data->iv_candidates);
6640   htab_delete (data->inv_expr_tab);
6641 }
6642
6643 /* Returns true if the loop body BODY includes any function calls.  */
6644
6645 static bool
6646 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6647 {
6648   gimple_stmt_iterator gsi;
6649   unsigned i;
6650
6651   for (i = 0; i < num_nodes; i++)
6652     for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6653       {
6654         gimple stmt = gsi_stmt (gsi);
6655         if (is_gimple_call (stmt)
6656             && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6657           return true;
6658       }
6659   return false;
6660 }
6661
6662 /* Optimizes the LOOP.  Returns true if anything changed.  */
6663
6664 static bool
6665 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6666 {
6667   bool changed = false;
6668   struct iv_ca *iv_ca;
6669   edge exit = single_dom_exit (loop);
6670   basic_block *body;
6671
6672   gcc_assert (!data->niters);
6673   data->current_loop = loop;
6674   data->speed = optimize_loop_for_speed_p (loop);
6675
6676   if (dump_file && (dump_flags & TDF_DETAILS))
6677     {
6678       fprintf (dump_file, "Processing loop %d\n", loop->num);
6679
6680       if (exit)
6681         {
6682           fprintf (dump_file, "  single exit %d -> %d, exit condition ",
6683                    exit->src->index, exit->dest->index);
6684           print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6685           fprintf (dump_file, "\n");
6686         }
6687
6688       fprintf (dump_file, "\n");
6689     }
6690
6691   body = get_loop_body (loop);
6692   data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6693   renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6694   free (body);
6695
6696   data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6697
6698   /* For each ssa name determines whether it behaves as an induction variable
6699      in some loop.  */
6700   if (!find_induction_variables (data))
6701     goto finish;
6702
6703   /* Finds interesting uses (item 1).  */
6704   find_interesting_uses (data);
6705   if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6706     goto finish;
6707
6708   /* Finds candidates for the induction variables (item 2).  */
6709   find_iv_candidates (data);
6710
6711   /* Calculates the costs (item 3, part 1).  */
6712   determine_iv_costs (data);
6713   determine_use_iv_costs (data);
6714   determine_set_costs (data);
6715
6716   /* Find the optimal set of induction variables (item 3, part 2).  */
6717   iv_ca = find_optimal_iv_set (data);
6718   if (!iv_ca)
6719     goto finish;
6720   changed = true;
6721
6722   /* Create the new induction variables (item 4, part 1).  */
6723   create_new_ivs (data, iv_ca);
6724   iv_ca_free (&iv_ca);
6725
6726   /* Rewrite the uses (item 4, part 2).  */
6727   rewrite_uses (data);
6728
6729   /* Remove the ivs that are unused after rewriting.  */
6730   remove_unused_ivs (data);
6731
6732   /* We have changed the structure of induction variables; it might happen
6733      that definitions in the scev database refer to some of them that were
6734      eliminated.  */
6735   scev_reset ();
6736
6737 finish:
6738   free_loop_data (data);
6739
6740   return changed;
6741 }
6742
6743 /* Main entry point.  Optimizes induction variables in loops.  */
6744
6745 void
6746 tree_ssa_iv_optimize (void)
6747 {
6748   struct loop *loop;
6749   struct ivopts_data data;
6750   loop_iterator li;
6751
6752   tree_ssa_iv_optimize_init (&data);
6753
6754   /* Optimize the loops starting with the innermost ones.  */
6755   FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6756     {
6757       if (dump_file && (dump_flags & TDF_DETAILS))
6758         flow_loop_dump (loop, dump_file, NULL, 1);
6759
6760       tree_ssa_iv_optimize_loop (&data, loop);
6761     }
6762
6763   tree_ssa_iv_optimize_finalize (&data);
6764 }