gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "ggc.h"
 128 #include "tree.h"
 129 #include "target.h"
 130 #include "rtl.h"
 131 #include "basic-block.h"
 132 #include "diagnostic.h"
 133 #include "tree-flow.h"
 134 #include "tree-dump.h"
 135 #include "timevar.h"
 136 #include "cfgloop.h"
 137 #include "cfglayout.h"
 138 #include "expr.h"
 139 #include "optabs.h"
 140 #include "toplev.h"
 141 #include "tree-chrec.h"
 142 #include "tree-data-ref.h"
 143 #include "tree-scalar-evolution.h"
 144 #include "input.h"
 145 #include "tree-vectorizer.h"
 146 #include "tree-pass.h"
 147
 148 /*************************************************************************
 149   Simple Loop Peeling Utilities
 150  *************************************************************************/
 151 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 152   (struct loop *, struct loops *, edge);
 153 static void slpeel_update_phis_for_duplicate_loop
 154   (struct loop *, struct loop *, bool after);
 155 static void slpeel_update_phi_nodes_for_guard1
 156   (edge, struct loop *, bool, basic_block *, bitmap *);
 157 static void slpeel_update_phi_nodes_for_guard2
 158   (edge, struct loop *, bool, basic_block *);
 159 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 160
 161 static void rename_use_op (use_operand_p);
 162 static void rename_variables_in_bb (basic_block);
 163 static void rename_variables_in_loop (struct loop *);
 164
 165 /*************************************************************************
 166   General Vectorization Utilities
 167  *************************************************************************/
 168 static void vect_set_dump_settings (void);
 169
 170 /* vect_dump will be set to stderr or dump_file if exist.  */
 171 FILE *vect_dump;
 172
 173 /* vect_verbosity_level set to an invalid value
 174    to mark that it's uninitialized.  */
 175 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 176
 177 /* Number of loops, at the beginning of vectorization.  */
 178 unsigned int vect_loops_num;
 179
 180 /* Loop location.  */
 181 static LOC vect_loop_location;
 182
 183 /* Bitmap of virtual variables to be renamed.  */
 184 bitmap vect_vnames_to_rename;
 185 \f
 186 /*************************************************************************
 187   Simple Loop Peeling Utilities
 188
 189   Utilities to support loop peeling for vectorization purposes.
 190  *************************************************************************/
 191
 192
 193 /* Renames the use *OP_P.  */
 194
 195 static void
 196 rename_use_op (use_operand_p op_p)
 197 {
 198   tree new_name;
 199
 200   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 201     return;
 202
 203   new_name = get_current_def (USE_FROM_PTR (op_p));
 204
 205   /* Something defined outside of the loop.  */
 206   if (!new_name)
 207     return;
 208
 209   /* An ordinary ssa name defined in the loop.  */
 210
 211   SET_USE (op_p, new_name);
 212 }
 213
 214
 215 /* Renames the variables in basic block BB.  */
 216
 217 static void
 218 rename_variables_in_bb (basic_block bb)
 219 {
 220   tree phi;
 221   block_stmt_iterator bsi;
 222   tree stmt;
 223   use_operand_p use_p;
 224   ssa_op_iter iter;
 225   edge e;
 226   edge_iterator ei;
 227   struct loop *loop = bb->loop_father;
 228
 229   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 230     {
 231       stmt = bsi_stmt (bsi);
 232       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
 233                                  (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
 234         rename_use_op (use_p);
 235     }
 236
 237   FOR_EACH_EDGE (e, ei, bb->succs)
 238     {
 239       if (!flow_bb_inside_loop_p (loop, e->dest))
 240         continue;
 241       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 242         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 243     }
 244 }
 245
 246
 247 /* Renames variables in new generated LOOP.  */
 248
 249 static void
 250 rename_variables_in_loop (struct loop *loop)
 251 {
 252   unsigned i;
 253   basic_block *bbs;
 254
 255   bbs = get_loop_body (loop);
 256
 257   for (i = 0; i < loop->num_nodes; i++)
 258     rename_variables_in_bb (bbs[i]);
 259
 260   free (bbs);
 261 }
 262
 263
 264 /* Update the PHI nodes of NEW_LOOP.
 265
 266    NEW_LOOP is a duplicate of ORIG_LOOP.
 267    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 268    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 269    executes before it.  */
 270
 271 static void
 272 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 273                                        struct loop *new_loop, bool after)
 274 {
 275   tree new_ssa_name;
 276   tree phi_new, phi_orig;
 277   tree def;
 278   edge orig_loop_latch = loop_latch_edge (orig_loop);
 279   edge orig_entry_e = loop_preheader_edge (orig_loop);
 280   edge new_loop_exit_e = new_loop->single_exit;
 281   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 282   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 283
 284   /*
 285      step 1. For each loop-header-phi:
 286              Add the first phi argument for the phi in NEW_LOOP
 287             (the one associated with the entry of NEW_LOOP)
 288
 289      step 2. For each loop-header-phi:
 290              Add the second phi argument for the phi in NEW_LOOP
 291             (the one associated with the latch of NEW_LOOP)
 292
 293      step 3. Update the phis in the successor block of NEW_LOOP.
 294
 295         case 1: NEW_LOOP was placed before ORIG_LOOP:
 296                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 297                 Updating the phis in the successor block can therefore be done
 298                 along with the scanning of the loop header phis, because the
 299                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 300                 phi nodes, organized in the same order.
 301
 302         case 2: NEW_LOOP was placed after ORIG_LOOP:
 303                 The successor block of NEW_LOOP is the original exit block of
 304                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 305                 We postpone updating these phis to a later stage (when
 306                 loop guards are added).
 307    */
 308
 309
 310   /* Scan the phis in the headers of the old and new loops
 311      (they are organized in exactly the same order).  */
 312
 313   for (phi_new = phi_nodes (new_loop->header),
 314        phi_orig = phi_nodes (orig_loop->header);
 315        phi_new && phi_orig;
 316        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 317     {
 318       /* step 1.  */
 319       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 320       add_phi_arg (phi_new, def, new_loop_entry_e);
 321
 322       /* step 2.  */
 323       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 324       if (TREE_CODE (def) != SSA_NAME)
 325         continue;
 326
 327       new_ssa_name = get_current_def (def);
 328       if (!new_ssa_name)
 329         {
 330           /* This only happens if there are no definitions
 331              inside the loop. use the phi_result in this case.  */
 332           new_ssa_name = PHI_RESULT (phi_new);
 333         }
 334
 335       /* An ordinary ssa name defined in the loop.  */
 336       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 337
 338       /* step 3 (case 1).  */
 339       if (!after)
 340         {
 341           gcc_assert (new_loop_exit_e == orig_entry_e);
 342           SET_PHI_ARG_DEF (phi_orig,
 343                            new_loop_exit_e->dest_idx,
 344                            new_ssa_name);
 345         }
 346     }
 347 }
 348
 349
 350 /* Update PHI nodes for a guard of the LOOP.
 351
 352    Input:
 353    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 354         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 355         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 356         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 357         We denote this bb NEW_MERGE_BB because before the guard code was added
 358         it had a single predecessor (the LOOP header), and now it became a merge
 359         point of two paths - the path that ends with the LOOP exit-edge, and
 360         the path that ends with GUARD_EDGE.
 361    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 362         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 363
 364    ===> The CFG before the guard-code was added:
 365         LOOP_header_bb:
 366           loop_body
 367           if (exit_loop) goto update_bb
 368           else           goto LOOP_header_bb
 369         update_bb:
 370
 371    ==> The CFG after the guard-code was added:
 372         guard_bb:
 373           if (LOOP_guard_condition) goto new_merge_bb
 374           else                      goto LOOP_header_bb
 375         LOOP_header_bb:
 376           loop_body
 377           if (exit_loop_condition) goto new_merge_bb
 378           else                     goto LOOP_header_bb
 379         new_merge_bb:
 380           goto update_bb
 381         update_bb:
 382
 383    ==> The CFG after this function:
 384         guard_bb:
 385           if (LOOP_guard_condition) goto new_merge_bb
 386           else                      goto LOOP_header_bb
 387         LOOP_header_bb:
 388           loop_body
 389           if (exit_loop_condition) goto new_exit_bb
 390           else                     goto LOOP_header_bb
 391         new_exit_bb:
 392         new_merge_bb:
 393           goto update_bb
 394         update_bb:
 395
 396    This function:
 397    1. creates and updates the relevant phi nodes to account for the new
 398       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 399       1.1. Create phi nodes at NEW_MERGE_BB.
 400       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 401            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 402    2. preserves loop-closed-ssa-form by creating the required phi nodes
 403       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 404
 405    There are two flavors to this function:
 406
 407    slpeel_update_phi_nodes_for_guard1:
 408      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 409      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 410      for variables that have phis in the loop header.
 411
 412    slpeel_update_phi_nodes_for_guard2:
 413      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 414      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 415      for variables that have phis in the loop exit.
 416
 417    I.E., the overall structure is:
 418
 419         loop1_preheader_bb:
 420                 guard1 (goto loop1/merg1_bb)
 421         loop1
 422         loop1_exit_bb:
 423                 guard2 (goto merge1_bb/merge2_bb)
 424         merge1_bb
 425         loop2
 426         loop2_exit_bb
 427         merge2_bb
 428         next_bb
 429
 430    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 431    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 432    that have phis in loop1->header).
 433
 434    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 435    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 436    that have phis in next_bb). It also adds some of these phis to
 437    loop1_exit_bb.
 438
 439    slpeel_update_phi_nodes_for_guard1 is always called before
 440    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 441    to create correct data-flow and loop-closed-ssa-form.
 442
 443    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 444    that change between iterations of a loop (and therefore have a phi-node
 445    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 446    phis for variables that are used out of the loop (and therefore have
 447    loop-closed exit phis). Some variables may be both updated between
 448    iterations and used after the loop. This is why in loop1_exit_bb we
 449    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 450    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 451
 452    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 453      an original loop. i.e., we have:
 454
 455            orig_loop
 456            guard_bb (goto LOOP/new_merge)
 457            new_loop <-- LOOP
 458            new_exit
 459            new_merge
 460            next_bb
 461
 462      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 463      have:
 464
 465            new_loop
 466            guard_bb (goto LOOP/new_merge)
 467            orig_loop <-- LOOP
 468            new_exit
 469            new_merge
 470            next_bb
 471
 472      The SSA names defined in the original loop have a current
 473      reaching definition that that records the corresponding new
 474      ssa-name used in the new duplicated loop copy.
 475   */
 476
 477 /* Function slpeel_update_phi_nodes_for_guard1
 478
 479    Input:
 480    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 481    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 482             information.
 483
 484    In the context of the overall structure, we have:
 485
 486         loop1_preheader_bb:
 487                 guard1 (goto loop1/merg1_bb)
 488 LOOP->  loop1
 489         loop1_exit_bb:
 490                 guard2 (goto merge1_bb/merge2_bb)
 491         merge1_bb
 492         loop2
 493         loop2_exit_bb
 494         merge2_bb
 495         next_bb
 496
 497    For each name updated between loop iterations (i.e - for each name that has
 498    an entry (loop-header) phi in LOOP) we create a new phi in:
 499    1. merge1_bb (to account for the edge from guard1)
 500    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 501 */
 502
 503 static void
 504 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 505                                     bool is_new_loop, basic_block *new_exit_bb,
 506                                     bitmap *defs)
 507 {
 508   tree orig_phi, new_phi;
 509   tree update_phi, update_phi2;
 510   tree guard_arg, loop_arg;
 511   basic_block new_merge_bb = guard_edge->dest;
 512   edge e = EDGE_SUCC (new_merge_bb, 0);
 513   basic_block update_bb = e->dest;
 514   basic_block orig_bb = loop->header;
 515   edge new_exit_e;
 516   tree current_new_name;
 517   tree name;
 518
 519   /* Create new bb between loop and new_merge_bb.  */
 520   *new_exit_bb = split_edge (loop->single_exit);
 521   add_bb_to_loop (*new_exit_bb, loop->outer);
 522
 523   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 524
 525   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 526        orig_phi && update_phi;
 527        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 528     {
 529       /* Virtual phi; Mark it for renaming. We actually want to call
 530          mar_sym_for_renaming, but since all ssa renaming datastructures
 531          are going to be freed before we get to call ssa_upate, we just
 532          record this name for now in a bitmap, and will mark it for
 533          renaming later.  */
 534       name = PHI_RESULT (orig_phi);
 535       if (!is_gimple_reg (SSA_NAME_VAR (name)))
 536         bitmap_set_bit (vect_vnames_to_rename, SSA_NAME_VERSION (name));
 537
 538       /** 1. Handle new-merge-point phis  **/
 539
 540       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 541       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 542                                  new_merge_bb);
 543
 544       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 545             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 546       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 547       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 548
 549       add_phi_arg (new_phi, loop_arg, new_exit_e);
 550       add_phi_arg (new_phi, guard_arg, guard_edge);
 551
 552       /* 1.3. Update phi in successor block.  */
 553       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 554                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 555       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 556       update_phi2 = new_phi;
 557
 558
 559       /** 2. Handle loop-closed-ssa-form phis  **/
 560
 561       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 562       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 563                                  *new_exit_bb);
 564
 565       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 566       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 567
 568       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 569       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 570       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 571
 572       /* 2.4. Record the newly created name with set_current_def.
 573          We want to find a name such that
 574                 name = get_current_def (orig_loop_name)
 575          and to set its current definition as follows:
 576                 set_current_def (name, new_phi_name)
 577
 578          If LOOP is a new loop then loop_arg is already the name we're
 579          looking for. If LOOP is the original loop, then loop_arg is
 580          the orig_loop_name and the relevant name is recorded in its
 581          current reaching definition.  */
 582       if (is_new_loop)
 583         current_new_name = loop_arg;
 584       else
 585         {
 586           current_new_name = get_current_def (loop_arg);
 587           /* current_def is not available only if the variable does not
 588              change inside the loop, in which case we also don't care
 589              about recording a current_def for it because we won't be
 590              trying to create loop-exit-phis for it.  */
 591           if (!current_new_name)
 592             continue;
 593         }
 594       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 595
 596       set_current_def (current_new_name, PHI_RESULT (new_phi));
 597       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 598     }
 599
 600   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 601 }
 602
 603
 604 /* Function slpeel_update_phi_nodes_for_guard2
 605
 606    Input:
 607    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 608
 609    In the context of the overall structure, we have:
 610
 611         loop1_preheader_bb:
 612                 guard1 (goto loop1/merg1_bb)
 613         loop1
 614         loop1_exit_bb:
 615                 guard2 (goto merge1_bb/merge2_bb)
 616         merge1_bb
 617 LOOP->  loop2
 618         loop2_exit_bb
 619         merge2_bb
 620         next_bb
 621
 622    For each name used out side the loop (i.e - for each name that has an exit
 623    phi in next_bb) we create a new phi in:
 624    1. merge2_bb (to account for the edge from guard_bb)
 625    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 626    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 627       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 628 */
 629
 630 static void
 631 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 632                                     bool is_new_loop, basic_block *new_exit_bb)
 633 {
 634   tree orig_phi, new_phi;
 635   tree update_phi, update_phi2;
 636   tree guard_arg, loop_arg;
 637   basic_block new_merge_bb = guard_edge->dest;
 638   edge e = EDGE_SUCC (new_merge_bb, 0);
 639   basic_block update_bb = e->dest;
 640   edge new_exit_e;
 641   tree orig_def, orig_def_new_name;
 642   tree new_name, new_name2;
 643   tree arg;
 644
 645   /* Create new bb between loop and new_merge_bb.  */
 646   *new_exit_bb = split_edge (loop->single_exit);
 647   add_bb_to_loop (*new_exit_bb, loop->outer);
 648
 649   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 650
 651   for (update_phi = phi_nodes (update_bb); update_phi;
 652        update_phi = PHI_CHAIN (update_phi))
 653     {
 654       orig_phi = update_phi;
 655       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 656       /* This loop-closed-phi actually doesn't represent a use
 657          out of the loop - the phi arg is a constant.  */
 658       if (TREE_CODE (orig_def) != SSA_NAME)
 659         continue;
 660       orig_def_new_name = get_current_def (orig_def);
 661       arg = NULL_TREE;
 662
 663       /** 1. Handle new-merge-point phis  **/
 664
 665       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 666       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 667                                  new_merge_bb);
 668
 669       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 670             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 671       new_name = orig_def;
 672       new_name2 = NULL_TREE;
 673       if (orig_def_new_name)
 674         {
 675           new_name = orig_def_new_name;
 676           /* Some variables have both loop-entry-phis and loop-exit-phis.
 677              Such variables were given yet newer names by phis placed in
 678              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 679              new_name2 = get_current_def (get_current_def (orig_name)).  */
 680           new_name2 = get_current_def (new_name);
 681         }
 682
 683       if (is_new_loop)
 684         {
 685           guard_arg = orig_def;
 686           loop_arg = new_name;
 687         }
 688       else
 689         {
 690           guard_arg = new_name;
 691           loop_arg = orig_def;
 692         }
 693       if (new_name2)
 694         guard_arg = new_name2;
 695
 696       add_phi_arg (new_phi, loop_arg, new_exit_e);
 697       add_phi_arg (new_phi, guard_arg, guard_edge);
 698
 699       /* 1.3. Update phi in successor block.  */
 700       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 701       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 702       update_phi2 = new_phi;
 703
 704
 705       /** 2. Handle loop-closed-ssa-form phis  **/
 706
 707       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 708       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 709                                  *new_exit_bb);
 710
 711       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 712       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 713
 714       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 715       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 716       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 717
 718
 719       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 720
 721       /* 3.1. Find the relevant names that need an exit-phi in
 722          GUARD_BB, i.e. names for which
 723          slpeel_update_phi_nodes_for_guard1 had not already created a
 724          phi node. This is the case for names that are used outside
 725          the loop (and therefore need an exit phi) but are not updated
 726          across loop iterations (and therefore don't have a
 727          loop-header-phi).
 728
 729          slpeel_update_phi_nodes_for_guard1 is responsible for
 730          creating loop-exit phis in GUARD_BB for names that have a
 731          loop-header-phi.  When such a phi is created we also record
 732          the new name in its current definition.  If this new name
 733          exists, then guard_arg was set to this new name (see 1.2
 734          above).  Therefore, if guard_arg is not this new name, this
 735          is an indication that an exit-phi in GUARD_BB was not yet
 736          created, so we take care of it here.  */
 737       if (guard_arg == new_name2)
 738         continue;
 739       arg = guard_arg;
 740
 741       /* 3.2. Generate new phi node in GUARD_BB:  */
 742       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 743                                  guard_edge->src);
 744
 745       /* 3.3. GUARD_BB has one incoming edge:  */
 746       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 747       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 748
 749       /* 3.4. Update phi in successor of GUARD_BB:  */
 750       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 751                                                                 == guard_arg);
 752       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 753     }
 754
 755   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 756 }
 757
 758
 759 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 760    that starts at zero, increases by one and its limit is NITERS.
 761
 762    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 763
 764 void
 765 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 766 {
 767   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 768   tree orig_cond;
 769   edge exit_edge = loop->single_exit;
 770   block_stmt_iterator loop_cond_bsi;
 771   block_stmt_iterator incr_bsi;
 772   bool insert_after;
 773   tree begin_label = tree_block_label (loop->latch);
 774   tree exit_label = tree_block_label (loop->single_exit->dest);
 775   tree init = build_int_cst (TREE_TYPE (niters), 0);
 776   tree step = build_int_cst (TREE_TYPE (niters), 1);
 777   tree then_label;
 778   tree else_label;
 779   LOC loop_loc;
 780
 781   orig_cond = get_loop_exit_condition (loop);
 782   gcc_assert (orig_cond);
 783   loop_cond_bsi = bsi_for_stmt (orig_cond);
 784
 785   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 786   create_iv (init, step, NULL_TREE, loop,
 787              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
 788
 789   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 790     {
 791       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 792       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 793       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 794     }
 795   else /* 'then' edge loops back.  */
 796     {
 797       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 798       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 799       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 800     }
 801
 802   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 803                      then_label, else_label);
 804   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
 805
 806   /* Remove old loop exit test:  */
 807   bsi_remove (&loop_cond_bsi);
 808
 809   loop_loc = find_loop_location (loop);
 810   if (dump_file && (dump_flags & TDF_DETAILS))
 811     {
 812       if (loop_loc != UNKNOWN_LOC)
 813         fprintf (dump_file, "\nloop at %s:%d: ",
 814                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 815       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 816     }
 817
 818   loop->nb_iterations = niters;
 819 }
 820
 821
 822 /* Given LOOP this function generates a new copy of it and puts it
 823    on E which is either the entry or exit of LOOP.  */
 824
 825 static struct loop *
 826 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 827                                         edge e)
 828 {
 829   struct loop *new_loop;
 830   basic_block *new_bbs, *bbs;
 831   bool at_exit;
 832   bool was_imm_dom;
 833   basic_block exit_dest;
 834   tree phi, phi_arg;
 835
 836   at_exit = (e == loop->single_exit);
 837   if (!at_exit && e != loop_preheader_edge (loop))
 838     return NULL;
 839
 840   bbs = get_loop_body (loop);
 841
 842   /* Check whether duplication is possible.  */
 843   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 844     {
 845       free (bbs);
 846       return NULL;
 847     }
 848
 849   /* Generate new loop structure.  */
 850   new_loop = duplicate_loop (loops, loop, loop->outer);
 851   if (!new_loop)
 852     {
 853       free (bbs);
 854       return NULL;
 855     }
 856
 857   exit_dest = loop->single_exit->dest;
 858   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 859                                           exit_dest) == loop->header ?
 860                  true : false);
 861
 862   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 863
 864   copy_bbs (bbs, loop->num_nodes, new_bbs,
 865             &loop->single_exit, 1, &new_loop->single_exit, NULL,
 866             e->src);
 867
 868   /* Duplicating phi args at exit bbs as coming
 869      also from exit of duplicated loop.  */
 870   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 871     {
 872       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
 873       if (phi_arg)
 874         {
 875           edge new_loop_exit_edge;
 876
 877           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 878             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 879           else
 880             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 881
 882           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 883         }
 884     }
 885
 886   if (at_exit) /* Add the loop copy at exit.  */
 887     {
 888       redirect_edge_and_branch_force (e, new_loop->header);
 889       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 890       if (was_imm_dom)
 891         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 892     }
 893   else /* Add the copy at entry.  */
 894     {
 895       edge new_exit_e;
 896       edge entry_e = loop_preheader_edge (loop);
 897       basic_block preheader = entry_e->src;
 898
 899       if (!flow_bb_inside_loop_p (new_loop,
 900                                   EDGE_SUCC (new_loop->header, 0)->dest))
 901         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 902       else
 903         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 904
 905       redirect_edge_and_branch_force (new_exit_e, loop->header);
 906       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 907                                new_exit_e->src);
 908
 909       /* We have to add phi args to the loop->header here as coming
 910          from new_exit_e edge.  */
 911       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 912         {
 913           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 914           if (phi_arg)
 915             add_phi_arg (phi, phi_arg, new_exit_e);
 916         }
 917
 918       redirect_edge_and_branch_force (entry_e, new_loop->header);
 919       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 920     }
 921
 922   free (new_bbs);
 923   free (bbs);
 924
 925   return new_loop;
 926 }
 927
 928
 929 /* Given the condition statement COND, put it as the last statement
 930    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 931    Assumes that this is the single exit of the guarded loop.
 932    Returns the skip edge.  */
 933
 934 static edge
 935 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 936                         basic_block dom_bb)
 937 {
 938   block_stmt_iterator bsi;
 939   edge new_e, enter_e;
 940   tree cond_stmt, then_label, else_label;
 941
 942   enter_e = EDGE_SUCC (guard_bb, 0);
 943   enter_e->flags &= ~EDGE_FALLTHRU;
 944   enter_e->flags |= EDGE_FALSE_VALUE;
 945   bsi = bsi_last (guard_bb);
 946
 947   then_label = build1 (GOTO_EXPR, void_type_node,
 948                        tree_block_label (exit_bb));
 949   else_label = build1 (GOTO_EXPR, void_type_node,
 950                        tree_block_label (enter_e->dest));
 951   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 952                      then_label, else_label);
 953   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 954   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 955   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 956   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 957   return new_e;
 958 }
 959
 960
 961 /* This function verifies that the following restrictions apply to LOOP:
 962    (1) it is innermost
 963    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 964    (3) it is single entry, single exit
 965    (4) its exit condition is the last stmt in the header
 966    (5) E is the entry/exit edge of LOOP.
 967  */
 968
 969 bool
 970 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 971 {
 972   edge exit_e = loop->single_exit;
 973   edge entry_e = loop_preheader_edge (loop);
 974   tree orig_cond = get_loop_exit_condition (loop);
 975   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 976
 977   if (need_ssa_update_p ())
 978     return false;
 979
 980   if (loop->inner
 981       /* All loops have an outer scope; the only case loop->outer is NULL is for
 982          the function itself.  */
 983       || !loop->outer
 984       || loop->num_nodes != 2
 985       || !empty_block_p (loop->latch)
 986       || !loop->single_exit
 987       /* Verify that new loop exit condition can be trivially modified.  */
 988       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 989       || (e != exit_e && e != entry_e))
 990     return false;
 991
 992   return true;
 993 }
 994
 995 #ifdef ENABLE_CHECKING
 996 void
 997 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 998                                  struct loop *second_loop)
 999 {
1000   basic_block loop1_exit_bb = first_loop->single_exit->dest;
1001   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1002   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1003
1004   /* A guard that controls whether the second_loop is to be executed or skipped
1005      is placed in first_loop->exit.  first_loopt->exit therefore has two
1006      successors - one is the preheader of second_loop, and the other is a bb
1007      after second_loop.
1008    */
1009   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1010
1011   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
1012         of second_loop.  */
1013
1014   /* The preheader of new_loop is expected to have two predecessors:
1015      first_loop->exit and the block that precedes first_loop.  */
1016
1017   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1018               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1019                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1020                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1021                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1022
1023   /* Verify that the other successor of first_loopt->exit is after the
1024      second_loop.  */
1025   /* TODO */
1026 }
1027 #endif
1028
1029 /* Function slpeel_tree_peel_loop_to_edge.
1030
1031    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1032    that is placed on the entry (exit) edge E of LOOP. After this transformation
1033    we have two loops one after the other - first-loop iterates FIRST_NITERS
1034    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1035
1036    Input:
1037    - LOOP: the loop to be peeled.
1038    - E: the exit or entry edge of LOOP.
1039         If it is the entry edge, we peel the first iterations of LOOP. In this
1040         case first-loop is LOOP, and second-loop is the newly created loop.
1041         If it is the exit edge, we peel the last iterations of LOOP. In this
1042         case, first-loop is the newly created loop, and second-loop is LOOP.
1043    - NITERS: the number of iterations that LOOP iterates.
1044    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1045    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1046         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1047         is false, the caller of this function may want to take care of this
1048         (this can be useful if we don't want new stmts added to first-loop).
1049
1050    Output:
1051    The function returns a pointer to the new loop-copy, or NULL if it failed
1052    to perform the transformation.
1053
1054    The function generates two if-then-else guards: one before the first loop,
1055    and the other before the second loop:
1056    The first guard is:
1057      if (FIRST_NITERS == 0) then skip the first loop,
1058      and go directly to the second loop.
1059    The second guard is:
1060      if (FIRST_NITERS == NITERS) then skip the second loop.
1061
1062    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1063    FORNOW the resulting code will not be in loop-closed-ssa form.
1064 */
1065
1066 struct loop*
1067 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1068                                edge e, tree first_niters,
1069                                tree niters, bool update_first_loop_count)
1070 {
1071   struct loop *new_loop = NULL, *first_loop, *second_loop;
1072   edge skip_e;
1073   tree pre_condition;
1074   bitmap definitions;
1075   basic_block bb_before_second_loop, bb_after_second_loop;
1076   basic_block bb_before_first_loop;
1077   basic_block bb_between_loops;
1078   basic_block new_exit_bb;
1079   edge exit_e = loop->single_exit;
1080   LOC loop_loc;
1081
1082   if (!slpeel_can_duplicate_loop_p (loop, e))
1083     return NULL;
1084
1085   /* We have to initialize cfg_hooks. Then, when calling
1086    cfg_hooks->split_edge, the function tree_split_edge
1087    is actually called and, when calling cfg_hooks->duplicate_block,
1088    the function tree_duplicate_bb is called.  */
1089   tree_register_cfg_hooks ();
1090
1091
1092   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1093         Resulting CFG would be:
1094
1095         first_loop:
1096         do {
1097         } while ...
1098
1099         second_loop:
1100         do {
1101         } while ...
1102
1103         orig_exit_bb:
1104    */
1105
1106   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1107     {
1108       loop_loc = find_loop_location (loop);
1109       if (dump_file && (dump_flags & TDF_DETAILS))
1110         {
1111           if (loop_loc != UNKNOWN_LOC)
1112             fprintf (dump_file, "\n%s:%d: note: ",
1113                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1114           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1115         }
1116       return NULL;
1117     }
1118
1119   if (e == exit_e)
1120     {
1121       /* NEW_LOOP was placed after LOOP.  */
1122       first_loop = loop;
1123       second_loop = new_loop;
1124     }
1125   else
1126     {
1127       /* NEW_LOOP was placed before LOOP.  */
1128       first_loop = new_loop;
1129       second_loop = loop;
1130     }
1131
1132   definitions = ssa_names_to_replace ();
1133   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1134   rename_variables_in_loop (new_loop);
1135
1136
1137   /* 2. Add the guard that controls whether the first loop is executed.
1138         Resulting CFG would be:
1139
1140         bb_before_first_loop:
1141         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1142                                GOTO first-loop
1143
1144         first_loop:
1145         do {
1146         } while ...
1147
1148         bb_before_second_loop:
1149
1150         second_loop:
1151         do {
1152         } while ...
1153
1154         orig_exit_bb:
1155    */
1156
1157   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1158   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1159   bb_before_second_loop = split_edge (first_loop->single_exit);
1160   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1161
1162   pre_condition =
1163     fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1164                  build_int_cst (TREE_TYPE (first_niters), 0));
1165   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1166                                   bb_before_second_loop, bb_before_first_loop);
1167   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1168                                       first_loop == new_loop,
1169                                       &new_exit_bb, &definitions);
1170
1171
1172   /* 3. Add the guard that controls whether the second loop is executed.
1173         Resulting CFG would be:
1174
1175         bb_before_first_loop:
1176         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1177                                GOTO first-loop
1178
1179         first_loop:
1180         do {
1181         } while ...
1182
1183         bb_between_loops:
1184         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1185                                     GOTO bb_before_second_loop
1186
1187         bb_before_second_loop:
1188
1189         second_loop:
1190         do {
1191         } while ...
1192
1193         bb_after_second_loop:
1194
1195         orig_exit_bb:
1196    */
1197
1198   bb_between_loops = new_exit_bb;
1199   bb_after_second_loop = split_edge (second_loop->single_exit);
1200   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1201
1202   pre_condition =
1203         fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1204   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1205                                   bb_after_second_loop, bb_before_first_loop);
1206   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1207                                      second_loop == new_loop, &new_exit_bb);
1208
1209   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1210    */
1211   if (update_first_loop_count)
1212     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1213
1214   BITMAP_FREE (definitions);
1215   delete_update_ssa ();
1216
1217   return new_loop;
1218 }
1219
1220 /* Function vect_get_loop_location.
1221
1222    Extract the location of the loop in the source code.
1223    If the loop is not well formed for vectorization, an estimated
1224    location is calculated.
1225    Return the loop location if succeed and NULL if not.  */
1226
1227 LOC
1228 find_loop_location (struct loop *loop)
1229 {
1230   tree node = NULL_TREE;
1231   basic_block bb;
1232   block_stmt_iterator si;
1233
1234   if (!loop)
1235     return UNKNOWN_LOC;
1236
1237   node = get_loop_exit_condition (loop);
1238
1239   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1240       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1241     return EXPR_LOC (node);
1242
1243   /* If we got here the loop is probably not "well formed",
1244      try to estimate the loop location */
1245
1246   if (!loop->header)
1247     return UNKNOWN_LOC;
1248
1249   bb = loop->header;
1250
1251   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1252     {
1253       node = bsi_stmt (si);
1254       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1255         return EXPR_LOC (node);
1256     }
1257
1258   return UNKNOWN_LOC;
1259 }
1260
1261
1262 /*************************************************************************
1263   Vectorization Debug Information.
1264  *************************************************************************/
1265
1266 /* Function vect_set_verbosity_level.
1267
1268    Called from toplev.c upon detection of the
1269    -ftree-vectorizer-verbose=N option.  */
1270
1271 void
1272 vect_set_verbosity_level (const char *val)
1273 {
1274    unsigned int vl;
1275
1276    vl = atoi (val);
1277    if (vl < MAX_VERBOSITY_LEVEL)
1278      vect_verbosity_level = vl;
1279    else
1280      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1281 }
1282
1283
1284 /* Function vect_set_dump_settings.
1285
1286    Fix the verbosity level of the vectorizer if the
1287    requested level was not set explicitly using the flag
1288    -ftree-vectorizer-verbose=N.
1289    Decide where to print the debugging information (dump_file/stderr).
1290    If the user defined the verbosity level, but there is no dump file,
1291    print to stderr, otherwise print to the dump file.  */
1292
1293 static void
1294 vect_set_dump_settings (void)
1295 {
1296   vect_dump = dump_file;
1297
1298   /* Check if the verbosity level was defined by the user:  */
1299   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1300     {
1301       /* If there is no dump file, print to stderr.  */
1302       if (!dump_file)
1303         vect_dump = stderr;
1304       return;
1305     }
1306
1307   /* User didn't specify verbosity level:  */
1308   if (dump_file && (dump_flags & TDF_DETAILS))
1309     vect_verbosity_level = REPORT_DETAILS;
1310   else if (dump_file && (dump_flags & TDF_STATS))
1311     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1312   else
1313     vect_verbosity_level = REPORT_NONE;
1314
1315   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1316 }
1317
1318
1319 /* Function debug_loop_details.
1320
1321    For vectorization debug dumps.  */
1322
1323 bool
1324 vect_print_dump_info (enum verbosity_levels vl)
1325 {
1326   if (vl > vect_verbosity_level)
1327     return false;
1328
1329   if (vect_loop_location == UNKNOWN_LOC)
1330     fprintf (vect_dump, "\n%s:%d: note: ",
1331                  DECL_SOURCE_FILE (current_function_decl),
1332                  DECL_SOURCE_LINE (current_function_decl));
1333   else
1334     fprintf (vect_dump, "\n%s:%d: note: ",
1335              LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
1336
1337
1338   return true;
1339 }
1340
1341
1342 /*************************************************************************
1343   Vectorization Utilities.
1344  *************************************************************************/
1345
1346 /* Function new_stmt_vec_info.
1347
1348    Create and initialize a new stmt_vec_info struct for STMT.  */
1349
1350 stmt_vec_info
1351 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1352 {
1353   stmt_vec_info res;
1354   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1355
1356   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1357   STMT_VINFO_STMT (res) = stmt;
1358   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1359   STMT_VINFO_RELEVANT_P (res) = 0;
1360   STMT_VINFO_LIVE_P (res) = 0;
1361   STMT_VINFO_VECTYPE (res) = NULL;
1362   STMT_VINFO_VEC_STMT (res) = NULL;
1363   STMT_VINFO_DATA_REF (res) = NULL;
1364   if (TREE_CODE (stmt) == PHI_NODE)
1365     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1366   else
1367     STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1368   STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
1369
1370   return res;
1371 }
1372
1373
1374 /* Function new_loop_vec_info.
1375
1376    Create and initialize a new loop_vec_info struct for LOOP, as well as
1377    stmt_vec_info structs for all the stmts in LOOP.  */
1378
1379 loop_vec_info
1380 new_loop_vec_info (struct loop *loop)
1381 {
1382   loop_vec_info res;
1383   basic_block *bbs;
1384   block_stmt_iterator si;
1385   unsigned int i;
1386
1387   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1388
1389   bbs = get_loop_body (loop);
1390
1391   /* Create stmt_info for all stmts in the loop.  */
1392   for (i = 0; i < loop->num_nodes; i++)
1393     {
1394       basic_block bb = bbs[i];
1395       tree phi;
1396
1397       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1398         {
1399           tree_ann_t ann = get_tree_ann (phi);
1400           set_stmt_info (ann, new_stmt_vec_info (phi, res));
1401         }
1402
1403       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1404         {
1405           tree stmt = bsi_stmt (si);
1406           stmt_ann_t ann;
1407
1408           ann = stmt_ann (stmt);
1409           set_stmt_info ((tree_ann_t)ann, new_stmt_vec_info (stmt, res));
1410         }
1411     }
1412
1413   LOOP_VINFO_LOOP (res) = loop;
1414   LOOP_VINFO_BBS (res) = bbs;
1415   LOOP_VINFO_EXIT_COND (res) = NULL;
1416   LOOP_VINFO_NITERS (res) = NULL;
1417   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1418   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1419   LOOP_VINFO_VECT_FACTOR (res) = 0;
1420   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREFS (res), 20, "loop_datarefs");
1421   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DDRS (res), 20, "loop_ddrs");
1422   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1423
1424   return res;
1425 }
1426
1427
1428 /* Function destroy_loop_vec_info.
1429
1430    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1431    stmts in the loop.  */
1432
1433 void
1434 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1435 {
1436   struct loop *loop;
1437   basic_block *bbs;
1438   int nbbs;
1439   block_stmt_iterator si;
1440   int j;
1441
1442   if (!loop_vinfo)
1443     return;
1444
1445   loop = LOOP_VINFO_LOOP (loop_vinfo);
1446
1447   bbs = LOOP_VINFO_BBS (loop_vinfo);
1448   nbbs = loop->num_nodes;
1449
1450   for (j = 0; j < nbbs; j++)
1451     {
1452       basic_block bb = bbs[j];
1453       tree phi;
1454       stmt_vec_info stmt_info;
1455
1456       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1457         {
1458           tree_ann_t ann = get_tree_ann (phi);
1459
1460           stmt_info = vinfo_for_stmt (phi);
1461           free (stmt_info);
1462           set_stmt_info (ann, NULL);
1463         }
1464
1465       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1466         {
1467           tree stmt = bsi_stmt (si);
1468           stmt_ann_t ann = stmt_ann (stmt);
1469           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1470
1471           if (stmt_info)
1472             {
1473               VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1474               free (stmt_info);
1475               set_stmt_info ((tree_ann_t)ann, NULL);
1476             }
1477         }
1478     }
1479
1480   free (LOOP_VINFO_BBS (loop_vinfo));
1481   varray_clear (LOOP_VINFO_DATAREFS (loop_vinfo));
1482   varray_clear (LOOP_VINFO_DDRS (loop_vinfo));
1483
1484   free (loop_vinfo);
1485 }
1486
1487
1488 /* Function vect_force_dr_alignment_p.
1489
1490    Returns whether the alignment of a DECL can be forced to be aligned
1491    on ALIGNMENT bit boundary.  */
1492
1493 bool
1494 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1495 {
1496   if (TREE_CODE (decl) != VAR_DECL)
1497     return false;
1498
1499   if (DECL_EXTERNAL (decl))
1500     return false;
1501
1502   if (TREE_ASM_WRITTEN (decl))
1503     return false;
1504
1505   if (TREE_STATIC (decl))
1506     return (alignment <= MAX_OFILE_ALIGNMENT);
1507   else
1508     /* This is not 100% correct.  The absolute correct stack alignment
1509        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1510        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1511        However, until someone implements forced stack alignment, SSE
1512        isn't really usable without this.  */
1513     return (alignment <= PREFERRED_STACK_BOUNDARY);
1514 }
1515
1516
1517 /* Function get_vectype_for_scalar_type.
1518
1519    Returns the vector type corresponding to SCALAR_TYPE as supported
1520    by the target.  */
1521
1522 tree
1523 get_vectype_for_scalar_type (tree scalar_type)
1524 {
1525   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1526   int nbytes = GET_MODE_SIZE (inner_mode);
1527   int nunits;
1528   tree vectype;
1529
1530   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1531     return NULL_TREE;
1532
1533   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1534      is expected.  */
1535   nunits = UNITS_PER_SIMD_WORD / nbytes;
1536
1537   vectype = build_vector_type (scalar_type, nunits);
1538   if (vect_print_dump_info (REPORT_DETAILS))
1539     {
1540       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1541       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1542     }
1543
1544   if (!vectype)
1545     return NULL_TREE;
1546
1547   if (vect_print_dump_info (REPORT_DETAILS))
1548     {
1549       fprintf (vect_dump, "vectype: ");
1550       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1551     }
1552
1553   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1554       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1555     {
1556       if (vect_print_dump_info (REPORT_DETAILS))
1557         fprintf (vect_dump, "mode not supported by target.");
1558       return NULL_TREE;
1559     }
1560
1561   return vectype;
1562 }
1563
1564
1565 /* Function vect_supportable_dr_alignment
1566
1567    Return whether the data reference DR is supported with respect to its
1568    alignment.  */
1569
1570 enum dr_alignment_support
1571 vect_supportable_dr_alignment (struct data_reference *dr)
1572 {
1573   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1574   enum machine_mode mode = (int) TYPE_MODE (vectype);
1575
1576   if (aligned_access_p (dr))
1577     return dr_aligned;
1578
1579   /* Possibly unaligned access.  */
1580
1581   if (DR_IS_READ (dr))
1582     {
1583       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1584           && (!targetm.vectorize.builtin_mask_for_load
1585               || targetm.vectorize.builtin_mask_for_load ()))
1586         return dr_unaligned_software_pipeline;
1587
1588       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1589         /* Can't software pipeline the loads, but can at least do them.  */
1590         return dr_unaligned_supported;
1591     }
1592
1593   /* Unsupported.  */
1594   return dr_unaligned_unsupported;
1595 }
1596
1597
1598 /* Function vect_is_simple_use.
1599
1600    Input:
1601    LOOP - the loop that is being vectorized.
1602    OPERAND - operand of a stmt in LOOP.
1603    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1604
1605    Returns whether a stmt with OPERAND can be vectorized.
1606    Supportable operands are constants, loop invariants, and operands that are
1607    defined by the current iteration of the loop. Unsupportable operands are
1608    those that are defined by a previous iteration of the loop (as is the case
1609    in reduction/induction computations).  */
1610
1611 bool
1612 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1613                     tree *def, enum vect_def_type *dt)
1614 {
1615   basic_block bb;
1616   stmt_vec_info stmt_vinfo;
1617   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1618
1619   *def_stmt = NULL_TREE;
1620   *def = NULL_TREE;
1621
1622   if (vect_print_dump_info (REPORT_DETAILS))
1623     {
1624       fprintf (vect_dump, "vect_is_simple_use: operand ");
1625       print_generic_expr (vect_dump, operand, TDF_SLIM);
1626     }
1627
1628   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1629     {
1630       *dt = vect_constant_def;
1631       return true;
1632     }
1633
1634   if (TREE_CODE (operand) != SSA_NAME)
1635     {
1636       if (vect_print_dump_info (REPORT_DETAILS))
1637         fprintf (vect_dump, "not ssa-name.");
1638       return false;
1639     }
1640
1641   *def_stmt = SSA_NAME_DEF_STMT (operand);
1642   if (*def_stmt == NULL_TREE )
1643     {
1644       if (vect_print_dump_info (REPORT_DETAILS))
1645         fprintf (vect_dump, "no def_stmt.");
1646       return false;
1647     }
1648
1649   if (vect_print_dump_info (REPORT_DETAILS))
1650     {
1651       fprintf (vect_dump, "def_stmt: ");
1652       print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
1653     }
1654
1655   /* empty stmt is expected only in case of a function argument.
1656      (Otherwise - we expect a phi_node or a modify_expr).  */
1657   if (IS_EMPTY_STMT (*def_stmt))
1658     {
1659       tree arg = TREE_OPERAND (*def_stmt, 0);
1660       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1661         {
1662           *def = operand;
1663           *dt = vect_invariant_def;
1664           return true;
1665         }
1666
1667       if (vect_print_dump_info (REPORT_DETAILS))
1668         fprintf (vect_dump, "Unexpected empty stmt.");
1669       return false;
1670     }
1671
1672   bb = bb_for_stmt (*def_stmt);
1673   if (!flow_bb_inside_loop_p (loop, bb))
1674     *dt = vect_invariant_def;
1675   else
1676     {
1677       stmt_vinfo = vinfo_for_stmt (*def_stmt);
1678       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
1679     }
1680
1681   if (*dt == vect_unknown_def_type)
1682     {
1683       if (vect_print_dump_info (REPORT_DETAILS))
1684         fprintf (vect_dump, "Unsupported pattern.");
1685       return false;
1686     }
1687
1688   /* stmts inside the loop that have been identified as performing
1689      a reduction operation cannot have uses in the loop.  */
1690   if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE)
1691     {
1692       if (vect_print_dump_info (REPORT_DETAILS))
1693         fprintf (vect_dump, "reduction used in loop.");
1694       return false;
1695     }
1696
1697   if (vect_print_dump_info (REPORT_DETAILS))
1698     fprintf (vect_dump, "type of def: %d.",*dt);
1699
1700   switch (TREE_CODE (*def_stmt))
1701     {
1702     case PHI_NODE:
1703       *def = PHI_RESULT (*def_stmt);
1704       gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
1705                   || *dt == vect_invariant_def);
1706       break;
1707
1708     case MODIFY_EXPR:
1709       *def = TREE_OPERAND (*def_stmt, 0);
1710       gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
1711       break;
1712
1713     default:
1714       if (vect_print_dump_info (REPORT_DETAILS))
1715         fprintf (vect_dump, "unsupported defining stmt: ");
1716       return false;
1717     }
1718
1719   if (*dt == vect_induction_def)
1720     {
1721       if (vect_print_dump_info (REPORT_DETAILS))
1722         fprintf (vect_dump, "induction not supported.");
1723       return false;
1724     }
1725
1726   return true;
1727 }
1728
1729
1730 /* Function reduction_code_for_scalar_code
1731
1732    Input:
1733    CODE - tree_code of a reduction operations.
1734
1735    Output:
1736    REDUC_CODE - the corresponding tree-code to be used to reduce the
1737       vector of partial results into a single scalar result (which
1738       will also reside in a vector).
1739
1740    Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise.  */
1741
1742 bool
1743 reduction_code_for_scalar_code (enum tree_code code,
1744                                 enum tree_code *reduc_code)
1745 {
1746   switch (code)
1747   {
1748   case MAX_EXPR:
1749     *reduc_code = REDUC_MAX_EXPR;
1750     return true;
1751
1752   case MIN_EXPR:
1753     *reduc_code = REDUC_MIN_EXPR;
1754     return true;
1755
1756   case PLUS_EXPR:
1757     *reduc_code = REDUC_PLUS_EXPR;
1758     return true;
1759
1760   default:
1761     return false;
1762   }
1763 }
1764
1765
1766 /* Function vect_is_simple_reduction
1767
1768    Detect a cross-iteration def-use cucle that represents a simple
1769    reduction computation. We look for the following pattern:
1770
1771    loop_header:
1772      a1 = phi < a0, a2 >
1773      a3 = ...
1774      a2 = operation (a3, a1)
1775
1776    such that:
1777    1. operation is commutative and associative and it is safe to
1778       change the order of the computation.
1779    2. no uses for a2 in the loop (a2 is used out of the loop)
1780    3. no uses of a1 in the loop besides the reduction operation.
1781
1782    Condition 1 is tested here.
1783    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */
1784
1785 tree
1786 vect_is_simple_reduction (struct loop *loop ATTRIBUTE_UNUSED,
1787                           tree phi ATTRIBUTE_UNUSED)
1788 {
1789   edge latch_e = loop_latch_edge (loop);
1790   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1791   tree def_stmt, def1, def2;
1792   enum tree_code code;
1793   int op_type;
1794   tree operation, op1, op2;
1795   tree type;
1796
1797   if (TREE_CODE (loop_arg) != SSA_NAME)
1798     {
1799       if (vect_print_dump_info (REPORT_DETAILS))
1800         {
1801           fprintf (vect_dump, "reduction: not ssa_name: ");
1802           print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1803         }
1804       return NULL_TREE;
1805     }
1806
1807   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1808   if (!def_stmt)
1809     {
1810       if (vect_print_dump_info (REPORT_DETAILS))
1811         fprintf (vect_dump, "reduction: no def_stmt.");
1812       return NULL_TREE;
1813     }
1814
1815   if (TREE_CODE (def_stmt) != MODIFY_EXPR)
1816     {
1817       if (vect_print_dump_info (REPORT_DETAILS))
1818         {
1819           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1820         }
1821       return NULL_TREE;
1822     }
1823
1824   operation = TREE_OPERAND (def_stmt, 1);
1825   code = TREE_CODE (operation);
1826   if (!commutative_tree_code (code) || !associative_tree_code (code))
1827     {
1828       if (vect_print_dump_info (REPORT_DETAILS))
1829         {
1830           fprintf (vect_dump, "reduction: not commutative/associative: ");
1831           print_generic_expr (vect_dump, operation, TDF_SLIM);
1832         }
1833       return NULL_TREE;
1834     }
1835
1836   op_type = TREE_CODE_LENGTH (code);
1837   if (op_type != binary_op)
1838     {
1839       if (vect_print_dump_info (REPORT_DETAILS))
1840         {
1841           fprintf (vect_dump, "reduction: not binary operation: ");
1842           print_generic_expr (vect_dump, operation, TDF_SLIM);
1843         }
1844       return NULL_TREE;
1845     }
1846
1847   op1 = TREE_OPERAND (operation, 0);
1848   op2 = TREE_OPERAND (operation, 1);
1849   if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1850     {
1851       if (vect_print_dump_info (REPORT_DETAILS))
1852         {
1853           fprintf (vect_dump, "reduction: uses not ssa_names: ");
1854           print_generic_expr (vect_dump, operation, TDF_SLIM);
1855         }
1856       return NULL_TREE;
1857     }
1858
1859   /* Check that it's ok to change the order of the computation.  */
1860   type = TREE_TYPE (operation);
1861   if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
1862       || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
1863     {
1864       if (vect_print_dump_info (REPORT_DETAILS))
1865         {
1866           fprintf (vect_dump, "reduction: multiple types: operation type: ");
1867           print_generic_expr (vect_dump, type, TDF_SLIM);
1868           fprintf (vect_dump, ", operands types: ");
1869           print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
1870           fprintf (vect_dump, ",");
1871           print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
1872         }
1873       return NULL_TREE;
1874     }
1875
1876   /* CHECKME: check for !flag_finite_math_only too?  */
1877   if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
1878     {
1879       /* Changing the order of operations changes the sematics.  */
1880       if (vect_print_dump_info (REPORT_DETAILS))
1881         {
1882           fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
1883           print_generic_expr (vect_dump, operation, TDF_SLIM);
1884         }
1885       return NULL_TREE;
1886     }
1887   else if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && flag_trapv)
1888     {
1889       /* Changing the order of operations changes the sematics.  */
1890       if (vect_print_dump_info (REPORT_DETAILS))
1891         {
1892           fprintf (vect_dump, "reduction: unsafe int math optimization: ");
1893           print_generic_expr (vect_dump, operation, TDF_SLIM);
1894         }
1895       return NULL_TREE;
1896     }
1897
1898   /* reduction is safe. we're dealing with one of the following:
1899      1) integer arithmetic and no trapv
1900      2) floating point arithmetic, and special flags permit this optimization.
1901    */
1902   def1 = SSA_NAME_DEF_STMT (op1);
1903   def2 = SSA_NAME_DEF_STMT (op2);
1904   if (!def1 || !def2)
1905     {
1906       if (vect_print_dump_info (REPORT_DETAILS))
1907         {
1908           fprintf (vect_dump, "reduction: no defs for operands: ");
1909           print_generic_expr (vect_dump, operation, TDF_SLIM);
1910         }
1911       return NULL_TREE;
1912     }
1913
1914   if (TREE_CODE (def1) == MODIFY_EXPR
1915       && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
1916       && def2 == phi)
1917     {
1918       if (vect_print_dump_info (REPORT_DETAILS))
1919         {
1920           fprintf (vect_dump, "detected reduction:");
1921           print_generic_expr (vect_dump, operation, TDF_SLIM);
1922         }
1923       return def_stmt;
1924     }
1925   else if (TREE_CODE (def2) == MODIFY_EXPR
1926       && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
1927       && def1 == phi)
1928     {
1929       use_operand_p use;
1930       ssa_op_iter iter;
1931
1932       /* Swap operands (just for simplicity - so that the rest of the code
1933          can assume that the reduction variable is always the last (second)
1934          argument).  */
1935       if (vect_print_dump_info (REPORT_DETAILS))
1936         {
1937           fprintf (vect_dump, "detected reduction: need to swap operands:");
1938           print_generic_expr (vect_dump, operation, TDF_SLIM);
1939         }
1940
1941       /* CHECKME */
1942       FOR_EACH_SSA_USE_OPERAND (use, def_stmt, iter, SSA_OP_USE)
1943         {
1944           tree tuse = USE_FROM_PTR (use);
1945           if (tuse == op1)
1946             SET_USE (use, op2);
1947           else if (tuse == op2)
1948             SET_USE (use, op1);
1949         }
1950       return def_stmt;
1951     }
1952   else
1953     {
1954       if (vect_print_dump_info (REPORT_DETAILS))
1955         {
1956           fprintf (vect_dump, "reduction: unknown pattern.");
1957           print_generic_expr (vect_dump, operation, TDF_SLIM);
1958         }
1959       return NULL_TREE;
1960     }
1961 }
1962
1963
1964 /* Function vect_is_simple_iv_evolution.
1965
1966    FORNOW: A simple evolution of an induction variables in the loop is
1967    considered a polynomial evolution with constant step.  */
1968
1969 bool
1970 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1971                              tree * step)
1972 {
1973   tree init_expr;
1974   tree step_expr;
1975
1976   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1977
1978   /* When there is no evolution in this loop, the evolution function
1979      is not "simple".  */
1980   if (evolution_part == NULL_TREE)
1981     return false;
1982
1983   /* When the evolution is a polynomial of degree >= 2
1984      the evolution function is not "simple".  */
1985   if (tree_is_chrec (evolution_part))
1986     return false;
1987
1988   step_expr = evolution_part;
1989   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1990                                                            loop_nb));
1991
1992   if (vect_print_dump_info (REPORT_DETAILS))
1993     {
1994       fprintf (vect_dump, "step: ");
1995       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
1996       fprintf (vect_dump, ",  init: ");
1997       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
1998     }
1999
2000   *init = init_expr;
2001   *step = step_expr;
2002
2003   if (TREE_CODE (step_expr) != INTEGER_CST)
2004     {
2005       if (vect_print_dump_info (REPORT_DETAILS))
2006         fprintf (vect_dump, "step unknown.");
2007       return false;
2008     }
2009
2010   return true;
2011 }
2012
2013
2014 /* Function vectorize_loops.
2015
2016    Entry Point to loop vectorization phase.  */
2017
2018 void
2019 vectorize_loops (struct loops *loops)
2020 {
2021   unsigned int i;
2022   unsigned int num_vectorized_loops = 0;
2023
2024   /* Fix the verbosity level if not defined explicitly by the user.  */
2025   vect_set_dump_settings ();
2026
2027   /* Allocate the bitmap that records which virtual variables that
2028      need to be renamed.  */
2029   vect_vnames_to_rename = BITMAP_ALLOC (NULL);
2030
2031   /*  ----------- Analyze loops. -----------  */
2032
2033   /* If some loop was duplicated, it gets bigger number
2034      than all previously defined loops. This fact allows us to run
2035      only over initial loops skipping newly generated ones.  */
2036   vect_loops_num = loops->num;
2037   for (i = 1; i < vect_loops_num; i++)
2038     {
2039       loop_vec_info loop_vinfo;
2040       struct loop *loop = loops->parray[i];
2041
2042       if (!loop)
2043         continue;
2044
2045       vect_loop_location = find_loop_location (loop);
2046       loop_vinfo = vect_analyze_loop (loop);
2047       loop->aux = loop_vinfo;
2048
2049       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2050         continue;
2051
2052       vect_transform_loop (loop_vinfo, loops);
2053       num_vectorized_loops++;
2054     }
2055
2056   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2057     fprintf (vect_dump, "vectorized %u loops in function.\n",
2058              num_vectorized_loops);
2059
2060   /*  ----------- Finalize. -----------  */
2061
2062   BITMAP_FREE (vect_vnames_to_rename);
2063
2064   for (i = 1; i < vect_loops_num; i++)
2065     {
2066       struct loop *loop = loops->parray[i];
2067       loop_vec_info loop_vinfo;
2068
2069       if (!loop)
2070         continue;
2071       loop_vinfo = loop->aux;
2072       destroy_loop_vec_info (loop_vinfo);
2073       loop->aux = NULL;
2074     }
2075 }