old-autovect-branch/gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "ggc.h"
 128 #include "tree.h"
 129 #include "target.h"
 130 #include "rtl.h"
 131 #include "basic-block.h"
 132 #include "diagnostic.h"
 133 #include "tree-flow.h"
 134 #include "tree-dump.h"
 135 #include "timevar.h"
 136 #include "cfgloop.h"
 137 #include "cfglayout.h"
 138 #include "expr.h"
 139 #include "optabs.h"
 140 #include "params.h"
 141 #include "toplev.h"
 142 #include "tree-chrec.h"
 143 #include "tree-data-ref.h"
 144 #include "tree-scalar-evolution.h"
 145 #include "input.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148 #include "recog.h"
 149
 150 /*************************************************************************
 151   Simple Loop Peeling Utilities
 152  *************************************************************************/
 153 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 154   (struct loop *, struct loops *, edge);
 155 static void slpeel_update_phis_for_duplicate_loop
 156   (struct loop *, struct loop *, bool after);
 157 static void slpeel_update_phi_nodes_for_guard1
 158   (edge, struct loop *, bool, basic_block *, bitmap *);
 159 static void slpeel_update_phi_nodes_for_guard2
 160   (edge, struct loop *, bool, basic_block *);
 161 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 162
 163 static void rename_use_op (use_operand_p);
 164 static void rename_variables_in_bb (basic_block);
 165 static void rename_variables_in_loop (struct loop *);
 166
 167 /*************************************************************************
 168   General Vectorization Utilities
 169  *************************************************************************/
 170 static void vect_set_dump_settings (void);
 171
 172 /* vect_dump will be set to stderr or dump_file if exist.  */
 173 FILE *vect_dump;
 174
 175 /* vect_verbosity_level set to an invalid value
 176    to mark that it's uninitialized.  */
 177 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 178
 179 /* Number of loops, at the beginning of vectorization.  */
 180 unsigned int vect_loops_num;
 181
 182 /* Loop location.  */
 183 static LOC vect_loop_location;
 184
 185 /* Bitmap of virtual variables to be renamed.  */
 186 bitmap vect_vnames_to_rename;
 187 \f
 188 /*************************************************************************
 189   Simple Loop Peeling Utilities
 190
 191   Utilities to support loop peeling for vectorization purposes.
 192  *************************************************************************/
 193
 194
 195 /* Renames the use *OP_P.  */
 196
 197 static void
 198 rename_use_op (use_operand_p op_p)
 199 {
 200   tree new_name;
 201
 202   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 203     return;
 204
 205   new_name = get_current_def (USE_FROM_PTR (op_p));
 206
 207   /* Something defined outside of the loop.  */
 208   if (!new_name)
 209     return;
 210
 211   /* An ordinary ssa name defined in the loop.  */
 212
 213   SET_USE (op_p, new_name);
 214 }
 215
 216
 217 /* Renames the variables in basic block BB.  */
 218
 219 static void
 220 rename_variables_in_bb (basic_block bb)
 221 {
 222   tree phi;
 223   block_stmt_iterator bsi;
 224   tree stmt;
 225   use_operand_p use_p;
 226   ssa_op_iter iter;
 227   edge e;
 228   edge_iterator ei;
 229   struct loop *loop = bb->loop_father;
 230
 231   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 232     {
 233       stmt = bsi_stmt (bsi);
 234       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
 235                                  (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
 236         rename_use_op (use_p);
 237     }
 238
 239   FOR_EACH_EDGE (e, ei, bb->succs)
 240     {
 241       if (!flow_bb_inside_loop_p (loop, e->dest))
 242         continue;
 243       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 244         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 245     }
 246 }
 247
 248
 249 /* Renames variables in new generated LOOP.  */
 250
 251 static void
 252 rename_variables_in_loop (struct loop *loop)
 253 {
 254   unsigned i;
 255   basic_block *bbs;
 256
 257   bbs = get_loop_body (loop);
 258
 259   for (i = 0; i < loop->num_nodes; i++)
 260     rename_variables_in_bb (bbs[i]);
 261
 262   free (bbs);
 263 }
 264
 265
 266 /* Update the PHI nodes of NEW_LOOP.
 267
 268    NEW_LOOP is a duplicate of ORIG_LOOP.
 269    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 270    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 271    executes before it.  */
 272
 273 static void
 274 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 275                                        struct loop *new_loop, bool after)
 276 {
 277   tree new_ssa_name;
 278   tree phi_new, phi_orig;
 279   tree def;
 280   edge orig_loop_latch = loop_latch_edge (orig_loop);
 281   edge orig_entry_e = loop_preheader_edge (orig_loop);
 282   edge new_loop_exit_e = new_loop->single_exit;
 283   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 284   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 285
 286   /*
 287      step 1. For each loop-header-phi:
 288              Add the first phi argument for the phi in NEW_LOOP
 289             (the one associated with the entry of NEW_LOOP)
 290
 291      step 2. For each loop-header-phi:
 292              Add the second phi argument for the phi in NEW_LOOP
 293             (the one associated with the latch of NEW_LOOP)
 294
 295      step 3. Update the phis in the successor block of NEW_LOOP.
 296
 297         case 1: NEW_LOOP was placed before ORIG_LOOP:
 298                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 299                 Updating the phis in the successor block can therefore be done
 300                 along with the scanning of the loop header phis, because the
 301                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 302                 phi nodes, organized in the same order.
 303
 304         case 2: NEW_LOOP was placed after ORIG_LOOP:
 305                 The successor block of NEW_LOOP is the original exit block of
 306                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 307                 We postpone updating these phis to a later stage (when
 308                 loop guards are added).
 309    */
 310
 311
 312   /* Scan the phis in the headers of the old and new loops
 313      (they are organized in exactly the same order).  */
 314
 315   for (phi_new = phi_nodes (new_loop->header),
 316        phi_orig = phi_nodes (orig_loop->header);
 317        phi_new && phi_orig;
 318        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 319     {
 320       /* step 1.  */
 321       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 322       add_phi_arg (phi_new, def, new_loop_entry_e);
 323
 324       /* step 2.  */
 325       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 326       if (TREE_CODE (def) != SSA_NAME)
 327         continue;
 328
 329       new_ssa_name = get_current_def (def);
 330       if (!new_ssa_name)
 331         {
 332           /* This only happens if there are no definitions
 333              inside the loop. use the phi_result in this case.  */
 334           new_ssa_name = PHI_RESULT (phi_new);
 335         }
 336
 337       /* An ordinary ssa name defined in the loop.  */
 338       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 339
 340       /* step 3 (case 1).  */
 341       if (!after)
 342         {
 343           gcc_assert (new_loop_exit_e == orig_entry_e);
 344           SET_PHI_ARG_DEF (phi_orig,
 345                            new_loop_exit_e->dest_idx,
 346                            new_ssa_name);
 347         }
 348     }
 349 }
 350
 351
 352 /* Update PHI nodes for a guard of the LOOP.
 353
 354    Input:
 355    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 356         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 357         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 358         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 359         We denote this bb NEW_MERGE_BB because before the guard code was added
 360         it had a single predecessor (the LOOP header), and now it became a merge
 361         point of two paths - the path that ends with the LOOP exit-edge, and
 362         the path that ends with GUARD_EDGE.
 363    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 364         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 365
 366    ===> The CFG before the guard-code was added:
 367         LOOP_header_bb:
 368           loop_body
 369           if (exit_loop) goto update_bb
 370           else           goto LOOP_header_bb
 371         update_bb:
 372
 373    ==> The CFG after the guard-code was added:
 374         guard_bb:
 375           if (LOOP_guard_condition) goto new_merge_bb
 376           else                      goto LOOP_header_bb
 377         LOOP_header_bb:
 378           loop_body
 379           if (exit_loop_condition) goto new_merge_bb
 380           else                     goto LOOP_header_bb
 381         new_merge_bb:
 382           goto update_bb
 383         update_bb:
 384
 385    ==> The CFG after this function:
 386         guard_bb:
 387           if (LOOP_guard_condition) goto new_merge_bb
 388           else                      goto LOOP_header_bb
 389         LOOP_header_bb:
 390           loop_body
 391           if (exit_loop_condition) goto new_exit_bb
 392           else                     goto LOOP_header_bb
 393         new_exit_bb:
 394         new_merge_bb:
 395           goto update_bb
 396         update_bb:
 397
 398    This function:
 399    1. creates and updates the relevant phi nodes to account for the new
 400       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 401       1.1. Create phi nodes at NEW_MERGE_BB.
 402       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 403            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 404    2. preserves loop-closed-ssa-form by creating the required phi nodes
 405       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 406
 407    There are two flavors to this function:
 408
 409    slpeel_update_phi_nodes_for_guard1:
 410      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 411      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 412      for variables that have phis in the loop header.
 413
 414    slpeel_update_phi_nodes_for_guard2:
 415      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 416      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 417      for variables that have phis in the loop exit.
 418
 419    I.E., the overall structure is:
 420
 421         loop1_preheader_bb:
 422                 guard1 (goto loop1/merg1_bb)
 423         loop1
 424         loop1_exit_bb:
 425                 guard2 (goto merge1_bb/merge2_bb)
 426         merge1_bb
 427         loop2
 428         loop2_exit_bb
 429         merge2_bb
 430         next_bb
 431
 432    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 433    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 434    that have phis in loop1->header).
 435
 436    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 437    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 438    that have phis in next_bb). It also adds some of these phis to
 439    loop1_exit_bb.
 440
 441    slpeel_update_phi_nodes_for_guard1 is always called before
 442    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 443    to create correct data-flow and loop-closed-ssa-form.
 444
 445    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 446    that change between iterations of a loop (and therefore have a phi-node
 447    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 448    phis for variables that are used out of the loop (and therefore have
 449    loop-closed exit phis). Some variables may be both updated between
 450    iterations and used after the loop. This is why in loop1_exit_bb we
 451    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 452    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 453
 454    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 455      an original loop. i.e., we have:
 456
 457            orig_loop
 458            guard_bb (goto LOOP/new_merge)
 459            new_loop <-- LOOP
 460            new_exit
 461            new_merge
 462            next_bb
 463
 464      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 465      have:
 466
 467            new_loop
 468            guard_bb (goto LOOP/new_merge)
 469            orig_loop <-- LOOP
 470            new_exit
 471            new_merge
 472            next_bb
 473
 474      The SSA names defined in the original loop have a current
 475      reaching definition that that records the corresponding new
 476      ssa-name used in the new duplicated loop copy.
 477   */
 478
 479 /* Function slpeel_update_phi_nodes_for_guard1
 480
 481    Input:
 482    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 483    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 484             information.
 485
 486    In the context of the overall structure, we have:
 487
 488         loop1_preheader_bb:
 489                 guard1 (goto loop1/merg1_bb)
 490 LOOP->  loop1
 491         loop1_exit_bb:
 492                 guard2 (goto merge1_bb/merge2_bb)
 493         merge1_bb
 494         loop2
 495         loop2_exit_bb
 496         merge2_bb
 497         next_bb
 498
 499    For each name updated between loop iterations (i.e - for each name that has
 500    an entry (loop-header) phi in LOOP) we create a new phi in:
 501    1. merge1_bb (to account for the edge from guard1)
 502    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 503 */
 504
 505 static void
 506 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 507                                     bool is_new_loop, basic_block *new_exit_bb,
 508                                     bitmap *defs)
 509 {
 510   tree orig_phi, new_phi;
 511   tree update_phi, update_phi2;
 512   tree guard_arg, loop_arg;
 513   basic_block new_merge_bb = guard_edge->dest;
 514   edge e = EDGE_SUCC (new_merge_bb, 0);
 515   basic_block update_bb = e->dest;
 516   basic_block orig_bb = loop->header;
 517   edge new_exit_e;
 518   tree current_new_name;
 519   tree name;
 520
 521   /* Create new bb between loop and new_merge_bb.  */
 522   *new_exit_bb = split_edge (loop->single_exit);
 523   add_bb_to_loop (*new_exit_bb, loop->outer);
 524
 525   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 526
 527   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 528        orig_phi && update_phi;
 529        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 530     {
 531       /* Virtual phi; Mark it for renaming. We actually want to call
 532          mar_sym_for_renaming, but since all ssa renaming datastructures
 533          are going to be freed before we get to call ssa_upate, we just
 534          record this name for now in a bitmap, and will mark it for
 535          renaming later.  */
 536       name = PHI_RESULT (orig_phi);
 537       if (!is_gimple_reg (SSA_NAME_VAR (name)))
 538         bitmap_set_bit (vect_vnames_to_rename, SSA_NAME_VERSION (name));
 539
 540       /** 1. Handle new-merge-point phis  **/
 541
 542       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 543       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 544                                  new_merge_bb);
 545
 546       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 547             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 548       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 549       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 550
 551       add_phi_arg (new_phi, loop_arg, new_exit_e);
 552       add_phi_arg (new_phi, guard_arg, guard_edge);
 553
 554       /* 1.3. Update phi in successor block.  */
 555       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 556                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 557       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 558       update_phi2 = new_phi;
 559
 560
 561       /** 2. Handle loop-closed-ssa-form phis  **/
 562
 563       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 564       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 565                                  *new_exit_bb);
 566
 567       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 568       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 569
 570       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 571       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 572       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 573
 574       /* 2.4. Record the newly created name with set_current_def.
 575          We want to find a name such that
 576                 name = get_current_def (orig_loop_name)
 577          and to set its current definition as follows:
 578                 set_current_def (name, new_phi_name)
 579
 580          If LOOP is a new loop then loop_arg is already the name we're
 581          looking for. If LOOP is the original loop, then loop_arg is
 582          the orig_loop_name and the relevant name is recorded in its
 583          current reaching definition.  */
 584       if (is_new_loop)
 585         current_new_name = loop_arg;
 586       else
 587         {
 588           current_new_name = get_current_def (loop_arg);
 589           /* current_def is not available only if the variable does not
 590              change inside the loop, in which case we also don't care
 591              about recording a current_def for it because we won't be
 592              trying to create loop-exit-phis for it.  */
 593           if (!current_new_name)
 594             continue;
 595         }
 596       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 597
 598       set_current_def (current_new_name, PHI_RESULT (new_phi));
 599       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 600     }
 601
 602   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 603 }
 604
 605
 606 /* Function slpeel_update_phi_nodes_for_guard2
 607
 608    Input:
 609    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 610
 611    In the context of the overall structure, we have:
 612
 613         loop1_preheader_bb:
 614                 guard1 (goto loop1/merg1_bb)
 615         loop1
 616         loop1_exit_bb:
 617                 guard2 (goto merge1_bb/merge2_bb)
 618         merge1_bb
 619 LOOP->  loop2
 620         loop2_exit_bb
 621         merge2_bb
 622         next_bb
 623
 624    For each name used out side the loop (i.e - for each name that has an exit
 625    phi in next_bb) we create a new phi in:
 626    1. merge2_bb (to account for the edge from guard_bb)
 627    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 628    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 629       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 630 */
 631
 632 static void
 633 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 634                                     bool is_new_loop, basic_block *new_exit_bb)
 635 {
 636   tree orig_phi, new_phi;
 637   tree update_phi, update_phi2;
 638   tree guard_arg, loop_arg;
 639   basic_block new_merge_bb = guard_edge->dest;
 640   edge e = EDGE_SUCC (new_merge_bb, 0);
 641   basic_block update_bb = e->dest;
 642   edge new_exit_e;
 643   tree orig_def, orig_def_new_name;
 644   tree new_name, new_name2;
 645   tree arg;
 646
 647   /* Create new bb between loop and new_merge_bb.  */
 648   *new_exit_bb = split_edge (loop->single_exit);
 649   add_bb_to_loop (*new_exit_bb, loop->outer);
 650
 651   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 652
 653   for (update_phi = phi_nodes (update_bb); update_phi;
 654        update_phi = PHI_CHAIN (update_phi))
 655     {
 656       orig_phi = update_phi;
 657       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 658       /* This loop-closed-phi actually doesn't represent a use
 659          out of the loop - the phi arg is a constant.  */
 660       if (TREE_CODE (orig_def) != SSA_NAME)
 661         continue;
 662       orig_def_new_name = get_current_def (orig_def);
 663       arg = NULL_TREE;
 664
 665       /** 1. Handle new-merge-point phis  **/
 666
 667       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 668       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 669                                  new_merge_bb);
 670
 671       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 672             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 673       new_name = orig_def;
 674       new_name2 = NULL_TREE;
 675       if (orig_def_new_name)
 676         {
 677           new_name = orig_def_new_name;
 678           /* Some variables have both loop-entry-phis and loop-exit-phis.
 679              Such variables were given yet newer names by phis placed in
 680              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 681              new_name2 = get_current_def (get_current_def (orig_name)).  */
 682           new_name2 = get_current_def (new_name);
 683         }
 684
 685       if (is_new_loop)
 686         {
 687           guard_arg = orig_def;
 688           loop_arg = new_name;
 689         }
 690       else
 691         {
 692           guard_arg = new_name;
 693           loop_arg = orig_def;
 694         }
 695       if (new_name2)
 696         guard_arg = new_name2;
 697
 698       add_phi_arg (new_phi, loop_arg, new_exit_e);
 699       add_phi_arg (new_phi, guard_arg, guard_edge);
 700
 701       /* 1.3. Update phi in successor block.  */
 702       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 703       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 704       update_phi2 = new_phi;
 705
 706
 707       /** 2. Handle loop-closed-ssa-form phis  **/
 708
 709       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 710       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 711                                  *new_exit_bb);
 712
 713       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 714       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 715
 716       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 717       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 718       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 719
 720
 721       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 722
 723       /* 3.1. Find the relevant names that need an exit-phi in
 724          GUARD_BB, i.e. names for which
 725          slpeel_update_phi_nodes_for_guard1 had not already created a
 726          phi node. This is the case for names that are used outside
 727          the loop (and therefore need an exit phi) but are not updated
 728          across loop iterations (and therefore don't have a
 729          loop-header-phi).
 730
 731          slpeel_update_phi_nodes_for_guard1 is responsible for
 732          creating loop-exit phis in GUARD_BB for names that have a
 733          loop-header-phi.  When such a phi is created we also record
 734          the new name in its current definition.  If this new name
 735          exists, then guard_arg was set to this new name (see 1.2
 736          above).  Therefore, if guard_arg is not this new name, this
 737          is an indication that an exit-phi in GUARD_BB was not yet
 738          created, so we take care of it here.  */
 739       if (guard_arg == new_name2)
 740         continue;
 741       arg = guard_arg;
 742
 743       /* 3.2. Generate new phi node in GUARD_BB:  */
 744       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 745                                  guard_edge->src);
 746
 747       /* 3.3. GUARD_BB has one incoming edge:  */
 748       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 749       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 750
 751       /* 3.4. Update phi in successor of GUARD_BB:  */
 752       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 753                                                                 == guard_arg);
 754       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 755     }
 756
 757   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 758 }
 759
 760
 761 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 762    that starts at zero, increases by one and its limit is NITERS.
 763
 764    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 765
 766 void
 767 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 768 {
 769   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 770   tree orig_cond;
 771   edge exit_edge = loop->single_exit;
 772   block_stmt_iterator loop_cond_bsi;
 773   block_stmt_iterator incr_bsi;
 774   bool insert_after;
 775   tree begin_label = tree_block_label (loop->latch);
 776   tree exit_label = tree_block_label (loop->single_exit->dest);
 777   tree init = build_int_cst (TREE_TYPE (niters), 0);
 778   tree step = build_int_cst (TREE_TYPE (niters), 1);
 779   tree then_label;
 780   tree else_label;
 781   LOC loop_loc;
 782
 783   orig_cond = get_loop_exit_condition (loop);
 784   gcc_assert (orig_cond);
 785   loop_cond_bsi = bsi_for_stmt (orig_cond);
 786
 787   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 788   create_iv (init, step, NULL_TREE, loop,
 789              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
 790
 791   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 792     {
 793       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 794       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 795       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 796     }
 797   else /* 'then' edge loops back.  */
 798     {
 799       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 800       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 801       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 802     }
 803
 804   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 805                      then_label, else_label);
 806   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
 807
 808   /* Remove old loop exit test:  */
 809   bsi_remove (&loop_cond_bsi);
 810
 811   loop_loc = find_loop_location (loop);
 812   if (dump_file && (dump_flags & TDF_DETAILS))
 813     {
 814       if (loop_loc != UNKNOWN_LOC)
 815         fprintf (dump_file, "\nloop at %s:%d: ",
 816                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 817       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 818     }
 819
 820   loop->nb_iterations = niters;
 821 }
 822
 823
 824 /* Given LOOP this function generates a new copy of it and puts it
 825    on E which is either the entry or exit of LOOP.  */
 826
 827 static struct loop *
 828 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 829                                         edge e)
 830 {
 831   struct loop *new_loop;
 832   basic_block *new_bbs, *bbs;
 833   bool at_exit;
 834   bool was_imm_dom;
 835   basic_block exit_dest;
 836   tree phi, phi_arg;
 837
 838   at_exit = (e == loop->single_exit);
 839   if (!at_exit && e != loop_preheader_edge (loop))
 840     return NULL;
 841
 842   bbs = get_loop_body (loop);
 843
 844   /* Check whether duplication is possible.  */
 845   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 846     {
 847       free (bbs);
 848       return NULL;
 849     }
 850
 851   /* Generate new loop structure.  */
 852   new_loop = duplicate_loop (loops, loop, loop->outer);
 853   if (!new_loop)
 854     {
 855       free (bbs);
 856       return NULL;
 857     }
 858
 859   exit_dest = loop->single_exit->dest;
 860   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 861                                           exit_dest) == loop->header ?
 862                  true : false);
 863
 864   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 865
 866   copy_bbs (bbs, loop->num_nodes, new_bbs,
 867             &loop->single_exit, 1, &new_loop->single_exit, NULL,
 868             e->src);
 869
 870   /* Duplicating phi args at exit bbs as coming
 871      also from exit of duplicated loop.  */
 872   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 873     {
 874       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
 875       if (phi_arg)
 876         {
 877           edge new_loop_exit_edge;
 878
 879           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 880             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 881           else
 882             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 883
 884           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 885         }
 886     }
 887
 888   if (at_exit) /* Add the loop copy at exit.  */
 889     {
 890       redirect_edge_and_branch_force (e, new_loop->header);
 891       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 892       if (was_imm_dom)
 893         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 894     }
 895   else /* Add the copy at entry.  */
 896     {
 897       edge new_exit_e;
 898       edge entry_e = loop_preheader_edge (loop);
 899       basic_block preheader = entry_e->src;
 900
 901       if (!flow_bb_inside_loop_p (new_loop,
 902                                   EDGE_SUCC (new_loop->header, 0)->dest))
 903         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 904       else
 905         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 906
 907       redirect_edge_and_branch_force (new_exit_e, loop->header);
 908       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 909                                new_exit_e->src);
 910
 911       /* We have to add phi args to the loop->header here as coming
 912          from new_exit_e edge.  */
 913       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 914         {
 915           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 916           if (phi_arg)
 917             add_phi_arg (phi, phi_arg, new_exit_e);
 918         }
 919
 920       redirect_edge_and_branch_force (entry_e, new_loop->header);
 921       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 922     }
 923
 924   free (new_bbs);
 925   free (bbs);
 926
 927   return new_loop;
 928 }
 929
 930
 931 /* Given the condition statement COND, put it as the last statement
 932    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 933    Assumes that this is the single exit of the guarded loop.
 934    Returns the skip edge.  */
 935
 936 static edge
 937 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 938                         basic_block dom_bb)
 939 {
 940   block_stmt_iterator bsi;
 941   edge new_e, enter_e;
 942   tree cond_stmt, then_label, else_label;
 943
 944   enter_e = EDGE_SUCC (guard_bb, 0);
 945   enter_e->flags &= ~EDGE_FALLTHRU;
 946   enter_e->flags |= EDGE_FALSE_VALUE;
 947   bsi = bsi_last (guard_bb);
 948
 949   then_label = build1 (GOTO_EXPR, void_type_node,
 950                        tree_block_label (exit_bb));
 951   else_label = build1 (GOTO_EXPR, void_type_node,
 952                        tree_block_label (enter_e->dest));
 953   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 954                      then_label, else_label);
 955   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 956   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 957   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 958   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 959   return new_e;
 960 }
 961
 962
 963 /* This function verifies that the following restrictions apply to LOOP:
 964    (1) it is innermost
 965    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 966    (3) it is single entry, single exit
 967    (4) its exit condition is the last stmt in the header
 968    (5) E is the entry/exit edge of LOOP.
 969  */
 970
 971 bool
 972 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 973 {
 974   edge exit_e = loop->single_exit;
 975   edge entry_e = loop_preheader_edge (loop);
 976   tree orig_cond = get_loop_exit_condition (loop);
 977   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 978
 979   if (need_ssa_update_p ())
 980     return false;
 981
 982   if (loop->inner
 983       /* All loops have an outer scope; the only case loop->outer is NULL is for
 984          the function itself.  */
 985       || !loop->outer
 986       || loop->num_nodes != 2
 987       || !empty_block_p (loop->latch)
 988       || !loop->single_exit
 989       /* Verify that new loop exit condition can be trivially modified.  */
 990       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 991       || (e != exit_e && e != entry_e))
 992     return false;
 993
 994   return true;
 995 }
 996
 997 #ifdef ENABLE_CHECKING
 998 void
 999 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1000                                  struct loop *second_loop)
1001 {
1002   basic_block loop1_exit_bb = first_loop->single_exit->dest;
1003   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1004   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1005
1006   /* A guard that controls whether the second_loop is to be executed or skipped
1007      is placed in first_loop->exit.  first_loopt->exit therefore has two
1008      successors - one is the preheader of second_loop, and the other is a bb
1009      after second_loop.
1010    */
1011   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1012
1013   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
1014         of second_loop.  */
1015
1016   /* The preheader of new_loop is expected to have two predecessors:
1017      first_loop->exit and the block that precedes first_loop.  */
1018
1019   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1020               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1021                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1022                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1023                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1024
1025   /* Verify that the other successor of first_loopt->exit is after the
1026      second_loop.  */
1027   /* TODO */
1028 }
1029 #endif
1030
1031 /* Function slpeel_tree_peel_loop_to_edge.
1032
1033    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1034    that is placed on the entry (exit) edge E of LOOP. After this transformation
1035    we have two loops one after the other - first-loop iterates FIRST_NITERS
1036    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1037
1038    Input:
1039    - LOOP: the loop to be peeled.
1040    - E: the exit or entry edge of LOOP.
1041         If it is the entry edge, we peel the first iterations of LOOP. In this
1042         case first-loop is LOOP, and second-loop is the newly created loop.
1043         If it is the exit edge, we peel the last iterations of LOOP. In this
1044         case, first-loop is the newly created loop, and second-loop is LOOP.
1045    - NITERS: the number of iterations that LOOP iterates.
1046    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1047    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1048         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1049         is false, the caller of this function may want to take care of this
1050         (this can be useful if we don't want new stmts added to first-loop).
1051
1052    Output:
1053    The function returns a pointer to the new loop-copy, or NULL if it failed
1054    to perform the transformation.
1055
1056    The function generates two if-then-else guards: one before the first loop,
1057    and the other before the second loop:
1058    The first guard is:
1059      if (FIRST_NITERS == 0) then skip the first loop,
1060      and go directly to the second loop.
1061    The second guard is:
1062      if (FIRST_NITERS == NITERS) then skip the second loop.
1063
1064    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1065    FORNOW the resulting code will not be in loop-closed-ssa form.
1066 */
1067
1068 struct loop*
1069 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1070                                edge e, tree first_niters,
1071                                tree niters, bool update_first_loop_count)
1072 {
1073   struct loop *new_loop = NULL, *first_loop, *second_loop;
1074   edge skip_e;
1075   tree pre_condition;
1076   bitmap definitions;
1077   basic_block bb_before_second_loop, bb_after_second_loop;
1078   basic_block bb_before_first_loop;
1079   basic_block bb_between_loops;
1080   basic_block new_exit_bb;
1081   edge exit_e = loop->single_exit;
1082   LOC loop_loc;
1083
1084   if (!slpeel_can_duplicate_loop_p (loop, e))
1085     return NULL;
1086
1087   /* We have to initialize cfg_hooks. Then, when calling
1088    cfg_hooks->split_edge, the function tree_split_edge
1089    is actually called and, when calling cfg_hooks->duplicate_block,
1090    the function tree_duplicate_bb is called.  */
1091   tree_register_cfg_hooks ();
1092
1093
1094   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1095         Resulting CFG would be:
1096
1097         first_loop:
1098         do {
1099         } while ...
1100
1101         second_loop:
1102         do {
1103         } while ...
1104
1105         orig_exit_bb:
1106    */
1107
1108   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1109     {
1110       loop_loc = find_loop_location (loop);
1111       if (dump_file && (dump_flags & TDF_DETAILS))
1112         {
1113           if (loop_loc != UNKNOWN_LOC)
1114             fprintf (dump_file, "\n%s:%d: note: ",
1115                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1116           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1117         }
1118       return NULL;
1119     }
1120
1121   if (e == exit_e)
1122     {
1123       /* NEW_LOOP was placed after LOOP.  */
1124       first_loop = loop;
1125       second_loop = new_loop;
1126     }
1127   else
1128     {
1129       /* NEW_LOOP was placed before LOOP.  */
1130       first_loop = new_loop;
1131       second_loop = loop;
1132     }
1133
1134   definitions = ssa_names_to_replace ();
1135   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1136   rename_variables_in_loop (new_loop);
1137
1138   /* 2. Add the guard that controls whether the first loop is executed.
1139         Resulting CFG would be:
1140
1141         bb_before_first_loop:
1142         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1143                                GOTO first-loop
1144
1145         first_loop:
1146         do {
1147         } while ...
1148
1149         bb_before_second_loop:
1150
1151         second_loop:
1152         do {
1153         } while ...
1154
1155         orig_exit_bb:
1156    */
1157
1158   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1159   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1160   bb_before_second_loop = split_edge (first_loop->single_exit);
1161   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1162
1163   pre_condition =
1164     fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1165                  build_int_cst (TREE_TYPE (first_niters), 0));
1166   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1167                                   bb_before_second_loop, bb_before_first_loop);
1168   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1169                                       first_loop == new_loop,
1170                                       &new_exit_bb, &definitions);
1171
1172
1173   /* 3. Add the guard that controls whether the second loop is executed.
1174         Resulting CFG would be:
1175
1176         bb_before_first_loop:
1177         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1178                                GOTO first-loop
1179
1180         first_loop:
1181         do {
1182         } while ...
1183
1184         bb_between_loops:
1185         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1186                                     GOTO bb_before_second_loop
1187
1188         bb_before_second_loop:
1189
1190         second_loop:
1191         do {
1192         } while ...
1193
1194         bb_after_second_loop:
1195
1196         orig_exit_bb:
1197    */
1198
1199   bb_between_loops = new_exit_bb;
1200   bb_after_second_loop = split_edge (second_loop->single_exit);
1201   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1202
1203   pre_condition =
1204         fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1205   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1206                                   bb_after_second_loop, bb_before_first_loop);
1207   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1208                                      second_loop == new_loop, &new_exit_bb);
1209
1210   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1211    */
1212   if (update_first_loop_count)
1213     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1214
1215   BITMAP_FREE (definitions);
1216   delete_update_ssa ();
1217
1218   return new_loop;
1219 }
1220
1221 /* Function vect_get_loop_location.
1222
1223    Extract the location of the loop in the source code.
1224    If the loop is not well formed for vectorization, an estimated
1225    location is calculated.
1226    Return the loop location if succeed and NULL if not.  */
1227
1228 LOC
1229 find_loop_location (struct loop *loop)
1230 {
1231   tree node = NULL_TREE;
1232   basic_block bb;
1233   block_stmt_iterator si;
1234
1235   if (!loop)
1236     return UNKNOWN_LOC;
1237
1238   node = get_loop_exit_condition (loop);
1239
1240   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1241       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1242     return EXPR_LOC (node);
1243
1244   /* If we got here the loop is probably not "well formed",
1245      try to estimate the loop location */
1246
1247   if (!loop->header)
1248     return UNKNOWN_LOC;
1249
1250   bb = loop->header;
1251
1252   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1253     {
1254       node = bsi_stmt (si);
1255       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1256         return EXPR_LOC (node);
1257     }
1258
1259   return UNKNOWN_LOC;
1260 }
1261
1262
1263 /*************************************************************************
1264   Vectorization Debug Information.
1265  *************************************************************************/
1266
1267 /* Function vect_set_verbosity_level.
1268
1269    Called from toplev.c upon detection of the
1270    -ftree-vectorizer-verbose=N option.  */
1271
1272 void
1273 vect_set_verbosity_level (const char *val)
1274 {
1275    unsigned int vl;
1276
1277    vl = atoi (val);
1278    if (vl < MAX_VERBOSITY_LEVEL)
1279      vect_verbosity_level = vl;
1280    else
1281      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1282 }
1283
1284
1285 /* Function vect_set_dump_settings.
1286
1287    Fix the verbosity level of the vectorizer if the
1288    requested level was not set explicitly using the flag
1289    -ftree-vectorizer-verbose=N.
1290    Decide where to print the debugging information (dump_file/stderr).
1291    If the user defined the verbosity level, but there is no dump file,
1292    print to stderr, otherwise print to the dump file.  */
1293
1294 static void
1295 vect_set_dump_settings (void)
1296 {
1297   vect_dump = dump_file;
1298
1299   /* Check if the verbosity level was defined by the user:  */
1300   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1301     {
1302       /* If there is no dump file, print to stderr.  */
1303       if (!dump_file)
1304         vect_dump = stderr;
1305       return;
1306     }
1307
1308   /* User didn't specify verbosity level:  */
1309   if (dump_file && (dump_flags & TDF_DETAILS))
1310     vect_verbosity_level = REPORT_DETAILS;
1311   else if (dump_file && (dump_flags & TDF_STATS))
1312     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1313   else
1314     vect_verbosity_level = REPORT_NONE;
1315
1316   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1317 }
1318
1319
1320 /* Function debug_loop_details.
1321
1322    For vectorization debug dumps.  */
1323
1324 bool
1325 vect_print_dump_info (enum verbosity_levels vl)
1326 {
1327   if (vl > vect_verbosity_level)
1328     return false;
1329
1330   if (vect_loop_location == UNKNOWN_LOC)
1331     fprintf (vect_dump, "\n%s:%d: note: ",
1332                  DECL_SOURCE_FILE (current_function_decl),
1333                  DECL_SOURCE_LINE (current_function_decl));
1334   else
1335     fprintf (vect_dump, "\n%s:%d: note: ",
1336              LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
1337
1338
1339   return true;
1340 }
1341
1342
1343 /*************************************************************************
1344   Vectorization Utilities.
1345  *************************************************************************/
1346
1347 /* Function new_stmt_vec_info.
1348
1349    Create and initialize a new stmt_vec_info struct for STMT.  */
1350
1351 stmt_vec_info
1352 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1353 {
1354   stmt_vec_info res;
1355   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1356
1357   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1358   STMT_VINFO_STMT (res) = stmt;
1359   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1360   STMT_VINFO_RELEVANT (res) = 0;
1361   STMT_VINFO_LIVE_P (res) = false;
1362   STMT_VINFO_VECTYPE (res) = NULL;
1363   STMT_VINFO_VEC_STMT (res) = NULL;
1364   STMT_VINFO_EPILOG_STMT (res) = NULL;
1365   STMT_VINFO_DATA_REF (res) = NULL;
1366   STMT_VINFO_IN_PATTERN_P (res) = false;
1367   STMT_VINFO_RELATED_STMT (res) = NULL;
1368   if (TREE_CODE (stmt) == PHI_NODE)
1369     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1370   else
1371     STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1372   STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
1373   DR_GROUP_FIRST_DR (res) = NULL_TREE;
1374   DR_GROUP_NEXT_DR (res) = NULL_TREE;
1375   DR_GROUP_SIZE (res) = 0;
1376   DR_GROUP_SAME_DR_STMT (res) = NULL_TREE;
1377   DR_GROUP_GAP (res) = 0;
1378   DR_GROUP_STORE_COUNT (res) = 0;
1379
1380   return res;
1381 }
1382
1383
1384 /* Function new_loop_vec_info.
1385
1386    Create and initialize a new loop_vec_info struct for LOOP, as well as
1387    stmt_vec_info structs for all the stmts in LOOP.  */
1388
1389 loop_vec_info
1390 new_loop_vec_info (struct loop *loop)
1391 {
1392   loop_vec_info res;
1393   basic_block *bbs;
1394   block_stmt_iterator si;
1395   unsigned int i;
1396
1397   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1398
1399   bbs = get_loop_body (loop);
1400
1401   /* Create stmt_info for all stmts in the loop.  */
1402   for (i = 0; i < loop->num_nodes; i++)
1403     {
1404       basic_block bb = bbs[i];
1405       tree phi;
1406
1407       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1408         {
1409           tree_ann_t ann = get_tree_ann (phi);
1410           set_stmt_info (ann, new_stmt_vec_info (phi, res));
1411         }
1412
1413       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1414         {
1415           tree stmt = bsi_stmt (si);
1416           stmt_ann_t ann;
1417
1418           ann = stmt_ann (stmt);
1419           set_stmt_info ((tree_ann_t)ann, new_stmt_vec_info (stmt, res));
1420         }
1421     }
1422
1423   LOOP_VINFO_LOOP (res) = loop;
1424   LOOP_VINFO_BBS (res) = bbs;
1425   LOOP_VINFO_EXIT_COND (res) = NULL;
1426   LOOP_VINFO_NITERS (res) = NULL;
1427   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1428   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1429   LOOP_VINFO_VECT_FACTOR (res) = 0;
1430   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREFS (res), 20, "loop_datarefs");
1431   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DDRS (res), 20, "loop_ddrs");
1432   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1433   LOOP_VINFO_MAY_MISALIGN_STMTS (res)
1434     = VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS));
1435
1436   return res;
1437 }
1438
1439
1440 /* Function destroy_loop_vec_info.
1441
1442    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1443    stmts in the loop.  */
1444
1445 void
1446 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1447 {
1448   struct loop *loop;
1449   basic_block *bbs;
1450   int nbbs;
1451   block_stmt_iterator si;
1452   int j;
1453
1454   if (!loop_vinfo)
1455     return;
1456
1457   loop = LOOP_VINFO_LOOP (loop_vinfo);
1458
1459   bbs = LOOP_VINFO_BBS (loop_vinfo);
1460   nbbs = loop->num_nodes;
1461
1462   for (j = 0; j < nbbs; j++)
1463     {
1464       basic_block bb = bbs[j];
1465       tree phi;
1466       stmt_vec_info stmt_info;
1467
1468       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1469         {
1470           tree_ann_t ann = get_tree_ann (phi);
1471
1472           stmt_info = vinfo_for_stmt (phi);
1473           free (stmt_info);
1474           set_stmt_info (ann, NULL);
1475         }
1476
1477       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1478         {
1479           tree stmt = bsi_stmt (si);
1480           stmt_ann_t ann = stmt_ann (stmt);
1481           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1482
1483           if (stmt_info)
1484             {
1485               VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1486               free (stmt_info);
1487               set_stmt_info ((tree_ann_t)ann, NULL);
1488             }
1489         }
1490     }
1491
1492   free (LOOP_VINFO_BBS (loop_vinfo));
1493   varray_clear (LOOP_VINFO_DATAREFS (loop_vinfo));
1494   varray_clear (LOOP_VINFO_DDRS (loop_vinfo));
1495   VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1496
1497   free (loop_vinfo);
1498 }
1499
1500
1501 /* Function vect_force_dr_alignment_p.
1502
1503    Returns whether the alignment of a DECL can be forced to be aligned
1504    on ALIGNMENT bit boundary.  */
1505
1506 bool
1507 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1508 {
1509   if (TREE_CODE (decl) != VAR_DECL)
1510     return false;
1511
1512   if (DECL_EXTERNAL (decl))
1513     return false;
1514
1515   if (TREE_ASM_WRITTEN (decl))
1516     return false;
1517
1518   if (TREE_STATIC (decl))
1519     return (alignment <= MAX_OFILE_ALIGNMENT);
1520   else
1521     /* This is not 100% correct.  The absolute correct stack alignment
1522        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1523        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1524        However, until someone implements forced stack alignment, SSE
1525        isn't really usable without this.  */
1526     return (alignment <= PREFERRED_STACK_BOUNDARY);
1527 }
1528
1529
1530 /* Function get_vectype_for_scalar_type.
1531
1532    Returns the vector type corresponding to SCALAR_TYPE as supported
1533    by the target.  */
1534
1535 tree
1536 get_vectype_for_scalar_type (tree scalar_type)
1537 {
1538   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1539   int nbytes = GET_MODE_SIZE (inner_mode);
1540   int nunits;
1541   tree vectype;
1542
1543   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1544     return NULL_TREE;
1545
1546   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1547      is expected.  */
1548   nunits = UNITS_PER_SIMD_WORD / nbytes;
1549
1550   vectype = build_vector_type (scalar_type, nunits);
1551   if (vect_print_dump_info (REPORT_DETAILS))
1552     {
1553       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1554       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1555     }
1556
1557   if (!vectype)
1558     return NULL_TREE;
1559
1560   if (vect_print_dump_info (REPORT_DETAILS))
1561     {
1562       fprintf (vect_dump, "vectype: ");
1563       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1564     }
1565
1566   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1567       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1568     {
1569       if (vect_print_dump_info (REPORT_DETAILS))
1570         fprintf (vect_dump, "mode not supported by target.");
1571       return NULL_TREE;
1572     }
1573
1574   return vectype;
1575 }
1576
1577
1578 /* Function vect_supportable_dr_alignment
1579
1580    Return whether the data reference DR is supported with respect to its
1581    alignment.  */
1582
1583 enum dr_alignment_support
1584 vect_supportable_dr_alignment (struct data_reference *dr)
1585 {
1586   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1587   enum machine_mode mode = (int) TYPE_MODE (vectype);
1588
1589   if (aligned_access_p (dr))
1590     return dr_aligned;
1591
1592   /* Possibly unaligned access.  */
1593   if (DR_IS_READ (dr))
1594     {
1595       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1596           && (!targetm.vectorize.builtin_mask_for_load
1597               || targetm.vectorize.builtin_mask_for_load ()))
1598         return dr_unaligned_software_pipeline;
1599
1600       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1601         /* Can't software pipeline the loads, but can at least do them.  */
1602         return dr_unaligned_supported;
1603     }
1604
1605   /* Unsupported.  */
1606   return dr_unaligned_unsupported;
1607 }
1608
1609
1610 /* Function vect_is_simple_live_use.
1611
1612    Input:
1613    LOOP - the loop that is being vectorized.
1614    OPERAND - operand of a stmt in LOOP.
1615    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1616
1617    Returns whether a stmt with OPERAND can be vectorized.  Supportable
1618    operands are constants, loop invariants, and operands that are defined
1619    by the current iteration of the loop and induction variables. */
1620
1621 bool
1622 vect_is_simple_live_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1623                     tree *def, enum vect_def_type *dt)
1624 {
1625   basic_block bb;
1626   stmt_vec_info stmt_vinfo;
1627   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1628
1629   *def_stmt = NULL_TREE;
1630   *def = NULL_TREE;
1631
1632   if (vect_print_dump_info (REPORT_DETAILS))
1633     {
1634       fprintf (vect_dump, "vect_is_simple_use: operand ");
1635       print_generic_expr (vect_dump, operand, TDF_SLIM);
1636     }
1637
1638   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1639     {
1640       *dt = vect_constant_def;
1641       return true;
1642     }
1643
1644   if (TREE_CODE (operand) != SSA_NAME)
1645     {
1646       if (vect_print_dump_info (REPORT_DETAILS))
1647         fprintf (vect_dump, "not ssa-name.");
1648       return false;
1649     }
1650
1651   *def_stmt = SSA_NAME_DEF_STMT (operand);
1652   if (*def_stmt == NULL_TREE )
1653     {
1654       if (vect_print_dump_info (REPORT_DETAILS))
1655         fprintf (vect_dump, "no def_stmt.");
1656       return false;
1657     }
1658
1659   if (vect_print_dump_info (REPORT_DETAILS))
1660     {
1661       fprintf (vect_dump, "def_stmt: ");
1662       print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
1663     }
1664
1665   /* empty stmt is expected only in case of a function argument.
1666      (Otherwise - we expect a phi_node or a modify_expr).  */
1667   if (IS_EMPTY_STMT (*def_stmt))
1668     {
1669       *def = operand;
1670       *dt = vect_invariant_def;
1671       return true;
1672     }
1673
1674   bb = bb_for_stmt (*def_stmt);
1675   if (!flow_bb_inside_loop_p (loop, bb))
1676     *dt = vect_invariant_def;
1677   else
1678     {
1679       stmt_vinfo = vinfo_for_stmt (*def_stmt);
1680       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
1681     }
1682
1683   if (*dt == vect_unknown_def_type)
1684     {
1685       if (vect_print_dump_info (REPORT_DETAILS))
1686         fprintf (vect_dump, "Unsupported pattern.");
1687       return false;
1688     }
1689
1690   /* stmts inside the loop that have been identified as performing
1691      a reduction operation cannot have uses in the loop.  */
1692   if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE)
1693     {
1694       if (vect_print_dump_info (REPORT_DETAILS))
1695         fprintf (vect_dump, "reduction used in loop.");
1696       return false;
1697     }
1698
1699   if (vect_print_dump_info (REPORT_DETAILS))
1700     fprintf (vect_dump, "type of def: %d.",*dt);
1701
1702   switch (TREE_CODE (*def_stmt))
1703     {
1704     case PHI_NODE:
1705       *def = PHI_RESULT (*def_stmt);
1706       gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
1707                   || *dt == vect_invariant_def);
1708       break;
1709
1710     case MODIFY_EXPR:
1711       *def = TREE_OPERAND (*def_stmt, 0);
1712       gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
1713       break;
1714
1715     default:
1716       if (vect_print_dump_info (REPORT_DETAILS))
1717         fprintf (vect_dump, "unsupported defining stmt: ");
1718       return false;
1719     }
1720
1721   return true;
1722 }
1723
1724
1725 /* Function vect_is_simple_use.
1726
1727    Input:
1728    LOOP - the loop that is being vectorized.
1729    OPERAND - operand of a stmt in LOOP.
1730    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1731
1732    Returns whether a stmt with OPERAND can be vectorized.  This function
1733    is currently similar to vect_is_simple_live_use, but in addition
1734    filters out operands which are induction variables.  This restriction
1735    will be relaxed in the future. */
1736
1737 bool
1738 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree * def_stmt,
1739                     tree * def, enum vect_def_type *dt)
1740 {
1741
1742   if (!vect_is_simple_live_use (operand, loop_vinfo, def_stmt, def, dt))
1743     return false;
1744
1745   if (*dt == vect_induction_def)
1746     {
1747       if (vect_print_dump_info (REPORT_DETAILS))
1748         fprintf (vect_dump, "induction not supported.");
1749       return false;
1750     }
1751
1752   return true;
1753 }
1754
1755
1756 /* Function supportable_widening_operation
1757
1758    Check whether an operation represented by the code CODE is a
1759    widening operation that is supported by the target platform in
1760    vector form (i.e., when operating on arguments of type VECTYPE).
1761
1762    The two kinds of widening operations we currently support are
1763    NOP and WIDEN_MULT. This function checks if these oprations
1764    are supported by the target platform either directly (via vector
1765    tree-codes), or via target builtins.
1766
1767    Output:
1768    - CODE1 and CODE2 are codes of vector operations to be used when
1769    vectorizing the operation, if available.
1770    - DECL1 and DECL2 are decls of target builtin functions to be used
1771    when vectorizing the operation, if available. In this case,
1772    CODE1 and CODE2 are CALL_EXPR.
1773 */
1774
1775 bool
1776 supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
1777                                 tree *decl1, tree *decl2,
1778                                 enum tree_code *code1, enum tree_code *code2)
1779 {
1780   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1781   bool ordered_p;
1782   enum machine_mode vec_mode;
1783   enum insn_code icode1, icode2;
1784   optab optab1, optab2;
1785   tree expr = TREE_OPERAND (stmt, 1);
1786   tree type = TREE_TYPE (expr);
1787   tree wide_vectype = get_vectype_for_scalar_type (type);
1788   enum tree_code c1, c2;
1789
1790   /* The result of a vectorized widening operation usually requires two vectors
1791      (because the widened results do not fit int one vector). The generated
1792      vector results would normally be expected to be generated in the same
1793      order as in the original scalar computation. i.e. if 8 results are
1794      generated in each vector iteration, they are to be organized as follows:
1795         vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
1796
1797      However, in the special case that the result of the widening operation is
1798      used in a reduction copmutation only, the order doesn't matter (because
1799      when vectorizing a reduction we change the order of the computation).
1800      Some targets can take advatage of this and generate more efficient code.
1801      For example, targets like Altivec, that support widen_mult using a sequence
1802      of {mult_even,mult_odd} generate the following vectors:
1803         vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
1804    */
1805
1806    if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction)
1807      ordered_p = false;
1808    else
1809      ordered_p = true;
1810
1811   if (!ordered_p
1812       && code == WIDEN_MULT_EXPR
1813       && targetm.vectorize.builtin_mul_widen_even
1814       && targetm.vectorize.builtin_mul_widen_even (vectype)
1815       && targetm.vectorize.builtin_mul_widen_odd
1816       && targetm.vectorize.builtin_mul_widen_odd (vectype))
1817     {
1818       if (vect_print_dump_info (REPORT_DETAILS))
1819         fprintf (vect_dump, "Unordered widening operation detected.");
1820
1821       *code1 = *code2 = CALL_EXPR;
1822       *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
1823       *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
1824       return true;
1825     }
1826
1827   switch (code)
1828     {
1829     case WIDEN_MULT_EXPR:
1830       if (BYTES_BIG_ENDIAN)
1831         {
1832           c1 = VEC_WIDEN_MULT_HI_EXPR;
1833           c2 = VEC_WIDEN_MULT_LO_EXPR;
1834         }
1835       else
1836         {
1837           c2 = VEC_WIDEN_MULT_HI_EXPR;
1838           c1 = VEC_WIDEN_MULT_LO_EXPR;
1839         }
1840       break;
1841     case NOP_EXPR:
1842       if (BYTES_BIG_ENDIAN)
1843         {
1844           c1 = VEC_UNPACK_HI_EXPR;
1845           c2 = VEC_UNPACK_LO_EXPR;
1846         }
1847       else
1848         {
1849           c2 = VEC_UNPACK_HI_EXPR;
1850           c1 = VEC_UNPACK_LO_EXPR;
1851         }
1852       break;
1853     default:
1854       gcc_unreachable ();
1855     }
1856
1857   *code1 = c1;
1858   *code2 = c2;
1859   optab1 = optab_for_tree_code (c1, vectype);
1860   optab2 = optab_for_tree_code (c2, vectype);
1861
1862   if (!optab1 || !optab2)
1863     return false;
1864
1865   vec_mode = TYPE_MODE (vectype);
1866   if ((icode1 = optab1->handlers[(int) vec_mode].insn_code) == CODE_FOR_nothing
1867       || insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
1868       || (icode2 = optab2->handlers[(int) vec_mode].insn_code)
1869                                                         == CODE_FOR_nothing
1870       || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
1871     return false;
1872
1873   return true;
1874 }
1875
1876
1877 /* Function reduction_code_for_scalar_code
1878
1879    Input:
1880    CODE - tree_code of a reduction operations.
1881
1882    Output:
1883    REDUC_CODE - the corresponding tree-code to be used to reduce the
1884       vector of partial results into a single scalar result (which
1885       will also reside in a vector).
1886
1887    Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise.  */
1888
1889 bool
1890 reduction_code_for_scalar_code (enum tree_code code,
1891                                 enum tree_code *reduc_code)
1892 {
1893   switch (code)
1894   {
1895   case MAX_EXPR:
1896     *reduc_code = REDUC_MAX_EXPR;
1897     return true;
1898
1899   case MIN_EXPR:
1900     *reduc_code = REDUC_MIN_EXPR;
1901     return true;
1902
1903   case PLUS_EXPR:
1904     *reduc_code = REDUC_PLUS_EXPR;
1905     return true;
1906
1907   default:
1908     return false;
1909   }
1910 }
1911
1912
1913 /* Function vect_is_simple_reduction
1914
1915    Detect a cross-iteration def-use cucle that represents a simple
1916    reduction computation. We look for the following pattern:
1917
1918    loop_header:
1919      a1 = phi < a0, a2 >
1920      a3 = ...
1921      a2 = operation (a3, a1)
1922
1923    such that:
1924    1. operation is commutative and associative and it is safe to
1925       change the order of the computation.
1926    2. no uses for a2 in the loop (a2 is used out of the loop)
1927    3. no uses of a1 in the loop besides the reduction operation.
1928
1929    Condition 1 is tested here.
1930    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */
1931
1932 tree
1933 vect_is_simple_reduction (struct loop *loop, tree phi)
1934 {
1935   edge latch_e = loop_latch_edge (loop);
1936   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1937   tree def_stmt, def1, def2;
1938   enum tree_code code;
1939   int op_type;
1940   tree operation, op1, op2;
1941   tree type;
1942
1943   if (TREE_CODE (loop_arg) != SSA_NAME)
1944     {
1945       if (vect_print_dump_info (REPORT_DETAILS))
1946         {
1947           fprintf (vect_dump, "reduction: not ssa_name: ");
1948           print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1949         }
1950       return NULL_TREE;
1951     }
1952
1953   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1954   if (!def_stmt)
1955     {
1956       if (vect_print_dump_info (REPORT_DETAILS))
1957         fprintf (vect_dump, "reduction: no def_stmt.");
1958       return NULL_TREE;
1959     }
1960
1961   if (TREE_CODE (def_stmt) != MODIFY_EXPR)
1962     {
1963       if (vect_print_dump_info (REPORT_DETAILS))
1964         {
1965           fprintf (vect_dump, "reduction: unexpected def_stmt: ");
1966           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1967         }
1968       return NULL_TREE;
1969     }
1970
1971   operation = TREE_OPERAND (def_stmt, 1);
1972   code = TREE_CODE (operation);
1973   if (!commutative_tree_code (code) || !associative_tree_code (code))
1974     {
1975       if (vect_print_dump_info (REPORT_DETAILS))
1976         {
1977           fprintf (vect_dump, "reduction: not commutative/associative: ");
1978           print_generic_expr (vect_dump, operation, TDF_SLIM);
1979         }
1980       return NULL_TREE;
1981     }
1982
1983   op_type = TREE_CODE_LENGTH (code);
1984   if (op_type != binary_op)
1985     {
1986       if (vect_print_dump_info (REPORT_DETAILS))
1987         {
1988           fprintf (vect_dump, "reduction: not binary operation: ");
1989           print_generic_expr (vect_dump, operation, TDF_SLIM);
1990         }
1991       return NULL_TREE;
1992     }
1993
1994   op1 = TREE_OPERAND (operation, 0);
1995   op2 = TREE_OPERAND (operation, 1);
1996   if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1997     {
1998       if (vect_print_dump_info (REPORT_DETAILS))
1999         {
2000           fprintf (vect_dump, "reduction: uses not ssa_names: ");
2001           print_generic_expr (vect_dump, operation, TDF_SLIM);
2002         }
2003       return NULL_TREE;
2004     }
2005
2006   /* Check that it's ok to change the order of the computation.  */
2007   type = TREE_TYPE (operation);
2008   if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
2009       || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
2010     {
2011       if (vect_print_dump_info (REPORT_DETAILS))
2012         {
2013           fprintf (vect_dump, "reduction: multiple types: operation type: ");
2014           print_generic_expr (vect_dump, type, TDF_SLIM);
2015           fprintf (vect_dump, ", operands types: ");
2016           print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
2017           fprintf (vect_dump, ",");
2018           print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
2019         }
2020       return NULL_TREE;
2021     }
2022
2023   /* CHECKME: check for !flag_finite_math_only too?  */
2024   if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
2025     {
2026       /* Changing the order of operations changes the semantics.  */
2027       if (vect_print_dump_info (REPORT_DETAILS))
2028         {
2029           fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
2030           print_generic_expr (vect_dump, operation, TDF_SLIM);
2031         }
2032       return NULL_TREE;
2033     }
2034   else if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && flag_trapv)
2035     {
2036       /* Changing the order of operations changes the semantics.  */
2037       if (vect_print_dump_info (REPORT_DETAILS))
2038         {
2039           fprintf (vect_dump, "reduction: unsafe int math optimization: ");
2040           print_generic_expr (vect_dump, operation, TDF_SLIM);
2041         }
2042       return NULL_TREE;
2043     }
2044
2045   /* reduction is safe. we're dealing with one of the following:
2046      1) integer arithmetic and no trapv
2047      2) floating point arithmetic, and special flags permit this optimization.
2048    */
2049   def1 = SSA_NAME_DEF_STMT (op1);
2050   def2 = SSA_NAME_DEF_STMT (op2);
2051   if (!def1 || !def2)
2052     {
2053       if (vect_print_dump_info (REPORT_DETAILS))
2054         {
2055           fprintf (vect_dump, "reduction: no defs for operands: ");
2056           print_generic_expr (vect_dump, operation, TDF_SLIM);
2057         }
2058       return NULL_TREE;
2059     }
2060
2061   if (TREE_CODE (def1) == MODIFY_EXPR
2062       && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
2063       && def2 == phi)
2064     {
2065       if (vect_print_dump_info (REPORT_DETAILS))
2066         {
2067           fprintf (vect_dump, "detected reduction:");
2068           print_generic_expr (vect_dump, operation, TDF_SLIM);
2069         }
2070       return def_stmt;
2071     }
2072   else if (TREE_CODE (def2) == MODIFY_EXPR
2073       && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
2074       && def1 == phi)
2075     {
2076       /* Swap operands (just for simplicity - so that the rest of the code
2077          can assume that the reduction variable is always the last (second)
2078          argument).  */
2079       if (vect_print_dump_info (REPORT_DETAILS))
2080         {
2081           fprintf (vect_dump, "detected reduction: need to swap operands:");
2082           print_generic_expr (vect_dump, operation, TDF_SLIM);
2083         }
2084       swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0),
2085                                     &TREE_OPERAND (operation, 1));
2086       return def_stmt;
2087     }
2088   else
2089     {
2090       if (vect_print_dump_info (REPORT_DETAILS))
2091         {
2092           fprintf (vect_dump, "reduction: unknown pattern. ");
2093           print_generic_expr (vect_dump, operation, TDF_SLIM);
2094         }
2095       return NULL_TREE;
2096     }
2097 }
2098
2099
2100 /* Function vect_is_simple_iv_evolution.
2101
2102    FORNOW: A simple evolution of an induction variables in the loop is
2103    considered a polynomial evolution with constant step.  */
2104
2105 bool
2106 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
2107                              tree * step)
2108 {
2109   tree init_expr;
2110   tree step_expr;
2111
2112   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
2113
2114   /* When there is no evolution in this loop, the evolution function
2115      is not "simple".  */
2116   if (evolution_part == NULL_TREE)
2117     return false;
2118
2119   /* When the evolution is a polynomial of degree >= 2
2120      the evolution function is not "simple".  */
2121   if (tree_is_chrec (evolution_part))
2122     return false;
2123
2124   step_expr = evolution_part;
2125   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2126                                                            loop_nb));
2127
2128   if (vect_print_dump_info (REPORT_DETAILS))
2129     {
2130       fprintf (vect_dump, "step: ");
2131       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
2132       fprintf (vect_dump, ",  init: ");
2133       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
2134     }
2135
2136   *init = init_expr;
2137   *step = step_expr;
2138
2139   if (TREE_CODE (step_expr) != INTEGER_CST)
2140     {
2141       if (vect_print_dump_info (REPORT_DETAILS))
2142         fprintf (vect_dump, "step unknown.");
2143       return false;
2144     }
2145
2146   return true;
2147 }
2148
2149
2150 /* Function vectorize_loops.
2151
2152    Entry Point to loop vectorization phase.  */
2153
2154 void
2155 vectorize_loops (struct loops *loops)
2156 {
2157   unsigned int i;
2158   unsigned int num_vectorized_loops = 0;
2159
2160   /* Fix the verbosity level if not defined explicitly by the user.  */
2161   vect_set_dump_settings ();
2162
2163   /* Allocate the bitmap that records which virtual variables that
2164      need to be renamed.  */
2165   vect_vnames_to_rename = BITMAP_ALLOC (NULL);
2166
2167   /*  ----------- Analyze loops. -----------  */
2168
2169   /* If some loop was duplicated, it gets bigger number
2170      than all previously defined loops. This fact allows us to run
2171      only over initial loops skipping newly generated ones.  */
2172   vect_loops_num = loops->num;
2173   for (i = 1; i < vect_loops_num; i++)
2174     {
2175       loop_vec_info loop_vinfo;
2176       struct loop *loop = loops->parray[i];
2177
2178       if (!loop)
2179         continue;
2180
2181       vect_loop_location = find_loop_location (loop);
2182       loop_vinfo = vect_analyze_loop (loop);
2183       loop->aux = loop_vinfo;
2184
2185       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2186         continue;
2187
2188       vect_transform_loop (loop_vinfo, loops);
2189       num_vectorized_loops++;
2190     }
2191
2192   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2193     fprintf (vect_dump, "vectorized %u loops in function.\n",
2194              num_vectorized_loops);
2195
2196   /*  ----------- Finalize. -----------  */
2197
2198   BITMAP_FREE (vect_vnames_to_rename);
2199
2200   for (i = 1; i < vect_loops_num; i++)
2201     {
2202       struct loop *loop = loops->parray[i];
2203       loop_vec_info loop_vinfo;
2204
2205       if (!loop)
2206         continue;
2207       loop_vinfo = loop->aux;
2208       destroy_loop_vec_info (loop_vinfo);
2209       loop->aux = NULL;
2210     }
2211 }