gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  20 02110-1301, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "ggc.h"
 128 #include "tree.h"
 129 #include "target.h"
 130 #include "rtl.h"
 131 #include "basic-block.h"
 132 #include "diagnostic.h"
 133 #include "tree-flow.h"
 134 #include "tree-dump.h"
 135 #include "timevar.h"
 136 #include "cfgloop.h"
 137 #include "cfglayout.h"
 138 #include "expr.h"
 139 #include "recog.h"
 140 #include "optabs.h"
 141 #include "params.h"
 142 #include "toplev.h"
 143 #include "tree-chrec.h"
 144 #include "tree-data-ref.h"
 145 #include "tree-scalar-evolution.h"
 146 #include "input.h"
 147 #include "tree-vectorizer.h"
 148 #include "tree-pass.h"
 149
 150 /*************************************************************************
 151   Simple Loop Peeling Utilities
 152  *************************************************************************/
 153 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 154   (struct loop *, struct loops *, edge);
 155 static void slpeel_update_phis_for_duplicate_loop
 156   (struct loop *, struct loop *, bool after);
 157 static void slpeel_update_phi_nodes_for_guard1
 158   (edge, struct loop *, bool, basic_block *, bitmap *);
 159 static void slpeel_update_phi_nodes_for_guard2
 160   (edge, struct loop *, bool, basic_block *);
 161 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 162
 163 static void rename_use_op (use_operand_p);
 164 static void rename_variables_in_bb (basic_block);
 165 static void rename_variables_in_loop (struct loop *);
 166
 167 /*************************************************************************
 168   General Vectorization Utilities
 169  *************************************************************************/
 170 static void vect_set_dump_settings (void);
 171
 172 /* vect_dump will be set to stderr or dump_file if exist.  */
 173 FILE *vect_dump;
 174
 175 /* vect_verbosity_level set to an invalid value
 176    to mark that it's uninitialized.  */
 177 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 178
 179 /* Number of loops, at the beginning of vectorization.  */
 180 unsigned int vect_loops_num;
 181
 182 /* Loop location.  */
 183 static LOC vect_loop_location;
 184
 185 /* Bitmap of virtual variables to be renamed.  */
 186 bitmap vect_vnames_to_rename;
 187 \f
 188 /*************************************************************************
 189   Simple Loop Peeling Utilities
 190
 191   Utilities to support loop peeling for vectorization purposes.
 192  *************************************************************************/
 193
 194
 195 /* Renames the use *OP_P.  */
 196
 197 static void
 198 rename_use_op (use_operand_p op_p)
 199 {
 200   tree new_name;
 201
 202   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 203     return;
 204
 205   new_name = get_current_def (USE_FROM_PTR (op_p));
 206
 207   /* Something defined outside of the loop.  */
 208   if (!new_name)
 209     return;
 210
 211   /* An ordinary ssa name defined in the loop.  */
 212
 213   SET_USE (op_p, new_name);
 214 }
 215
 216
 217 /* Renames the variables in basic block BB.  */
 218
 219 static void
 220 rename_variables_in_bb (basic_block bb)
 221 {
 222   tree phi;
 223   block_stmt_iterator bsi;
 224   tree stmt;
 225   use_operand_p use_p;
 226   ssa_op_iter iter;
 227   edge e;
 228   edge_iterator ei;
 229   struct loop *loop = bb->loop_father;
 230
 231   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 232     {
 233       stmt = bsi_stmt (bsi);
 234       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
 235                                  (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
 236         rename_use_op (use_p);
 237     }
 238
 239   FOR_EACH_EDGE (e, ei, bb->succs)
 240     {
 241       if (!flow_bb_inside_loop_p (loop, e->dest))
 242         continue;
 243       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 244         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 245     }
 246 }
 247
 248
 249 /* Renames variables in new generated LOOP.  */
 250
 251 static void
 252 rename_variables_in_loop (struct loop *loop)
 253 {
 254   unsigned i;
 255   basic_block *bbs;
 256
 257   bbs = get_loop_body (loop);
 258
 259   for (i = 0; i < loop->num_nodes; i++)
 260     rename_variables_in_bb (bbs[i]);
 261
 262   free (bbs);
 263 }
 264
 265
 266 /* Update the PHI nodes of NEW_LOOP.
 267
 268    NEW_LOOP is a duplicate of ORIG_LOOP.
 269    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 270    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 271    executes before it.  */
 272
 273 static void
 274 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 275                                        struct loop *new_loop, bool after)
 276 {
 277   tree new_ssa_name;
 278   tree phi_new, phi_orig;
 279   tree def;
 280   edge orig_loop_latch = loop_latch_edge (orig_loop);
 281   edge orig_entry_e = loop_preheader_edge (orig_loop);
 282   edge new_loop_exit_e = new_loop->single_exit;
 283   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 284   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 285
 286   /*
 287      step 1. For each loop-header-phi:
 288              Add the first phi argument for the phi in NEW_LOOP
 289             (the one associated with the entry of NEW_LOOP)
 290
 291      step 2. For each loop-header-phi:
 292              Add the second phi argument for the phi in NEW_LOOP
 293             (the one associated with the latch of NEW_LOOP)
 294
 295      step 3. Update the phis in the successor block of NEW_LOOP.
 296
 297         case 1: NEW_LOOP was placed before ORIG_LOOP:
 298                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 299                 Updating the phis in the successor block can therefore be done
 300                 along with the scanning of the loop header phis, because the
 301                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 302                 phi nodes, organized in the same order.
 303
 304         case 2: NEW_LOOP was placed after ORIG_LOOP:
 305                 The successor block of NEW_LOOP is the original exit block of
 306                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 307                 We postpone updating these phis to a later stage (when
 308                 loop guards are added).
 309    */
 310
 311
 312   /* Scan the phis in the headers of the old and new loops
 313      (they are organized in exactly the same order).  */
 314
 315   for (phi_new = phi_nodes (new_loop->header),
 316        phi_orig = phi_nodes (orig_loop->header);
 317        phi_new && phi_orig;
 318        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 319     {
 320       /* step 1.  */
 321       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 322       add_phi_arg (phi_new, def, new_loop_entry_e);
 323
 324       /* step 2.  */
 325       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 326       if (TREE_CODE (def) != SSA_NAME)
 327         continue;
 328
 329       new_ssa_name = get_current_def (def);
 330       if (!new_ssa_name)
 331         {
 332           /* This only happens if there are no definitions
 333              inside the loop. use the phi_result in this case.  */
 334           new_ssa_name = PHI_RESULT (phi_new);
 335         }
 336
 337       /* An ordinary ssa name defined in the loop.  */
 338       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 339
 340       /* step 3 (case 1).  */
 341       if (!after)
 342         {
 343           gcc_assert (new_loop_exit_e == orig_entry_e);
 344           SET_PHI_ARG_DEF (phi_orig,
 345                            new_loop_exit_e->dest_idx,
 346                            new_ssa_name);
 347         }
 348     }
 349 }
 350
 351
 352 /* Update PHI nodes for a guard of the LOOP.
 353
 354    Input:
 355    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 356         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 357         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 358         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 359         We denote this bb NEW_MERGE_BB because before the guard code was added
 360         it had a single predecessor (the LOOP header), and now it became a merge
 361         point of two paths - the path that ends with the LOOP exit-edge, and
 362         the path that ends with GUARD_EDGE.
 363    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 364         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 365
 366    ===> The CFG before the guard-code was added:
 367         LOOP_header_bb:
 368           loop_body
 369           if (exit_loop) goto update_bb
 370           else           goto LOOP_header_bb
 371         update_bb:
 372
 373    ==> The CFG after the guard-code was added:
 374         guard_bb:
 375           if (LOOP_guard_condition) goto new_merge_bb
 376           else                      goto LOOP_header_bb
 377         LOOP_header_bb:
 378           loop_body
 379           if (exit_loop_condition) goto new_merge_bb
 380           else                     goto LOOP_header_bb
 381         new_merge_bb:
 382           goto update_bb
 383         update_bb:
 384
 385    ==> The CFG after this function:
 386         guard_bb:
 387           if (LOOP_guard_condition) goto new_merge_bb
 388           else                      goto LOOP_header_bb
 389         LOOP_header_bb:
 390           loop_body
 391           if (exit_loop_condition) goto new_exit_bb
 392           else                     goto LOOP_header_bb
 393         new_exit_bb:
 394         new_merge_bb:
 395           goto update_bb
 396         update_bb:
 397
 398    This function:
 399    1. creates and updates the relevant phi nodes to account for the new
 400       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 401       1.1. Create phi nodes at NEW_MERGE_BB.
 402       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 403            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 404    2. preserves loop-closed-ssa-form by creating the required phi nodes
 405       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 406
 407    There are two flavors to this function:
 408
 409    slpeel_update_phi_nodes_for_guard1:
 410      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 411      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 412      for variables that have phis in the loop header.
 413
 414    slpeel_update_phi_nodes_for_guard2:
 415      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 416      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 417      for variables that have phis in the loop exit.
 418
 419    I.E., the overall structure is:
 420
 421         loop1_preheader_bb:
 422                 guard1 (goto loop1/merg1_bb)
 423         loop1
 424         loop1_exit_bb:
 425                 guard2 (goto merge1_bb/merge2_bb)
 426         merge1_bb
 427         loop2
 428         loop2_exit_bb
 429         merge2_bb
 430         next_bb
 431
 432    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 433    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 434    that have phis in loop1->header).
 435
 436    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 437    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 438    that have phis in next_bb). It also adds some of these phis to
 439    loop1_exit_bb.
 440
 441    slpeel_update_phi_nodes_for_guard1 is always called before
 442    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 443    to create correct data-flow and loop-closed-ssa-form.
 444
 445    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 446    that change between iterations of a loop (and therefore have a phi-node
 447    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 448    phis for variables that are used out of the loop (and therefore have
 449    loop-closed exit phis). Some variables may be both updated between
 450    iterations and used after the loop. This is why in loop1_exit_bb we
 451    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 452    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 453
 454    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 455      an original loop. i.e., we have:
 456
 457            orig_loop
 458            guard_bb (goto LOOP/new_merge)
 459            new_loop <-- LOOP
 460            new_exit
 461            new_merge
 462            next_bb
 463
 464      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 465      have:
 466
 467            new_loop
 468            guard_bb (goto LOOP/new_merge)
 469            orig_loop <-- LOOP
 470            new_exit
 471            new_merge
 472            next_bb
 473
 474      The SSA names defined in the original loop have a current
 475      reaching definition that that records the corresponding new
 476      ssa-name used in the new duplicated loop copy.
 477   */
 478
 479 /* Function slpeel_update_phi_nodes_for_guard1
 480
 481    Input:
 482    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 483    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 484             information.
 485
 486    In the context of the overall structure, we have:
 487
 488         loop1_preheader_bb:
 489                 guard1 (goto loop1/merg1_bb)
 490 LOOP->  loop1
 491         loop1_exit_bb:
 492                 guard2 (goto merge1_bb/merge2_bb)
 493         merge1_bb
 494         loop2
 495         loop2_exit_bb
 496         merge2_bb
 497         next_bb
 498
 499    For each name updated between loop iterations (i.e - for each name that has
 500    an entry (loop-header) phi in LOOP) we create a new phi in:
 501    1. merge1_bb (to account for the edge from guard1)
 502    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 503 */
 504
 505 static void
 506 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 507                                     bool is_new_loop, basic_block *new_exit_bb,
 508                                     bitmap *defs)
 509 {
 510   tree orig_phi, new_phi;
 511   tree update_phi, update_phi2;
 512   tree guard_arg, loop_arg;
 513   basic_block new_merge_bb = guard_edge->dest;
 514   edge e = EDGE_SUCC (new_merge_bb, 0);
 515   basic_block update_bb = e->dest;
 516   basic_block orig_bb = loop->header;
 517   edge new_exit_e;
 518   tree current_new_name;
 519   tree name;
 520
 521   /* Create new bb between loop and new_merge_bb.  */
 522   *new_exit_bb = split_edge (loop->single_exit);
 523   add_bb_to_loop (*new_exit_bb, loop->outer);
 524
 525   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 526
 527   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 528        orig_phi && update_phi;
 529        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 530     {
 531       /* Virtual phi; Mark it for renaming. We actually want to call
 532          mar_sym_for_renaming, but since all ssa renaming datastructures
 533          are going to be freed before we get to call ssa_upate, we just
 534          record this name for now in a bitmap, and will mark it for
 535          renaming later.  */
 536       name = PHI_RESULT (orig_phi);
 537       if (!is_gimple_reg (SSA_NAME_VAR (name)))
 538         bitmap_set_bit (vect_vnames_to_rename, SSA_NAME_VERSION (name));
 539
 540       /** 1. Handle new-merge-point phis  **/
 541
 542       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 543       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 544                                  new_merge_bb);
 545
 546       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 547             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 548       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 549       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 550
 551       add_phi_arg (new_phi, loop_arg, new_exit_e);
 552       add_phi_arg (new_phi, guard_arg, guard_edge);
 553
 554       /* 1.3. Update phi in successor block.  */
 555       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 556                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 557       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 558       update_phi2 = new_phi;
 559
 560
 561       /** 2. Handle loop-closed-ssa-form phis  **/
 562
 563       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 564       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 565                                  *new_exit_bb);
 566
 567       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 568       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 569
 570       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 571       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 572       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 573
 574       /* 2.4. Record the newly created name with set_current_def.
 575          We want to find a name such that
 576                 name = get_current_def (orig_loop_name)
 577          and to set its current definition as follows:
 578                 set_current_def (name, new_phi_name)
 579
 580          If LOOP is a new loop then loop_arg is already the name we're
 581          looking for. If LOOP is the original loop, then loop_arg is
 582          the orig_loop_name and the relevant name is recorded in its
 583          current reaching definition.  */
 584       if (is_new_loop)
 585         current_new_name = loop_arg;
 586       else
 587         {
 588           current_new_name = get_current_def (loop_arg);
 589           /* current_def is not available only if the variable does not
 590              change inside the loop, in which case we also don't care
 591              about recording a current_def for it because we won't be
 592              trying to create loop-exit-phis for it.  */
 593           if (!current_new_name)
 594             continue;
 595         }
 596       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 597
 598       set_current_def (current_new_name, PHI_RESULT (new_phi));
 599       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 600     }
 601
 602   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 603 }
 604
 605
 606 /* Function slpeel_update_phi_nodes_for_guard2
 607
 608    Input:
 609    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 610
 611    In the context of the overall structure, we have:
 612
 613         loop1_preheader_bb:
 614                 guard1 (goto loop1/merg1_bb)
 615         loop1
 616         loop1_exit_bb:
 617                 guard2 (goto merge1_bb/merge2_bb)
 618         merge1_bb
 619 LOOP->  loop2
 620         loop2_exit_bb
 621         merge2_bb
 622         next_bb
 623
 624    For each name used out side the loop (i.e - for each name that has an exit
 625    phi in next_bb) we create a new phi in:
 626    1. merge2_bb (to account for the edge from guard_bb)
 627    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 628    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 629       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 630 */
 631
 632 static void
 633 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 634                                     bool is_new_loop, basic_block *new_exit_bb)
 635 {
 636   tree orig_phi, new_phi;
 637   tree update_phi, update_phi2;
 638   tree guard_arg, loop_arg;
 639   basic_block new_merge_bb = guard_edge->dest;
 640   edge e = EDGE_SUCC (new_merge_bb, 0);
 641   basic_block update_bb = e->dest;
 642   edge new_exit_e;
 643   tree orig_def, orig_def_new_name;
 644   tree new_name, new_name2;
 645   tree arg;
 646
 647   /* Create new bb between loop and new_merge_bb.  */
 648   *new_exit_bb = split_edge (loop->single_exit);
 649   add_bb_to_loop (*new_exit_bb, loop->outer);
 650
 651   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 652
 653   for (update_phi = phi_nodes (update_bb); update_phi;
 654        update_phi = PHI_CHAIN (update_phi))
 655     {
 656       orig_phi = update_phi;
 657       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 658       /* This loop-closed-phi actually doesn't represent a use
 659          out of the loop - the phi arg is a constant.  */
 660       if (TREE_CODE (orig_def) != SSA_NAME)
 661         continue;
 662       orig_def_new_name = get_current_def (orig_def);
 663       arg = NULL_TREE;
 664
 665       /** 1. Handle new-merge-point phis  **/
 666
 667       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 668       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 669                                  new_merge_bb);
 670
 671       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 672             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 673       new_name = orig_def;
 674       new_name2 = NULL_TREE;
 675       if (orig_def_new_name)
 676         {
 677           new_name = orig_def_new_name;
 678           /* Some variables have both loop-entry-phis and loop-exit-phis.
 679              Such variables were given yet newer names by phis placed in
 680              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 681              new_name2 = get_current_def (get_current_def (orig_name)).  */
 682           new_name2 = get_current_def (new_name);
 683         }
 684
 685       if (is_new_loop)
 686         {
 687           guard_arg = orig_def;
 688           loop_arg = new_name;
 689         }
 690       else
 691         {
 692           guard_arg = new_name;
 693           loop_arg = orig_def;
 694         }
 695       if (new_name2)
 696         guard_arg = new_name2;
 697
 698       add_phi_arg (new_phi, loop_arg, new_exit_e);
 699       add_phi_arg (new_phi, guard_arg, guard_edge);
 700
 701       /* 1.3. Update phi in successor block.  */
 702       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 703       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 704       update_phi2 = new_phi;
 705
 706
 707       /** 2. Handle loop-closed-ssa-form phis  **/
 708
 709       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 710       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 711                                  *new_exit_bb);
 712
 713       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 714       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 715
 716       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 717       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 718       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 719
 720
 721       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 722
 723       /* 3.1. Find the relevant names that need an exit-phi in
 724          GUARD_BB, i.e. names for which
 725          slpeel_update_phi_nodes_for_guard1 had not already created a
 726          phi node. This is the case for names that are used outside
 727          the loop (and therefore need an exit phi) but are not updated
 728          across loop iterations (and therefore don't have a
 729          loop-header-phi).
 730
 731          slpeel_update_phi_nodes_for_guard1 is responsible for
 732          creating loop-exit phis in GUARD_BB for names that have a
 733          loop-header-phi.  When such a phi is created we also record
 734          the new name in its current definition.  If this new name
 735          exists, then guard_arg was set to this new name (see 1.2
 736          above).  Therefore, if guard_arg is not this new name, this
 737          is an indication that an exit-phi in GUARD_BB was not yet
 738          created, so we take care of it here.  */
 739       if (guard_arg == new_name2)
 740         continue;
 741       arg = guard_arg;
 742
 743       /* 3.2. Generate new phi node in GUARD_BB:  */
 744       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 745                                  guard_edge->src);
 746
 747       /* 3.3. GUARD_BB has one incoming edge:  */
 748       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 749       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 750
 751       /* 3.4. Update phi in successor of GUARD_BB:  */
 752       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 753                                                                 == guard_arg);
 754       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 755     }
 756
 757   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 758 }
 759
 760
 761 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 762    that starts at zero, increases by one and its limit is NITERS.
 763
 764    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 765
 766 void
 767 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 768 {
 769   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 770   tree orig_cond;
 771   edge exit_edge = loop->single_exit;
 772   block_stmt_iterator loop_cond_bsi;
 773   block_stmt_iterator incr_bsi;
 774   bool insert_after;
 775   tree begin_label = tree_block_label (loop->latch);
 776   tree exit_label = tree_block_label (loop->single_exit->dest);
 777   tree init = build_int_cst (TREE_TYPE (niters), 0);
 778   tree step = build_int_cst (TREE_TYPE (niters), 1);
 779   tree then_label;
 780   tree else_label;
 781   LOC loop_loc;
 782
 783   orig_cond = get_loop_exit_condition (loop);
 784   gcc_assert (orig_cond);
 785   loop_cond_bsi = bsi_for_stmt (orig_cond);
 786
 787   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 788   create_iv (init, step, NULL_TREE, loop,
 789              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
 790
 791   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 792     {
 793       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 794       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 795       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 796     }
 797   else /* 'then' edge loops back.  */
 798     {
 799       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 800       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 801       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 802     }
 803
 804   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 805                      then_label, else_label);
 806   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
 807
 808   /* Remove old loop exit test:  */
 809   bsi_remove (&loop_cond_bsi, true);
 810
 811   loop_loc = find_loop_location (loop);
 812   if (dump_file && (dump_flags & TDF_DETAILS))
 813     {
 814       if (loop_loc != UNKNOWN_LOC)
 815         fprintf (dump_file, "\nloop at %s:%d: ",
 816                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 817       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 818     }
 819
 820   loop->nb_iterations = niters;
 821 }
 822
 823
 824 /* Given LOOP this function generates a new copy of it and puts it
 825    on E which is either the entry or exit of LOOP.  */
 826
 827 static struct loop *
 828 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 829                                         edge e)
 830 {
 831   struct loop *new_loop;
 832   basic_block *new_bbs, *bbs;
 833   bool at_exit;
 834   bool was_imm_dom;
 835   basic_block exit_dest;
 836   tree phi, phi_arg;
 837
 838   at_exit = (e == loop->single_exit);
 839   if (!at_exit && e != loop_preheader_edge (loop))
 840     return NULL;
 841
 842   bbs = get_loop_body (loop);
 843
 844   /* Check whether duplication is possible.  */
 845   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 846     {
 847       free (bbs);
 848       return NULL;
 849     }
 850
 851   /* Generate new loop structure.  */
 852   new_loop = duplicate_loop (loops, loop, loop->outer);
 853   if (!new_loop)
 854     {
 855       free (bbs);
 856       return NULL;
 857     }
 858
 859   exit_dest = loop->single_exit->dest;
 860   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 861                                           exit_dest) == loop->header ?
 862                  true : false);
 863
 864   new_bbs = XNEWVEC (basic_block, loop->num_nodes);
 865
 866   copy_bbs (bbs, loop->num_nodes, new_bbs,
 867             &loop->single_exit, 1, &new_loop->single_exit, NULL,
 868             e->src);
 869
 870   /* Duplicating phi args at exit bbs as coming
 871      also from exit of duplicated loop.  */
 872   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 873     {
 874       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
 875       if (phi_arg)
 876         {
 877           edge new_loop_exit_edge;
 878
 879           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 880             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 881           else
 882             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 883
 884           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 885         }
 886     }
 887
 888   if (at_exit) /* Add the loop copy at exit.  */
 889     {
 890       redirect_edge_and_branch_force (e, new_loop->header);
 891       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 892       if (was_imm_dom)
 893         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 894     }
 895   else /* Add the copy at entry.  */
 896     {
 897       edge new_exit_e;
 898       edge entry_e = loop_preheader_edge (loop);
 899       basic_block preheader = entry_e->src;
 900
 901       if (!flow_bb_inside_loop_p (new_loop,
 902                                   EDGE_SUCC (new_loop->header, 0)->dest))
 903         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 904       else
 905         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 906
 907       redirect_edge_and_branch_force (new_exit_e, loop->header);
 908       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 909                                new_exit_e->src);
 910
 911       /* We have to add phi args to the loop->header here as coming
 912          from new_exit_e edge.  */
 913       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 914         {
 915           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 916           if (phi_arg)
 917             add_phi_arg (phi, phi_arg, new_exit_e);
 918         }
 919
 920       redirect_edge_and_branch_force (entry_e, new_loop->header);
 921       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 922     }
 923
 924   free (new_bbs);
 925   free (bbs);
 926
 927   return new_loop;
 928 }
 929
 930
 931 /* Given the condition statement COND, put it as the last statement
 932    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 933    Assumes that this is the single exit of the guarded loop.
 934    Returns the skip edge.  */
 935
 936 static edge
 937 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 938                         basic_block dom_bb)
 939 {
 940   block_stmt_iterator bsi;
 941   edge new_e, enter_e;
 942   tree cond_stmt, then_label, else_label;
 943
 944   enter_e = EDGE_SUCC (guard_bb, 0);
 945   enter_e->flags &= ~EDGE_FALLTHRU;
 946   enter_e->flags |= EDGE_FALSE_VALUE;
 947   bsi = bsi_last (guard_bb);
 948
 949   then_label = build1 (GOTO_EXPR, void_type_node,
 950                        tree_block_label (exit_bb));
 951   else_label = build1 (GOTO_EXPR, void_type_node,
 952                        tree_block_label (enter_e->dest));
 953   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 954                      then_label, else_label);
 955   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 956   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 957   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 958   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 959   return new_e;
 960 }
 961
 962
 963 /* This function verifies that the following restrictions apply to LOOP:
 964    (1) it is innermost
 965    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 966    (3) it is single entry, single exit
 967    (4) its exit condition is the last stmt in the header
 968    (5) E is the entry/exit edge of LOOP.
 969  */
 970
 971 bool
 972 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 973 {
 974   edge exit_e = loop->single_exit;
 975   edge entry_e = loop_preheader_edge (loop);
 976   tree orig_cond = get_loop_exit_condition (loop);
 977   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 978
 979   if (need_ssa_update_p ())
 980     return false;
 981
 982   if (loop->inner
 983       /* All loops have an outer scope; the only case loop->outer is NULL is for
 984          the function itself.  */
 985       || !loop->outer
 986       || loop->num_nodes != 2
 987       || !empty_block_p (loop->latch)
 988       || !loop->single_exit
 989       /* Verify that new loop exit condition can be trivially modified.  */
 990       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 991       || (e != exit_e && e != entry_e))
 992     return false;
 993
 994   return true;
 995 }
 996
 997 #ifdef ENABLE_CHECKING
 998 void
 999 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1000                                  struct loop *second_loop)
1001 {
1002   basic_block loop1_exit_bb = first_loop->single_exit->dest;
1003   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1004   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1005
1006   /* A guard that controls whether the second_loop is to be executed or skipped
1007      is placed in first_loop->exit.  first_loopt->exit therefore has two
1008      successors - one is the preheader of second_loop, and the other is a bb
1009      after second_loop.
1010    */
1011   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
1012
1013   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
1014         of second_loop.  */
1015
1016   /* The preheader of new_loop is expected to have two predecessors:
1017      first_loop->exit and the block that precedes first_loop.  */
1018
1019   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1020               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1021                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1022                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1023                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1024
1025   /* Verify that the other successor of first_loopt->exit is after the
1026      second_loop.  */
1027   /* TODO */
1028 }
1029 #endif
1030
1031 /* Function slpeel_tree_peel_loop_to_edge.
1032
1033    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1034    that is placed on the entry (exit) edge E of LOOP. After this transformation
1035    we have two loops one after the other - first-loop iterates FIRST_NITERS
1036    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1037
1038    Input:
1039    - LOOP: the loop to be peeled.
1040    - E: the exit or entry edge of LOOP.
1041         If it is the entry edge, we peel the first iterations of LOOP. In this
1042         case first-loop is LOOP, and second-loop is the newly created loop.
1043         If it is the exit edge, we peel the last iterations of LOOP. In this
1044         case, first-loop is the newly created loop, and second-loop is LOOP.
1045    - NITERS: the number of iterations that LOOP iterates.
1046    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1047    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1048         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1049         is false, the caller of this function may want to take care of this
1050         (this can be useful if we don't want new stmts added to first-loop).
1051
1052    Output:
1053    The function returns a pointer to the new loop-copy, or NULL if it failed
1054    to perform the transformation.
1055
1056    The function generates two if-then-else guards: one before the first loop,
1057    and the other before the second loop:
1058    The first guard is:
1059      if (FIRST_NITERS == 0) then skip the first loop,
1060      and go directly to the second loop.
1061    The second guard is:
1062      if (FIRST_NITERS == NITERS) then skip the second loop.
1063
1064    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1065    FORNOW the resulting code will not be in loop-closed-ssa form.
1066 */
1067
1068 struct loop*
1069 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1070                                edge e, tree first_niters,
1071                                tree niters, bool update_first_loop_count)
1072 {
1073   struct loop *new_loop = NULL, *first_loop, *second_loop;
1074   edge skip_e;
1075   tree pre_condition;
1076   bitmap definitions;
1077   basic_block bb_before_second_loop, bb_after_second_loop;
1078   basic_block bb_before_first_loop;
1079   basic_block bb_between_loops;
1080   basic_block new_exit_bb;
1081   edge exit_e = loop->single_exit;
1082   LOC loop_loc;
1083
1084   if (!slpeel_can_duplicate_loop_p (loop, e))
1085     return NULL;
1086
1087   /* We have to initialize cfg_hooks. Then, when calling
1088    cfg_hooks->split_edge, the function tree_split_edge
1089    is actually called and, when calling cfg_hooks->duplicate_block,
1090    the function tree_duplicate_bb is called.  */
1091   tree_register_cfg_hooks ();
1092
1093
1094   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1095         Resulting CFG would be:
1096
1097         first_loop:
1098         do {
1099         } while ...
1100
1101         second_loop:
1102         do {
1103         } while ...
1104
1105         orig_exit_bb:
1106    */
1107
1108   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1109     {
1110       loop_loc = find_loop_location (loop);
1111       if (dump_file && (dump_flags & TDF_DETAILS))
1112         {
1113           if (loop_loc != UNKNOWN_LOC)
1114             fprintf (dump_file, "\n%s:%d: note: ",
1115                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1116           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1117         }
1118       return NULL;
1119     }
1120
1121   if (e == exit_e)
1122     {
1123       /* NEW_LOOP was placed after LOOP.  */
1124       first_loop = loop;
1125       second_loop = new_loop;
1126     }
1127   else
1128     {
1129       /* NEW_LOOP was placed before LOOP.  */
1130       first_loop = new_loop;
1131       second_loop = loop;
1132     }
1133
1134   definitions = ssa_names_to_replace ();
1135   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1136   rename_variables_in_loop (new_loop);
1137
1138
1139   /* 2. Add the guard that controls whether the first loop is executed.
1140         Resulting CFG would be:
1141
1142         bb_before_first_loop:
1143         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1144                                GOTO first-loop
1145
1146         first_loop:
1147         do {
1148         } while ...
1149
1150         bb_before_second_loop:
1151
1152         second_loop:
1153         do {
1154         } while ...
1155
1156         orig_exit_bb:
1157    */
1158
1159   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1160   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1161   bb_before_second_loop = split_edge (first_loop->single_exit);
1162   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1163
1164   pre_condition =
1165     fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1166                  build_int_cst (TREE_TYPE (first_niters), 0));
1167   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1168                                   bb_before_second_loop, bb_before_first_loop);
1169   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1170                                       first_loop == new_loop,
1171                                       &new_exit_bb, &definitions);
1172
1173
1174   /* 3. Add the guard that controls whether the second loop is executed.
1175         Resulting CFG would be:
1176
1177         bb_before_first_loop:
1178         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1179                                GOTO first-loop
1180
1181         first_loop:
1182         do {
1183         } while ...
1184
1185         bb_between_loops:
1186         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1187                                     GOTO bb_before_second_loop
1188
1189         bb_before_second_loop:
1190
1191         second_loop:
1192         do {
1193         } while ...
1194
1195         bb_after_second_loop:
1196
1197         orig_exit_bb:
1198    */
1199
1200   bb_between_loops = new_exit_bb;
1201   bb_after_second_loop = split_edge (second_loop->single_exit);
1202   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1203
1204   pre_condition =
1205         fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1206   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1207                                   bb_after_second_loop, bb_before_first_loop);
1208   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1209                                      second_loop == new_loop, &new_exit_bb);
1210
1211   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1212    */
1213   if (update_first_loop_count)
1214     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1215
1216   BITMAP_FREE (definitions);
1217   delete_update_ssa ();
1218
1219   return new_loop;
1220 }
1221
1222 /* Function vect_get_loop_location.
1223
1224    Extract the location of the loop in the source code.
1225    If the loop is not well formed for vectorization, an estimated
1226    location is calculated.
1227    Return the loop location if succeed and NULL if not.  */
1228
1229 LOC
1230 find_loop_location (struct loop *loop)
1231 {
1232   tree node = NULL_TREE;
1233   basic_block bb;
1234   block_stmt_iterator si;
1235
1236   if (!loop)
1237     return UNKNOWN_LOC;
1238
1239   node = get_loop_exit_condition (loop);
1240
1241   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1242       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1243     return EXPR_LOC (node);
1244
1245   /* If we got here the loop is probably not "well formed",
1246      try to estimate the loop location */
1247
1248   if (!loop->header)
1249     return UNKNOWN_LOC;
1250
1251   bb = loop->header;
1252
1253   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1254     {
1255       node = bsi_stmt (si);
1256       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1257         return EXPR_LOC (node);
1258     }
1259
1260   return UNKNOWN_LOC;
1261 }
1262
1263
1264 /*************************************************************************
1265   Vectorization Debug Information.
1266  *************************************************************************/
1267
1268 /* Function vect_set_verbosity_level.
1269
1270    Called from toplev.c upon detection of the
1271    -ftree-vectorizer-verbose=N option.  */
1272
1273 void
1274 vect_set_verbosity_level (const char *val)
1275 {
1276    unsigned int vl;
1277
1278    vl = atoi (val);
1279    if (vl < MAX_VERBOSITY_LEVEL)
1280      vect_verbosity_level = vl;
1281    else
1282      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1283 }
1284
1285
1286 /* Function vect_set_dump_settings.
1287
1288    Fix the verbosity level of the vectorizer if the
1289    requested level was not set explicitly using the flag
1290    -ftree-vectorizer-verbose=N.
1291    Decide where to print the debugging information (dump_file/stderr).
1292    If the user defined the verbosity level, but there is no dump file,
1293    print to stderr, otherwise print to the dump file.  */
1294
1295 static void
1296 vect_set_dump_settings (void)
1297 {
1298   vect_dump = dump_file;
1299
1300   /* Check if the verbosity level was defined by the user:  */
1301   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1302     {
1303       /* If there is no dump file, print to stderr.  */
1304       if (!dump_file)
1305         vect_dump = stderr;
1306       return;
1307     }
1308
1309   /* User didn't specify verbosity level:  */
1310   if (dump_file && (dump_flags & TDF_DETAILS))
1311     vect_verbosity_level = REPORT_DETAILS;
1312   else if (dump_file && (dump_flags & TDF_STATS))
1313     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1314   else
1315     vect_verbosity_level = REPORT_NONE;
1316
1317   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1318 }
1319
1320
1321 /* Function debug_loop_details.
1322
1323    For vectorization debug dumps.  */
1324
1325 bool
1326 vect_print_dump_info (enum verbosity_levels vl)
1327 {
1328   if (vl > vect_verbosity_level)
1329     return false;
1330
1331   if (!current_function_decl || !vect_dump)
1332     return false;
1333
1334   if (vect_loop_location == UNKNOWN_LOC)
1335     fprintf (vect_dump, "\n%s:%d: note: ",
1336              DECL_SOURCE_FILE (current_function_decl),
1337              DECL_SOURCE_LINE (current_function_decl));
1338   else
1339     fprintf (vect_dump, "\n%s:%d: note: ",
1340              LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
1341
1342   return true;
1343 }
1344
1345
1346 /*************************************************************************
1347   Vectorization Utilities.
1348  *************************************************************************/
1349
1350 /* Function new_stmt_vec_info.
1351
1352    Create and initialize a new stmt_vec_info struct for STMT.  */
1353
1354 stmt_vec_info
1355 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1356 {
1357   stmt_vec_info res;
1358   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1359
1360   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1361   STMT_VINFO_STMT (res) = stmt;
1362   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1363   STMT_VINFO_RELEVANT (res) = 0;
1364   STMT_VINFO_LIVE_P (res) = false;
1365   STMT_VINFO_VECTYPE (res) = NULL;
1366   STMT_VINFO_VEC_STMT (res) = NULL;
1367   STMT_VINFO_IN_PATTERN_P (res) = false;
1368   STMT_VINFO_RELATED_STMT (res) = NULL;
1369   STMT_VINFO_DATA_REF (res) = NULL;
1370   if (TREE_CODE (stmt) == PHI_NODE)
1371     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1372   else
1373     STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1374   STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
1375
1376   return res;
1377 }
1378
1379
1380 /* Function new_loop_vec_info.
1381
1382    Create and initialize a new loop_vec_info struct for LOOP, as well as
1383    stmt_vec_info structs for all the stmts in LOOP.  */
1384
1385 loop_vec_info
1386 new_loop_vec_info (struct loop *loop)
1387 {
1388   loop_vec_info res;
1389   basic_block *bbs;
1390   block_stmt_iterator si;
1391   unsigned int i;
1392
1393   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1394
1395   bbs = get_loop_body (loop);
1396
1397   /* Create stmt_info for all stmts in the loop.  */
1398   for (i = 0; i < loop->num_nodes; i++)
1399     {
1400       basic_block bb = bbs[i];
1401       tree phi;
1402
1403       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1404         {
1405           stmt_ann_t ann = get_stmt_ann (phi);
1406           set_stmt_info (ann, new_stmt_vec_info (phi, res));
1407         }
1408
1409       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1410         {
1411           tree stmt = bsi_stmt (si);
1412           stmt_ann_t ann;
1413
1414           ann = stmt_ann (stmt);
1415           set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1416         }
1417     }
1418
1419   LOOP_VINFO_LOOP (res) = loop;
1420   LOOP_VINFO_BBS (res) = bbs;
1421   LOOP_VINFO_EXIT_COND (res) = NULL;
1422   LOOP_VINFO_NITERS (res) = NULL;
1423   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1424   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1425   LOOP_VINFO_VECT_FACTOR (res) = 0;
1426   LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
1427   LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
1428   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1429   LOOP_VINFO_MAY_MISALIGN_STMTS (res)
1430     = VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS));
1431
1432   return res;
1433 }
1434
1435
1436 /* Function destroy_loop_vec_info.
1437
1438    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1439    stmts in the loop.  */
1440
1441 void
1442 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1443 {
1444   struct loop *loop;
1445   basic_block *bbs;
1446   int nbbs;
1447   block_stmt_iterator si;
1448   int j;
1449
1450   if (!loop_vinfo)
1451     return;
1452
1453   loop = LOOP_VINFO_LOOP (loop_vinfo);
1454
1455   bbs = LOOP_VINFO_BBS (loop_vinfo);
1456   nbbs = loop->num_nodes;
1457
1458   for (j = 0; j < nbbs; j++)
1459     {
1460       basic_block bb = bbs[j];
1461       tree phi;
1462       stmt_vec_info stmt_info;
1463
1464       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1465         {
1466           stmt_ann_t ann = stmt_ann (phi);
1467
1468           stmt_info = vinfo_for_stmt (phi);
1469           free (stmt_info);
1470           set_stmt_info (ann, NULL);
1471         }
1472
1473       for (si = bsi_start (bb); !bsi_end_p (si); )
1474         {
1475           tree stmt = bsi_stmt (si);
1476           stmt_ann_t ann = stmt_ann (stmt);
1477           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1478
1479           if (stmt_info)
1480             {
1481               /* Check if this is a "pattern stmt" (introduced by the
1482                  vectorizer during the pattern recognition pass).  */
1483               bool remove_stmt_p = false;
1484               tree orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1485               if (orig_stmt)
1486                 {
1487                   stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1488                   if (orig_stmt_info
1489                       && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
1490                     remove_stmt_p = true;
1491                 }
1492
1493               /* Free stmt_vec_info.  */
1494               VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1495               free (stmt_info);
1496               set_stmt_info (ann, NULL);
1497
1498               /* Remove dead "pattern stmts".  */
1499               if (remove_stmt_p)
1500                 bsi_remove (&si, true);
1501             }
1502           bsi_next (&si);
1503         }
1504     }
1505
1506   free (LOOP_VINFO_BBS (loop_vinfo));
1507   free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
1508   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
1509   VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
1510
1511   free (loop_vinfo);
1512 }
1513
1514
1515 /* Function vect_force_dr_alignment_p.
1516
1517    Returns whether the alignment of a DECL can be forced to be aligned
1518    on ALIGNMENT bit boundary.  */
1519
1520 bool
1521 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1522 {
1523   if (TREE_CODE (decl) != VAR_DECL)
1524     return false;
1525
1526   if (DECL_EXTERNAL (decl))
1527     return false;
1528
1529   if (TREE_ASM_WRITTEN (decl))
1530     return false;
1531
1532   if (TREE_STATIC (decl))
1533     return (alignment <= MAX_OFILE_ALIGNMENT);
1534   else
1535     /* This is not 100% correct.  The absolute correct stack alignment
1536        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1537        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1538        However, until someone implements forced stack alignment, SSE
1539        isn't really usable without this.  */
1540     return (alignment <= PREFERRED_STACK_BOUNDARY);
1541 }
1542
1543
1544 /* Function get_vectype_for_scalar_type.
1545
1546    Returns the vector type corresponding to SCALAR_TYPE as supported
1547    by the target.  */
1548
1549 tree
1550 get_vectype_for_scalar_type (tree scalar_type)
1551 {
1552   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1553   int nbytes = GET_MODE_SIZE (inner_mode);
1554   int nunits;
1555   tree vectype;
1556
1557   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1558     return NULL_TREE;
1559
1560   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1561      is expected.  */
1562   nunits = UNITS_PER_SIMD_WORD / nbytes;
1563
1564   vectype = build_vector_type (scalar_type, nunits);
1565   if (vect_print_dump_info (REPORT_DETAILS))
1566     {
1567       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1568       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1569     }
1570
1571   if (!vectype)
1572     return NULL_TREE;
1573
1574   if (vect_print_dump_info (REPORT_DETAILS))
1575     {
1576       fprintf (vect_dump, "vectype: ");
1577       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1578     }
1579
1580   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1581       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1582     {
1583       if (vect_print_dump_info (REPORT_DETAILS))
1584         fprintf (vect_dump, "mode not supported by target.");
1585       return NULL_TREE;
1586     }
1587
1588   return vectype;
1589 }
1590
1591
1592 /* Function vect_supportable_dr_alignment
1593
1594    Return whether the data reference DR is supported with respect to its
1595    alignment.  */
1596
1597 enum dr_alignment_support
1598 vect_supportable_dr_alignment (struct data_reference *dr)
1599 {
1600   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1601   enum machine_mode mode = (int) TYPE_MODE (vectype);
1602
1603   if (aligned_access_p (dr))
1604     return dr_aligned;
1605
1606   /* Possibly unaligned access.  */
1607
1608   if (DR_IS_READ (dr))
1609     {
1610       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1611           && (!targetm.vectorize.builtin_mask_for_load
1612               || targetm.vectorize.builtin_mask_for_load ()))
1613         return dr_unaligned_software_pipeline;
1614
1615       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1616         /* Can't software pipeline the loads, but can at least do them.  */
1617         return dr_unaligned_supported;
1618     }
1619
1620   /* Unsupported.  */
1621   return dr_unaligned_unsupported;
1622 }
1623
1624
1625 /* Function vect_is_simple_use.
1626
1627    Input:
1628    LOOP - the loop that is being vectorized.
1629    OPERAND - operand of a stmt in LOOP.
1630    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1631
1632    Returns whether a stmt with OPERAND can be vectorized.
1633    Supportable operands are constants, loop invariants, and operands that are
1634    defined by the current iteration of the loop. Unsupportable operands are
1635    those that are defined by a previous iteration of the loop (as is the case
1636    in reduction/induction computations).  */
1637
1638 bool
1639 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1640                     tree *def, enum vect_def_type *dt)
1641 {
1642   basic_block bb;
1643   stmt_vec_info stmt_vinfo;
1644   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1645
1646   *def_stmt = NULL_TREE;
1647   *def = NULL_TREE;
1648
1649   if (vect_print_dump_info (REPORT_DETAILS))
1650     {
1651       fprintf (vect_dump, "vect_is_simple_use: operand ");
1652       print_generic_expr (vect_dump, operand, TDF_SLIM);
1653     }
1654
1655   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1656     {
1657       *dt = vect_constant_def;
1658       return true;
1659     }
1660
1661   if (TREE_CODE (operand) != SSA_NAME)
1662     {
1663       if (vect_print_dump_info (REPORT_DETAILS))
1664         fprintf (vect_dump, "not ssa-name.");
1665       return false;
1666     }
1667
1668   *def_stmt = SSA_NAME_DEF_STMT (operand);
1669   if (*def_stmt == NULL_TREE )
1670     {
1671       if (vect_print_dump_info (REPORT_DETAILS))
1672         fprintf (vect_dump, "no def_stmt.");
1673       return false;
1674     }
1675
1676   if (vect_print_dump_info (REPORT_DETAILS))
1677     {
1678       fprintf (vect_dump, "def_stmt: ");
1679       print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
1680     }
1681
1682   /* empty stmt is expected only in case of a function argument.
1683      (Otherwise - we expect a phi_node or a modify_expr).  */
1684   if (IS_EMPTY_STMT (*def_stmt))
1685     {
1686       tree arg = TREE_OPERAND (*def_stmt, 0);
1687       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1688         {
1689           *def = operand;
1690           *dt = vect_invariant_def;
1691           return true;
1692         }
1693
1694       if (vect_print_dump_info (REPORT_DETAILS))
1695         fprintf (vect_dump, "Unexpected empty stmt.");
1696       return false;
1697     }
1698
1699   bb = bb_for_stmt (*def_stmt);
1700   if (!flow_bb_inside_loop_p (loop, bb))
1701     *dt = vect_invariant_def;
1702   else
1703     {
1704       stmt_vinfo = vinfo_for_stmt (*def_stmt);
1705       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
1706     }
1707
1708   if (*dt == vect_unknown_def_type)
1709     {
1710       if (vect_print_dump_info (REPORT_DETAILS))
1711         fprintf (vect_dump, "Unsupported pattern.");
1712       return false;
1713     }
1714
1715   /* stmts inside the loop that have been identified as performing
1716      a reduction operation cannot have uses in the loop.  */
1717   if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE)
1718     {
1719       if (vect_print_dump_info (REPORT_DETAILS))
1720         fprintf (vect_dump, "reduction used in loop.");
1721       return false;
1722     }
1723
1724   if (vect_print_dump_info (REPORT_DETAILS))
1725     fprintf (vect_dump, "type of def: %d.",*dt);
1726
1727   switch (TREE_CODE (*def_stmt))
1728     {
1729     case PHI_NODE:
1730       *def = PHI_RESULT (*def_stmt);
1731       gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
1732                   || *dt == vect_invariant_def);
1733       break;
1734
1735     case MODIFY_EXPR:
1736       *def = TREE_OPERAND (*def_stmt, 0);
1737       gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
1738       break;
1739
1740     default:
1741       if (vect_print_dump_info (REPORT_DETAILS))
1742         fprintf (vect_dump, "unsupported defining stmt: ");
1743       return false;
1744     }
1745
1746   if (*dt == vect_induction_def)
1747     {
1748       if (vect_print_dump_info (REPORT_DETAILS))
1749         fprintf (vect_dump, "induction not supported.");
1750       return false;
1751     }
1752
1753   return true;
1754 }
1755
1756
1757 /* Function supportable_widening_operation
1758
1759    Check whether an operation represented by the code CODE is a
1760    widening operation that is supported by the target platform in
1761    vector form (i.e., when operating on arguments of type VECTYPE).
1762
1763    The two kinds of widening operations we currently support are
1764    NOP and WIDEN_MULT. This function checks if these oprations
1765    are supported by the target platform either directly (via vector
1766    tree-codes), or via target builtins.
1767
1768    Output:
1769    - CODE1 and CODE2 are codes of vector operations to be used when
1770    vectorizing the operation, if available.
1771    - DECL1 and DECL2 are decls of target builtin functions to be used
1772    when vectorizing the operation, if available. In this case,
1773    CODE1 and CODE2 are CALL_EXPR.  */
1774
1775 bool
1776 supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
1777                                 tree *decl1, tree *decl2,
1778                                 enum tree_code *code1, enum tree_code *code2)
1779 {
1780   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1781   bool ordered_p;
1782   enum machine_mode vec_mode;
1783   enum insn_code icode1, icode2;
1784   optab optab1, optab2;
1785   tree expr = TREE_OPERAND (stmt, 1);
1786   tree type = TREE_TYPE (expr);
1787   tree wide_vectype = get_vectype_for_scalar_type (type);
1788   enum tree_code c1, c2;
1789
1790   /* The result of a vectorized widening operation usually requires two vectors
1791      (because the widened results do not fit int one vector). The generated
1792      vector results would normally be expected to be generated in the same
1793      order as in the original scalar computation. i.e. if 8 results are
1794      generated in each vector iteration, they are to be organized as follows:
1795         vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
1796
1797      However, in the special case that the result of the widening operation is
1798      used in a reduction copmutation only, the order doesn't matter (because
1799      when vectorizing a reduction we change the order of the computation).
1800      Some targets can take advatage of this and generate more efficient code.
1801      For example, targets like Altivec, that support widen_mult using a sequence
1802      of {mult_even,mult_odd} generate the following vectors:
1803         vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].  */
1804
1805    if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction)
1806      ordered_p = false;
1807    else
1808      ordered_p = true;
1809
1810   if (!ordered_p
1811       && code == WIDEN_MULT_EXPR
1812       && targetm.vectorize.builtin_mul_widen_even
1813       && targetm.vectorize.builtin_mul_widen_even (vectype)
1814       && targetm.vectorize.builtin_mul_widen_odd
1815       && targetm.vectorize.builtin_mul_widen_odd (vectype))
1816     {
1817       if (vect_print_dump_info (REPORT_DETAILS))
1818         fprintf (vect_dump, "Unordered widening operation detected.");
1819
1820       *code1 = *code2 = CALL_EXPR;
1821       *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
1822       *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
1823       return true;
1824     }
1825
1826   switch (code)
1827     {
1828     case WIDEN_MULT_EXPR:
1829       if (BYTES_BIG_ENDIAN)
1830         {
1831           c1 = VEC_WIDEN_MULT_HI_EXPR;
1832           c2 = VEC_WIDEN_MULT_LO_EXPR;
1833         }
1834       else
1835         {
1836           c2 = VEC_WIDEN_MULT_HI_EXPR;
1837           c1 = VEC_WIDEN_MULT_LO_EXPR;
1838         }
1839       break;
1840
1841     case NOP_EXPR:
1842       if (BYTES_BIG_ENDIAN)
1843         {
1844           c1 = VEC_UNPACK_HI_EXPR;
1845           c2 = VEC_UNPACK_LO_EXPR;
1846         }
1847       else
1848         {
1849           c2 = VEC_UNPACK_HI_EXPR;
1850           c1 = VEC_UNPACK_LO_EXPR;
1851         }
1852       break;
1853
1854     default:
1855       gcc_unreachable ();
1856     }
1857
1858   *code1 = c1;
1859   *code2 = c2;
1860   optab1 = optab_for_tree_code (c1, vectype);
1861   optab2 = optab_for_tree_code (c2, vectype);
1862
1863   if (!optab1 || !optab2)
1864     return false;
1865
1866   vec_mode = TYPE_MODE (vectype);
1867   if ((icode1 = optab1->handlers[(int) vec_mode].insn_code) == CODE_FOR_nothing
1868       || insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
1869       || (icode2 = optab2->handlers[(int) vec_mode].insn_code)
1870                                                         == CODE_FOR_nothing
1871       || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
1872     return false;
1873
1874   return true;
1875 }
1876
1877
1878 /* Function reduction_code_for_scalar_code
1879
1880    Input:
1881    CODE - tree_code of a reduction operations.
1882
1883    Output:
1884    REDUC_CODE - the corresponding tree-code to be used to reduce the
1885       vector of partial results into a single scalar result (which
1886       will also reside in a vector).
1887
1888    Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise.  */
1889
1890 bool
1891 reduction_code_for_scalar_code (enum tree_code code,
1892                                 enum tree_code *reduc_code)
1893 {
1894   switch (code)
1895   {
1896   case MAX_EXPR:
1897     *reduc_code = REDUC_MAX_EXPR;
1898     return true;
1899
1900   case MIN_EXPR:
1901     *reduc_code = REDUC_MIN_EXPR;
1902     return true;
1903
1904   case PLUS_EXPR:
1905     *reduc_code = REDUC_PLUS_EXPR;
1906     return true;
1907
1908   default:
1909     return false;
1910   }
1911 }
1912
1913
1914 /* Function vect_is_simple_reduction
1915
1916    Detect a cross-iteration def-use cucle that represents a simple
1917    reduction computation. We look for the following pattern:
1918
1919    loop_header:
1920      a1 = phi < a0, a2 >
1921      a3 = ...
1922      a2 = operation (a3, a1)
1923
1924    such that:
1925    1. operation is commutative and associative and it is safe to
1926       change the order of the computation.
1927    2. no uses for a2 in the loop (a2 is used out of the loop)
1928    3. no uses of a1 in the loop besides the reduction operation.
1929
1930    Condition 1 is tested here.
1931    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.  */
1932
1933 tree
1934 vect_is_simple_reduction (struct loop *loop, tree phi)
1935 {
1936   edge latch_e = loop_latch_edge (loop);
1937   tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1938   tree def_stmt, def1, def2;
1939   enum tree_code code;
1940   int op_type;
1941   tree operation, op1, op2;
1942   tree type;
1943
1944   if (TREE_CODE (loop_arg) != SSA_NAME)
1945     {
1946       if (vect_print_dump_info (REPORT_DETAILS))
1947         {
1948           fprintf (vect_dump, "reduction: not ssa_name: ");
1949           print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1950         }
1951       return NULL_TREE;
1952     }
1953
1954   def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1955   if (!def_stmt)
1956     {
1957       if (vect_print_dump_info (REPORT_DETAILS))
1958         fprintf (vect_dump, "reduction: no def_stmt.");
1959       return NULL_TREE;
1960     }
1961
1962   if (TREE_CODE (def_stmt) != MODIFY_EXPR)
1963     {
1964       if (vect_print_dump_info (REPORT_DETAILS))
1965         {
1966           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1967         }
1968       return NULL_TREE;
1969     }
1970
1971   operation = TREE_OPERAND (def_stmt, 1);
1972   code = TREE_CODE (operation);
1973   if (!commutative_tree_code (code) || !associative_tree_code (code))
1974     {
1975       if (vect_print_dump_info (REPORT_DETAILS))
1976         {
1977           fprintf (vect_dump, "reduction: not commutative/associative: ");
1978           print_generic_expr (vect_dump, operation, TDF_SLIM);
1979         }
1980       return NULL_TREE;
1981     }
1982
1983   op_type = TREE_CODE_LENGTH (code);
1984   if (op_type != binary_op)
1985     {
1986       if (vect_print_dump_info (REPORT_DETAILS))
1987         {
1988           fprintf (vect_dump, "reduction: not binary operation: ");
1989           print_generic_expr (vect_dump, operation, TDF_SLIM);
1990         }
1991       return NULL_TREE;
1992     }
1993
1994   op1 = TREE_OPERAND (operation, 0);
1995   op2 = TREE_OPERAND (operation, 1);
1996   if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1997     {
1998       if (vect_print_dump_info (REPORT_DETAILS))
1999         {
2000           fprintf (vect_dump, "reduction: uses not ssa_names: ");
2001           print_generic_expr (vect_dump, operation, TDF_SLIM);
2002         }
2003       return NULL_TREE;
2004     }
2005
2006   /* Check that it's ok to change the order of the computation.  */
2007   type = TREE_TYPE (operation);
2008   if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
2009       || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
2010     {
2011       if (vect_print_dump_info (REPORT_DETAILS))
2012         {
2013           fprintf (vect_dump, "reduction: multiple types: operation type: ");
2014           print_generic_expr (vect_dump, type, TDF_SLIM);
2015           fprintf (vect_dump, ", operands types: ");
2016           print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
2017           fprintf (vect_dump, ",");
2018           print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
2019         }
2020       return NULL_TREE;
2021     }
2022
2023   /* CHECKME: check for !flag_finite_math_only too?  */
2024   if (SCALAR_FLOAT_TYPE_P (type) && !flag_unsafe_math_optimizations)
2025     {
2026       /* Changing the order of operations changes the semantics.  */
2027       if (vect_print_dump_info (REPORT_DETAILS))
2028         {
2029           fprintf (vect_dump, "reduction: unsafe fp math optimization: ");
2030           print_generic_expr (vect_dump, operation, TDF_SLIM);
2031         }
2032       return NULL_TREE;
2033     }
2034   else if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && flag_trapv)
2035     {
2036       /* Changing the order of operations changes the semantics.  */
2037       if (vect_print_dump_info (REPORT_DETAILS))
2038         {
2039           fprintf (vect_dump, "reduction: unsafe int math optimization: ");
2040           print_generic_expr (vect_dump, operation, TDF_SLIM);
2041         }
2042       return NULL_TREE;
2043     }
2044
2045   /* reduction is safe. we're dealing with one of the following:
2046      1) integer arithmetic and no trapv
2047      2) floating point arithmetic, and special flags permit this optimization.
2048    */
2049   def1 = SSA_NAME_DEF_STMT (op1);
2050   def2 = SSA_NAME_DEF_STMT (op2);
2051   if (!def1 || !def2)
2052     {
2053       if (vect_print_dump_info (REPORT_DETAILS))
2054         {
2055           fprintf (vect_dump, "reduction: no defs for operands: ");
2056           print_generic_expr (vect_dump, operation, TDF_SLIM);
2057         }
2058       return NULL_TREE;
2059     }
2060
2061   if (TREE_CODE (def1) == MODIFY_EXPR
2062       && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
2063       && def2 == phi)
2064     {
2065       if (vect_print_dump_info (REPORT_DETAILS))
2066         {
2067           fprintf (vect_dump, "detected reduction:");
2068           print_generic_expr (vect_dump, operation, TDF_SLIM);
2069         }
2070       return def_stmt;
2071     }
2072   else if (TREE_CODE (def2) == MODIFY_EXPR
2073       && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
2074       && def1 == phi)
2075     {
2076       /* Swap operands (just for simplicity - so that the rest of the code
2077          can assume that the reduction variable is always the last (second)
2078          argument).  */
2079       if (vect_print_dump_info (REPORT_DETAILS))
2080         {
2081           fprintf (vect_dump, "detected reduction: need to swap operands:");
2082           print_generic_expr (vect_dump, operation, TDF_SLIM);
2083         }
2084       swap_tree_operands (def_stmt, &TREE_OPERAND (operation, 0),
2085                                     &TREE_OPERAND (operation, 1));
2086       return def_stmt;
2087     }
2088   else
2089     {
2090       if (vect_print_dump_info (REPORT_DETAILS))
2091         {
2092           fprintf (vect_dump, "reduction: unknown pattern.");
2093           print_generic_expr (vect_dump, operation, TDF_SLIM);
2094         }
2095       return NULL_TREE;
2096     }
2097 }
2098
2099
2100 /* Function vect_is_simple_iv_evolution.
2101
2102    FORNOW: A simple evolution of an induction variables in the loop is
2103    considered a polynomial evolution with constant step.  */
2104
2105 bool
2106 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
2107                              tree * step)
2108 {
2109   tree init_expr;
2110   tree step_expr;
2111
2112   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
2113
2114   /* When there is no evolution in this loop, the evolution function
2115      is not "simple".  */
2116   if (evolution_part == NULL_TREE)
2117     return false;
2118
2119   /* When the evolution is a polynomial of degree >= 2
2120      the evolution function is not "simple".  */
2121   if (tree_is_chrec (evolution_part))
2122     return false;
2123
2124   step_expr = evolution_part;
2125   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2126                                                            loop_nb));
2127
2128   if (vect_print_dump_info (REPORT_DETAILS))
2129     {
2130       fprintf (vect_dump, "step: ");
2131       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
2132       fprintf (vect_dump, ",  init: ");
2133       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
2134     }
2135
2136   *init = init_expr;
2137   *step = step_expr;
2138
2139   if (TREE_CODE (step_expr) != INTEGER_CST)
2140     {
2141       if (vect_print_dump_info (REPORT_DETAILS))
2142         fprintf (vect_dump, "step unknown.");
2143       return false;
2144     }
2145
2146   return true;
2147 }
2148
2149
2150 /* Function vectorize_loops.
2151
2152    Entry Point to loop vectorization phase.  */
2153
2154 unsigned
2155 vectorize_loops (struct loops *loops)
2156 {
2157   unsigned int i;
2158   unsigned int num_vectorized_loops = 0;
2159
2160   /* Fix the verbosity level if not defined explicitly by the user.  */
2161   vect_set_dump_settings ();
2162
2163   /* Allocate the bitmap that records which virtual variables that
2164      need to be renamed.  */
2165   vect_vnames_to_rename = BITMAP_ALLOC (NULL);
2166
2167   /*  ----------- Analyze loops. -----------  */
2168
2169   /* If some loop was duplicated, it gets bigger number
2170      than all previously defined loops. This fact allows us to run
2171      only over initial loops skipping newly generated ones.  */
2172   vect_loops_num = loops->num;
2173   for (i = 1; i < vect_loops_num; i++)
2174     {
2175       loop_vec_info loop_vinfo;
2176       struct loop *loop = loops->parray[i];
2177
2178       if (!loop)
2179         continue;
2180
2181       vect_loop_location = find_loop_location (loop);
2182       loop_vinfo = vect_analyze_loop (loop);
2183       loop->aux = loop_vinfo;
2184
2185       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2186         continue;
2187
2188       vect_transform_loop (loop_vinfo, loops);
2189       num_vectorized_loops++;
2190     }
2191   vect_loop_location = UNKNOWN_LOC;
2192
2193   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2194     fprintf (vect_dump, "vectorized %u loops in function.\n",
2195              num_vectorized_loops);
2196
2197   /*  ----------- Finalize. -----------  */
2198
2199   BITMAP_FREE (vect_vnames_to_rename);
2200
2201   for (i = 1; i < vect_loops_num; i++)
2202     {
2203       struct loop *loop = loops->parray[i];
2204       loop_vec_info loop_vinfo;
2205
2206       if (!loop)
2207         continue;
2208       loop_vinfo = loop->aux;
2209       destroy_loop_vec_info (loop_vinfo);
2210       loop->aux = NULL;
2211     }
2212
2213   return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0;
2214 }