gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "ggc.h"
 128 #include "tree.h"
 129 #include "target.h"
 130 #include "rtl.h"
 131 #include "basic-block.h"
 132 #include "diagnostic.h"
 133 #include "tree-flow.h"
 134 #include "tree-dump.h"
 135 #include "timevar.h"
 136 #include "cfgloop.h"
 137 #include "cfglayout.h"
 138 #include "expr.h"
 139 #include "optabs.h"
 140 #include "toplev.h"
 141 #include "tree-chrec.h"
 142 #include "tree-data-ref.h"
 143 #include "tree-scalar-evolution.h"
 144 #include "input.h"
 145 #include "tree-vectorizer.h"
 146 #include "tree-pass.h"
 147
 148 /*************************************************************************
 149   Simple Loop Peeling Utilities
 150  *************************************************************************/
 151 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 152   (struct loop *, struct loops *, edge);
 153 static void slpeel_update_phis_for_duplicate_loop
 154   (struct loop *, struct loop *, bool after);
 155 static void slpeel_update_phi_nodes_for_guard1
 156   (edge, struct loop *, bool, basic_block *, bitmap *);
 157 static void slpeel_update_phi_nodes_for_guard2
 158   (edge, struct loop *, bool, basic_block *);
 159 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 160
 161 static void rename_use_op (use_operand_p);
 162 static void rename_variables_in_bb (basic_block);
 163 static void rename_variables_in_loop (struct loop *);
 164
 165 /*************************************************************************
 166   General Vectorization Utilities
 167  *************************************************************************/
 168 static void vect_set_dump_settings (void);
 169
 170 /* vect_dump will be set to stderr or dump_file if exist.  */
 171 FILE *vect_dump;
 172
 173 /* vect_verbosity_level set to an invalid value
 174    to mark that it's uninitialized.  */
 175 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 176
 177 /* Number of loops, at the beginning of vectorization.  */
 178 unsigned int vect_loops_num;
 179 \f
 180 /*************************************************************************
 181   Simple Loop Peeling Utilities
 182
 183   Utilities to support loop peeling for vectorization purposes.
 184  *************************************************************************/
 185
 186
 187 /* Renames the use *OP_P.  */
 188
 189 static void
 190 rename_use_op (use_operand_p op_p)
 191 {
 192   tree new_name;
 193
 194   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 195     return;
 196
 197   new_name = get_current_def (USE_FROM_PTR (op_p));
 198
 199   /* Something defined outside of the loop.  */
 200   if (!new_name)
 201     return;
 202
 203   /* An ordinary ssa name defined in the loop.  */
 204
 205   SET_USE (op_p, new_name);
 206 }
 207
 208
 209 /* Renames the variables in basic block BB.  */
 210
 211 static void
 212 rename_variables_in_bb (basic_block bb)
 213 {
 214   tree phi;
 215   block_stmt_iterator bsi;
 216   tree stmt;
 217   use_operand_p use_p;
 218   ssa_op_iter iter;
 219   edge e;
 220   edge_iterator ei;
 221   struct loop *loop = bb->loop_father;
 222
 223   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 224     {
 225       stmt = bsi_stmt (bsi);
 226       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
 227                                  (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
 228         rename_use_op (use_p);
 229     }
 230
 231   FOR_EACH_EDGE (e, ei, bb->succs)
 232     {
 233       if (!flow_bb_inside_loop_p (loop, e->dest))
 234         continue;
 235       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 236         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 237     }
 238 }
 239
 240
 241 /* Renames variables in new generated LOOP.  */
 242
 243 static void
 244 rename_variables_in_loop (struct loop *loop)
 245 {
 246   unsigned i;
 247   basic_block *bbs;
 248
 249   bbs = get_loop_body (loop);
 250
 251   for (i = 0; i < loop->num_nodes; i++)
 252     rename_variables_in_bb (bbs[i]);
 253
 254   free (bbs);
 255 }
 256
 257
 258 /* Update the PHI nodes of NEW_LOOP.
 259
 260    NEW_LOOP is a duplicate of ORIG_LOOP.
 261    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 262    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 263    executes before it.  */
 264
 265 static void
 266 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 267                                        struct loop *new_loop, bool after)
 268 {
 269   tree new_ssa_name;
 270   tree phi_new, phi_orig;
 271   tree def;
 272   edge orig_loop_latch = loop_latch_edge (orig_loop);
 273   edge orig_entry_e = loop_preheader_edge (orig_loop);
 274   edge new_loop_exit_e = new_loop->single_exit;
 275   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 276   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 277
 278   /*
 279      step 1. For each loop-header-phi:
 280              Add the first phi argument for the phi in NEW_LOOP
 281             (the one associated with the entry of NEW_LOOP)
 282
 283      step 2. For each loop-header-phi:
 284              Add the second phi argument for the phi in NEW_LOOP
 285             (the one associated with the latch of NEW_LOOP)
 286
 287      step 3. Update the phis in the successor block of NEW_LOOP.
 288
 289         case 1: NEW_LOOP was placed before ORIG_LOOP:
 290                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 291                 Updating the phis in the successor block can therefore be done
 292                 along with the scanning of the loop header phis, because the
 293                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 294                 phi nodes, organized in the same order.
 295
 296         case 2: NEW_LOOP was placed after ORIG_LOOP:
 297                 The successor block of NEW_LOOP is the original exit block of
 298                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 299                 We postpone updating these phis to a later stage (when
 300                 loop guards are added).
 301    */
 302
 303
 304   /* Scan the phis in the headers of the old and new loops
 305      (they are organized in exactly the same order).  */
 306
 307   for (phi_new = phi_nodes (new_loop->header),
 308        phi_orig = phi_nodes (orig_loop->header);
 309        phi_new && phi_orig;
 310        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 311     {
 312       /* step 1.  */
 313       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 314       add_phi_arg (phi_new, def, new_loop_entry_e);
 315
 316       /* step 2.  */
 317       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 318       if (TREE_CODE (def) != SSA_NAME)
 319         continue;
 320
 321       new_ssa_name = get_current_def (def);
 322       if (!new_ssa_name)
 323         {
 324           /* This only happens if there are no definitions
 325              inside the loop. use the phi_result in this case.  */
 326           new_ssa_name = PHI_RESULT (phi_new);
 327         }
 328
 329       /* An ordinary ssa name defined in the loop.  */
 330       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 331
 332       /* step 3 (case 1).  */
 333       if (!after)
 334         {
 335           gcc_assert (new_loop_exit_e == orig_entry_e);
 336           SET_PHI_ARG_DEF (phi_orig,
 337                            new_loop_exit_e->dest_idx,
 338                            new_ssa_name);
 339         }
 340     }
 341 }
 342
 343
 344 /* Update PHI nodes for a guard of the LOOP.
 345
 346    Input:
 347    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 348         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 349         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 350         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 351         We denote this bb NEW_MERGE_BB because before the guard code was added
 352         it had a single predecessor (the LOOP header), and now it became a merge
 353         point of two paths - the path that ends with the LOOP exit-edge, and
 354         the path that ends with GUARD_EDGE.
 355    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 356         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 357
 358    ===> The CFG before the guard-code was added:
 359         LOOP_header_bb:
 360           loop_body
 361           if (exit_loop) goto update_bb
 362           else           goto LOOP_header_bb
 363         update_bb:
 364
 365    ==> The CFG after the guard-code was added:
 366         guard_bb:
 367           if (LOOP_guard_condition) goto new_merge_bb
 368           else                      goto LOOP_header_bb
 369         LOOP_header_bb:
 370           loop_body
 371           if (exit_loop_condition) goto new_merge_bb
 372           else                     goto LOOP_header_bb
 373         new_merge_bb:
 374           goto update_bb
 375         update_bb:
 376
 377    ==> The CFG after this function:
 378         guard_bb:
 379           if (LOOP_guard_condition) goto new_merge_bb
 380           else                      goto LOOP_header_bb
 381         LOOP_header_bb:
 382           loop_body
 383           if (exit_loop_condition) goto new_exit_bb
 384           else                     goto LOOP_header_bb
 385         new_exit_bb:
 386         new_merge_bb:
 387           goto update_bb
 388         update_bb:
 389
 390    This function:
 391    1. creates and updates the relevant phi nodes to account for the new
 392       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 393       1.1. Create phi nodes at NEW_MERGE_BB.
 394       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 395            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 396    2. preserves loop-closed-ssa-form by creating the required phi nodes
 397       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 398
 399    There are two flavors to this function:
 400
 401    slpeel_update_phi_nodes_for_guard1:
 402      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 403      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 404      for variables that have phis in the loop header.
 405
 406    slpeel_update_phi_nodes_for_guard2:
 407      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 408      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 409      for variables that have phis in the loop exit.
 410
 411    I.E., the overall structure is:
 412
 413         loop1_preheader_bb:
 414                 guard1 (goto loop1/merg1_bb)
 415         loop1
 416         loop1_exit_bb:
 417                 guard2 (goto merge1_bb/merge2_bb)
 418         merge1_bb
 419         loop2
 420         loop2_exit_bb
 421         merge2_bb
 422         next_bb
 423
 424    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 425    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 426    that have phis in loop1->header).
 427
 428    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 429    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 430    that have phis in next_bb). It also adds some of these phis to
 431    loop1_exit_bb.
 432
 433    slpeel_update_phi_nodes_for_guard1 is always called before
 434    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 435    to create correct data-flow and loop-closed-ssa-form.
 436
 437    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 438    that change between iterations of a loop (and therefore have a phi-node
 439    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 440    phis for variables that are used out of the loop (and therefore have
 441    loop-closed exit phis). Some variables may be both updated between
 442    iterations and used after the loop. This is why in loop1_exit_bb we
 443    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 444    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 445
 446    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 447      an original loop. i.e., we have:
 448
 449            orig_loop
 450            guard_bb (goto LOOP/new_merge)
 451            new_loop <-- LOOP
 452            new_exit
 453            new_merge
 454            next_bb
 455
 456      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 457      have:
 458
 459            new_loop
 460            guard_bb (goto LOOP/new_merge)
 461            orig_loop <-- LOOP
 462            new_exit
 463            new_merge
 464            next_bb
 465
 466      The SSA names defined in the original loop have a current
 467      reaching definition that that records the corresponding new
 468      ssa-name used in the new duplicated loop copy.
 469   */
 470
 471 /* Function slpeel_update_phi_nodes_for_guard1
 472
 473    Input:
 474    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 475    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 476             information.
 477
 478    In the context of the overall structure, we have:
 479
 480         loop1_preheader_bb:
 481                 guard1 (goto loop1/merg1_bb)
 482 LOOP->  loop1
 483         loop1_exit_bb:
 484                 guard2 (goto merge1_bb/merge2_bb)
 485         merge1_bb
 486         loop2
 487         loop2_exit_bb
 488         merge2_bb
 489         next_bb
 490
 491    For each name updated between loop iterations (i.e - for each name that has
 492    an entry (loop-header) phi in LOOP) we create a new phi in:
 493    1. merge1_bb (to account for the edge from guard1)
 494    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 495 */
 496
 497 static void
 498 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 499                                     bool is_new_loop, basic_block *new_exit_bb,
 500                                     bitmap *defs)
 501 {
 502   tree orig_phi, new_phi;
 503   tree update_phi, update_phi2;
 504   tree guard_arg, loop_arg;
 505   basic_block new_merge_bb = guard_edge->dest;
 506   edge e = EDGE_SUCC (new_merge_bb, 0);
 507   basic_block update_bb = e->dest;
 508   basic_block orig_bb = loop->header;
 509   edge new_exit_e;
 510   tree current_new_name;
 511
 512   /* Create new bb between loop and new_merge_bb.  */
 513   *new_exit_bb = split_edge (loop->single_exit);
 514   add_bb_to_loop (*new_exit_bb, loop->outer);
 515
 516   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 517
 518   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 519        orig_phi && update_phi;
 520        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 521     {
 522       /** 1. Handle new-merge-point phis  **/
 523
 524       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 525       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 526                                  new_merge_bb);
 527
 528       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 529             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 530       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 531       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 532
 533       add_phi_arg (new_phi, loop_arg, new_exit_e);
 534       add_phi_arg (new_phi, guard_arg, guard_edge);
 535
 536       /* 1.3. Update phi in successor block.  */
 537       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 538                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 539       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 540       update_phi2 = new_phi;
 541
 542
 543       /** 2. Handle loop-closed-ssa-form phis  **/
 544
 545       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 546       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 547                                  *new_exit_bb);
 548
 549       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 550       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 551
 552       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 553       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 554       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 555
 556       /* 2.4. Record the newly created name with set_current_def.
 557          We want to find a name such that
 558                 name = get_current_def (orig_loop_name)
 559          and to set its current definition as follows:
 560                 set_current_def (name, new_phi_name)
 561
 562          If LOOP is a new loop then loop_arg is already the name we're
 563          looking for. If LOOP is the original loop, then loop_arg is
 564          the orig_loop_name and the relevant name is recorded in its
 565          current reaching definition.  */
 566       if (is_new_loop)
 567         current_new_name = loop_arg;
 568       else
 569         {
 570           current_new_name = get_current_def (loop_arg);
 571           /* current_def is not available only if the variable does not
 572              change inside the loop, in which case we also don't care
 573              about recording a current_def for it because we won't be
 574              trying to create loop-exit-phis for it.  */
 575           if (!current_new_name)
 576             continue;
 577         }
 578 #ifdef ENABLE_CHECKING
 579       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 580 #endif
 581
 582       set_current_def (current_new_name, PHI_RESULT (new_phi));
 583       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 584     }
 585
 586   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 587 }
 588
 589
 590 /* Function slpeel_update_phi_nodes_for_guard2
 591
 592    Input:
 593    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 594
 595    In the context of the overall structure, we have:
 596
 597         loop1_preheader_bb:
 598                 guard1 (goto loop1/merg1_bb)
 599         loop1
 600         loop1_exit_bb:
 601                 guard2 (goto merge1_bb/merge2_bb)
 602         merge1_bb
 603 LOOP->  loop2
 604         loop2_exit_bb
 605         merge2_bb
 606         next_bb
 607
 608    For each name used out side the loop (i.e - for each name that has an exit
 609    phi in next_bb) we create a new phi in:
 610    1. merge2_bb (to account for the edge from guard_bb)
 611    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 612    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 613       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 614 */
 615
 616 static void
 617 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 618                                     bool is_new_loop, basic_block *new_exit_bb)
 619 {
 620   tree orig_phi, new_phi;
 621   tree update_phi, update_phi2;
 622   tree guard_arg, loop_arg;
 623   basic_block new_merge_bb = guard_edge->dest;
 624   edge e = EDGE_SUCC (new_merge_bb, 0);
 625   basic_block update_bb = e->dest;
 626   edge new_exit_e;
 627   tree orig_def, orig_def_new_name;
 628   tree new_name, new_name2;
 629   tree arg;
 630
 631   /* Create new bb between loop and new_merge_bb.  */
 632   *new_exit_bb = split_edge (loop->single_exit);
 633   add_bb_to_loop (*new_exit_bb, loop->outer);
 634
 635   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 636
 637   for (update_phi = phi_nodes (update_bb); update_phi;
 638        update_phi = PHI_CHAIN (update_phi))
 639     {
 640       orig_phi = update_phi;
 641       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 642       orig_def_new_name = get_current_def (orig_def);
 643       arg = NULL_TREE;
 644
 645       /** 1. Handle new-merge-point phis  **/
 646
 647       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 648       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 649                                  new_merge_bb);
 650
 651       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 652             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 653       new_name = orig_def;
 654       new_name2 = NULL_TREE;
 655       if (orig_def_new_name)
 656         {
 657           new_name = orig_def_new_name;
 658           /* Some variables have both loop-entry-phis and loop-exit-phis.
 659              Such variables were given yet newer names by phis placed in
 660              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 661              new_name2 = get_current_def (get_current_def (orig_name)).  */
 662           new_name2 = get_current_def (new_name);
 663         }
 664
 665       if (is_new_loop)
 666         {
 667           guard_arg = orig_def;
 668           loop_arg = new_name;
 669         }
 670       else
 671         {
 672           guard_arg = new_name;
 673           loop_arg = orig_def;
 674         }
 675       if (new_name2)
 676         guard_arg = new_name2;
 677
 678       add_phi_arg (new_phi, loop_arg, new_exit_e);
 679       add_phi_arg (new_phi, guard_arg, guard_edge);
 680
 681       /* 1.3. Update phi in successor block.  */
 682       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 683       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 684       update_phi2 = new_phi;
 685
 686
 687       /** 2. Handle loop-closed-ssa-form phis  **/
 688
 689       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 690       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 691                                  *new_exit_bb);
 692
 693       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 694       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 695
 696       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 697       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 698       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 699
 700
 701       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 702
 703       /* 3.1. Find the relevant names that need an exit-phi in
 704          GUARD_BB, i.e. names for which
 705          slpeel_update_phi_nodes_for_guard1 had not already created a
 706          phi node. This is the case for names that are used outside
 707          the loop (and therefore need an exit phi) but are not updated
 708          across loop iterations (and therefore don't have a
 709          loop-header-phi).
 710
 711          slpeel_update_phi_nodes_for_guard1 is responsible for
 712          creating loop-exit phis in GUARD_BB for names that have a
 713          loop-header-phi.  When such a phi is created we also record
 714          the new name in its current definition.  If this new name
 715          exists, then guard_arg was set to this new name (see 1.2
 716          above).  Therefore, if guard_arg is not this new name, this
 717          is an indication that an exit-phi in GUARD_BB was not yet
 718          created, so we take care of it here.  */
 719       if (guard_arg == new_name2)
 720         continue;
 721       arg = guard_arg;
 722
 723       /* 3.2. Generate new phi node in GUARD_BB:  */
 724       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 725                                  guard_edge->src);
 726
 727       /* 3.3. GUARD_BB has one incoming edge:  */
 728       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 729       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 730
 731       /* 3.4. Update phi in successor of GUARD_BB:  */
 732       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 733                                                                 == guard_arg);
 734       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 735     }
 736
 737   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 738 }
 739
 740
 741 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 742    that starts at zero, increases by one and its limit is NITERS.
 743
 744    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 745
 746 void
 747 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 748 {
 749   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 750   tree orig_cond;
 751   edge exit_edge = loop->single_exit;
 752   block_stmt_iterator loop_cond_bsi;
 753   block_stmt_iterator incr_bsi;
 754   bool insert_after;
 755   tree begin_label = tree_block_label (loop->latch);
 756   tree exit_label = tree_block_label (loop->single_exit->dest);
 757   tree init = build_int_cst (TREE_TYPE (niters), 0);
 758   tree step = build_int_cst (TREE_TYPE (niters), 1);
 759   tree then_label;
 760   tree else_label;
 761   LOC loop_loc;
 762
 763   orig_cond = get_loop_exit_condition (loop);
 764 #ifdef ENABLE_CHECKING
 765   gcc_assert (orig_cond);
 766 #endif
 767   loop_cond_bsi = bsi_for_stmt (orig_cond);
 768
 769   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 770   create_iv (init, step, NULL_TREE, loop,
 771              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
 772
 773   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 774     {
 775       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 776       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 777       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 778     }
 779   else /* 'then' edge loops back.  */
 780     {
 781       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 782       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 783       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 784     }
 785
 786   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 787                      then_label, else_label);
 788   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
 789
 790   /* Remove old loop exit test:  */
 791   bsi_remove (&loop_cond_bsi);
 792
 793   loop_loc = find_loop_location (loop);
 794   if (dump_file && (dump_flags & TDF_DETAILS))
 795     {
 796       if (loop_loc != UNKNOWN_LOC)
 797         fprintf (dump_file, "\nloop at %s:%d: ",
 798                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 799       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 800     }
 801
 802   loop->nb_iterations = niters;
 803 }
 804
 805
 806 /* Given LOOP this function generates a new copy of it and puts it
 807    on E which is either the entry or exit of LOOP.  */
 808
 809 static struct loop *
 810 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 811                                         edge e)
 812 {
 813   struct loop *new_loop;
 814   basic_block *new_bbs, *bbs;
 815   bool at_exit;
 816   bool was_imm_dom;
 817   basic_block exit_dest;
 818   tree phi, phi_arg;
 819
 820   at_exit = (e == loop->single_exit);
 821   if (!at_exit && e != loop_preheader_edge (loop))
 822     return NULL;
 823
 824   bbs = get_loop_body (loop);
 825
 826   /* Check whether duplication is possible.  */
 827   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 828     {
 829       free (bbs);
 830       return NULL;
 831     }
 832
 833   /* Generate new loop structure.  */
 834   new_loop = duplicate_loop (loops, loop, loop->outer);
 835   if (!new_loop)
 836     {
 837       free (bbs);
 838       return NULL;
 839     }
 840
 841   exit_dest = loop->single_exit->dest;
 842   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 843                                           exit_dest) == loop->header ?
 844                  true : false);
 845
 846   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 847
 848   copy_bbs (bbs, loop->num_nodes, new_bbs,
 849             &loop->single_exit, 1, &new_loop->single_exit, NULL);
 850
 851   /* Duplicating phi args at exit bbs as coming
 852      also from exit of duplicated loop.  */
 853   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 854     {
 855       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
 856       if (phi_arg)
 857         {
 858           edge new_loop_exit_edge;
 859
 860           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 861             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 862           else
 863             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 864
 865           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 866         }
 867     }
 868
 869   if (at_exit) /* Add the loop copy at exit.  */
 870     {
 871       redirect_edge_and_branch_force (e, new_loop->header);
 872       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 873       if (was_imm_dom)
 874         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 875     }
 876   else /* Add the copy at entry.  */
 877     {
 878       edge new_exit_e;
 879       edge entry_e = loop_preheader_edge (loop);
 880       basic_block preheader = entry_e->src;
 881
 882       if (!flow_bb_inside_loop_p (new_loop,
 883                                   EDGE_SUCC (new_loop->header, 0)->dest))
 884         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 885       else
 886         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 887
 888       redirect_edge_and_branch_force (new_exit_e, loop->header);
 889       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 890                                new_exit_e->src);
 891
 892       /* We have to add phi args to the loop->header here as coming
 893          from new_exit_e edge.  */
 894       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 895         {
 896           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 897           if (phi_arg)
 898             add_phi_arg (phi, phi_arg, new_exit_e);
 899         }
 900
 901       redirect_edge_and_branch_force (entry_e, new_loop->header);
 902       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 903     }
 904
 905   free (new_bbs);
 906   free (bbs);
 907
 908   return new_loop;
 909 }
 910
 911
 912 /* Given the condition statement COND, put it as the last statement
 913    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 914    Assumes that this is the single exit of the guarded loop.
 915    Returns the skip edge.  */
 916
 917 static edge
 918 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 919                         basic_block dom_bb)
 920 {
 921   block_stmt_iterator bsi;
 922   edge new_e, enter_e;
 923   tree cond_stmt, then_label, else_label;
 924
 925   enter_e = EDGE_SUCC (guard_bb, 0);
 926   enter_e->flags &= ~EDGE_FALLTHRU;
 927   enter_e->flags |= EDGE_FALSE_VALUE;
 928   bsi = bsi_last (guard_bb);
 929
 930   then_label = build1 (GOTO_EXPR, void_type_node,
 931                        tree_block_label (exit_bb));
 932   else_label = build1 (GOTO_EXPR, void_type_node,
 933                        tree_block_label (enter_e->dest));
 934   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 935                      then_label, else_label);
 936   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 937   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 938   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 939   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 940   return new_e;
 941 }
 942
 943
 944 /* This function verifies that the following restrictions apply to LOOP:
 945    (1) it is innermost
 946    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 947    (3) it is single entry, single exit
 948    (4) its exit condition is the last stmt in the header
 949    (5) E is the entry/exit edge of LOOP.
 950  */
 951
 952 bool
 953 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 954 {
 955   edge exit_e = loop->single_exit;
 956   edge entry_e = loop_preheader_edge (loop);
 957   tree orig_cond = get_loop_exit_condition (loop);
 958   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 959
 960   if (need_ssa_update_p ())
 961     return false;
 962
 963   if (loop->inner
 964       /* All loops have an outer scope; the only case loop->outer is NULL is for
 965          the function itself.  */
 966       || !loop->outer
 967       || loop->num_nodes != 2
 968       || !empty_block_p (loop->latch)
 969       || !loop->single_exit
 970       /* Verify that new loop exit condition can be trivially modified.  */
 971       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 972       || (e != exit_e && e != entry_e))
 973     return false;
 974
 975   return true;
 976 }
 977
 978 #ifdef ENABLE_CHECKING
 979 void
 980 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 981                                  struct loop *second_loop)
 982 {
 983   basic_block loop1_exit_bb = first_loop->single_exit->dest;
 984   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 985   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 986
 987   /* A guard that controls whether the second_loop is to be executed or skipped
 988      is placed in first_loop->exit.  first_loopt->exit therefore has two
 989      successors - one is the preheader of second_loop, and the other is a bb
 990      after second_loop.
 991    */
 992   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 993
 994   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 995         of second_loop.  */
 996
 997   /* The preheader of new_loop is expected to have two predecessors:
 998      first_loop->exit and the block that precedes first_loop.  */
 999
1000   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1001               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1002                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1003                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
1004                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1005
1006   /* Verify that the other successor of first_loopt->exit is after the
1007      second_loop.  */
1008   /* TODO */
1009 }
1010 #endif
1011
1012 /* Function slpeel_tree_peel_loop_to_edge.
1013
1014    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1015    that is placed on the entry (exit) edge E of LOOP. After this transformation
1016    we have two loops one after the other - first-loop iterates FIRST_NITERS
1017    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1018
1019    Input:
1020    - LOOP: the loop to be peeled.
1021    - E: the exit or entry edge of LOOP.
1022         If it is the entry edge, we peel the first iterations of LOOP. In this
1023         case first-loop is LOOP, and second-loop is the newly created loop.
1024         If it is the exit edge, we peel the last iterations of LOOP. In this
1025         case, first-loop is the newly created loop, and second-loop is LOOP.
1026    - NITERS: the number of iterations that LOOP iterates.
1027    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1028    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1029         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1030         is false, the caller of this function may want to take care of this
1031         (this can be useful if we don't want new stmts added to first-loop).
1032
1033    Output:
1034    The function returns a pointer to the new loop-copy, or NULL if it failed
1035    to perform the transformation.
1036
1037    The function generates two if-then-else guards: one before the first loop,
1038    and the other before the second loop:
1039    The first guard is:
1040      if (FIRST_NITERS == 0) then skip the first loop,
1041      and go directly to the second loop.
1042    The second guard is:
1043      if (FIRST_NITERS == NITERS) then skip the second loop.
1044
1045    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1046    FORNOW the resulting code will not be in loop-closed-ssa form.
1047 */
1048
1049 struct loop*
1050 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1051                                edge e, tree first_niters,
1052                                tree niters, bool update_first_loop_count)
1053 {
1054   struct loop *new_loop = NULL, *first_loop, *second_loop;
1055   edge skip_e;
1056   tree pre_condition;
1057   bitmap definitions;
1058   basic_block bb_before_second_loop, bb_after_second_loop;
1059   basic_block bb_before_first_loop;
1060   basic_block bb_between_loops;
1061   basic_block new_exit_bb;
1062   edge exit_e = loop->single_exit;
1063   LOC loop_loc;
1064
1065   if (!slpeel_can_duplicate_loop_p (loop, e))
1066     return NULL;
1067
1068   /* We have to initialize cfg_hooks. Then, when calling
1069    cfg_hooks->split_edge, the function tree_split_edge
1070    is actually called and, when calling cfg_hooks->duplicate_block,
1071    the function tree_duplicate_bb is called.  */
1072   tree_register_cfg_hooks ();
1073
1074
1075   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1076         Resulting CFG would be:
1077
1078         first_loop:
1079         do {
1080         } while ...
1081
1082         second_loop:
1083         do {
1084         } while ...
1085
1086         orig_exit_bb:
1087    */
1088
1089   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1090     {
1091       loop_loc = find_loop_location (loop);
1092       if (dump_file && (dump_flags & TDF_DETAILS))
1093         {
1094           if (loop_loc != UNKNOWN_LOC)
1095             fprintf (dump_file, "\n%s:%d: note: ",
1096                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1097           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1098         }
1099       return NULL;
1100     }
1101
1102   if (e == exit_e)
1103     {
1104       /* NEW_LOOP was placed after LOOP.  */
1105       first_loop = loop;
1106       second_loop = new_loop;
1107     }
1108   else
1109     {
1110       /* NEW_LOOP was placed before LOOP.  */
1111       first_loop = new_loop;
1112       second_loop = loop;
1113     }
1114
1115   definitions = ssa_names_to_replace ();
1116   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1117   rename_variables_in_loop (new_loop);
1118
1119
1120   /* 2. Add the guard that controls whether the first loop is executed.
1121         Resulting CFG would be:
1122
1123         bb_before_first_loop:
1124         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1125                                GOTO first-loop
1126
1127         first_loop:
1128         do {
1129         } while ...
1130
1131         bb_before_second_loop:
1132
1133         second_loop:
1134         do {
1135         } while ...
1136
1137         orig_exit_bb:
1138    */
1139
1140   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1141   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1142   bb_before_second_loop = split_edge (first_loop->single_exit);
1143   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1144
1145   pre_condition =
1146     fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
1147   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1148                                   bb_before_second_loop, bb_before_first_loop);
1149   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1150                                       first_loop == new_loop,
1151                                       &new_exit_bb, &definitions);
1152
1153
1154   /* 3. Add the guard that controls whether the second loop is executed.
1155         Resulting CFG would be:
1156
1157         bb_before_first_loop:
1158         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1159                                GOTO first-loop
1160
1161         first_loop:
1162         do {
1163         } while ...
1164
1165         bb_between_loops:
1166         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1167                                     GOTO bb_before_second_loop
1168
1169         bb_before_second_loop:
1170
1171         second_loop:
1172         do {
1173         } while ...
1174
1175         bb_after_second_loop:
1176
1177         orig_exit_bb:
1178    */
1179
1180   bb_between_loops = new_exit_bb;
1181   bb_after_second_loop = split_edge (second_loop->single_exit);
1182   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1183
1184   pre_condition =
1185         fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
1186   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1187                                   bb_after_second_loop, bb_before_first_loop);
1188   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1189                                      second_loop == new_loop, &new_exit_bb);
1190
1191   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1192    */
1193   if (update_first_loop_count)
1194     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1195
1196   BITMAP_FREE (definitions);
1197   delete_update_ssa ();
1198
1199   return new_loop;
1200 }
1201
1202 /* Function vect_get_loop_location.
1203
1204    Extract the location of the loop in the source code.
1205    If the loop is not well formed for vectorization, an estimated
1206    location is calculated.
1207    Return the loop location if succeed and NULL if not.  */
1208
1209 LOC
1210 find_loop_location (struct loop *loop)
1211 {
1212   tree node = NULL_TREE;
1213   basic_block bb;
1214   block_stmt_iterator si;
1215
1216   if (!loop)
1217     return UNKNOWN_LOC;
1218
1219   node = get_loop_exit_condition (loop);
1220
1221   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1222       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1223     return EXPR_LOC (node);
1224
1225   /* If we got here the loop is probably not "well formed",
1226      try to estimate the loop location */
1227
1228   if (!loop->header)
1229     return UNKNOWN_LOC;
1230
1231   bb = loop->header;
1232
1233   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1234     {
1235       node = bsi_stmt (si);
1236       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1237         return EXPR_LOC (node);
1238     }
1239
1240   return UNKNOWN_LOC;
1241 }
1242
1243
1244 /*************************************************************************
1245   Vectorization Debug Information.
1246  *************************************************************************/
1247
1248 /* Function vect_set_verbosity_level.
1249
1250    Called from toplev.c upon detection of the
1251    -ftree-vectorizer-verbose=N option.  */
1252
1253 void
1254 vect_set_verbosity_level (const char *val)
1255 {
1256    unsigned int vl;
1257
1258    vl = atoi (val);
1259    if (vl < MAX_VERBOSITY_LEVEL)
1260      vect_verbosity_level = vl;
1261    else
1262      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1263 }
1264
1265
1266 /* Function vect_set_dump_settings.
1267
1268    Fix the verbosity level of the vectorizer if the
1269    requested level was not set explicitly using the flag
1270    -ftree-vectorizer-verbose=N.
1271    Decide where to print the debugging information (dump_file/stderr).
1272    If the user defined the verbosity level, but there is no dump file,
1273    print to stderr, otherwise print to the dump file.  */
1274
1275 static void
1276 vect_set_dump_settings (void)
1277 {
1278   vect_dump = dump_file;
1279
1280   /* Check if the verbosity level was defined by the user:  */
1281   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1282     {
1283       /* If there is no dump file, print to stderr.  */
1284       if (!dump_file)
1285         vect_dump = stderr;
1286       return;
1287     }
1288
1289   /* User didn't specify verbosity level:  */
1290   if (dump_file && (dump_flags & TDF_DETAILS))
1291     vect_verbosity_level = REPORT_DETAILS;
1292   else if (dump_file && (dump_flags & TDF_STATS))
1293     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1294   else
1295     vect_verbosity_level = REPORT_NONE;
1296
1297   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1298 }
1299
1300
1301 /* Function debug_loop_details.
1302
1303    For vectorization debug dumps.  */
1304
1305 bool
1306 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1307 {
1308   if (vl > vect_verbosity_level)
1309     return false;
1310
1311   if (loc == UNKNOWN_LOC)
1312     fprintf (vect_dump, "\n%s:%d: note: ",
1313                  DECL_SOURCE_FILE (current_function_decl),
1314                  DECL_SOURCE_LINE (current_function_decl));
1315   else
1316     fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1317
1318
1319   return true;
1320 }
1321
1322
1323 /*************************************************************************
1324   Vectorization Utilities.
1325  *************************************************************************/
1326
1327 /* Function new_stmt_vec_info.
1328
1329    Create and initialize a new stmt_vec_info struct for STMT.  */
1330
1331 stmt_vec_info
1332 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1333 {
1334   stmt_vec_info res;
1335   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1336
1337   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1338   STMT_VINFO_STMT (res) = stmt;
1339   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1340   STMT_VINFO_RELEVANT_P (res) = 0;
1341   STMT_VINFO_LIVE_P (res) = 0;
1342   STMT_VINFO_VECTYPE (res) = NULL;
1343   STMT_VINFO_VEC_STMT (res) = NULL;
1344   STMT_VINFO_DATA_REF (res) = NULL;
1345   if (TREE_CODE (stmt) == PHI_NODE)
1346     STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1347   else
1348     STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
1349   STMT_VINFO_MEMTAG (res) = NULL;
1350   STMT_VINFO_PTR_INFO (res) = NULL;
1351   STMT_VINFO_SUBVARS (res) = NULL;
1352   STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
1353   STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1354   STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1355   STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1356   STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1357
1358   return res;
1359 }
1360
1361
1362 /* Function new_loop_vec_info.
1363
1364    Create and initialize a new loop_vec_info struct for LOOP, as well as
1365    stmt_vec_info structs for all the stmts in LOOP.  */
1366
1367 loop_vec_info
1368 new_loop_vec_info (struct loop *loop)
1369 {
1370   loop_vec_info res;
1371   basic_block *bbs;
1372   block_stmt_iterator si;
1373   unsigned int i;
1374
1375   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1376
1377   bbs = get_loop_body (loop);
1378
1379   /* Create stmt_info for all stmts in the loop.  */
1380   for (i = 0; i < loop->num_nodes; i++)
1381     {
1382       basic_block bb = bbs[i];
1383       tree phi;
1384
1385       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1386         {
1387           tree_ann_t ann = get_tree_ann (phi);
1388           set_stmt_info (ann, new_stmt_vec_info (phi, res));
1389         }
1390
1391       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1392         {
1393           tree stmt = bsi_stmt (si);
1394           stmt_ann_t ann;
1395
1396           ann = stmt_ann (stmt);
1397           set_stmt_info ((tree_ann_t)ann, new_stmt_vec_info (stmt, res));
1398         }
1399     }
1400
1401   LOOP_VINFO_LOOP (res) = loop;
1402   LOOP_VINFO_BBS (res) = bbs;
1403   LOOP_VINFO_EXIT_COND (res) = NULL;
1404   LOOP_VINFO_NITERS (res) = NULL;
1405   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1406   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1407   LOOP_VINFO_VECT_FACTOR (res) = 0;
1408   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1409                            "loop_write_datarefs");
1410   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1411                            "loop_read_datarefs");
1412   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1413   LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1414
1415   return res;
1416 }
1417
1418
1419 /* Function destroy_loop_vec_info.
1420
1421    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1422    stmts in the loop.  */
1423
1424 void
1425 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1426 {
1427   struct loop *loop;
1428   basic_block *bbs;
1429   int nbbs;
1430   block_stmt_iterator si;
1431   int j;
1432
1433   if (!loop_vinfo)
1434     return;
1435
1436   loop = LOOP_VINFO_LOOP (loop_vinfo);
1437
1438   bbs = LOOP_VINFO_BBS (loop_vinfo);
1439   nbbs = loop->num_nodes;
1440
1441   for (j = 0; j < nbbs; j++)
1442     {
1443       basic_block bb = bbs[j];
1444       tree phi;
1445       stmt_vec_info stmt_info;
1446
1447       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1448         {
1449           tree_ann_t ann = get_tree_ann (phi);
1450
1451           stmt_info = vinfo_for_stmt (phi);
1452           free (stmt_info);
1453           set_stmt_info (ann, NULL);
1454         }
1455
1456       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1457         {
1458           tree stmt = bsi_stmt (si);
1459           stmt_ann_t ann = stmt_ann (stmt);
1460
1461           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1462           free (stmt_info);
1463           set_stmt_info ((tree_ann_t)ann, NULL);
1464         }
1465     }
1466
1467   free (LOOP_VINFO_BBS (loop_vinfo));
1468   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1469   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1470
1471   free (loop_vinfo);
1472 }
1473
1474
1475 /* Function vect_strip_conversions
1476
1477    Strip conversions that don't narrow the mode.  */
1478
1479 tree
1480 vect_strip_conversion (tree expr)
1481 {
1482   tree to, ti, oprnd0;
1483
1484   while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1485     {
1486       to = TREE_TYPE (expr);
1487       oprnd0 = TREE_OPERAND (expr, 0);
1488       ti = TREE_TYPE (oprnd0);
1489
1490       if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1491         return NULL_TREE;
1492       if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1493         return NULL_TREE;
1494
1495       expr = oprnd0;
1496     }
1497   return expr;
1498 }
1499
1500
1501 /* Function vect_force_dr_alignment_p.
1502
1503    Returns whether the alignment of a DECL can be forced to be aligned
1504    on ALIGNMENT bit boundary.  */
1505
1506 bool
1507 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1508 {
1509   if (TREE_CODE (decl) != VAR_DECL)
1510     return false;
1511
1512   if (DECL_EXTERNAL (decl))
1513     return false;
1514
1515   if (TREE_ASM_WRITTEN (decl))
1516     return false;
1517
1518   if (TREE_STATIC (decl))
1519     return (alignment <= MAX_OFILE_ALIGNMENT);
1520   else
1521     /* This is not 100% correct.  The absolute correct stack alignment
1522        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1523        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1524        However, until someone implements forced stack alignment, SSE
1525        isn't really usable without this.  */
1526     return (alignment <= PREFERRED_STACK_BOUNDARY);
1527 }
1528
1529
1530 /* Function get_vectype_for_scalar_type.
1531
1532    Returns the vector type corresponding to SCALAR_TYPE as supported
1533    by the target.  */
1534
1535 tree
1536 get_vectype_for_scalar_type (tree scalar_type)
1537 {
1538   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1539   int nbytes = GET_MODE_SIZE (inner_mode);
1540   int nunits;
1541   tree vectype;
1542
1543   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1544     return NULL_TREE;
1545
1546   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1547      is expected.  */
1548   nunits = UNITS_PER_SIMD_WORD / nbytes;
1549
1550   vectype = build_vector_type (scalar_type, nunits);
1551   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1552     {
1553       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1554       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1555     }
1556
1557   if (!vectype)
1558     return NULL_TREE;
1559
1560   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1561     {
1562       fprintf (vect_dump, "vectype: ");
1563       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1564     }
1565
1566   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1567       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1568     {
1569       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1570         fprintf (vect_dump, "mode not supported by target.");
1571       return NULL_TREE;
1572     }
1573
1574   return vectype;
1575 }
1576
1577
1578 /* Function vect_supportable_dr_alignment
1579
1580    Return whether the data reference DR is supported with respect to its
1581    alignment.  */
1582
1583 enum dr_alignment_support
1584 vect_supportable_dr_alignment (struct data_reference *dr)
1585 {
1586   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1587   enum machine_mode mode = (int) TYPE_MODE (vectype);
1588
1589   if (aligned_access_p (dr))
1590     return dr_aligned;
1591
1592   /* Possibly unaligned access.  */
1593
1594   if (DR_IS_READ (dr))
1595     {
1596       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1597           && (!targetm.vectorize.builtin_mask_for_load
1598               || targetm.vectorize.builtin_mask_for_load ()))
1599         return dr_unaligned_software_pipeline;
1600
1601       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1602         /* Can't software pipeline the loads, but can at least do them.  */
1603         return dr_unaligned_supported;
1604     }
1605
1606   /* Unsupported.  */
1607   return dr_unaligned_unsupported;
1608 }
1609
1610
1611 /* Function vect_is_simple_use.
1612
1613    Input:
1614    LOOP - the loop that is being vectorized.
1615    OPERAND - operand of a stmt in LOOP.
1616    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1617
1618    Returns whether a stmt with OPERAND can be vectorized.
1619    Supportable operands are constants, loop invariants, and operands that are
1620    defined by the current iteration of the loop. Unsupportable operands are
1621    those that are defined by a previous iteration of the loop (as is the case
1622    in reduction/induction computations).  */
1623
1624 bool
1625 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
1626                     tree *def, enum vect_def_type *dt)
1627 {
1628   basic_block bb;
1629   stmt_vec_info stmt_vinfo;
1630   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1631
1632   *def_stmt = NULL_TREE;
1633   *def = NULL_TREE;
1634
1635   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1636     {
1637       fprintf (vect_dump, "vect_is_simple_use: operand ");
1638       print_generic_expr (vect_dump, operand, TDF_SLIM);
1639     }
1640
1641   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1642     {
1643       *dt = vect_constant_def;
1644       return true;
1645     }
1646
1647   if (TREE_CODE (operand) != SSA_NAME)
1648     {
1649       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1650         fprintf (vect_dump, "not ssa-name.");
1651       return false;
1652     }
1653
1654   *def_stmt = SSA_NAME_DEF_STMT (operand);
1655   if (*def_stmt == NULL_TREE )
1656     {
1657       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1658         fprintf (vect_dump, "no def_stmt.");
1659       return false;
1660     }
1661
1662   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1663     {
1664       fprintf (vect_dump, "def_stmt: ");
1665       print_generic_expr (vect_dump, *def_stmt, TDF_SLIM);
1666     }
1667
1668   /* empty stmt is expected only in case of a function argument.
1669      (Otherwise - we expect a phi_node or a modify_expr).  */
1670   if (IS_EMPTY_STMT (*def_stmt))
1671     {
1672       tree arg = TREE_OPERAND (*def_stmt, 0);
1673       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1674         {
1675           *def = operand;
1676           *dt = vect_invariant_def;
1677           return true;
1678         }
1679
1680       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1681         fprintf (vect_dump, "Unexpected empty stmt.");
1682       return false;
1683     }
1684
1685   bb = bb_for_stmt (*def_stmt);
1686   if (!flow_bb_inside_loop_p (loop, bb))
1687     *dt = vect_invariant_def;
1688   else
1689     {
1690       stmt_vinfo = vinfo_for_stmt (*def_stmt);
1691       *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
1692     }
1693
1694   if (*dt == vect_unknown_def_type)
1695     {
1696       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1697         fprintf (vect_dump, "Unsupported pattern.");
1698       return false;
1699     }
1700
1701   /* stmts inside the loop that have been identified as performing
1702      a reduction operation cannot have uses in the loop.  */
1703   if (*dt == vect_reduction_def && TREE_CODE (*def_stmt) != PHI_NODE)
1704     {
1705       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1706         fprintf (vect_dump, "reduction used in loop.");
1707       return false;
1708     }
1709
1710   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1711     fprintf (vect_dump, "type of def: %d.",*dt);
1712
1713   switch (TREE_CODE (*def_stmt))
1714     {
1715     case PHI_NODE:
1716       *def = PHI_RESULT (*def_stmt);
1717       gcc_assert (*dt == vect_induction_def || *dt == vect_reduction_def
1718                   || *dt == vect_invariant_def);
1719       break;
1720
1721     case MODIFY_EXPR:
1722       *def = TREE_OPERAND (*def_stmt, 0);
1723       gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
1724       break;
1725
1726     default:
1727       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1728         fprintf (vect_dump, "unsupported defining stmt: ");
1729       return false;
1730     }
1731
1732   if (*dt == vect_induction_def)
1733     {
1734       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1735         fprintf (vect_dump, "induction not supported.");
1736       return false;
1737     }
1738
1739   return true;
1740 }
1741
1742
1743 /* Function vect_is_simple_reduction
1744
1745    TODO:
1746    Detect a cross-iteration def-use cucle that represents a simple
1747    reduction computation. We look for the followng pattern:
1748
1749    loop_header:
1750      a1 = phi < a0, a2 >
1751      a3 = ...
1752      a2 = operation (a3, a1)
1753
1754    such that:
1755    1. operation is...
1756    2. no uses for a2 in the loop (elsewhere)  */
1757
1758 tree
1759 vect_is_simple_reduction (struct loop *loop ATTRIBUTE_UNUSED,
1760                           tree phi ATTRIBUTE_UNUSED)
1761 {
1762   /* FORNOW */
1763   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1764     fprintf (vect_dump, "reduction: unknown pattern.");
1765
1766   return NULL_TREE;
1767 }
1768
1769
1770 /* Function vect_is_simple_iv_evolution.
1771
1772    FORNOW: A simple evolution of an induction variables in the loop is
1773    considered a polynomial evolution with constant step.  */
1774
1775 bool
1776 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1777                              tree * step)
1778 {
1779   tree init_expr;
1780   tree step_expr;
1781
1782   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1783
1784   /* When there is no evolution in this loop, the evolution function
1785      is not "simple".  */
1786   if (evolution_part == NULL_TREE)
1787     return false;
1788
1789   /* When the evolution is a polynomial of degree >= 2
1790      the evolution function is not "simple".  */
1791   if (tree_is_chrec (evolution_part))
1792     return false;
1793
1794   step_expr = evolution_part;
1795   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1796                                                            loop_nb));
1797
1798   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1799     {
1800       fprintf (vect_dump, "step: ");
1801       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
1802       fprintf (vect_dump, ",  init: ");
1803       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
1804     }
1805
1806   *init = init_expr;
1807   *step = step_expr;
1808
1809   if (TREE_CODE (step_expr) != INTEGER_CST)
1810     {
1811       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1812         fprintf (vect_dump, "step unknown.");
1813       return false;
1814     }
1815
1816   return true;
1817 }
1818
1819
1820 /* Function vectorize_loops.
1821
1822    Entry Point to loop vectorization phase.  */
1823
1824 void
1825 vectorize_loops (struct loops *loops)
1826 {
1827   unsigned int i;
1828   unsigned int num_vectorized_loops = 0;
1829
1830   /* Fix the verbosity level if not defined explicitly by the user.  */
1831   vect_set_dump_settings ();
1832
1833   /*  ----------- Analyze loops. -----------  */
1834
1835   /* If some loop was duplicated, it gets bigger number
1836      than all previously defined loops. This fact allows us to run
1837      only over initial loops skipping newly generated ones.  */
1838   vect_loops_num = loops->num;
1839   for (i = 1; i < vect_loops_num; i++)
1840     {
1841       loop_vec_info loop_vinfo;
1842       struct loop *loop = loops->parray[i];
1843
1844       if (!loop)
1845         continue;
1846
1847       loop_vinfo = vect_analyze_loop (loop);
1848       loop->aux = loop_vinfo;
1849
1850       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1851         continue;
1852
1853       vect_transform_loop (loop_vinfo, loops);
1854       num_vectorized_loops++;
1855     }
1856
1857   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
1858     fprintf (vect_dump, "vectorized %u loops in function.\n",
1859              num_vectorized_loops);
1860
1861   /*  ----------- Finalize. -----------  */
1862
1863   for (i = 1; i < vect_loops_num; i++)
1864     {
1865       struct loop *loop = loops->parray[i];
1866       loop_vec_info loop_vinfo;
1867
1868       if (!loop)
1869         continue;
1870       loop_vinfo = loop->aux;
1871       destroy_loop_vec_info (loop_vinfo);
1872       loop->aux = NULL;
1873     }
1874 }