gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "errors.h"
 128 #include "ggc.h"
 129 #include "tree.h"
 130 #include "target.h"
 131 #include "rtl.h"
 132 #include "basic-block.h"
 133 #include "diagnostic.h"
 134 #include "tree-flow.h"
 135 #include "tree-dump.h"
 136 #include "timevar.h"
 137 #include "cfgloop.h"
 138 #include "cfglayout.h"
 139 #include "expr.h"
 140 #include "optabs.h"
 141 #include "toplev.h"
 142 #include "tree-chrec.h"
 143 #include "tree-data-ref.h"
 144 #include "tree-scalar-evolution.h"
 145 #include "input.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148
 149 /*************************************************************************
 150   Simple Loop Peeling Utilities
 151  *************************************************************************/
 152 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 153   (struct loop *, struct loops *, edge);
 154 static void slpeel_update_phis_for_duplicate_loop
 155   (struct loop *, struct loop *, bool after);
 156 static void slpeel_update_phi_nodes_for_guard1
 157   (edge, struct loop *, bool, basic_block *, bitmap *);
 158 static void slpeel_update_phi_nodes_for_guard2
 159   (edge, struct loop *, bool, basic_block *);
 160 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 161
 162 static void rename_use_op (use_operand_p);
 163 static void rename_variables_in_bb (basic_block);
 164 static void rename_variables_in_loop (struct loop *);
 165
 166 /*************************************************************************
 167   General Vectorization Utilities
 168  *************************************************************************/
 169 static void vect_set_dump_settings (void);
 170
 171 /* vect_dump will be set to stderr or dump_file if exist.  */
 172 FILE *vect_dump;
 173
 174 /* vect_verbosity_level set to an invalid value
 175    to mark that it's uninitialized.  */
 176 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 177
 178 /* Number of loops, at the beginning of vectorization.  */
 179 unsigned int vect_loops_num;
 180 \f
 181 /*************************************************************************
 182   Simple Loop Peeling Utilities
 183
 184   Utilities to support loop peeling for vectorization purposes.
 185  *************************************************************************/
 186
 187
 188 /* Renames the use *OP_P.  */
 189
 190 static void
 191 rename_use_op (use_operand_p op_p)
 192 {
 193   tree new_name;
 194
 195   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 196     return;
 197
 198   new_name = get_current_def (USE_FROM_PTR (op_p));
 199
 200   /* Something defined outside of the loop.  */
 201   if (!new_name)
 202     return;
 203
 204   /* An ordinary ssa name defined in the loop.  */
 205
 206   SET_USE (op_p, new_name);
 207 }
 208
 209
 210 /* Renames the variables in basic block BB.  */
 211
 212 static void
 213 rename_variables_in_bb (basic_block bb)
 214 {
 215   tree phi;
 216   block_stmt_iterator bsi;
 217   tree stmt;
 218   use_operand_p use_p;
 219   ssa_op_iter iter;
 220   edge e;
 221   edge_iterator ei;
 222   struct loop *loop = bb->loop_father;
 223
 224   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 225     {
 226       stmt = bsi_stmt (bsi);
 227       FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
 228                                  (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
 229         rename_use_op (use_p);
 230     }
 231
 232   FOR_EACH_EDGE (e, ei, bb->succs)
 233     {
 234       if (!flow_bb_inside_loop_p (loop, e->dest))
 235         continue;
 236       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 237         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 238     }
 239 }
 240
 241
 242 /* Renames variables in new generated LOOP.  */
 243
 244 static void
 245 rename_variables_in_loop (struct loop *loop)
 246 {
 247   unsigned i;
 248   basic_block *bbs;
 249
 250   bbs = get_loop_body (loop);
 251
 252   for (i = 0; i < loop->num_nodes; i++)
 253     rename_variables_in_bb (bbs[i]);
 254
 255   free (bbs);
 256 }
 257
 258
 259 /* Update the PHI nodes of NEW_LOOP.
 260
 261    NEW_LOOP is a duplicate of ORIG_LOOP.
 262    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 263    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 264    executes before it.  */
 265
 266 static void
 267 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 268                                        struct loop *new_loop, bool after)
 269 {
 270   tree new_ssa_name;
 271   tree phi_new, phi_orig;
 272   tree def;
 273   edge orig_loop_latch = loop_latch_edge (orig_loop);
 274   edge orig_entry_e = loop_preheader_edge (orig_loop);
 275   edge new_loop_exit_e = new_loop->single_exit;
 276   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 277   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 278
 279   /*
 280      step 1. For each loop-header-phi:
 281              Add the first phi argument for the phi in NEW_LOOP
 282             (the one associated with the entry of NEW_LOOP)
 283
 284      step 2. For each loop-header-phi:
 285              Add the second phi argument for the phi in NEW_LOOP
 286             (the one associated with the latch of NEW_LOOP)
 287
 288      step 3. Update the phis in the successor block of NEW_LOOP.
 289
 290         case 1: NEW_LOOP was placed before ORIG_LOOP:
 291                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 292                 Updating the phis in the successor block can therefore be done
 293                 along with the scanning of the loop header phis, because the
 294                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 295                 phi nodes, organized in the same order.
 296
 297         case 2: NEW_LOOP was placed after ORIG_LOOP:
 298                 The successor block of NEW_LOOP is the original exit block of
 299                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 300                 We postpone updating these phis to a later stage (when
 301                 loop guards are added).
 302    */
 303
 304
 305   /* Scan the phis in the headers of the old and new loops
 306      (they are organized in exactly the same order).  */
 307
 308   for (phi_new = phi_nodes (new_loop->header),
 309        phi_orig = phi_nodes (orig_loop->header);
 310        phi_new && phi_orig;
 311        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 312     {
 313       /* step 1.  */
 314       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 315       add_phi_arg (phi_new, def, new_loop_entry_e);
 316
 317       /* step 2.  */
 318       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 319       if (TREE_CODE (def) != SSA_NAME)
 320         continue;
 321
 322       new_ssa_name = get_current_def (def);
 323       if (!new_ssa_name)
 324         /* Something defined outside of the loop.  */
 325         continue;
 326
 327       /* An ordinary ssa name defined in the loop.  */
 328       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 329
 330       /* step 3 (case 1).  */
 331       if (!after)
 332         {
 333           gcc_assert (new_loop_exit_e == orig_entry_e);
 334           SET_PHI_ARG_DEF (phi_orig,
 335                            new_loop_exit_e->dest_idx,
 336                            new_ssa_name);
 337         }
 338     }
 339 }
 340
 341
 342 /* Update PHI nodes for a guard of the LOOP.
 343
 344    Input:
 345    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 346         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 347         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 348         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 349         We denote this bb NEW_MERGE_BB because before the guard code was added
 350         it had a single predecessor (the LOOP header), and now it became a merge
 351         point of two paths - the path that ends with the LOOP exit-edge, and
 352         the path that ends with GUARD_EDGE.
 353    - NEW_EXIT_BB: New basic block that is added by this function between LOOP
 354         and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
 355
 356    ===> The CFG before the guard-code was added:
 357         LOOP_header_bb:
 358           loop_body
 359           if (exit_loop) goto update_bb
 360           else           goto LOOP_header_bb
 361         update_bb:
 362
 363    ==> The CFG after the guard-code was added:
 364         guard_bb:
 365           if (LOOP_guard_condition) goto new_merge_bb
 366           else                      goto LOOP_header_bb
 367         LOOP_header_bb:
 368           loop_body
 369           if (exit_loop_condition) goto new_merge_bb
 370           else                     goto LOOP_header_bb
 371         new_merge_bb:
 372           goto update_bb
 373         update_bb:
 374
 375    ==> The CFG after this function:
 376         guard_bb:
 377           if (LOOP_guard_condition) goto new_merge_bb
 378           else                      goto LOOP_header_bb
 379         LOOP_header_bb:
 380           loop_body
 381           if (exit_loop_condition) goto new_exit_bb
 382           else                     goto LOOP_header_bb
 383         new_exit_bb:
 384         new_merge_bb:
 385           goto update_bb
 386         update_bb:
 387
 388    This function:
 389    1. creates and updates the relevant phi nodes to account for the new
 390       incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
 391       1.1. Create phi nodes at NEW_MERGE_BB.
 392       1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 393            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 394    2. preserves loop-closed-ssa-form by creating the required phi nodes
 395       at the exit of LOOP (i.e, in NEW_EXIT_BB).
 396
 397    There are two flavors to this function:
 398
 399    slpeel_update_phi_nodes_for_guard1:
 400      Here the guard controls whether we enter or skip LOOP, where LOOP is a
 401      prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
 402      for variables that have phis in the loop header.
 403
 404    slpeel_update_phi_nodes_for_guard2:
 405      Here the guard controls whether we enter or skip LOOP, where LOOP is an
 406      epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
 407      for variables that have phis in the loop exit.
 408
 409    I.E., the overall structure is:
 410
 411         loop1_preheader_bb:
 412                 guard1 (goto loop1/merg1_bb)
 413         loop1
 414         loop1_exit_bb:
 415                 guard2 (goto merge1_bb/merge2_bb)
 416         merge1_bb
 417         loop2
 418         loop2_exit_bb
 419         merge2_bb
 420         next_bb
 421
 422    slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
 423    loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
 424    that have phis in loop1->header).
 425
 426    slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
 427    loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
 428    that have phis in next_bb). It also adds some of these phis to
 429    loop1_exit_bb.
 430
 431    slpeel_update_phi_nodes_for_guard1 is always called before
 432    slpeel_update_phi_nodes_for_guard2. They are both needed in order
 433    to create correct data-flow and loop-closed-ssa-form.
 434
 435    Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
 436    that change between iterations of a loop (and therefore have a phi-node
 437    at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
 438    phis for variables that are used out of the loop (and therefore have
 439    loop-closed exit phis). Some variables may be both updated between
 440    iterations and used after the loop. This is why in loop1_exit_bb we
 441    may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
 442    and exit phis (created by slpeel_update_phi_nodes_for_guard2).
 443
 444    - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
 445      an original loop. i.e., we have:
 446
 447            orig_loop
 448            guard_bb (goto LOOP/new_merge)
 449            new_loop <-- LOOP
 450            new_exit
 451            new_merge
 452            next_bb
 453
 454      If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
 455      have:
 456
 457            new_loop
 458            guard_bb (goto LOOP/new_merge)
 459            orig_loop <-- LOOP
 460            new_exit
 461            new_merge
 462            next_bb
 463
 464      The SSA names defined in the original loop have a current
 465      reaching definition that that records the corresponding new
 466      ssa-name used in the new duplicated loop copy.
 467   */
 468
 469 /* Function slpeel_update_phi_nodes_for_guard1
 470
 471    Input:
 472    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 473    - DEFS - a bitmap of ssa names to mark new names for which we recorded
 474             information.
 475
 476    In the context of the overall structure, we have:
 477
 478         loop1_preheader_bb:
 479                 guard1 (goto loop1/merg1_bb)
 480 LOOP->  loop1
 481         loop1_exit_bb:
 482                 guard2 (goto merge1_bb/merge2_bb)
 483         merge1_bb
 484         loop2
 485         loop2_exit_bb
 486         merge2_bb
 487         next_bb
 488
 489    For each name updated between loop iterations (i.e - for each name that has
 490    an entry (loop-header) phi in LOOP) we create a new phi in:
 491    1. merge1_bb (to account for the edge from guard1)
 492    2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 493 */
 494
 495 static void
 496 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
 497                                     bool is_new_loop, basic_block *new_exit_bb,
 498                                     bitmap *defs)
 499 {
 500   tree orig_phi, new_phi;
 501   tree update_phi, update_phi2;
 502   tree guard_arg, loop_arg;
 503   basic_block new_merge_bb = guard_edge->dest;
 504   edge e = EDGE_SUCC (new_merge_bb, 0);
 505   basic_block update_bb = e->dest;
 506   basic_block orig_bb = loop->header;
 507   edge new_exit_e;
 508   tree current_new_name;
 509
 510   /* Create new bb between loop and new_merge_bb.  */
 511   *new_exit_bb = split_edge (loop->single_exit);
 512   add_bb_to_loop (*new_exit_bb, loop->outer);
 513
 514   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 515
 516   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 517        orig_phi && update_phi;
 518        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 519     {
 520       /** 1. Handle new-merge-point phis  **/
 521
 522       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 523       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 524                                  new_merge_bb);
 525
 526       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 527             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 528       loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
 529       guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
 530
 531       add_phi_arg (new_phi, loop_arg, new_exit_e);
 532       add_phi_arg (new_phi, guard_arg, guard_edge);
 533
 534       /* 1.3. Update phi in successor block.  */
 535       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 536                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 537       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 538       update_phi2 = new_phi;
 539
 540
 541       /** 2. Handle loop-closed-ssa-form phis  **/
 542
 543       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 544       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 545                                  *new_exit_bb);
 546
 547       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 548       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 549
 550       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 551       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 552       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 553
 554       /* 2.4. Record the newly created name with set_current_def.
 555          We want to find a name such that
 556                 name = get_current_def (orig_loop_name)
 557          and to set its current definition as follows:
 558                 set_current_def (name, new_phi_name)
 559
 560          If LOOP is a new loop then loop_arg is already the name we're
 561          looking for. If LOOP is the original loop, then loop_arg is
 562          the orig_loop_name and the relevant name is recorded in its
 563          current reaching definition.  */
 564       if (is_new_loop)
 565         current_new_name = loop_arg;
 566       else
 567         {
 568           current_new_name = get_current_def (loop_arg);
 569           gcc_assert (current_new_name);
 570         }
 571 #ifdef ENABLE_CHECKING
 572       gcc_assert (get_current_def (current_new_name) == NULL_TREE);
 573 #endif
 574
 575       set_current_def (current_new_name, PHI_RESULT (new_phi));
 576       bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
 577     }
 578
 579   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 580 }
 581
 582
 583 /* Function slpeel_update_phi_nodes_for_guard2
 584
 585    Input:
 586    - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
 587
 588    In the context of the overall structure, we have:
 589
 590         loop1_preheader_bb:
 591                 guard1 (goto loop1/merg1_bb)
 592         loop1
 593         loop1_exit_bb:
 594                 guard2 (goto merge1_bb/merge2_bb)
 595         merge1_bb
 596 LOOP->  loop2
 597         loop2_exit_bb
 598         merge2_bb
 599         next_bb
 600
 601    For each name used out side the loop (i.e - for each name that has an exit
 602    phi in next_bb) we create a new phi in:
 603    1. merge2_bb (to account for the edge from guard_bb)
 604    2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
 605    3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
 606       if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
 607 */
 608
 609 static void
 610 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
 611                                     bool is_new_loop, basic_block *new_exit_bb)
 612 {
 613   tree orig_phi, new_phi;
 614   tree update_phi, update_phi2;
 615   tree guard_arg, loop_arg;
 616   basic_block new_merge_bb = guard_edge->dest;
 617   edge e = EDGE_SUCC (new_merge_bb, 0);
 618   basic_block update_bb = e->dest;
 619   edge new_exit_e;
 620   tree orig_def, orig_def_new_name;
 621   tree new_name, new_name2;
 622   tree arg;
 623
 624   /* Create new bb between loop and new_merge_bb.  */
 625   *new_exit_bb = split_edge (loop->single_exit);
 626   add_bb_to_loop (*new_exit_bb, loop->outer);
 627
 628   new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
 629
 630   for (update_phi = phi_nodes (update_bb); update_phi;
 631        update_phi = PHI_CHAIN (update_phi))
 632     {
 633       orig_phi = update_phi;
 634       orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 635       orig_def_new_name = get_current_def (orig_def);
 636       arg = NULL_TREE;
 637
 638       /** 1. Handle new-merge-point phis  **/
 639
 640       /* 1.1. Generate new phi node in NEW_MERGE_BB:  */
 641       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 642                                  new_merge_bb);
 643
 644       /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 645             of LOOP. Set the two PHI args in NEW_PHI for these edges:  */
 646       new_name = orig_def;
 647       new_name2 = NULL_TREE;
 648       if (orig_def_new_name)
 649         {
 650           new_name = orig_def_new_name;
 651           /* Some variables have both loop-entry-phis and loop-exit-phis.
 652              Such variables were given yet newer names by phis placed in
 653              guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
 654              new_name2 = get_current_def (get_current_def (orig_name)).  */
 655           new_name2 = get_current_def (new_name);
 656         }
 657
 658       if (is_new_loop)
 659         {
 660           guard_arg = orig_def;
 661           loop_arg = new_name;
 662         }
 663       else
 664         {
 665           guard_arg = new_name;
 666           loop_arg = orig_def;
 667         }
 668       if (new_name2)
 669         guard_arg = new_name2;
 670
 671       add_phi_arg (new_phi, loop_arg, new_exit_e);
 672       add_phi_arg (new_phi, guard_arg, guard_edge);
 673
 674       /* 1.3. Update phi in successor block.  */
 675       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
 676       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 677       update_phi2 = new_phi;
 678
 679
 680       /** 2. Handle loop-closed-ssa-form phis  **/
 681
 682       /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
 683       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 684                                  *new_exit_bb);
 685
 686       /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop.  */
 687       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 688
 689       /* 2.3. Update phi in successor of NEW_EXIT_BB:  */
 690       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
 691       SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
 692
 693
 694       /** 3. Handle loop-closed-ssa-form phis for first loop  **/
 695
 696       /* 3.1. Find the relevant names that need an exit-phi in
 697          GUARD_BB, i.e. names for which
 698          slpeel_update_phi_nodes_for_guard1 had not already created a
 699          phi node. This is the case for names that are used outside
 700          the loop (and therefore need an exit phi) but are not updated
 701          across loop iterations (and therefore don't have a
 702          loop-header-phi).
 703
 704          slpeel_update_phi_nodes_for_guard1 is responsible for
 705          creating loop-exit phis in GUARD_BB for names that have a
 706          loop-header-phi.  When such a phi is created we also record
 707          the new name in its current definition.  If this new name
 708          exists, then guard_arg was set to this new name (see 1.2
 709          above).  Therefore, if guard_arg is not this new name, this
 710          is an indication that an exit-phi in GUARD_BB was not yet
 711          created, so we take care of it here.  */
 712       if (guard_arg == new_name2)
 713         continue;
 714       arg = guard_arg;
 715
 716       /* 3.2. Generate new phi node in GUARD_BB:  */
 717       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 718                                  guard_edge->src);
 719
 720       /* 3.3. GUARD_BB has one incoming edge:  */
 721       gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
 722       add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
 723
 724       /* 3.4. Update phi in successor of GUARD_BB:  */
 725       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
 726                                                                 == guard_arg);
 727       SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
 728     }
 729
 730   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 731 }
 732
 733
 734 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 735    that starts at zero, increases by one and its limit is NITERS.
 736
 737    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 738
 739 void
 740 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 741 {
 742   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 743   tree orig_cond;
 744   edge exit_edge = loop->single_exit;
 745   block_stmt_iterator loop_cond_bsi;
 746   block_stmt_iterator incr_bsi;
 747   bool insert_after;
 748   tree begin_label = tree_block_label (loop->latch);
 749   tree exit_label = tree_block_label (loop->single_exit->dest);
 750   tree init = build_int_cst (TREE_TYPE (niters), 0);
 751   tree step = build_int_cst (TREE_TYPE (niters), 1);
 752   tree then_label;
 753   tree else_label;
 754   LOC loop_loc;
 755
 756   orig_cond = get_loop_exit_condition (loop);
 757 #ifdef ENABLE_CHECKING
 758   gcc_assert (orig_cond);
 759 #endif
 760   loop_cond_bsi = bsi_for_stmt (orig_cond);
 761
 762   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 763   create_iv (init, step, NULL_TREE, loop,
 764              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
 765
 766   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 767     {
 768       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 769       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 770       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 771     }
 772   else /* 'then' edge loops back.  */
 773     {
 774       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 775       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 776       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 777     }
 778
 779   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 780                      then_label, else_label);
 781   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
 782
 783   /* Remove old loop exit test:  */
 784   bsi_remove (&loop_cond_bsi);
 785
 786   loop_loc = find_loop_location (loop);
 787   if (dump_file && (dump_flags & TDF_DETAILS))
 788     {
 789       if (loop_loc != UNKNOWN_LOC)
 790         fprintf (dump_file, "\nloop at %s:%d: ",
 791                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 792       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 793     }
 794
 795   loop->nb_iterations = niters;
 796 }
 797
 798
 799 /* Given LOOP this function generates a new copy of it and puts it
 800    on E which is either the entry or exit of LOOP.  */
 801
 802 static struct loop *
 803 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 804                                         edge e)
 805 {
 806   struct loop *new_loop;
 807   basic_block *new_bbs, *bbs;
 808   bool at_exit;
 809   bool was_imm_dom;
 810   basic_block exit_dest;
 811   tree phi, phi_arg;
 812
 813   at_exit = (e == loop->single_exit);
 814   if (!at_exit && e != loop_preheader_edge (loop))
 815     return NULL;
 816
 817   bbs = get_loop_body (loop);
 818
 819   /* Check whether duplication is possible.  */
 820   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 821     {
 822       free (bbs);
 823       return NULL;
 824     }
 825
 826   /* Generate new loop structure.  */
 827   new_loop = duplicate_loop (loops, loop, loop->outer);
 828   if (!new_loop)
 829     {
 830       free (bbs);
 831       return NULL;
 832     }
 833
 834   exit_dest = loop->single_exit->dest;
 835   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 836                                           exit_dest) == loop->header ?
 837                  true : false);
 838
 839   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 840
 841   copy_bbs (bbs, loop->num_nodes, new_bbs,
 842             &loop->single_exit, 1, &new_loop->single_exit, NULL);
 843
 844   /* Duplicating phi args at exit bbs as coming
 845      also from exit of duplicated loop.  */
 846   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 847     {
 848       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
 849       if (phi_arg)
 850         {
 851           edge new_loop_exit_edge;
 852
 853           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 854             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 855           else
 856             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 857
 858           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 859         }
 860     }
 861
 862   if (at_exit) /* Add the loop copy at exit.  */
 863     {
 864       redirect_edge_and_branch_force (e, new_loop->header);
 865       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 866       if (was_imm_dom)
 867         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 868     }
 869   else /* Add the copy at entry.  */
 870     {
 871       edge new_exit_e;
 872       edge entry_e = loop_preheader_edge (loop);
 873       basic_block preheader = entry_e->src;
 874
 875       if (!flow_bb_inside_loop_p (new_loop,
 876                                   EDGE_SUCC (new_loop->header, 0)->dest))
 877         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 878       else
 879         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 880
 881       redirect_edge_and_branch_force (new_exit_e, loop->header);
 882       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 883                                new_exit_e->src);
 884
 885       /* We have to add phi args to the loop->header here as coming
 886          from new_exit_e edge.  */
 887       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 888         {
 889           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 890           if (phi_arg)
 891             add_phi_arg (phi, phi_arg, new_exit_e);
 892         }
 893
 894       redirect_edge_and_branch_force (entry_e, new_loop->header);
 895       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 896     }
 897
 898   free (new_bbs);
 899   free (bbs);
 900
 901   return new_loop;
 902 }
 903
 904
 905 /* Given the condition statement COND, put it as the last statement
 906    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 907    Assumes that this is the single exit of the guarded loop.
 908    Returns the skip edge.  */
 909
 910 static edge
 911 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 912                         basic_block dom_bb)
 913 {
 914   block_stmt_iterator bsi;
 915   edge new_e, enter_e;
 916   tree cond_stmt, then_label, else_label;
 917
 918   enter_e = EDGE_SUCC (guard_bb, 0);
 919   enter_e->flags &= ~EDGE_FALLTHRU;
 920   enter_e->flags |= EDGE_FALSE_VALUE;
 921   bsi = bsi_last (guard_bb);
 922
 923   then_label = build1 (GOTO_EXPR, void_type_node,
 924                        tree_block_label (exit_bb));
 925   else_label = build1 (GOTO_EXPR, void_type_node,
 926                        tree_block_label (enter_e->dest));
 927   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 928                      then_label, else_label);
 929   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 930   /* Add new edge to connect guard block to the merge/loop-exit block.  */
 931   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 932   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 933   return new_e;
 934 }
 935
 936
 937 /* This function verifies that the following restrictions apply to LOOP:
 938    (1) it is innermost
 939    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 940    (3) it is single entry, single exit
 941    (4) its exit condition is the last stmt in the header
 942    (5) E is the entry/exit edge of LOOP.
 943  */
 944
 945 bool
 946 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 947 {
 948   edge exit_e = loop->single_exit;
 949   edge entry_e = loop_preheader_edge (loop);
 950   tree orig_cond = get_loop_exit_condition (loop);
 951   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 952
 953   if (need_ssa_update_p ())
 954     return false;
 955
 956   if (loop->inner
 957       /* All loops have an outer scope; the only case loop->outer is NULL is for
 958          the function itself.  */
 959       || !loop->outer
 960       || loop->num_nodes != 2
 961       || !empty_block_p (loop->latch)
 962       || !loop->single_exit
 963       /* Verify that new loop exit condition can be trivially modified.  */
 964       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 965       || (e != exit_e && e != entry_e))
 966     return false;
 967
 968   return true;
 969 }
 970
 971 #ifdef ENABLE_CHECKING
 972 void
 973 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 974                                  struct loop *second_loop)
 975 {
 976   basic_block loop1_exit_bb = first_loop->single_exit->dest;
 977   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 978   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 979
 980   /* A guard that controls whether the second_loop is to be executed or skipped
 981      is placed in first_loop->exit.  first_loopt->exit therefore has two
 982      successors - one is the preheader of second_loop, and the other is a bb
 983      after second_loop.
 984    */
 985   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 986
 987   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 988         of second_loop.  */
 989
 990   /* The preheader of new_loop is expected to have two predessors:
 991      first_loop->exit and the block that precedes first_loop.  */
 992
 993   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 994               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 995                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 996                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 997                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 998
 999   /* Verify that the other successor of first_loopt->exit is after the
1000      second_loop.  */
1001   /* TODO */
1002 }
1003 #endif
1004
1005 /* Function slpeel_tree_peel_loop_to_edge.
1006
1007    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1008    that is placed on the entry (exit) edge E of LOOP. After this transformation
1009    we have two loops one after the other - first-loop iterates FIRST_NITERS
1010    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
1011
1012    Input:
1013    - LOOP: the loop to be peeled.
1014    - E: the exit or entry edge of LOOP.
1015         If it is the entry edge, we peel the first iterations of LOOP. In this
1016         case first-loop is LOOP, and second-loop is the newly created loop.
1017         If it is the exit edge, we peel the last iterations of LOOP. In this
1018         case, first-loop is the newly created loop, and second-loop is LOOP.
1019    - NITERS: the number of iterations that LOOP iterates.
1020    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1021    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
1022         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
1023         is false, the caller of this function may want to take care of this
1024         (this can be useful if we don't want new stmts added to first-loop).
1025
1026    Output:
1027    The function returns a pointer to the new loop-copy, or NULL if it failed
1028    to perform the transformation.
1029
1030    The function generates two if-then-else guards: one before the first loop,
1031    and the other before the second loop:
1032    The first guard is:
1033      if (FIRST_NITERS == 0) then skip the first loop,
1034      and go directly to the second loop.
1035    The second guard is:
1036      if (FIRST_NITERS == NITERS) then skip the second loop.
1037
1038    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1039    FORNOW the resulting code will not be in loop-closed-ssa form.
1040 */
1041
1042 struct loop*
1043 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
1044                                edge e, tree first_niters,
1045                                tree niters, bool update_first_loop_count)
1046 {
1047   struct loop *new_loop = NULL, *first_loop, *second_loop;
1048   edge skip_e;
1049   tree pre_condition;
1050   bitmap definitions;
1051   basic_block bb_before_second_loop, bb_after_second_loop;
1052   basic_block bb_before_first_loop;
1053   basic_block bb_between_loops;
1054   basic_block new_exit_bb;
1055   edge exit_e = loop->single_exit;
1056   LOC loop_loc;
1057
1058   if (!slpeel_can_duplicate_loop_p (loop, e))
1059     return NULL;
1060
1061   /* We have to initialize cfg_hooks. Then, when calling
1062    cfg_hooks->split_edge, the function tree_split_edge
1063    is actually called and, when calling cfg_hooks->duplicate_block,
1064    the function tree_duplicate_bb is called.  */
1065   tree_register_cfg_hooks ();
1066
1067
1068   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1069         Resulting CFG would be:
1070
1071         first_loop:
1072         do {
1073         } while ...
1074
1075         second_loop:
1076         do {
1077         } while ...
1078
1079         orig_exit_bb:
1080    */
1081
1082   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
1083     {
1084       loop_loc = find_loop_location (loop);
1085       if (dump_file && (dump_flags & TDF_DETAILS))
1086         {
1087           if (loop_loc != UNKNOWN_LOC)
1088             fprintf (dump_file, "\n%s:%d: note: ",
1089                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1090           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1091         }
1092       return NULL;
1093     }
1094
1095   if (e == exit_e)
1096     {
1097       /* NEW_LOOP was placed after LOOP.  */
1098       first_loop = loop;
1099       second_loop = new_loop;
1100     }
1101   else
1102     {
1103       /* NEW_LOOP was placed before LOOP.  */
1104       first_loop = new_loop;
1105       second_loop = loop;
1106     }
1107
1108   definitions = ssa_names_to_replace ();
1109   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1110   rename_variables_in_loop (new_loop);
1111
1112
1113   /* 2. Add the guard that controls whether the first loop is executed.
1114         Resulting CFG would be:
1115
1116         bb_before_first_loop:
1117         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1118                                GOTO first-loop
1119
1120         first_loop:
1121         do {
1122         } while ...
1123
1124         bb_before_second_loop:
1125
1126         second_loop:
1127         do {
1128         } while ...
1129
1130         orig_exit_bb:
1131    */
1132
1133   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1134   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1135   bb_before_second_loop = split_edge (first_loop->single_exit);
1136   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1137
1138   pre_condition =
1139     fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
1140   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1141                                   bb_before_second_loop, bb_before_first_loop);
1142   slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1143                                       first_loop == new_loop,
1144                                       &new_exit_bb, &definitions);
1145
1146
1147   /* 3. Add the guard that controls whether the second loop is executed.
1148         Resulting CFG would be:
1149
1150         bb_before_first_loop:
1151         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1152                                GOTO first-loop
1153
1154         first_loop:
1155         do {
1156         } while ...
1157
1158         bb_between_loops:
1159         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1160                                     GOTO bb_before_second_loop
1161
1162         bb_before_second_loop:
1163
1164         second_loop:
1165         do {
1166         } while ...
1167
1168         bb_after_second_loop:
1169
1170         orig_exit_bb:
1171    */
1172
1173   bb_between_loops = new_exit_bb;
1174   bb_after_second_loop = split_edge (second_loop->single_exit);
1175   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1176
1177   pre_condition =
1178         fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
1179   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1180                                   bb_after_second_loop, bb_before_first_loop);
1181   slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1182                                      second_loop == new_loop, &new_exit_bb);
1183
1184   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1185    */
1186   if (update_first_loop_count)
1187     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1188
1189   BITMAP_FREE (definitions);
1190   delete_update_ssa ();
1191
1192   return new_loop;
1193 }
1194
1195 /* Function vect_get_loop_location.
1196
1197    Extract the location of the loop in the source code.
1198    If the loop is not well formed for vectorization, an estimated
1199    location is calculated.
1200    Return the loop location if succeed and NULL if not.  */
1201
1202 LOC
1203 find_loop_location (struct loop *loop)
1204 {
1205   tree node = NULL_TREE;
1206   basic_block bb;
1207   block_stmt_iterator si;
1208
1209   if (!loop)
1210     return UNKNOWN_LOC;
1211
1212   node = get_loop_exit_condition (loop);
1213
1214   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1215       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1216     return EXPR_LOC (node);
1217
1218   /* If we got here the loop is probably not "well formed",
1219      try to estimate the loop location */
1220
1221   if (!loop->header)
1222     return UNKNOWN_LOC;
1223
1224   bb = loop->header;
1225
1226   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1227     {
1228       node = bsi_stmt (si);
1229       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1230         return EXPR_LOC (node);
1231     }
1232
1233   return UNKNOWN_LOC;
1234 }
1235
1236
1237 /*************************************************************************
1238   Vectorization Debug Information.
1239  *************************************************************************/
1240
1241 /* Function vect_set_verbosity_level.
1242
1243    Called from toplev.c upon detection of the
1244    -ftree-vectorizer-verbose=N option.  */
1245
1246 void
1247 vect_set_verbosity_level (const char *val)
1248 {
1249    unsigned int vl;
1250
1251    vl = atoi (val);
1252    if (vl < MAX_VERBOSITY_LEVEL)
1253      vect_verbosity_level = vl;
1254    else
1255      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1256 }
1257
1258
1259 /* Function vect_set_dump_settings.
1260
1261    Fix the verbosity level of the vectorizer if the
1262    requested level was not set explicitly using the flag
1263    -ftree-vectorizer-verbose=N.
1264    Decide where to print the debugging information (dump_file/stderr).
1265    If the user defined the verbosity level, but there is no dump file,
1266    print to stderr, otherwise print to the dump file.  */
1267
1268 static void
1269 vect_set_dump_settings (void)
1270 {
1271   vect_dump = dump_file;
1272
1273   /* Check if the verbosity level was defined by the user:  */
1274   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1275     {
1276       /* If there is no dump file, print to stderr.  */
1277       if (!dump_file)
1278         vect_dump = stderr;
1279       return;
1280     }
1281
1282   /* User didn't specify verbosity level:  */
1283   if (dump_file && (dump_flags & TDF_DETAILS))
1284     vect_verbosity_level = REPORT_DETAILS;
1285   else if (dump_file && (dump_flags & TDF_STATS))
1286     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1287   else
1288     vect_verbosity_level = REPORT_NONE;
1289
1290   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1291 }
1292
1293
1294 /* Function debug_loop_details.
1295
1296    For vectorization debug dumps.  */
1297
1298 bool
1299 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1300 {
1301   if (vl > vect_verbosity_level)
1302     return false;
1303
1304   if (loc == UNKNOWN_LOC)
1305     fprintf (vect_dump, "\n%s:%d: note: ",
1306                  DECL_SOURCE_FILE (current_function_decl),
1307                  DECL_SOURCE_LINE (current_function_decl));
1308   else
1309     fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1310
1311
1312   return true;
1313 }
1314
1315
1316 /*************************************************************************
1317   Vectorization Utilities.
1318  *************************************************************************/
1319
1320 /* Function new_stmt_vec_info.
1321
1322    Create and initialize a new stmt_vec_info struct for STMT.  */
1323
1324 stmt_vec_info
1325 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1326 {
1327   stmt_vec_info res;
1328   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1329
1330   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1331   STMT_VINFO_STMT (res) = stmt;
1332   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1333   STMT_VINFO_RELEVANT_P (res) = 0;
1334   STMT_VINFO_VECTYPE (res) = NULL;
1335   STMT_VINFO_VEC_STMT (res) = NULL;
1336   STMT_VINFO_DATA_REF (res) = NULL;
1337   STMT_VINFO_MEMTAG (res) = NULL;
1338   STMT_VINFO_PTR_INFO (res) = NULL;
1339   STMT_VINFO_SUBVARS (res) = NULL;
1340   STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
1341   STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1342   STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1343   STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1344   STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1345
1346   return res;
1347 }
1348
1349
1350 /* Function new_loop_vec_info.
1351
1352    Create and initialize a new loop_vec_info struct for LOOP, as well as
1353    stmt_vec_info structs for all the stmts in LOOP.  */
1354
1355 loop_vec_info
1356 new_loop_vec_info (struct loop *loop)
1357 {
1358   loop_vec_info res;
1359   basic_block *bbs;
1360   block_stmt_iterator si;
1361   unsigned int i;
1362
1363   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1364
1365   bbs = get_loop_body (loop);
1366
1367   /* Create stmt_info for all stmts in the loop.  */
1368   for (i = 0; i < loop->num_nodes; i++)
1369     {
1370       basic_block bb = bbs[i];
1371       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1372         {
1373           tree stmt = bsi_stmt (si);
1374           stmt_ann_t ann;
1375
1376           ann = stmt_ann (stmt);
1377           set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1378         }
1379     }
1380
1381   LOOP_VINFO_LOOP (res) = loop;
1382   LOOP_VINFO_BBS (res) = bbs;
1383   LOOP_VINFO_EXIT_COND (res) = NULL;
1384   LOOP_VINFO_NITERS (res) = NULL;
1385   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1386   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1387   LOOP_VINFO_VECT_FACTOR (res) = 0;
1388   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1389                            "loop_write_datarefs");
1390   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1391                            "loop_read_datarefs");
1392   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1393   LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1394
1395   return res;
1396 }
1397
1398
1399 /* Function destroy_loop_vec_info.
1400
1401    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1402    stmts in the loop.  */
1403
1404 void
1405 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1406 {
1407   struct loop *loop;
1408   basic_block *bbs;
1409   int nbbs;
1410   block_stmt_iterator si;
1411   int j;
1412
1413   if (!loop_vinfo)
1414     return;
1415
1416   loop = LOOP_VINFO_LOOP (loop_vinfo);
1417
1418   bbs = LOOP_VINFO_BBS (loop_vinfo);
1419   nbbs = loop->num_nodes;
1420
1421   for (j = 0; j < nbbs; j++)
1422     {
1423       basic_block bb = bbs[j];
1424       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1425         {
1426           tree stmt = bsi_stmt (si);
1427           stmt_ann_t ann = stmt_ann (stmt);
1428           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1429           free (stmt_info);
1430           set_stmt_info (ann, NULL);
1431         }
1432     }
1433
1434   free (LOOP_VINFO_BBS (loop_vinfo));
1435   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1436   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1437
1438   free (loop_vinfo);
1439 }
1440
1441
1442 /* Function vect_strip_conversions
1443
1444    Strip conversions that don't narrow the mode.  */
1445
1446 tree
1447 vect_strip_conversion (tree expr)
1448 {
1449   tree to, ti, oprnd0;
1450
1451   while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1452     {
1453       to = TREE_TYPE (expr);
1454       oprnd0 = TREE_OPERAND (expr, 0);
1455       ti = TREE_TYPE (oprnd0);
1456
1457       if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1458         return NULL_TREE;
1459       if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1460         return NULL_TREE;
1461
1462       expr = oprnd0;
1463     }
1464   return expr;
1465 }
1466
1467
1468 /* Function vect_force_dr_alignment_p.
1469
1470    Returns whether the alignment of a DECL can be forced to be aligned
1471    on ALIGNMENT bit boundary.  */
1472
1473 bool
1474 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1475 {
1476   if (TREE_CODE (decl) != VAR_DECL)
1477     return false;
1478
1479   if (DECL_EXTERNAL (decl))
1480     return false;
1481
1482   if (TREE_ASM_WRITTEN (decl))
1483     return false;
1484
1485   if (TREE_STATIC (decl))
1486     return (alignment <= MAX_OFILE_ALIGNMENT);
1487   else
1488     /* This is not 100% correct.  The absolute correct stack alignment
1489        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1490        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1491        However, until someone implements forced stack alignment, SSE
1492        isn't really usable without this.  */
1493     return (alignment <= PREFERRED_STACK_BOUNDARY);
1494 }
1495
1496
1497 /* Function get_vectype_for_scalar_type.
1498
1499    Returns the vector type corresponding to SCALAR_TYPE as supported
1500    by the target.  */
1501
1502 tree
1503 get_vectype_for_scalar_type (tree scalar_type)
1504 {
1505   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1506   int nbytes = GET_MODE_SIZE (inner_mode);
1507   int nunits;
1508   tree vectype;
1509
1510   if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
1511     return NULL_TREE;
1512
1513   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1514      is expected.  */
1515   nunits = UNITS_PER_SIMD_WORD / nbytes;
1516
1517   vectype = build_vector_type (scalar_type, nunits);
1518   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1519     {
1520       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1521       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1522     }
1523
1524   if (!vectype)
1525     return NULL_TREE;
1526
1527   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1528     {
1529       fprintf (vect_dump, "vectype: ");
1530       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1531     }
1532
1533   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1534       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
1535     {
1536       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1537         fprintf (vect_dump, "mode not supported by target.");
1538       return NULL_TREE;
1539     }
1540
1541   return vectype;
1542 }
1543
1544
1545 /* Function vect_supportable_dr_alignment
1546
1547    Return whether the data reference DR is supported with respect to its
1548    alignment.  */
1549
1550 enum dr_alignment_support
1551 vect_supportable_dr_alignment (struct data_reference *dr)
1552 {
1553   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1554   enum machine_mode mode = (int) TYPE_MODE (vectype);
1555
1556   if (aligned_access_p (dr))
1557     return dr_aligned;
1558
1559   /* Possibly unaligned access.  */
1560
1561   if (DR_IS_READ (dr))
1562     {
1563       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1564           && (!targetm.vectorize.builtin_mask_for_load
1565               || targetm.vectorize.builtin_mask_for_load ()))
1566         return dr_unaligned_software_pipeline;
1567
1568       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1569         /* Can't software pipeline the loads, but can at least do them.  */
1570         return dr_unaligned_supported;
1571     }
1572
1573   /* Unsupported.  */
1574   return dr_unaligned_unsupported;
1575 }
1576
1577
1578 /* Function vect_is_simple_use.
1579
1580    Input:
1581    LOOP - the loop that is being vectorized.
1582    OPERAND - operand of a stmt in LOOP.
1583    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1584
1585    Returns whether a stmt with OPERAND can be vectorized.
1586    Supportable operands are constants, loop invariants, and operands that are
1587    defined by the current iteration of the loop. Unsupportable operands are
1588    those that are defined by a previous iteration of the loop (as is the case
1589    in reduction/induction computations).  */
1590
1591 bool
1592 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
1593 {
1594   tree def_stmt;
1595   basic_block bb;
1596   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1597
1598   if (def)
1599     *def = NULL_TREE;
1600
1601   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1602     return true;
1603
1604   if (TREE_CODE (operand) != SSA_NAME)
1605     return false;
1606
1607   def_stmt = SSA_NAME_DEF_STMT (operand);
1608   if (def_stmt == NULL_TREE )
1609     {
1610       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1611         fprintf (vect_dump, "no def_stmt.");
1612       return false;
1613     }
1614
1615   /* empty stmt is expected only in case of a function argument.
1616      (Otherwise - we expect a phi_node or a modify_expr).  */
1617   if (IS_EMPTY_STMT (def_stmt))
1618     {
1619       tree arg = TREE_OPERAND (def_stmt, 0);
1620       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1621         return true;
1622       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1623         {
1624           fprintf (vect_dump, "Unexpected empty stmt: ");
1625           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1626         }
1627       return false;
1628     }
1629
1630   /* phi_node inside the loop indicates an induction/reduction pattern.
1631      This is not supported yet.  */
1632   bb = bb_for_stmt (def_stmt);
1633   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1634     {
1635       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1636         fprintf (vect_dump, "reduction/induction - unsupported.");
1637       return false; /* FORNOW: not supported yet.  */
1638     }
1639
1640   /* Expecting a modify_expr or a phi_node.  */
1641   if (TREE_CODE (def_stmt) == MODIFY_EXPR
1642       || TREE_CODE (def_stmt) == PHI_NODE)
1643     {
1644       if (def)
1645         *def = def_stmt;
1646       return true;
1647     }
1648
1649   return false;
1650 }
1651
1652
1653 /* Function vect_is_simple_iv_evolution.
1654
1655    FORNOW: A simple evolution of an induction variables in the loop is
1656    considered a polynomial evolution with constant step.  */
1657
1658 bool
1659 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1660                              tree * step)
1661 {
1662   tree init_expr;
1663   tree step_expr;
1664
1665   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1666
1667   /* When there is no evolution in this loop, the evolution function
1668      is not "simple".  */
1669   if (evolution_part == NULL_TREE)
1670     return false;
1671
1672   /* When the evolution is a polynomial of degree >= 2
1673      the evolution function is not "simple".  */
1674   if (tree_is_chrec (evolution_part))
1675     return false;
1676
1677   step_expr = evolution_part;
1678   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1679                                                            loop_nb));
1680
1681   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1682     {
1683       fprintf (vect_dump, "step: ");
1684       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
1685       fprintf (vect_dump, ",  init: ");
1686       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
1687     }
1688
1689   *init = init_expr;
1690   *step = step_expr;
1691
1692   if (TREE_CODE (step_expr) != INTEGER_CST)
1693     {
1694       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1695         fprintf (vect_dump, "step unknown.");
1696       return false;
1697     }
1698
1699   return true;
1700 }
1701
1702
1703 /* Function vectorize_loops.
1704
1705    Entry Point to loop vectorization phase.  */
1706
1707 void
1708 vectorize_loops (struct loops *loops)
1709 {
1710   unsigned int i;
1711   unsigned int num_vectorized_loops = 0;
1712
1713   /* Fix the verbosity level if not defined explicitly by the user.  */
1714   vect_set_dump_settings ();
1715
1716   /*  ----------- Analyze loops. -----------  */
1717
1718   /* If some loop was duplicated, it gets bigger number
1719      than all previously defined loops. This fact allows us to run
1720      only over initial loops skipping newly generated ones.  */
1721   vect_loops_num = loops->num;
1722   for (i = 1; i < vect_loops_num; i++)
1723     {
1724       loop_vec_info loop_vinfo;
1725       struct loop *loop = loops->parray[i];
1726
1727       if (!loop)
1728         continue;
1729
1730       loop_vinfo = vect_analyze_loop (loop);
1731       loop->aux = loop_vinfo;
1732
1733       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1734         continue;
1735
1736       vect_transform_loop (loop_vinfo, loops);
1737       num_vectorized_loops++;
1738     }
1739
1740   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
1741     fprintf (vect_dump, "vectorized %u loops in function.\n",
1742              num_vectorized_loops);
1743
1744   /*  ----------- Finalize. -----------  */
1745
1746   for (i = 1; i < vect_loops_num; i++)
1747     {
1748       struct loop *loop = loops->parray[i];
1749       loop_vec_info loop_vinfo;
1750
1751       if (!loop)
1752         continue;
1753       loop_vinfo = loop->aux;
1754       destroy_loop_vec_info (loop_vinfo);
1755       loop->aux = NULL;
1756     }
1757 }