gcc/tree-vectorizer.c

   1 /* Loop Vectorization
   2    Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
   3    Contributed by Dorit Naishlos <dorit@il.ibm.com>
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 2, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING.  If not, write to the Free
  19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  20 02111-1307, USA.  */
  21
  22 /* Loop Vectorization Pass.
  23
  24    This pass tries to vectorize loops. This first implementation focuses on
  25    simple inner-most loops, with no conditional control flow, and a set of
  26    simple operations which vector form can be expressed using existing
  27    tree codes (PLUS, MULT etc).
  28
  29    For example, the vectorizer transforms the following simple loop:
  30
  31         short a[N]; short b[N]; short c[N]; int i;
  32
  33         for (i=0; i<N; i++){
  34           a[i] = b[i] + c[i];
  35         }
  36
  37    as if it was manually vectorized by rewriting the source code into:
  38
  39         typedef int __attribute__((mode(V8HI))) v8hi;
  40         short a[N];  short b[N]; short c[N];   int i;
  41         v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
  42         v8hi va, vb, vc;
  43
  44         for (i=0; i<N/8; i++){
  45           vb = pb[i];
  46           vc = pc[i];
  47           va = vb + vc;
  48           pa[i] = va;
  49         }
  50
  51         The main entry to this pass is vectorize_loops(), in which
  52    the vectorizer applies a set of analyses on a given set of loops,
  53    followed by the actual vectorization transformation for the loops that
  54    had successfully passed the analysis phase.
  55
  56         Throughout this pass we make a distinction between two types of
  57    data: scalars (which are represented by SSA_NAMES), and memory references
  58    ("data-refs"). These two types of data require different handling both
  59    during analysis and transformation. The types of data-refs that the
  60    vectorizer currently supports are ARRAY_REFS which base is an array DECL
  61    (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
  62    accesses are required to have a  simple (consecutive) access pattern.
  63
  64    Analysis phase:
  65    ===============
  66         The driver for the analysis phase is vect_analyze_loop_nest().
  67    It applies a set of analyses, some of which rely on the scalar evolution
  68    analyzer (scev) developed by Sebastian Pop.
  69
  70         During the analysis phase the vectorizer records some information
  71    per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
  72    loop, as well as general information about the loop as a whole, which is
  73    recorded in a "loop_vec_info" struct attached to each loop.
  74
  75    Transformation phase:
  76    =====================
  77         The loop transformation phase scans all the stmts in the loop, and
  78    creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
  79    the loop that needs to be vectorized. It insert the vector code sequence
  80    just before the scalar stmt S, and records a pointer to the vector code
  81    in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
  82    attached to S). This pointer will be used for the vectorization of following
  83    stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
  84    otherwise, we rely on dead code elimination for removing it.
  85
  86         For example, say stmt S1 was vectorized into stmt VS1:
  87
  88    VS1: vb = px[i];
  89    S1:  b = x[i];    STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  90    S2:  a = b;
  91
  92    To vectorize stmt S2, the vectorizer first finds the stmt that defines
  93    the operand 'b' (S1), and gets the relevant vector def 'vb' from the
  94    vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
  95    resulting sequence would be:
  96
  97    VS1: vb = px[i];
  98    S1:  b = x[i];       STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
  99    VS2: va = vb;
 100    S2:  a = b;          STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
 101
 102         Operands that are not SSA_NAMEs, are data-refs that appear in
 103    load/store operations (like 'x[i]' in S1), and are handled differently.
 104
 105    Target modeling:
 106    =================
 107         Currently the only target specific information that is used is the
 108    size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
 109    support different sizes of vectors, for now will need to specify one value
 110    for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
 111
 112         Since we only vectorize operations which vector form can be
 113    expressed using existing tree codes, to verify that an operation is
 114    supported, the vectorizer checks the relevant optab at the relevant
 115    machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
 116    the value found is CODE_FOR_nothing, then there's no target support, and
 117    we can't vectorize the stmt.
 118
 119    For additional information on this project see:
 120    http://gcc.gnu.org/projects/tree-ssa/vectorization.html
 121 */
 122
 123 #include "config.h"
 124 #include "system.h"
 125 #include "coretypes.h"
 126 #include "tm.h"
 127 #include "errors.h"
 128 #include "ggc.h"
 129 #include "tree.h"
 130 #include "target.h"
 131 #include "rtl.h"
 132 #include "basic-block.h"
 133 #include "diagnostic.h"
 134 #include "tree-flow.h"
 135 #include "tree-dump.h"
 136 #include "timevar.h"
 137 #include "cfgloop.h"
 138 #include "cfglayout.h"
 139 #include "expr.h"
 140 #include "optabs.h"
 141 #include "toplev.h"
 142 #include "tree-chrec.h"
 143 #include "tree-data-ref.h"
 144 #include "tree-scalar-evolution.h"
 145 #include "input.h"
 146 #include "tree-vectorizer.h"
 147 #include "tree-pass.h"
 148
 149 /*************************************************************************
 150   Simple Loop Peeling Utilities
 151  *************************************************************************/
 152 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
 153   (struct loop *, struct loops *, edge);
 154 static void slpeel_update_phis_for_duplicate_loop
 155   (struct loop *, struct loop *, bool after);
 156 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
 157 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
 158
 159 static void allocate_new_names (bitmap);
 160 static void rename_use_op (use_operand_p);
 161 static void rename_def_op (def_operand_p, tree);
 162 static void rename_variables_in_bb (basic_block);
 163 static void free_new_names (bitmap);
 164 static void rename_variables_in_loop (struct loop *);
 165
 166 /*************************************************************************
 167   General Vectorization Utilities
 168  *************************************************************************/
 169 static void vect_set_dump_settings (void);
 170 static bool need_imm_uses_for (tree);
 171
 172 /* vect_dump will be set to stderr or dump_file if exist.  */
 173 FILE *vect_dump;
 174
 175 /* vect_verbosity_level set to an invalid value
 176    to mark that it's uninitialized.  */
 177 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
 178
 179
 180 \f
 181 /*************************************************************************
 182   Simple Loop Peeling Utilities
 183
 184   Utilities to support loop peeling for vectorization purposes.
 185  *************************************************************************/
 186
 187
 188 /* For each definition in DEFINITIONS this function allocates
 189    new ssa name.  */
 190
 191 static void
 192 allocate_new_names (bitmap definitions)
 193 {
 194   unsigned ver;
 195   bitmap_iterator bi;
 196
 197   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 198     {
 199       tree def = ssa_name (ver);
 200       tree *new_name_ptr = xmalloc (sizeof (tree));
 201
 202       bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
 203
 204       *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
 205       SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
 206
 207       SSA_NAME_AUX (def) = new_name_ptr;
 208     }
 209 }
 210
 211
 212 /* Renames the use *OP_P.  */
 213
 214 static void
 215 rename_use_op (use_operand_p op_p)
 216 {
 217   tree *new_name_ptr;
 218
 219   if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
 220     return;
 221
 222   new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
 223
 224   /* Something defined outside of the loop.  */
 225   if (!new_name_ptr)
 226     return;
 227
 228   /* An ordinary ssa name defined in the loop.  */
 229
 230   SET_USE (op_p, *new_name_ptr);
 231 }
 232
 233
 234 /* Renames the def *OP_P in statement STMT.  */
 235
 236 static void
 237 rename_def_op (def_operand_p op_p, tree stmt)
 238 {
 239   tree *new_name_ptr;
 240
 241   if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
 242     return;
 243
 244   new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
 245
 246   /* Something defined outside of the loop.  */
 247   if (!new_name_ptr)
 248     return;
 249
 250   /* An ordinary ssa name defined in the loop.  */
 251
 252   SET_DEF (op_p, *new_name_ptr);
 253   SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
 254 }
 255
 256
 257 /* Renames the variables in basic block BB.  */
 258
 259 static void
 260 rename_variables_in_bb (basic_block bb)
 261 {
 262   tree phi;
 263   block_stmt_iterator bsi;
 264   tree stmt;
 265   stmt_ann_t ann;
 266   use_optype uses;
 267   vuse_optype vuses;
 268   def_optype defs;
 269   v_may_def_optype v_may_defs;
 270   v_must_def_optype v_must_defs;
 271   unsigned i;
 272   edge e;
 273   edge_iterator ei;
 274   struct loop *loop = bb->loop_father;
 275
 276   for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
 277     rename_def_op (PHI_RESULT_PTR (phi), phi);
 278
 279   for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
 280     {
 281       stmt = bsi_stmt (bsi);
 282       get_stmt_operands (stmt);
 283       ann = stmt_ann (stmt);
 284
 285       uses = USE_OPS (ann);
 286       for (i = 0; i < NUM_USES (uses); i++)
 287         rename_use_op (USE_OP_PTR (uses, i));
 288
 289       defs = DEF_OPS (ann);
 290       for (i = 0; i < NUM_DEFS (defs); i++)
 291         rename_def_op (DEF_OP_PTR (defs, i), stmt);
 292
 293       vuses = VUSE_OPS (ann);
 294       for (i = 0; i < NUM_VUSES (vuses); i++)
 295         rename_use_op (VUSE_OP_PTR (vuses, i));
 296
 297       v_may_defs = V_MAY_DEF_OPS (ann);
 298       for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
 299         {
 300           rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
 301           rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
 302         }
 303
 304       v_must_defs = V_MUST_DEF_OPS (ann);
 305       for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
 306         {
 307           rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
 308           rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
 309         }
 310     }
 311
 312   FOR_EACH_EDGE (e, ei, bb->succs)
 313     {
 314       if (!flow_bb_inside_loop_p (loop, e->dest))
 315         continue;
 316       for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
 317         rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
 318     }
 319 }
 320
 321
 322 /* Releases the structures holding the new ssa names.  */
 323
 324 static void
 325 free_new_names (bitmap definitions)
 326 {
 327   unsigned ver;
 328   bitmap_iterator bi;
 329
 330   EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
 331     {
 332       tree def = ssa_name (ver);
 333
 334       if (SSA_NAME_AUX (def))
 335         {
 336           free (SSA_NAME_AUX (def));
 337           SSA_NAME_AUX (def) = NULL;
 338         }
 339     }
 340 }
 341
 342
 343 /* Renames variables in new generated LOOP.  */
 344
 345 static void
 346 rename_variables_in_loop (struct loop *loop)
 347 {
 348   unsigned i;
 349   basic_block *bbs;
 350
 351   bbs = get_loop_body (loop);
 352
 353   for (i = 0; i < loop->num_nodes; i++)
 354     rename_variables_in_bb (bbs[i]);
 355
 356   free (bbs);
 357 }
 358
 359
 360 /* Update the PHI nodes of NEW_LOOP.
 361
 362    NEW_LOOP is a duplicate of ORIG_LOOP.
 363    AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
 364    AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
 365    executes before it.  */
 366
 367 static void
 368 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
 369                                        struct loop *new_loop, bool after)
 370 {
 371   tree *new_name_ptr, new_ssa_name;
 372   tree phi_new, phi_orig;
 373   tree def;
 374   edge orig_loop_latch = loop_latch_edge (orig_loop);
 375   edge orig_entry_e = loop_preheader_edge (orig_loop);
 376   edge new_loop_exit_e = new_loop->single_exit;
 377   edge new_loop_entry_e = loop_preheader_edge (new_loop);
 378   edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
 379
 380   /*
 381      step 1. For each loop-header-phi:
 382              Add the first phi argument for the phi in NEW_LOOP
 383             (the one associated with the entry of NEW_LOOP)
 384
 385      step 2. For each loop-header-phi:
 386              Add the second phi argument for the phi in NEW_LOOP
 387             (the one associated with the latch of NEW_LOOP)
 388
 389      step 3. Update the phis in the successor block of NEW_LOOP.
 390
 391         case 1: NEW_LOOP was placed before ORIG_LOOP:
 392                 The successor block of NEW_LOOP is the header of ORIG_LOOP.
 393                 Updating the phis in the successor block can therefore be done
 394                 along with the scanning of the loop header phis, because the
 395                 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
 396                 phi nodes, organized in the same order.
 397
 398         case 2: NEW_LOOP was placed after ORIG_LOOP:
 399                 The successor block of NEW_LOOP is the original exit block of
 400                 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
 401                 We postpone updating these phis to a later stage (when
 402                 loop guards are added).
 403    */
 404
 405
 406   /* Scan the phis in the headers of the old and new loops
 407      (they are organized in exactly the same order).  */
 408
 409   for (phi_new = phi_nodes (new_loop->header),
 410        phi_orig = phi_nodes (orig_loop->header);
 411        phi_new && phi_orig;
 412        phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
 413     {
 414       /* step 1.  */
 415       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
 416       add_phi_arg (phi_new, def, new_loop_entry_e);
 417
 418       /* step 2.  */
 419       def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
 420       if (TREE_CODE (def) != SSA_NAME)
 421         continue;
 422
 423       new_name_ptr = SSA_NAME_AUX (def);
 424       if (!new_name_ptr)
 425         /* Something defined outside of the loop.  */
 426         continue;
 427
 428       /* An ordinary ssa name defined in the loop.  */
 429       new_ssa_name = *new_name_ptr;
 430       add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
 431
 432       /* step 3 (case 1).  */
 433       if (!after)
 434         {
 435           gcc_assert (new_loop_exit_e == orig_entry_e);
 436           SET_PHI_ARG_DEF (phi_orig,
 437                            new_loop_exit_e->dest_idx,
 438                            new_ssa_name);
 439         }
 440     }
 441 }
 442
 443
 444 /* Update PHI nodes for a guard of the LOOP.
 445
 446    Input:
 447    - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
 448         controls whether LOOP is to be executed.  GUARD_EDGE is the edge that
 449         originates from the guard-bb, skips LOOP and reaches the (unique) exit
 450         bb of LOOP.  This loop-exit-bb is an empty bb with one successor.
 451         We denote this bb NEW_MERGE_BB because it had a single predecessor (the
 452         LOOP header) before the guard code was added, and now it became a merge
 453         point of two paths - the path that ends with the LOOP exit-edge, and
 454         the path that ends with GUARD_EDGE.
 455
 456         This function creates and updates the relevant phi nodes to account for
 457         the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
 458         1. Create phi nodes at NEW_MERGE_BB.
 459         2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
 460            UPDATE_BB).  UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
 461            was added:
 462
 463         ===> The CFG before the guard-code was added:
 464         LOOP_header_bb:
 465           if (exit_loop) goto update_bb : LOOP_header_bb
 466         update_bb:
 467
 468         ==> The CFG after the guard-code was added:
 469         guard_bb:
 470           if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
 471         LOOP_header_bb:
 472           if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
 473         new_merge_bb:
 474           goto update_bb
 475         update_bb:
 476
 477    - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
 478         UPDATE_BB are loop entry phis, like the phis in the LOOP header,
 479         organized in the same order.
 480         If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
 481         loop exit phis.
 482
 483    - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
 484         "original" loop).  FALSE if LOOP is an original loop (not a newly
 485         created copy).  The SSA_NAME_AUX fields of the defs in the original
 486         loop are the corresponding new ssa-names used in the new duplicated
 487         loop copy.  IS_NEW_LOOP indicates which of the two args of the phi
 488         nodes in UPDATE_BB takes the original ssa-name, and which takes the
 489         new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
 490         the LOOP-exit-edge takes the new-name, and the phi-arg that is
 491         associated with GUARD_EDGE takes the original name.  If IS_NEW_LOOP is
 492         FALSE, it's the other way around.
 493   */
 494
 495 static void
 496 slpeel_update_phi_nodes_for_guard (edge guard_edge,
 497                                    struct loop *loop,
 498                                    bool entry_phis,
 499                                    bool is_new_loop)
 500 {
 501   tree orig_phi, new_phi, update_phi;
 502   tree guard_arg, loop_arg;
 503   basic_block new_merge_bb = guard_edge->dest;
 504   edge e = single_succ_edge (new_merge_bb);
 505   basic_block update_bb = e->dest;
 506   basic_block orig_bb = (entry_phis ? loop->header : update_bb);
 507
 508   for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
 509        orig_phi && update_phi;
 510        orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
 511     {
 512       /* 1. Generate new phi node in NEW_MERGE_BB:  */
 513       new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
 514                                  new_merge_bb);
 515
 516       /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
 517             of LOOP. Set the two phi args in NEW_PHI for these edges:  */
 518       if (entry_phis)
 519         {
 520           loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
 521                                             loop_latch_edge (loop));
 522           guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
 523                                              loop_preheader_edge (loop));
 524         }
 525       else /* exit phis */
 526         {
 527           tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
 528           tree *new_name_ptr = SSA_NAME_AUX (orig_def);
 529           tree new_name;
 530
 531           if (new_name_ptr)
 532             new_name = *new_name_ptr;
 533           else
 534             /* Something defined outside of the loop  */
 535             new_name = orig_def;
 536
 537           if (is_new_loop)
 538             {
 539               guard_arg = orig_def;
 540               loop_arg = new_name;
 541             }
 542           else
 543             {
 544               guard_arg = new_name;
 545               loop_arg = orig_def;
 546             }
 547         }
 548       add_phi_arg (new_phi, loop_arg, loop->single_exit);
 549       add_phi_arg (new_phi, guard_arg, guard_edge);
 550
 551       /* 3. Update phi in successor block.  */
 552       gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
 553                   || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
 554       SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
 555     }
 556
 557   set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
 558 }
 559
 560
 561 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
 562    that starts at zero, increases by one and its limit is NITERS.
 563
 564    Assumption: the exit-condition of LOOP is the last stmt in the loop.  */
 565
 566 void
 567 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 568 {
 569   tree indx_before_incr, indx_after_incr, cond_stmt, cond;
 570   tree orig_cond;
 571   edge exit_edge = loop->single_exit;
 572   block_stmt_iterator loop_cond_bsi;
 573   block_stmt_iterator incr_bsi;
 574   bool insert_after;
 575   tree begin_label = tree_block_label (loop->latch);
 576   tree exit_label = tree_block_label (loop->single_exit->dest);
 577   tree init = build_int_cst (TREE_TYPE (niters), 0);
 578   tree step = build_int_cst (TREE_TYPE (niters), 1);
 579   tree then_label;
 580   tree else_label;
 581   LOC loop_loc;
 582
 583   orig_cond = get_loop_exit_condition (loop);
 584 #ifdef ENABLE_CHECKING
 585   gcc_assert (orig_cond);
 586 #endif
 587   loop_cond_bsi = bsi_for_stmt (orig_cond);
 588
 589   standard_iv_increment_position (loop, &incr_bsi, &insert_after);
 590   create_iv (init, step, NULL_TREE, loop,
 591              &incr_bsi, insert_after, &indx_before_incr, &indx_after_incr);
 592
 593   if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop.  */
 594     {
 595       cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
 596       then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 597       else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 598     }
 599   else /* 'then' edge loops back.  */
 600     {
 601       cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
 602       then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
 603       else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
 604     }
 605
 606   cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
 607                      then_label, else_label);
 608   bsi_insert_before (&loop_cond_bsi, cond_stmt, BSI_SAME_STMT);
 609
 610   /* Remove old loop exit test:  */
 611   bsi_remove (&loop_cond_bsi);
 612
 613   loop_loc = find_loop_location (loop);
 614   if (dump_file && (dump_flags & TDF_DETAILS))
 615     {
 616       if (loop_loc != UNKNOWN_LOC)
 617         fprintf (dump_file, "\nloop at %s:%d: ",
 618                  LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 619       print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
 620     }
 621
 622   loop->nb_iterations = niters;
 623 }
 624
 625
 626 /* Given LOOP this function generates a new copy of it and puts it
 627    on E which is either the entry or exit of LOOP.  */
 628
 629 static struct loop *
 630 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
 631                                         edge e)
 632 {
 633   struct loop *new_loop;
 634   basic_block *new_bbs, *bbs;
 635   bool at_exit;
 636   bool was_imm_dom;
 637   basic_block exit_dest;
 638   tree phi, phi_arg;
 639
 640   at_exit = (e == loop->single_exit);
 641   if (!at_exit && e != loop_preheader_edge (loop))
 642     return NULL;
 643
 644   bbs = get_loop_body (loop);
 645
 646   /* Check whether duplication is possible.  */
 647   if (!can_copy_bbs_p (bbs, loop->num_nodes))
 648     {
 649       free (bbs);
 650       return NULL;
 651     }
 652
 653   /* Generate new loop structure.  */
 654   new_loop = duplicate_loop (loops, loop, loop->outer);
 655   if (!new_loop)
 656     {
 657       free (bbs);
 658       return NULL;
 659     }
 660
 661   exit_dest = loop->single_exit->dest;
 662   was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
 663                                           exit_dest) == loop->header ?
 664                  true : false);
 665
 666   new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
 667
 668   copy_bbs (bbs, loop->num_nodes, new_bbs,
 669             &loop->single_exit, 1, &new_loop->single_exit, NULL);
 670
 671   /* Duplicating phi args at exit bbs as coming
 672      also from exit of duplicated loop.  */
 673   for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
 674     {
 675       phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->single_exit);
 676       if (phi_arg)
 677         {
 678           edge new_loop_exit_edge;
 679
 680           if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
 681             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
 682           else
 683             new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
 684
 685           add_phi_arg (phi, phi_arg, new_loop_exit_edge);
 686         }
 687     }
 688
 689   if (at_exit) /* Add the loop copy at exit.  */
 690     {
 691       redirect_edge_and_branch_force (e, new_loop->header);
 692       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
 693       if (was_imm_dom)
 694         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
 695     }
 696   else /* Add the copy at entry.  */
 697     {
 698       edge new_exit_e;
 699       edge entry_e = loop_preheader_edge (loop);
 700       basic_block preheader = entry_e->src;
 701
 702       if (!flow_bb_inside_loop_p (new_loop,
 703                                   EDGE_SUCC (new_loop->header, 0)->dest))
 704         new_exit_e = EDGE_SUCC (new_loop->header, 0);
 705       else
 706         new_exit_e = EDGE_SUCC (new_loop->header, 1);
 707
 708       redirect_edge_and_branch_force (new_exit_e, loop->header);
 709       set_immediate_dominator (CDI_DOMINATORS, loop->header,
 710                                new_exit_e->src);
 711
 712       /* We have to add phi args to the loop->header here as coming
 713          from new_exit_e edge.  */
 714       for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
 715         {
 716           phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
 717           if (phi_arg)
 718             add_phi_arg (phi, phi_arg, new_exit_e);
 719         }
 720
 721       redirect_edge_and_branch_force (entry_e, new_loop->header);
 722       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
 723     }
 724
 725   free (new_bbs);
 726   free (bbs);
 727
 728   return new_loop;
 729 }
 730
 731
 732 /* Given the condition statement COND, put it as the last statement
 733    of GUARD_BB; EXIT_BB is the basic block to skip the loop;
 734    Assumes that this is the single exit of the guarded loop.
 735    Returns the skip edge.  */
 736
 737 static edge
 738 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
 739                         basic_block dom_bb)
 740 {
 741   block_stmt_iterator bsi;
 742   edge new_e, enter_e;
 743   tree cond_stmt, then_label, else_label;
 744
 745   enter_e = single_succ_edge (guard_bb);
 746   enter_e->flags &= ~EDGE_FALLTHRU;
 747   enter_e->flags |= EDGE_FALSE_VALUE;
 748   bsi = bsi_last (guard_bb);
 749
 750   then_label = build1 (GOTO_EXPR, void_type_node,
 751                        tree_block_label (exit_bb));
 752   else_label = build1 (GOTO_EXPR, void_type_node,
 753                        tree_block_label (enter_e->dest));
 754   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
 755                      then_label, else_label);
 756   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
 757   /* Add new edge to connect entry block to the second loop.  */
 758   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
 759   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
 760   return new_e;
 761 }
 762
 763
 764 /* This function verifies that the following restrictions apply to LOOP:
 765    (1) it is innermost
 766    (2) it consists of exactly 2 basic blocks - header, and an empty latch.
 767    (3) it is single entry, single exit
 768    (4) its exit condition is the last stmt in the header
 769    (5) E is the entry/exit edge of LOOP.
 770  */
 771
 772 bool
 773 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
 774 {
 775   edge exit_e = loop->single_exit;
 776   edge entry_e = loop_preheader_edge (loop);
 777   tree orig_cond = get_loop_exit_condition (loop);
 778   block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
 779
 780   if (any_marked_for_rewrite_p ())
 781     return false;
 782
 783   if (loop->inner
 784       /* All loops have an outer scope; the only case loop->outer is NULL is for
 785          the function itself.  */
 786       || !loop->outer
 787       || loop->num_nodes != 2
 788       || !empty_block_p (loop->latch)
 789       || !loop->single_exit
 790       /* Verify that new loop exit condition can be trivially modified.  */
 791       || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
 792       || (e != exit_e && e != entry_e))
 793     return false;
 794
 795   return true;
 796 }
 797
 798 #ifdef ENABLE_CHECKING
 799 void
 800 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 801                                  struct loop *second_loop)
 802 {
 803   basic_block loop1_exit_bb = first_loop->single_exit->dest;
 804   basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
 805   basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
 806
 807   /* A guard that controls whether the second_loop is to be executed or skipped
 808      is placed in first_loop->exit.  first_loopt->exit therefore has two
 809      successors - one is the preheader of second_loop, and the other is a bb
 810      after second_loop.
 811    */
 812   gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
 813
 814   /* 1. Verify that one of the successors of first_loopt->exit is the preheader
 815         of second_loop.  */
 816
 817   /* The preheader of new_loop is expected to have two predessors:
 818      first_loop->exit and the block that precedes first_loop.  */
 819
 820   gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
 821               && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
 822                    && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
 823                || (EDGE_PRED (loop2_entry_bb, 1)->src ==  loop1_exit_bb
 824                    && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
 825
 826   /* Verify that the other successor of first_loopt->exit is after the
 827      second_loop.  */
 828   /* TODO */
 829 }
 830 #endif
 831
 832 /* Function slpeel_tree_peel_loop_to_edge.
 833
 834    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
 835    that is placed on the entry (exit) edge E of LOOP. After this transformation
 836    we have two loops one after the other - first-loop iterates FIRST_NITERS
 837    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
 838
 839    Input:
 840    - LOOP: the loop to be peeled.
 841    - E: the exit or entry edge of LOOP.
 842         If it is the entry edge, we peel the first iterations of LOOP. In this
 843         case first-loop is LOOP, and second-loop is the newly created loop.
 844         If it is the exit edge, we peel the last iterations of LOOP. In this
 845         case, first-loop is the newly created loop, and second-loop is LOOP.
 846    - NITERS: the number of iterations that LOOP iterates.
 847    - FIRST_NITERS: the number of iterations that the first-loop should iterate.
 848    - UPDATE_FIRST_LOOP_COUNT:  specified whether this function is responsible
 849         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
 850         is false, the caller of this function may want to take care of this
 851         (this can be useful if we don't want new stmts added to first-loop).
 852
 853    Output:
 854    The function returns a pointer to the new loop-copy, or NULL if it failed
 855    to perform the transformation.
 856
 857    The function generates two if-then-else guards: one before the first loop,
 858    and the other before the second loop:
 859    The first guard is:
 860      if (FIRST_NITERS == 0) then skip the first loop,
 861      and go directly to the second loop.
 862    The second guard is:
 863      if (FIRST_NITERS == NITERS) then skip the second loop.
 864
 865    FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
 866    FORNOW the resulting code will not be in loop-closed-ssa form.
 867 */
 868
 869 struct loop*
 870 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
 871                                edge e, tree first_niters,
 872                                tree niters, bool update_first_loop_count)
 873 {
 874   struct loop *new_loop = NULL, *first_loop, *second_loop;
 875   edge skip_e;
 876   tree pre_condition;
 877   bitmap definitions;
 878   basic_block bb_before_second_loop, bb_after_second_loop;
 879   basic_block bb_before_first_loop;
 880   basic_block bb_between_loops;
 881   edge exit_e = loop->single_exit;
 882   LOC loop_loc;
 883
 884   if (!slpeel_can_duplicate_loop_p (loop, e))
 885     return NULL;
 886
 887   /* We have to initialize cfg_hooks. Then, when calling
 888    cfg_hooks->split_edge, the function tree_split_edge
 889    is actually called and, when calling cfg_hooks->duplicate_block,
 890    the function tree_duplicate_bb is called.  */
 891   tree_register_cfg_hooks ();
 892
 893
 894   /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
 895         Resulting CFG would be:
 896
 897         first_loop:
 898         do {
 899         } while ...
 900
 901         second_loop:
 902         do {
 903         } while ...
 904
 905         orig_exit_bb:
 906    */
 907
 908   if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
 909     {
 910       loop_loc = find_loop_location (loop);
 911       if (dump_file && (dump_flags & TDF_DETAILS))
 912         {
 913           if (loop_loc != UNKNOWN_LOC)
 914             fprintf (dump_file, "\n%s:%d: note: ",
 915                      LOC_FILE (loop_loc), LOC_LINE (loop_loc));
 916           fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
 917         }
 918       return NULL;
 919     }
 920
 921   if (e == exit_e)
 922     {
 923       /* NEW_LOOP was placed after LOOP.  */
 924       first_loop = loop;
 925       second_loop = new_loop;
 926     }
 927   else
 928     {
 929       /* NEW_LOOP was placed before LOOP.  */
 930       first_loop = new_loop;
 931       second_loop = loop;
 932     }
 933
 934   definitions = marked_ssa_names ();
 935   allocate_new_names (definitions);
 936   slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
 937   rename_variables_in_loop (new_loop);
 938
 939
 940   /* 2. Add the guard that controls whether the first loop is executed.
 941         Resulting CFG would be:
 942
 943         bb_before_first_loop:
 944         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
 945                                GOTO first-loop
 946
 947         first_loop:
 948         do {
 949         } while ...
 950
 951         bb_before_second_loop:
 952
 953         second_loop:
 954         do {
 955         } while ...
 956
 957         orig_exit_bb:
 958    */
 959
 960   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
 961   add_bb_to_loop (bb_before_first_loop, first_loop->outer);
 962   bb_before_second_loop = split_edge (first_loop->single_exit);
 963   add_bb_to_loop (bb_before_second_loop, first_loop->outer);
 964
 965   pre_condition =
 966     fold (build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node));
 967   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
 968                                   bb_before_second_loop, bb_before_first_loop);
 969   slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
 970                                      first_loop == new_loop);
 971
 972
 973   /* 3. Add the guard that controls whether the second loop is executed.
 974         Resulting CFG would be:
 975
 976         bb_before_first_loop:
 977         if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
 978                                GOTO first-loop
 979
 980         first_loop:
 981         do {
 982         } while ...
 983
 984         bb_between_loops:
 985         if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
 986                                     GOTO bb_before_second_loop
 987
 988         bb_before_second_loop:
 989
 990         second_loop:
 991         do {
 992         } while ...
 993
 994         bb_after_second_loop:
 995
 996         orig_exit_bb:
 997    */
 998
 999   bb_between_loops = split_edge (first_loop->single_exit);
1000   add_bb_to_loop (bb_between_loops, first_loop->outer);
1001   bb_after_second_loop = split_edge (second_loop->single_exit);
1002   add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1003
1004   pre_condition =
1005         fold (build2 (EQ_EXPR, boolean_type_node, first_niters, niters));
1006   skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1007                                   bb_after_second_loop, bb_before_first_loop);
1008   slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1009                                      second_loop == new_loop);
1010
1011   /* Flow loop scan does not update loop->single_exit field.  */
1012   first_loop->single_exit = first_loop->single_exit;
1013   second_loop->single_exit = second_loop->single_exit;
1014
1015   /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1016    */
1017   if (update_first_loop_count)
1018     slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1019
1020   free_new_names (definitions);
1021   BITMAP_FREE (definitions);
1022   unmark_all_for_rewrite ();
1023
1024   return new_loop;
1025 }
1026
1027 /* Function vect_get_loop_location.
1028
1029    Extract the location of the loop in the source code.
1030    If the loop is not well formed for vectorization, an estimated
1031    location is calculated.
1032    Return the loop location if succeed and NULL if not.  */
1033
1034 LOC
1035 find_loop_location (struct loop *loop)
1036 {
1037   tree node = NULL_TREE;
1038   basic_block bb;
1039   block_stmt_iterator si;
1040
1041   if (!loop)
1042     return UNKNOWN_LOC;
1043
1044   node = get_loop_exit_condition (loop);
1045
1046   if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
1047       && EXPR_FILENAME (node) && EXPR_LINENO (node))
1048     return EXPR_LOC (node);
1049
1050   /* If we got here the loop is probably not "well formed",
1051      try to estimate the loop location */
1052
1053   if (!loop->header)
1054     return UNKNOWN_LOC;
1055
1056   bb = loop->header;
1057
1058   for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1059     {
1060       node = bsi_stmt (si);
1061       if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
1062         return EXPR_LOC (node);
1063     }
1064
1065   return UNKNOWN_LOC;
1066 }
1067
1068
1069 /*************************************************************************
1070   Vectorization Debug Information.
1071  *************************************************************************/
1072
1073 /* Function vect_set_verbosity_level.
1074
1075    Called from toplev.c upon detection of the
1076    -ftree-vectorizer-verbose=N option.  */
1077
1078 void
1079 vect_set_verbosity_level (const char *val)
1080 {
1081    unsigned int vl;
1082
1083    vl = atoi (val);
1084    if (vl < MAX_VERBOSITY_LEVEL)
1085      vect_verbosity_level = vl;
1086    else
1087      vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1088 }
1089
1090
1091 /* Function vect_set_dump_settings.
1092
1093    Fix the verbosity level of the vectorizer if the
1094    requested level was not set explicitly using the flag
1095    -ftree-vectorizer-verbose=N.
1096    Decide where to print the debugging information (dump_file/stderr).
1097    If the user defined the verbosity level, but there is no dump file,
1098    print to stderr, otherwise print to the dump file.  */
1099
1100 static void
1101 vect_set_dump_settings (void)
1102 {
1103   vect_dump = dump_file;
1104
1105   /* Check if the verbosity level was defined by the user:  */
1106   if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1107     {
1108       /* If there is no dump file, print to stderr.  */
1109       if (!dump_file)
1110         vect_dump = stderr;
1111       return;
1112     }
1113
1114   /* User didn't specify verbosity level:  */
1115   if (dump_file && (dump_flags & TDF_DETAILS))
1116     vect_verbosity_level = REPORT_DETAILS;
1117   else if (dump_file && (dump_flags & TDF_STATS))
1118     vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1119   else
1120     vect_verbosity_level = REPORT_NONE;
1121
1122   gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
1123 }
1124
1125
1126 /* Function debug_loop_details.
1127
1128    For vectorization debug dumps.  */
1129
1130 bool
1131 vect_print_dump_info (enum verbosity_levels vl, LOC loc)
1132 {
1133   if (vl > vect_verbosity_level)
1134     return false;
1135
1136   if (loc == UNKNOWN_LOC)
1137     fprintf (vect_dump, "\n%s:%d: note: ",
1138                  DECL_SOURCE_FILE (current_function_decl),
1139                  DECL_SOURCE_LINE (current_function_decl));
1140   else
1141     fprintf (vect_dump, "\n%s:%d: note: ", LOC_FILE (loc), LOC_LINE (loc));
1142
1143
1144   return true;
1145 }
1146
1147
1148 /*************************************************************************
1149   Vectorization Utilities.
1150  *************************************************************************/
1151
1152 /* Function new_stmt_vec_info.
1153
1154    Create and initialize a new stmt_vec_info struct for STMT.  */
1155
1156 stmt_vec_info
1157 new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
1158 {
1159   stmt_vec_info res;
1160   res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1161
1162   STMT_VINFO_TYPE (res) = undef_vec_info_type;
1163   STMT_VINFO_STMT (res) = stmt;
1164   STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
1165   STMT_VINFO_RELEVANT_P (res) = 0;
1166   STMT_VINFO_VECTYPE (res) = NULL;
1167   STMT_VINFO_VEC_STMT (res) = NULL;
1168   STMT_VINFO_DATA_REF (res) = NULL;
1169   STMT_VINFO_MEMTAG (res) = NULL;
1170   STMT_VINFO_SUBVARS (res) = NULL;
1171   STMT_VINFO_VECT_DR_BASE_ADDRESS (res) = NULL;
1172   STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1173   STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1174   STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1175   STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1176
1177   return res;
1178 }
1179
1180
1181 /* Function new_loop_vec_info.
1182
1183    Create and initialize a new loop_vec_info struct for LOOP, as well as
1184    stmt_vec_info structs for all the stmts in LOOP.  */
1185
1186 loop_vec_info
1187 new_loop_vec_info (struct loop *loop)
1188 {
1189   loop_vec_info res;
1190   basic_block *bbs;
1191   block_stmt_iterator si;
1192   unsigned int i;
1193
1194   res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1195
1196   bbs = get_loop_body (loop);
1197
1198   /* Create stmt_info for all stmts in the loop.  */
1199   for (i = 0; i < loop->num_nodes; i++)
1200     {
1201       basic_block bb = bbs[i];
1202       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1203         {
1204           tree stmt = bsi_stmt (si);
1205           stmt_ann_t ann;
1206
1207           get_stmt_operands (stmt);
1208           ann = stmt_ann (stmt);
1209           set_stmt_info (ann, new_stmt_vec_info (stmt, res));
1210         }
1211     }
1212
1213   LOOP_VINFO_LOOP (res) = loop;
1214   LOOP_VINFO_BBS (res) = bbs;
1215   LOOP_VINFO_EXIT_COND (res) = NULL;
1216   LOOP_VINFO_NITERS (res) = NULL;
1217   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1218   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
1219   LOOP_VINFO_VECT_FACTOR (res) = 0;
1220   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1221                            "loop_write_datarefs");
1222   VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1223                            "loop_read_datarefs");
1224   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1225   LOOP_VINFO_LOC (res) = UNKNOWN_LOC;
1226
1227   return res;
1228 }
1229
1230
1231 /* Function destroy_loop_vec_info.
1232
1233    Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1234    stmts in the loop.  */
1235
1236 void
1237 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1238 {
1239   struct loop *loop;
1240   basic_block *bbs;
1241   int nbbs;
1242   block_stmt_iterator si;
1243   int j;
1244
1245   if (!loop_vinfo)
1246     return;
1247
1248   loop = LOOP_VINFO_LOOP (loop_vinfo);
1249
1250   bbs = LOOP_VINFO_BBS (loop_vinfo);
1251   nbbs = loop->num_nodes;
1252
1253   for (j = 0; j < nbbs; j++)
1254     {
1255       basic_block bb = bbs[j];
1256       for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1257         {
1258           tree stmt = bsi_stmt (si);
1259           stmt_ann_t ann = stmt_ann (stmt);
1260           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1261           free (stmt_info);
1262           set_stmt_info (ann, NULL);
1263         }
1264     }
1265
1266   free (LOOP_VINFO_BBS (loop_vinfo));
1267   varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1268   varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1269
1270   free (loop_vinfo);
1271 }
1272
1273
1274 /* Function vect_strip_conversions
1275
1276    Strip conversions that don't narrow the mode.  */
1277
1278 tree
1279 vect_strip_conversion (tree expr)
1280 {
1281   tree to, ti, oprnd0;
1282
1283   while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1284     {
1285       to = TREE_TYPE (expr);
1286       oprnd0 = TREE_OPERAND (expr, 0);
1287       ti = TREE_TYPE (oprnd0);
1288
1289       if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1290         return NULL_TREE;
1291       if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1292         return NULL_TREE;
1293
1294       expr = oprnd0;
1295     }
1296   return expr;
1297 }
1298
1299
1300 /* Function vect_force_dr_alignment_p.
1301
1302    Returns whether the alignment of a DECL can be forced to be aligned
1303    on ALIGNMENT bit boundary.  */
1304
1305 bool
1306 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1307 {
1308   if (TREE_CODE (decl) != VAR_DECL)
1309     return false;
1310
1311   if (DECL_EXTERNAL (decl))
1312     return false;
1313
1314   if (TREE_ASM_WRITTEN (decl))
1315     return false;
1316
1317   if (TREE_STATIC (decl))
1318     return (alignment <= MAX_OFILE_ALIGNMENT);
1319   else
1320     /* This is not 100% correct.  The absolute correct stack alignment
1321        is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
1322        PREFERRED_STACK_BOUNDARY is honored by all translation units.
1323        However, until someone implements forced stack alignment, SSE
1324        isn't really usable without this.  */
1325     return (alignment <= PREFERRED_STACK_BOUNDARY);
1326 }
1327
1328
1329 /* Function get_vectype_for_scalar_type.
1330
1331    Returns the vector type corresponding to SCALAR_TYPE as supported
1332    by the target.  */
1333
1334 tree
1335 get_vectype_for_scalar_type (tree scalar_type)
1336 {
1337   enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1338   int nbytes = GET_MODE_SIZE (inner_mode);
1339   int nunits;
1340   tree vectype;
1341
1342   if (nbytes == 0)
1343     return NULL_TREE;
1344
1345   /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1346      is expected.  */
1347   nunits = UNITS_PER_SIMD_WORD / nbytes;
1348
1349   vectype = build_vector_type (scalar_type, nunits);
1350   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1351     {
1352       fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1353       print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1354     }
1355
1356   if (!vectype)
1357     return NULL_TREE;
1358
1359   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1360     {
1361       fprintf (vect_dump, "vectype: ");
1362       print_generic_expr (vect_dump, vectype, TDF_SLIM);
1363     }
1364
1365   if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1366     {
1367       /* TODO: tree-complex.c sometimes can parallelize operations
1368          on generic vectors.  We can vectorize the loop in that case,
1369          but then we should re-run the lowering pass.  */
1370       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1371         fprintf (vect_dump, "mode not supported by target.");
1372       return NULL_TREE;
1373     }
1374
1375   return vectype;
1376 }
1377
1378
1379 /* Function vect_supportable_dr_alignment
1380
1381    Return whether the data reference DR is supported with respect to its
1382    alignment.  */
1383
1384 enum dr_alignment_support
1385 vect_supportable_dr_alignment (struct data_reference *dr)
1386 {
1387   tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
1388   enum machine_mode mode = (int) TYPE_MODE (vectype);
1389
1390   if (aligned_access_p (dr))
1391     return dr_aligned;
1392
1393   /* Possibly unaligned access.  */
1394
1395   if (DR_IS_READ (dr))
1396     {
1397       if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1398           && (!targetm.vectorize.builtin_mask_for_load
1399               || targetm.vectorize.builtin_mask_for_load ()))
1400         return dr_unaligned_software_pipeline;
1401
1402       if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1403         /* Can't software pipeline the loads, but can at least do them.  */
1404         return dr_unaligned_supported;
1405     }
1406
1407   /* Unsupported.  */
1408   return dr_unaligned_unsupported;
1409 }
1410
1411
1412 /* Function vect_is_simple_use.
1413
1414    Input:
1415    LOOP - the loop that is being vectorized.
1416    OPERAND - operand of a stmt in LOOP.
1417    DEF - the defining stmt in case OPERAND is an SSA_NAME.
1418
1419    Returns whether a stmt with OPERAND can be vectorized.
1420    Supportable operands are constants, loop invariants, and operands that are
1421    defined by the current iteration of the loop. Unsupportable operands are
1422    those that are defined by a previous iteration of the loop (as is the case
1423    in reduction/induction computations).  */
1424
1425 bool
1426 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def)
1427 {
1428   tree def_stmt;
1429   basic_block bb;
1430   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1431
1432   if (def)
1433     *def = NULL_TREE;
1434
1435   if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1436     return true;
1437
1438   if (TREE_CODE (operand) != SSA_NAME)
1439     return false;
1440
1441   def_stmt = SSA_NAME_DEF_STMT (operand);
1442   if (def_stmt == NULL_TREE )
1443     {
1444       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1445         fprintf (vect_dump, "no def_stmt.");
1446       return false;
1447     }
1448
1449   /* empty stmt is expected only in case of a function argument.
1450      (Otherwise - we expect a phi_node or a modify_expr).  */
1451   if (IS_EMPTY_STMT (def_stmt))
1452     {
1453       tree arg = TREE_OPERAND (def_stmt, 0);
1454       if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1455         return true;
1456       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1457         {
1458           fprintf (vect_dump, "Unexpected empty stmt: ");
1459           print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1460         }
1461       return false;
1462     }
1463
1464   /* phi_node inside the loop indicates an induction/reduction pattern.
1465      This is not supported yet.  */
1466   bb = bb_for_stmt (def_stmt);
1467   if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1468     {
1469       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1470         fprintf (vect_dump, "reduction/induction - unsupported.");
1471       return false; /* FORNOW: not supported yet.  */
1472     }
1473
1474   /* Expecting a modify_expr or a phi_node.  */
1475   if (TREE_CODE (def_stmt) == MODIFY_EXPR
1476       || TREE_CODE (def_stmt) == PHI_NODE)
1477     {
1478       if (def)
1479         *def = def_stmt;
1480       return true;
1481     }
1482
1483   return false;
1484 }
1485
1486
1487 /* Function vect_is_simple_iv_evolution.
1488
1489    FORNOW: A simple evolution of an induction variables in the loop is
1490    considered a polynomial evolution with constant step.  */
1491
1492 bool
1493 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
1494                              tree * step)
1495 {
1496   tree init_expr;
1497   tree step_expr;
1498
1499   tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
1500
1501   /* When there is no evolution in this loop, the evolution function
1502      is not "simple".  */
1503   if (evolution_part == NULL_TREE)
1504     return false;
1505
1506   /* When the evolution is a polynomial of degree >= 2
1507      the evolution function is not "simple".  */
1508   if (tree_is_chrec (evolution_part))
1509     return false;
1510
1511   step_expr = evolution_part;
1512   init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1513                                                            loop_nb));
1514
1515   if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1516     {
1517       fprintf (vect_dump, "step: ");
1518       print_generic_expr (vect_dump, step_expr, TDF_SLIM);
1519       fprintf (vect_dump, ",  init: ");
1520       print_generic_expr (vect_dump, init_expr, TDF_SLIM);
1521     }
1522
1523   *init = init_expr;
1524   *step = step_expr;
1525
1526   if (TREE_CODE (step_expr) != INTEGER_CST)
1527     {
1528       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1529         fprintf (vect_dump, "step unknown.");
1530       return false;
1531     }
1532
1533   return true;
1534 }
1535
1536
1537 /* Function need_imm_uses_for.
1538
1539    Return whether we ought to include information for 'var'
1540    when calculating immediate uses.  For this pass we only want use
1541    information for non-virtual variables.  */
1542
1543 static bool
1544 need_imm_uses_for (tree var)
1545 {
1546   return is_gimple_reg (var);
1547 }
1548
1549
1550 /* Function vectorize_loops.
1551
1552    Entry Point to loop vectorization phase.  */
1553
1554 void
1555 vectorize_loops (struct loops *loops)
1556 {
1557   unsigned int i, loops_num;
1558   unsigned int num_vectorized_loops = 0;
1559
1560   /* Fix the verbosity level if not defined explicitly by the user.  */
1561   vect_set_dump_settings ();
1562
1563   /* Does the target support SIMD?  */
1564   /* FORNOW: until more sophisticated machine modelling is in place.  */
1565   if (!UNITS_PER_SIMD_WORD)
1566     {
1567       if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1568         fprintf (vect_dump, "vectorizer: target vector size is not defined.");
1569       return;
1570     }
1571
1572 #ifdef ENABLE_CHECKING
1573   verify_loop_closed_ssa ();
1574 #endif
1575
1576   compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
1577
1578   /*  ----------- Analyze loops. -----------  */
1579
1580   /* If some loop was duplicated, it gets bigger number
1581      than all previously defined loops. This fact allows us to run
1582      only over initial loops skipping newly generated ones.  */
1583   loops_num = loops->num;
1584   for (i = 1; i < loops_num; i++)
1585     {
1586       loop_vec_info loop_vinfo;
1587       struct loop *loop = loops->parray[i];
1588
1589       if (!loop)
1590         continue;
1591
1592       loop_vinfo = vect_analyze_loop (loop);
1593       loop->aux = loop_vinfo;
1594
1595       if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
1596         continue;
1597
1598       vect_transform_loop (loop_vinfo, loops);
1599       num_vectorized_loops++;
1600     }
1601
1602   if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, UNKNOWN_LOC))
1603     fprintf (vect_dump, "vectorized %u loops in function.\n",
1604              num_vectorized_loops);
1605
1606   /*  ----------- Finalize. -----------  */
1607
1608   free_df ();
1609   for (i = 1; i < loops_num; i++)
1610     {
1611       struct loop *loop = loops->parray[i];
1612       loop_vec_info loop_vinfo;
1613
1614       if (!loop)
1615         continue;
1616       loop_vinfo = loop->aux;
1617       destroy_loop_vec_info (loop_vinfo);
1618       loop->aux = NULL;
1619     }
1620
1621   rewrite_into_ssa (false);
1622   rewrite_into_loop_closed_ssa (NULL); /* FORNOW */
1623   bitmap_clear (vars_to_rename);
1624 }