* tree-vectorizer.c (vect_can_force_dr_alignment_p): Return false for
[official-gcc.git] / gcc / tree-vectorizer.c
blobd476d813ff2789c21ca59bc618d89602a6a4643a
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
132 #include "rtl.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
137 #include "timevar.h"
138 #include "cfgloop.h"
139 #include "cfglayout.h"
140 #include "expr.h"
141 #include "optabs.h"
142 #include "toplev.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148 #include "langhooks.h"
151 /*************************************************************************
152 Simple Loop Peeling Utilities
153 *************************************************************************/
155 /* Entry point for peeling of simple loops.
156 Peel the first/last iterations of a loop.
157 It can be used outside of the vectorizer for loops that are simple enough
158 (see function documentation). In the vectorizer it is used to peel the
159 last few iterations when the loop bound is unknown or does not evenly
160 divide by the vectorization factor, and to peel the first few iterations
161 to force the alignment of data references in the loop. */
162 struct loop *slpeel_tree_peel_loop_to_edge
163 (struct loop *, struct loops *, edge, tree, tree, bool);
164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
165 (struct loop *, struct loops *, edge);
166 static void slpeel_update_phis_for_duplicate_loop
167 (struct loop *, struct loop *, bool after);
168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
172 static void allocate_new_names (bitmap);
173 static void rename_use_op (use_operand_p);
174 static void rename_def_op (def_operand_p, tree);
175 static void rename_variables_in_bb (basic_block);
176 static void free_new_names (bitmap);
177 static void rename_variables_in_loop (struct loop *);
178 #ifdef ENABLE_CHECKING
179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
180 #endif
183 /*************************************************************************
184 Vectorization Utilities.
185 *************************************************************************/
187 /* Main analysis functions. */
188 static loop_vec_info vect_analyze_loop (struct loop *);
189 static loop_vec_info vect_analyze_loop_form (struct loop *);
190 static bool vect_analyze_data_refs (loop_vec_info);
191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
192 static bool vect_analyze_scalar_cycles (loop_vec_info);
193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
195 static bool vect_compute_data_refs_alignment (loop_vec_info);
196 static bool vect_analyze_operations (loop_vec_info);
198 /* Main code transformation functions. */
199 static void vect_transform_loop (loop_vec_info, struct loops *);
200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
205 static enum dr_alignment_support vect_supportable_dr_alignment
206 (struct data_reference *);
207 static void vect_align_data_ref (tree);
208 static void vect_enhance_data_refs_alignment (loop_vec_info);
210 /* Utility functions for the analyses. */
211 static bool vect_is_simple_use (tree , struct loop *, tree *);
212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
214 static void vect_mark_relevant (varray_type, tree);
215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
216 static tree vect_get_loop_niters (struct loop *, tree *);
217 static bool vect_compute_data_ref_alignment
218 (struct data_reference *, loop_vec_info);
219 static bool vect_analyze_data_ref_access (struct data_reference *);
220 static bool vect_get_first_index (tree, tree *);
221 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
222 static struct data_reference * vect_analyze_pointer_ref_access
223 (tree, tree, bool);
224 static bool vect_can_advance_ivs_p (struct loop *);
225 static tree vect_get_base_and_bit_offset
226 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
227 static struct data_reference * vect_analyze_pointer_ref_access
228 (tree, tree, bool);
229 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
230 static tree vect_compute_array_ref_alignment
231 (struct data_reference *, loop_vec_info, tree, tree *);
232 static tree vect_get_ptr_offset (tree, tree, tree *);
233 static tree vect_get_symbl_and_dr
234 (tree, tree, bool, loop_vec_info, struct data_reference **);
236 /* Utility functions for the code transformation. */
237 static tree vect_create_destination_var (tree, tree);
238 static tree vect_create_data_ref_ptr
239 (tree, block_stmt_iterator *, tree, tree *, bool);
240 static tree vect_create_index_for_vector_ref
241 (struct loop *, block_stmt_iterator *);
242 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
243 static tree get_vectype_for_scalar_type (tree);
244 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
245 static tree vect_get_vec_def_for_operand (tree, tree);
246 static tree vect_init_vector (tree, tree);
247 static void vect_finish_stmt_generation
248 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
250 /* Utility function dealing with loop peeling (not peeling itself). */
251 static void vect_generate_tmps_on_preheader
252 (loop_vec_info, tree *, tree *, tree *);
253 static tree vect_build_loop_niters (loop_vec_info);
254 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
255 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
256 static void vect_update_inits_of_dr
257 (struct data_reference *, struct loop *, tree niters);
258 static void vect_update_inits_of_drs (loop_vec_info, tree);
259 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
260 static void vect_do_peeling_for_loop_bound
261 (loop_vec_info, tree *, struct loops *);
263 /* Utilities for creation and deletion of vec_info structs. */
264 loop_vec_info new_loop_vec_info (struct loop *loop);
265 void destroy_loop_vec_info (loop_vec_info);
266 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
268 static bool vect_debug_stats (struct loop *loop);
269 static bool vect_debug_details (struct loop *loop);
272 /*************************************************************************
273 Simple Loop Peeling Utilities
275 Utilities to support loop peeling for vectorization purposes.
276 *************************************************************************/
279 /* For each definition in DEFINITIONS this function allocates
280 new ssa name. */
282 static void
283 allocate_new_names (bitmap definitions)
285 unsigned ver;
286 bitmap_iterator bi;
288 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
290 tree def = ssa_name (ver);
291 tree *new_name_ptr = xmalloc (sizeof (tree));
293 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
295 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
296 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
298 SSA_NAME_AUX (def) = new_name_ptr;
303 /* Renames the use *OP_P. */
305 static void
306 rename_use_op (use_operand_p op_p)
308 tree *new_name_ptr;
310 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
311 return;
313 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
315 /* Something defined outside of the loop. */
316 if (!new_name_ptr)
317 return;
319 /* An ordinary ssa name defined in the loop. */
321 SET_USE (op_p, *new_name_ptr);
325 /* Renames the def *OP_P in statement STMT. */
327 static void
328 rename_def_op (def_operand_p op_p, tree stmt)
330 tree *new_name_ptr;
332 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
333 return;
335 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
337 /* Something defined outside of the loop. */
338 if (!new_name_ptr)
339 return;
341 /* An ordinary ssa name defined in the loop. */
343 SET_DEF (op_p, *new_name_ptr);
344 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
348 /* Renames the variables in basic block BB. */
350 static void
351 rename_variables_in_bb (basic_block bb)
353 tree phi;
354 block_stmt_iterator bsi;
355 tree stmt;
356 stmt_ann_t ann;
357 use_optype uses;
358 vuse_optype vuses;
359 def_optype defs;
360 v_may_def_optype v_may_defs;
361 v_must_def_optype v_must_defs;
362 unsigned i;
363 edge e;
364 edge_iterator ei;
365 struct loop *loop = bb->loop_father;
367 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
368 rename_def_op (PHI_RESULT_PTR (phi), phi);
370 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
372 stmt = bsi_stmt (bsi);
373 get_stmt_operands (stmt);
374 ann = stmt_ann (stmt);
376 uses = USE_OPS (ann);
377 for (i = 0; i < NUM_USES (uses); i++)
378 rename_use_op (USE_OP_PTR (uses, i));
380 defs = DEF_OPS (ann);
381 for (i = 0; i < NUM_DEFS (defs); i++)
382 rename_def_op (DEF_OP_PTR (defs, i), stmt);
384 vuses = VUSE_OPS (ann);
385 for (i = 0; i < NUM_VUSES (vuses); i++)
386 rename_use_op (VUSE_OP_PTR (vuses, i));
388 v_may_defs = V_MAY_DEF_OPS (ann);
389 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
391 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
392 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
395 v_must_defs = V_MUST_DEF_OPS (ann);
396 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
398 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
399 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
403 FOR_EACH_EDGE (e, ei, bb->succs)
405 if (!flow_bb_inside_loop_p (loop, e->dest))
406 continue;
407 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
408 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
413 /* Releases the structures holding the new ssa names. */
415 static void
416 free_new_names (bitmap definitions)
418 unsigned ver;
419 bitmap_iterator bi;
421 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
423 tree def = ssa_name (ver);
425 if (SSA_NAME_AUX (def))
427 free (SSA_NAME_AUX (def));
428 SSA_NAME_AUX (def) = NULL;
434 /* Renames variables in new generated LOOP. */
436 static void
437 rename_variables_in_loop (struct loop *loop)
439 unsigned i;
440 basic_block *bbs;
442 bbs = get_loop_body (loop);
444 for (i = 0; i < loop->num_nodes; i++)
445 rename_variables_in_bb (bbs[i]);
447 free (bbs);
451 /* Update the PHI nodes of NEW_LOOP.
453 NEW_LOOP is a duplicate of ORIG_LOOP.
454 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
455 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
456 executes before it. */
458 static void
459 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
460 struct loop *new_loop, bool after)
462 tree *new_name_ptr, new_ssa_name;
463 tree phi_new, phi_orig;
464 tree def;
465 edge orig_loop_latch = loop_latch_edge (orig_loop);
466 edge orig_entry_e = loop_preheader_edge (orig_loop);
467 edge new_loop_exit_e = new_loop->exit_edges[0];
468 edge new_loop_entry_e = loop_preheader_edge (new_loop);
469 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
472 step 1. For each loop-header-phi:
473 Add the first phi argument for the phi in NEW_LOOP
474 (the one associated with the entry of NEW_LOOP)
476 step 2. For each loop-header-phi:
477 Add the second phi argument for the phi in NEW_LOOP
478 (the one associated with the latch of NEW_LOOP)
480 step 3. Update the phis in the successor block of NEW_LOOP.
482 case 1: NEW_LOOP was placed before ORIG_LOOP:
483 The successor block of NEW_LOOP is the header of ORIG_LOOP.
484 Updating the phis in the successor block can therefore be done
485 along with the scanning of the loop header phis, because the
486 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
487 phi nodes, organized in the same order.
489 case 2: NEW_LOOP was placed after ORIG_LOOP:
490 The successor block of NEW_LOOP is the original exit block of
491 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
492 We postpone updating these phis to a later stage (when
493 loop guards are added).
497 /* Scan the phis in the headers of the old and new loops
498 (they are organized in exactly the same order). */
500 for (phi_new = phi_nodes (new_loop->header),
501 phi_orig = phi_nodes (orig_loop->header);
502 phi_new && phi_orig;
503 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
505 /* step 1. */
506 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
507 add_phi_arg (phi_new, def, new_loop_entry_e);
509 /* step 2. */
510 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
511 if (TREE_CODE (def) != SSA_NAME)
512 continue;
514 new_name_ptr = SSA_NAME_AUX (def);
515 if (!new_name_ptr)
516 /* Something defined outside of the loop. */
517 continue;
519 /* An ordinary ssa name defined in the loop. */
520 new_ssa_name = *new_name_ptr;
521 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
523 /* step 3 (case 1). */
524 if (!after)
526 gcc_assert (new_loop_exit_e == orig_entry_e);
527 SET_PHI_ARG_DEF (phi_orig,
528 phi_arg_from_edge (phi_orig, new_loop_exit_e),
529 new_ssa_name);
535 /* Update PHI nodes for a guard of the LOOP.
537 Input:
538 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
539 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
540 originates from the guard-bb, skips LOOP and reaches the (unique) exit
541 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
542 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
543 LOOP header) before the guard code was added, and now it became a merge
544 point of two paths - the path that ends with the LOOP exit-edge, and
545 the path that ends with GUARD_EDGE.
547 This function creates and updates the relevant phi nodes to account for
548 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
549 1. Create phi nodes at NEW_MERGE_BB.
550 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
551 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
552 was added:
554 ===> The CFG before the guard-code was added:
555 LOOP_header_bb:
556 if (exit_loop) goto update_bb : LOOP_header_bb
557 update_bb:
559 ==> The CFG after the guard-code was added:
560 guard_bb:
561 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
562 LOOP_header_bb:
563 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
564 new_merge_bb:
565 goto update_bb
566 update_bb:
568 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
569 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
570 organized in the same order.
571 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
572 loop exit phis.
574 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
575 "original" loop). FALSE if LOOP is an original loop (not a newly
576 created copy). The SSA_NAME_AUX fields of the defs in the original
577 loop are the corresponding new ssa-names used in the new duplicated
578 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
579 nodes in UPDATE_BB takes the original ssa-name, and which takes the
580 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
581 the LOOP-exit-edge takes the new-name, and the phi-arg that is
582 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
583 FALSE, it's the other way around.
586 static void
587 slpeel_update_phi_nodes_for_guard (edge guard_edge,
588 struct loop *loop,
589 bool entry_phis,
590 bool is_new_loop)
592 tree orig_phi, new_phi, update_phi;
593 tree guard_arg, loop_arg;
594 basic_block new_merge_bb = guard_edge->dest;
595 edge e = EDGE_SUCC (new_merge_bb, 0);
596 basic_block update_bb = e->dest;
597 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
599 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
600 orig_phi && update_phi;
601 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
603 /* 1. Generate new phi node in NEW_MERGE_BB: */
604 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
605 new_merge_bb);
607 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
608 of LOOP. Set the two phi args in NEW_PHI for these edges: */
609 if (entry_phis)
611 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
612 EDGE_SUCC (loop->latch, 0));
613 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
615 else /* exit phis */
617 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
618 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
619 tree new_name;
621 if (new_name_ptr)
622 new_name = *new_name_ptr;
623 else
624 /* Something defined outside of the loop */
625 new_name = orig_def;
627 if (is_new_loop)
629 guard_arg = orig_def;
630 loop_arg = new_name;
632 else
634 guard_arg = new_name;
635 loop_arg = orig_def;
638 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
639 add_phi_arg (new_phi, guard_arg, guard_edge);
641 /* 3. Update phi in successor block. */
642 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
643 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
644 SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
645 PHI_RESULT (new_phi));
648 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
652 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
653 that starts at zero, increases by one and its limit is NITERS.
655 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
657 static void
658 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
660 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
661 tree orig_cond;
662 edge exit_edge = loop->exit_edges[0];
663 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
664 tree begin_label = tree_block_label (loop->latch);
665 tree exit_label = tree_block_label (loop->single_exit->dest);
666 tree init = build_int_cst (TREE_TYPE (niters), 0);
667 tree step = build_int_cst (TREE_TYPE (niters), 1);
669 orig_cond = get_loop_exit_condition (loop);
670 gcc_assert (orig_cond);
671 create_iv (init, step, NULL_TREE, loop,
672 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
674 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
675 back to the exit condition statement. */
676 bsi_next (&loop_exit_bsi);
677 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
679 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
680 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
681 else /* 'then' edge loops back. */
682 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
684 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
685 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
686 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
687 begin_label, exit_label);
688 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
690 /* Remove old loop exit test: */
691 bsi_remove (&loop_exit_bsi);
693 if (vect_debug_stats (loop) || vect_debug_details (loop))
694 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
696 loop->nb_iterations = niters;
700 /* Given LOOP this function generates a new copy of it and puts it
701 on E which is either the entry or exit of LOOP. */
703 static struct loop *
704 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
705 edge e)
707 struct loop *new_loop;
708 basic_block *new_bbs, *bbs;
709 bool at_exit;
710 bool was_imm_dom;
711 basic_block exit_dest;
712 tree phi, phi_arg;
714 at_exit = (e == loop->exit_edges[0]);
715 if (!at_exit && e != loop_preheader_edge (loop))
717 if (dump_file && (dump_flags & TDF_DETAILS))
718 fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
719 return NULL;
722 bbs = get_loop_body (loop);
724 /* Check whether duplication is possible. */
725 if (!can_copy_bbs_p (bbs, loop->num_nodes))
727 if (vect_debug_stats (loop) || vect_debug_details (loop))
728 fprintf (dump_file, "Cannot copy basic blocks.\n");
729 free (bbs);
730 return NULL;
733 /* Generate new loop structure. */
734 new_loop = duplicate_loop (loops, loop, loop->outer);
735 if (!new_loop)
737 if (vect_debug_stats (loop) || vect_debug_details (loop))
738 fprintf (dump_file, "duplicate_loop returns NULL.\n");
739 free (bbs);
740 return NULL;
743 exit_dest = loop->exit_edges[0]->dest;
744 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
745 exit_dest) == loop->header ?
746 true : false);
748 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
750 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
752 /* Duplicating phi args at exit bbs as coming
753 also from exit of duplicated loop. */
754 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
756 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
757 if (phi_arg)
759 edge new_loop_exit_edge;
761 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
762 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
763 else
764 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
766 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
770 if (at_exit) /* Add the loop copy at exit. */
772 redirect_edge_and_branch_force (e, new_loop->header);
773 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
774 if (was_imm_dom)
775 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
777 else /* Add the copy at entry. */
779 edge new_exit_e;
780 edge entry_e = loop_preheader_edge (loop);
781 basic_block preheader = entry_e->src;
783 if (!flow_bb_inside_loop_p (new_loop,
784 EDGE_SUCC (new_loop->header, 0)->dest))
785 new_exit_e = EDGE_SUCC (new_loop->header, 0);
786 else
787 new_exit_e = EDGE_SUCC (new_loop->header, 1);
789 redirect_edge_and_branch_force (new_exit_e, loop->header);
790 set_immediate_dominator (CDI_DOMINATORS, loop->header,
791 new_exit_e->src);
793 /* We have to add phi args to the loop->header here as coming
794 from new_exit_e edge. */
795 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
797 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
798 if (phi_arg)
799 add_phi_arg (phi, phi_arg, new_exit_e);
802 redirect_edge_and_branch_force (entry_e, new_loop->header);
803 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
806 flow_loop_scan (new_loop, LOOP_ALL);
807 flow_loop_scan (loop, LOOP_ALL);
808 free (new_bbs);
809 free (bbs);
811 return new_loop;
815 /* Given the condition statement COND, put it as the last statement
816 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
817 Assumes that this is the single exit of the guarded loop.
818 Returns the skip edge. */
820 static edge
821 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
822 basic_block dom_bb)
824 block_stmt_iterator bsi;
825 edge new_e, enter_e;
826 tree cond_stmt, then_label, else_label;
828 enter_e = EDGE_SUCC (guard_bb, 0);
829 enter_e->flags &= ~EDGE_FALLTHRU;
830 enter_e->flags |= EDGE_FALSE_VALUE;
831 bsi = bsi_last (guard_bb);
833 then_label = build1 (GOTO_EXPR, void_type_node,
834 tree_block_label (exit_bb));
835 else_label = build1 (GOTO_EXPR, void_type_node,
836 tree_block_label (enter_e->dest));
837 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
838 then_label, else_label);
839 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
840 /* Add new edge to connect entry block to the second loop. */
841 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
842 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
843 return new_e;
847 /* This function verifies that the following restrictions apply to LOOP:
848 (1) it is innermost
849 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
850 (3) it is single entry, single exit
851 (4) its exit condition is the last stmt in the header
852 (5) E is the entry/exit edge of LOOP.
855 static bool
856 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
858 edge exit_e = loop->exit_edges [0];
859 edge entry_e = loop_preheader_edge (loop);
860 tree orig_cond = get_loop_exit_condition (loop);
861 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
863 if (any_marked_for_rewrite_p ())
864 return false;
866 if (loop->inner
867 /* All loops have an outer scope; the only case loop->outer is NULL is for
868 the function itself. */
869 || !loop->outer
870 || loop->num_nodes != 2
871 || !empty_block_p (loop->latch)
872 || loop->num_exits != 1
873 || loop->num_entries != 1
874 /* Verify that new loop exit condition can be trivially modified. */
875 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
876 || (e != exit_e && e != entry_e))
877 return false;
879 return true;
882 #ifdef ENABLE_CHECKING
883 static void
884 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
885 struct loop *second_loop)
887 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
888 basic_block loop2_entry_bb = second_loop->pre_header;
889 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
891 /* A guard that controls whether the second_loop is to be executed or skipped
892 is placed in first_loop->exit. first_loopt->exit therefore has two
893 successors - one is the preheader of second_loop, and the other is a bb
894 after second_loop.
896 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
899 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
900 of second_loop. */
902 /* The preheader of new_loop is expected to have two predessors:
903 first_loop->exit and the block that precedes first_loop. */
905 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
906 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
907 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
908 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
909 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
911 /* Verify that the other successor of first_loopt->exit is after the
912 second_loop. */
913 /* TODO */
915 #endif
917 /* Function slpeel_tree_peel_loop_to_edge.
919 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
920 that is placed on the entry (exit) edge E of LOOP. After this transformation
921 we have two loops one after the other - first-loop iterates FIRST_NITERS
922 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
924 Input:
925 - LOOP: the loop to be peeled.
926 - E: the exit or entry edge of LOOP.
927 If it is the entry edge, we peel the first iterations of LOOP. In this
928 case first-loop is LOOP, and second-loop is the newly created loop.
929 If it is the exit edge, we peel the last iterations of LOOP. In this
930 case, first-loop is the newly created loop, and second-loop is LOOP.
931 - NITERS: the number of iterations that LOOP iterates.
932 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
933 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
934 for updating the loop bound of the first-loop to FIRST_NITERS. If it
935 is false, the caller of this function may want to take care of this
936 (this can be useful if we don't want new stmts added to first-loop).
938 Output:
939 The function returns a pointer to the new loop-copy, or NULL if it failed
940 to perform the transformation.
942 The function generates two if-then-else guards: one before the first loop,
943 and the other before the second loop:
944 The first guard is:
945 if (FIRST_NITERS == 0) then skip the first loop,
946 and go directly to the second loop.
947 The second guard is:
948 if (FIRST_NITERS == NITERS) then skip the second loop.
950 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
951 FORNOW the resulting code will not be in loop-closed-ssa form.
954 struct loop*
955 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
956 edge e, tree first_niters,
957 tree niters, bool update_first_loop_count)
959 struct loop *new_loop = NULL, *first_loop, *second_loop;
960 edge skip_e;
961 tree pre_condition;
962 bitmap definitions;
963 basic_block bb_before_second_loop, bb_after_second_loop;
964 basic_block bb_before_first_loop;
965 basic_block bb_between_loops;
966 edge exit_e = loop->exit_edges [0];
968 if (!slpeel_can_duplicate_loop_p (loop, e))
969 return NULL;
971 /* We have to initialize cfg_hooks. Then, when calling
972 cfg_hooks->split_edge, the function tree_split_edge
973 is actually called and, when calling cfg_hooks->duplicate_block,
974 the function tree_duplicate_bb is called. */
975 tree_register_cfg_hooks ();
978 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
979 Resulting CFG would be:
981 first_loop:
982 do {
983 } while ...
985 second_loop:
986 do {
987 } while ...
989 orig_exit_bb:
992 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
994 if (vect_debug_stats (loop) || vect_debug_details (loop))
995 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
996 return NULL;
999 if (e == exit_e)
1001 /* NEW_LOOP was placed after LOOP. */
1002 first_loop = loop;
1003 second_loop = new_loop;
1005 else
1007 /* NEW_LOOP was placed before LOOP. */
1008 first_loop = new_loop;
1009 second_loop = loop;
1012 definitions = marked_ssa_names ();
1013 allocate_new_names (definitions);
1014 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1015 rename_variables_in_loop (new_loop);
1018 /* 2. Add the guard that controls whether the first loop is executed.
1019 Resulting CFG would be:
1021 bb_before_first_loop:
1022 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1023 GOTO first-loop
1025 first_loop:
1026 do {
1027 } while ...
1029 bb_before_second_loop:
1031 second_loop:
1032 do {
1033 } while ...
1035 orig_exit_bb:
1038 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1039 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1040 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1041 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1042 flow_loop_scan (first_loop, LOOP_ALL);
1043 flow_loop_scan (second_loop, LOOP_ALL);
1045 pre_condition =
1046 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1047 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1048 bb_before_second_loop, bb_before_first_loop);
1049 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1050 first_loop == new_loop);
1053 /* 3. Add the guard that controls whether the second loop is executed.
1054 Resulting CFG would be:
1056 bb_before_first_loop:
1057 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1058 GOTO first-loop
1060 first_loop:
1061 do {
1062 } while ...
1064 bb_between_loops:
1065 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1066 GOTO bb_before_second_loop
1068 bb_before_second_loop:
1070 second_loop:
1071 do {
1072 } while ...
1074 bb_after_second_loop:
1076 orig_exit_bb:
1079 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1080 add_bb_to_loop (bb_between_loops, first_loop->outer);
1081 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1082 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1083 flow_loop_scan (first_loop, LOOP_ALL);
1084 flow_loop_scan (second_loop, LOOP_ALL);
1086 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1087 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1088 bb_after_second_loop, bb_before_first_loop);
1089 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1090 second_loop == new_loop);
1092 /* Flow loop scan does not update loop->single_exit field. */
1093 first_loop->single_exit = first_loop->exit_edges[0];
1094 second_loop->single_exit = second_loop->exit_edges[0];
1096 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1098 if (update_first_loop_count)
1099 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1101 free_new_names (definitions);
1102 BITMAP_XFREE (definitions);
1103 unmark_all_for_rewrite ();
1105 return new_loop;
1109 /* Here the proper Vectorizer starts. */
1111 /*************************************************************************
1112 Vectorization Utilities.
1113 *************************************************************************/
1115 /* Function new_stmt_vec_info.
1117 Create and initialize a new stmt_vec_info struct for STMT. */
1119 stmt_vec_info
1120 new_stmt_vec_info (tree stmt, struct loop *loop)
1122 stmt_vec_info res;
1123 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1125 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1126 STMT_VINFO_STMT (res) = stmt;
1127 STMT_VINFO_LOOP (res) = loop;
1128 STMT_VINFO_RELEVANT_P (res) = 0;
1129 STMT_VINFO_VECTYPE (res) = NULL;
1130 STMT_VINFO_VEC_STMT (res) = NULL;
1131 STMT_VINFO_DATA_REF (res) = NULL;
1132 STMT_VINFO_MEMTAG (res) = NULL;
1133 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1135 return res;
1139 /* Function new_loop_vec_info.
1141 Create and initialize a new loop_vec_info struct for LOOP, as well as
1142 stmt_vec_info structs for all the stmts in LOOP. */
1144 loop_vec_info
1145 new_loop_vec_info (struct loop *loop)
1147 loop_vec_info res;
1148 basic_block *bbs;
1149 block_stmt_iterator si;
1150 unsigned int i;
1152 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1154 bbs = get_loop_body (loop);
1156 /* Create stmt_info for all stmts in the loop. */
1157 for (i = 0; i < loop->num_nodes; i++)
1159 basic_block bb = bbs[i];
1160 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1162 tree stmt = bsi_stmt (si);
1163 stmt_ann_t ann;
1165 get_stmt_operands (stmt);
1166 ann = stmt_ann (stmt);
1167 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1171 LOOP_VINFO_LOOP (res) = loop;
1172 LOOP_VINFO_BBS (res) = bbs;
1173 LOOP_VINFO_EXIT_COND (res) = NULL;
1174 LOOP_VINFO_NITERS (res) = NULL;
1175 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1176 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1177 LOOP_VINFO_VECT_FACTOR (res) = 0;
1178 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1179 "loop_write_datarefs");
1180 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1181 "loop_read_datarefs");
1182 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1184 return res;
1188 /* Function destroy_loop_vec_info.
1190 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1191 stmts in the loop. */
1193 void
1194 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1196 struct loop *loop;
1197 basic_block *bbs;
1198 int nbbs;
1199 block_stmt_iterator si;
1200 int j;
1202 if (!loop_vinfo)
1203 return;
1205 loop = LOOP_VINFO_LOOP (loop_vinfo);
1207 bbs = LOOP_VINFO_BBS (loop_vinfo);
1208 nbbs = loop->num_nodes;
1210 for (j = 0; j < nbbs; j++)
1212 basic_block bb = bbs[j];
1213 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1215 tree stmt = bsi_stmt (si);
1216 stmt_ann_t ann = stmt_ann (stmt);
1217 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1218 free (stmt_info);
1219 set_stmt_info (ann, NULL);
1223 free (LOOP_VINFO_BBS (loop_vinfo));
1224 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1225 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1227 free (loop_vinfo);
1231 /* Function debug_loop_stats.
1233 For vectorization statistics dumps. */
1235 static bool
1236 vect_debug_stats (struct loop *loop)
1238 basic_block bb;
1239 block_stmt_iterator si;
1240 tree node = NULL_TREE;
1242 if (!dump_file || !(dump_flags & TDF_STATS))
1243 return false;
1245 if (!loop)
1247 fprintf (dump_file, "\n");
1248 return true;
1251 if (!loop->header)
1252 return false;
1254 bb = loop->header;
1256 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1258 node = bsi_stmt (si);
1259 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1260 break;
1263 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1264 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1266 fprintf (dump_file, "\nloop at %s:%d: ",
1267 EXPR_FILENAME (node), EXPR_LINENO (node));
1268 return true;
1271 return false;
1275 /* Function debug_loop_details.
1277 For vectorization debug dumps. */
1279 static bool
1280 vect_debug_details (struct loop *loop)
1282 basic_block bb;
1283 block_stmt_iterator si;
1284 tree node = NULL_TREE;
1286 if (!dump_file || !(dump_flags & TDF_DETAILS))
1287 return false;
1289 if (!loop)
1291 fprintf (dump_file, "\n");
1292 return true;
1295 if (!loop->header)
1296 return false;
1298 bb = loop->header;
1300 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1302 node = bsi_stmt (si);
1303 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1304 break;
1307 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1308 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1310 fprintf (dump_file, "\nloop at %s:%d: ",
1311 EXPR_FILENAME (node), EXPR_LINENO (node));
1312 return true;
1315 return false;
1319 /* Function vect_get_ptr_offset
1321 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1323 static tree
1324 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1325 tree vectype ATTRIBUTE_UNUSED,
1326 tree *offset ATTRIBUTE_UNUSED)
1328 /* TODO: Use alignment information. */
1329 return NULL_TREE;
1333 /* Function vect_get_base_and_bit_offset
1335 Return the BASE of the data reference EXPR.
1336 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1337 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1338 bits of 'a.b[i] + 4B' from a.
1340 Input:
1341 EXPR - the memory reference that is being analyzed
1342 DR - the data_reference struct of the _original_ memory reference
1343 (Note: DR_REF (DR) is not necessarily EXPR)
1344 VECTYPE - the type that defines the alignment (i.e, we compute
1345 alignment relative to TYPE_ALIGN(VECTYPE))
1347 Output:
1348 BASE (returned value) - the base of the data reference EXPR.
1349 E.g, if EXPR is a.b[k].c[i][j] the returned
1350 base is a.
1351 OFFSET - offset of EXPR from BASE in bits
1352 BASE_ALIGNED_P - indicates if BASE is aligned
1354 If something unexpected is encountered (an unsupported form of data-ref),
1355 or if VECTYPE is given but OFFSET cannot be determined:
1356 then NULL_TREE is returned. */
1358 static tree
1359 vect_get_base_and_bit_offset (struct data_reference *dr,
1360 tree expr,
1361 tree vectype,
1362 loop_vec_info loop_vinfo,
1363 tree *offset,
1364 bool *base_aligned_p)
1366 tree this_offset = size_zero_node;
1367 tree base = NULL_TREE;
1368 tree next_ref;
1369 tree oprnd0, oprnd1;
1370 struct data_reference *array_dr;
1371 enum tree_code code = TREE_CODE (expr);
1373 *base_aligned_p = false;
1375 switch (code)
1377 /* These cases end the recursion: */
1378 case VAR_DECL:
1379 *offset = size_zero_node;
1380 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1381 *base_aligned_p = true;
1382 return expr;
1384 case SSA_NAME:
1385 if (!vectype)
1386 return expr;
1388 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1389 return NULL_TREE;
1391 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1393 base = vect_get_ptr_offset (expr, vectype, offset);
1394 if (base)
1395 *base_aligned_p = true;
1397 else
1399 *base_aligned_p = true;
1400 *offset = size_zero_node;
1401 base = expr;
1403 return base;
1405 case INTEGER_CST:
1406 *offset = int_const_binop (MULT_EXPR, expr,
1407 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1408 return expr;
1410 /* These cases continue the recursion: */
1411 case COMPONENT_REF:
1412 oprnd0 = TREE_OPERAND (expr, 0);
1413 oprnd1 = TREE_OPERAND (expr, 1);
1415 this_offset = bit_position (oprnd1);
1416 if (vectype && !host_integerp (this_offset, 1))
1417 return NULL_TREE;
1418 next_ref = oprnd0;
1419 break;
1421 case ADDR_EXPR:
1422 oprnd0 = TREE_OPERAND (expr, 0);
1423 next_ref = oprnd0;
1424 break;
1426 case INDIRECT_REF:
1427 oprnd0 = TREE_OPERAND (expr, 0);
1428 next_ref = oprnd0;
1429 break;
1431 case ARRAY_REF:
1432 if (DR_REF (dr) != expr)
1433 /* Build array data_reference struct if the existing DR_REF
1434 doesn't match EXPR. This happens, for example, when the
1435 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1436 contains information on the access of T, not of arr. In order
1437 to continue the analysis, we create a new DR struct that
1438 describes the access of arr.
1440 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1441 else
1442 array_dr = dr;
1444 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1445 vectype, &this_offset);
1446 if (!next_ref)
1447 return NULL_TREE;
1449 if (vectype &&
1450 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1452 *offset = this_offset;
1453 *base_aligned_p = true;
1454 return next_ref;
1456 break;
1458 case PLUS_EXPR:
1459 case MINUS_EXPR:
1460 /* In case we have a PLUS_EXPR of the form
1461 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1462 This is verified in vect_get_symbl_and_dr. */
1463 oprnd0 = TREE_OPERAND (expr, 0);
1464 oprnd1 = TREE_OPERAND (expr, 1);
1466 base = vect_get_base_and_bit_offset
1467 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1468 if (vectype && !base)
1469 return NULL_TREE;
1471 next_ref = oprnd0;
1472 break;
1474 default:
1475 return NULL_TREE;
1478 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1479 loop_vinfo, offset, base_aligned_p);
1481 if (vectype && base)
1483 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1484 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1485 return NULL_TREE;
1487 if (vect_debug_details (NULL))
1489 print_generic_expr (dump_file, expr, TDF_SLIM);
1490 fprintf (dump_file, " --> total offset for ref: ");
1491 print_generic_expr (dump_file, *offset, TDF_SLIM);
1494 return base;
1498 /* Function vect_force_dr_alignment_p.
1500 Returns whether the alignment of a DECL can be forced to be aligned
1501 on ALIGNMENT bit boundary. */
1503 static bool
1504 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1506 if (TREE_CODE (decl) != VAR_DECL)
1507 return false;
1509 if (DECL_EXTERNAL (decl))
1510 return false;
1512 if (TREE_ASM_WRITTEN (decl))
1513 return false;
1515 if (TREE_STATIC (decl))
1516 return (alignment <= MAX_OFILE_ALIGNMENT);
1517 else
1518 /* This is not 100% correct. The absolute correct stack alignment
1519 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1520 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1521 However, until someone implements forced stack alignment, SSE
1522 isn't really usable without this. */
1523 return (alignment <= PREFERRED_STACK_BOUNDARY);
1527 /* Function vect_get_new_vect_var.
1529 Returns a name for a new variable. The current naming scheme appends the
1530 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1531 the name of vectorizer generated variables, and appends that to NAME if
1532 provided. */
1534 static tree
1535 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1537 const char *prefix;
1538 int prefix_len;
1539 tree new_vect_var;
1541 if (var_kind == vect_simple_var)
1542 prefix = "vect_";
1543 else
1544 prefix = "vect_p";
1546 prefix_len = strlen (prefix);
1548 if (name)
1549 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1550 else
1551 new_vect_var = create_tmp_var (type, prefix);
1553 return new_vect_var;
1557 /* Function vect_create_index_for_vector_ref.
1559 Create (and return) an index variable, along with it's update chain in the
1560 loop. This variable will be used to access a memory location in a vector
1561 operation.
1563 Input:
1564 LOOP: The loop being vectorized.
1565 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1566 function can be added here, or in the loop pre-header.
1568 Output:
1569 Return an index that will be used to index a vector array. It is expected
1570 that a pointer to the first vector will be used as the base address for the
1571 indexed reference.
1573 FORNOW: we are not trying to be efficient, just creating a new index each
1574 time from scratch. At this time all vector references could use the same
1575 index.
1577 TODO: create only one index to be used by all vector references. Record
1578 the index in the LOOP_VINFO the first time this procedure is called and
1579 return it on subsequent calls. The increment of this index must be placed
1580 just before the conditional expression that ends the single block loop. */
1582 static tree
1583 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1585 tree init, step;
1586 tree indx_before_incr, indx_after_incr;
1588 /* It is assumed that the base pointer used for vectorized access contains
1589 the address of the first vector. Therefore the index used for vectorized
1590 access must be initialized to zero and incremented by 1. */
1592 init = integer_zero_node;
1593 step = integer_one_node;
1595 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1596 create_iv (init, step, NULL_TREE, loop, bsi, false,
1597 &indx_before_incr, &indx_after_incr);
1599 return indx_before_incr;
1603 /* Function vect_create_addr_base_for_vector_ref.
1605 Create an expression that computes the address of the first memory location
1606 that will be accessed for a data reference.
1608 Input:
1609 STMT: The statement containing the data reference.
1610 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1611 OFFSET: Optional. If supplied, it is be added to the initial address.
1613 Output:
1614 1. Return an SSA_NAME whose value is the address of the memory location of
1615 the first vector of the data reference.
1616 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1617 these statement(s) which define the returned SSA_NAME.
1619 FORNOW: We are only handling array accesses with step 1. */
1621 static tree
1622 vect_create_addr_base_for_vector_ref (tree stmt,
1623 tree *new_stmt_list,
1624 tree offset)
1626 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1627 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1628 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1629 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1630 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1631 tree ref = DR_REF (dr);
1632 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1633 tree scalar_type = TREE_TYPE (ref);
1634 tree scalar_ptr_type = build_pointer_type (scalar_type);
1635 tree access_fn;
1636 tree init_val, step, init_oval;
1637 bool ok;
1638 bool is_ptr_ref, is_array_ref, is_addr_expr;
1639 tree array_base;
1640 tree vec_stmt;
1641 tree new_temp;
1642 tree array_ref;
1643 tree addr_base, addr_expr;
1644 tree dest, new_stmt;
1646 /* Only the access function of the last index is relevant (i_n in
1647 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1648 access_fn = DR_ACCESS_FN (dr, 0);
1649 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1650 true);
1651 if (!ok)
1652 init_oval = integer_zero_node;
1654 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1655 && TREE_CODE (data_ref_base) == SSA_NAME;
1656 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1657 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1658 || TREE_CODE (data_ref_base) == PLUS_EXPR
1659 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1660 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1662 /** Create: &(base[init_val])
1664 if data_ref_base is an ARRAY_TYPE:
1665 base = data_ref_base
1667 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1668 base = *((scalar_array *) data_ref_base)
1671 if (is_array_ref)
1672 array_base = data_ref_base;
1673 else /* is_ptr_ref or is_addr_expr */
1675 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1676 tree scalar_array_type = build_array_type (scalar_type, 0);
1677 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1678 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1679 add_referenced_tmp_var (array_ptr);
1681 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1682 add_referenced_tmp_var (dest);
1683 data_ref_base =
1684 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1685 append_to_statement_list_force (new_stmt, new_stmt_list);
1687 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1688 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1689 new_temp = make_ssa_name (array_ptr, vec_stmt);
1690 TREE_OPERAND (vec_stmt, 0) = new_temp;
1691 append_to_statement_list_force (vec_stmt, new_stmt_list);
1693 /* (*array_ptr) */
1694 array_base = build_fold_indirect_ref (new_temp);
1697 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1698 add_referenced_tmp_var (dest);
1699 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1700 append_to_statement_list_force (new_stmt, new_stmt_list);
1702 if (offset)
1704 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1705 add_referenced_tmp_var (tmp);
1706 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1707 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1708 init_val = make_ssa_name (tmp, vec_stmt);
1709 TREE_OPERAND (vec_stmt, 0) = init_val;
1710 append_to_statement_list_force (vec_stmt, new_stmt_list);
1713 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1714 NULL_TREE, NULL_TREE);
1715 addr_base = build_fold_addr_expr (array_ref);
1717 /* addr_expr = addr_base */
1718 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1719 get_name (base_name));
1720 add_referenced_tmp_var (addr_expr);
1721 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1722 new_temp = make_ssa_name (addr_expr, vec_stmt);
1723 TREE_OPERAND (vec_stmt, 0) = new_temp;
1724 append_to_statement_list_force (vec_stmt, new_stmt_list);
1726 return new_temp;
1730 /* Function get_vectype_for_scalar_type.
1732 Returns the vector type corresponding to SCALAR_TYPE as supported
1733 by the target. */
1735 static tree
1736 get_vectype_for_scalar_type (tree scalar_type)
1738 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1739 int nbytes = GET_MODE_SIZE (inner_mode);
1740 int nunits;
1741 tree vectype;
1743 if (nbytes == 0)
1744 return NULL_TREE;
1746 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1747 is expected. */
1748 nunits = UNITS_PER_SIMD_WORD / nbytes;
1750 vectype = build_vector_type (scalar_type, nunits);
1751 if (vect_debug_details (NULL))
1753 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1754 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1757 if (!vectype)
1758 return NULL_TREE;
1760 if (vect_debug_details (NULL))
1762 fprintf (dump_file, "vectype: ");
1763 print_generic_expr (dump_file, vectype, TDF_SLIM);
1766 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1768 /* TODO: tree-complex.c sometimes can parallelize operations
1769 on generic vectors. We can vectorize the loop in that case,
1770 but then we should re-run the lowering pass. */
1771 if (vect_debug_details (NULL))
1772 fprintf (dump_file, "mode not supported by target.");
1773 return NULL_TREE;
1776 return vectype;
1780 /* Function vect_align_data_ref.
1782 Handle mislignment of a memory accesses.
1784 FORNOW: Can't handle misaligned accesses.
1785 Make sure that the dataref is aligned. */
1787 static void
1788 vect_align_data_ref (tree stmt)
1790 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1791 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1793 /* FORNOW: can't handle misaligned accesses;
1794 all accesses expected to be aligned. */
1795 gcc_assert (aligned_access_p (dr));
1799 /* Function vect_create_data_ref_ptr.
1801 Create a memory reference expression for vector access, to be used in a
1802 vector load/store stmt. The reference is based on a new pointer to vector
1803 type (vp).
1805 Input:
1806 1. STMT: a stmt that references memory. Expected to be of the form
1807 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1808 2. BSI: block_stmt_iterator where new stmts can be added.
1809 3. OFFSET (optional): an offset to be added to the initial address accessed
1810 by the data-ref in STMT.
1811 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1812 pointing to the initial address.
1814 Output:
1815 1. Declare a new ptr to vector_type, and have it point to the base of the
1816 data reference (initial addressed accessed by the data reference).
1817 For example, for vector of type V8HI, the following code is generated:
1819 v8hi *vp;
1820 vp = (v8hi *)initial_address;
1822 if OFFSET is not supplied:
1823 initial_address = &a[init];
1824 if OFFSET is supplied:
1825 initial_address = &a[init + OFFSET];
1827 Return the initial_address in INITIAL_ADDRESS.
1829 2. Create a data-reference in the loop based on the new vector pointer vp,
1830 and using a new index variable 'idx' as follows:
1832 vp' = vp + update
1834 where if ONLY_INIT is true:
1835 update = zero
1836 and otherwise
1837 update = idx + vector_type_size
1839 Return the pointer vp'.
1842 FORNOW: handle only aligned and consecutive accesses. */
1844 static tree
1845 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1846 tree *initial_address, bool only_init)
1848 tree base_name;
1849 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1850 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1851 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1852 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1853 tree vect_ptr_type;
1854 tree vect_ptr;
1855 tree tag;
1856 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1857 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1858 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1859 int nvuses, nv_may_defs, nv_must_defs;
1860 int i;
1861 tree new_temp;
1862 tree vec_stmt;
1863 tree new_stmt_list = NULL_TREE;
1864 tree idx;
1865 edge pe = loop_preheader_edge (loop);
1866 basic_block new_bb;
1867 tree vect_ptr_init;
1868 tree vectype_size;
1869 tree ptr_update;
1870 tree data_ref_ptr;
1871 tree type, tmp, size;
1873 base_name = unshare_expr (DR_BASE_NAME (dr));
1874 if (vect_debug_details (NULL))
1876 tree data_ref_base = base_name;
1877 fprintf (dump_file, "create array_ref of type: ");
1878 print_generic_expr (dump_file, vectype, TDF_SLIM);
1879 if (TREE_CODE (data_ref_base) == VAR_DECL)
1880 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1881 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1882 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1883 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1884 fprintf (dump_file, "vectorizing a record based array ref: ");
1885 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1886 fprintf (dump_file, "vectorizing a pointer ref: ");
1887 print_generic_expr (dump_file, base_name, TDF_SLIM);
1890 /** (1) Create the new vector-pointer variable: **/
1892 vect_ptr_type = build_pointer_type (vectype);
1893 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1894 get_name (base_name));
1895 add_referenced_tmp_var (vect_ptr);
1898 /** (2) Handle aliasing information of the new vector-pointer: **/
1900 tag = STMT_VINFO_MEMTAG (stmt_info);
1901 gcc_assert (tag);
1902 get_var_ann (vect_ptr)->type_mem_tag = tag;
1904 /* Mark for renaming all aliased variables
1905 (i.e, the may-aliases of the type-mem-tag). */
1906 nvuses = NUM_VUSES (vuses);
1907 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1908 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1909 for (i = 0; i < nvuses; i++)
1911 tree use = VUSE_OP (vuses, i);
1912 if (TREE_CODE (use) == SSA_NAME)
1913 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1915 for (i = 0; i < nv_may_defs; i++)
1917 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1918 if (TREE_CODE (def) == SSA_NAME)
1919 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1921 for (i = 0; i < nv_must_defs; i++)
1923 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1924 if (TREE_CODE (def) == SSA_NAME)
1925 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1929 /** (3) Calculate the initial address the vector-pointer, and set
1930 the vector-pointer to point to it before the loop: **/
1932 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1933 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1934 offset);
1935 pe = loop_preheader_edge (loop);
1936 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1937 gcc_assert (!new_bb);
1938 *initial_address = new_temp;
1940 /* Create: p = (vectype *) initial_base */
1941 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1942 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1943 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1944 TREE_OPERAND (vec_stmt, 0) = new_temp;
1945 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1946 gcc_assert (!new_bb);
1947 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1950 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1952 if (only_init) /* No update in loop is required. */
1953 return vect_ptr_init;
1955 idx = vect_create_index_for_vector_ref (loop, bsi);
1957 /* Create: update = idx * vectype_size */
1958 tmp = create_tmp_var (integer_type_node, "update");
1959 add_referenced_tmp_var (tmp);
1960 size = TYPE_SIZE (vect_ptr_type);
1961 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1962 ptr_update = create_tmp_var (type, "update");
1963 add_referenced_tmp_var (ptr_update);
1964 vectype_size = build_int_cst (integer_type_node,
1965 GET_MODE_SIZE (TYPE_MODE (vectype)));
1966 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1967 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
1968 new_temp = make_ssa_name (tmp, vec_stmt);
1969 TREE_OPERAND (vec_stmt, 0) = new_temp;
1970 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1971 vec_stmt = fold_convert (type, new_temp);
1972 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1973 new_temp = make_ssa_name (ptr_update, vec_stmt);
1974 TREE_OPERAND (vec_stmt, 0) = new_temp;
1975 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1977 /* Create: data_ref_ptr = vect_ptr_init + update */
1978 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1979 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1980 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1981 TREE_OPERAND (vec_stmt, 0) = new_temp;
1982 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1983 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1985 return data_ref_ptr;
1989 /* Function vect_create_destination_var.
1991 Create a new temporary of type VECTYPE. */
1993 static tree
1994 vect_create_destination_var (tree scalar_dest, tree vectype)
1996 tree vec_dest;
1997 const char *new_name;
1999 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2001 new_name = get_name (scalar_dest);
2002 if (!new_name)
2003 new_name = "var_";
2004 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2005 add_referenced_tmp_var (vec_dest);
2007 return vec_dest;
2011 /* Function vect_init_vector.
2013 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2014 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2015 used in the vectorization of STMT. */
2017 static tree
2018 vect_init_vector (tree stmt, tree vector_var)
2020 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2021 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2022 tree new_var;
2023 tree init_stmt;
2024 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2025 tree vec_oprnd;
2026 edge pe;
2027 tree new_temp;
2028 basic_block new_bb;
2030 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2031 add_referenced_tmp_var (new_var);
2033 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2034 new_temp = make_ssa_name (new_var, init_stmt);
2035 TREE_OPERAND (init_stmt, 0) = new_temp;
2037 pe = loop_preheader_edge (loop);
2038 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2039 gcc_assert (!new_bb);
2041 if (vect_debug_details (NULL))
2043 fprintf (dump_file, "created new init_stmt: ");
2044 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2047 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2048 return vec_oprnd;
2052 /* Function vect_get_vec_def_for_operand.
2054 OP is an operand in STMT. This function returns a (vector) def that will be
2055 used in the vectorized stmt for STMT.
2057 In the case that OP is an SSA_NAME which is defined in the loop, then
2058 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2060 In case OP is an invariant or constant, a new stmt that creates a vector def
2061 needs to be introduced. */
2063 static tree
2064 vect_get_vec_def_for_operand (tree op, tree stmt)
2066 tree vec_oprnd;
2067 tree vec_stmt;
2068 tree def_stmt;
2069 stmt_vec_info def_stmt_info = NULL;
2070 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2071 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2072 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2073 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2074 basic_block bb;
2075 tree vec_inv;
2076 tree t = NULL_TREE;
2077 tree def;
2078 int i;
2080 if (vect_debug_details (NULL))
2082 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2083 print_generic_expr (dump_file, op, TDF_SLIM);
2086 /** ===> Case 1: operand is a constant. **/
2088 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2090 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2092 tree vec_cst;
2094 /* Build a tree with vector elements. */
2095 if (vect_debug_details (NULL))
2096 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2098 for (i = nunits - 1; i >= 0; --i)
2100 t = tree_cons (NULL_TREE, op, t);
2102 vec_cst = build_vector (vectype, t);
2103 return vect_init_vector (stmt, vec_cst);
2106 gcc_assert (TREE_CODE (op) == SSA_NAME);
2108 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2110 def_stmt = SSA_NAME_DEF_STMT (op);
2111 def_stmt_info = vinfo_for_stmt (def_stmt);
2113 if (vect_debug_details (NULL))
2115 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2116 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2120 /** ==> Case 2.1: operand is defined inside the loop. **/
2122 if (def_stmt_info)
2124 /* Get the def from the vectorized stmt. */
2126 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2127 gcc_assert (vec_stmt);
2128 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2129 return vec_oprnd;
2133 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2134 it is a reduction/induction. **/
2136 bb = bb_for_stmt (def_stmt);
2137 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2139 if (vect_debug_details (NULL))
2140 fprintf (dump_file, "reduction/induction - unsupported.");
2141 internal_error ("no support for reduction/induction"); /* FORNOW */
2145 /** ==> Case 2.3: operand is defined outside the loop -
2146 it is a loop invariant. */
2148 switch (TREE_CODE (def_stmt))
2150 case PHI_NODE:
2151 def = PHI_RESULT (def_stmt);
2152 break;
2153 case MODIFY_EXPR:
2154 def = TREE_OPERAND (def_stmt, 0);
2155 break;
2156 case NOP_EXPR:
2157 def = TREE_OPERAND (def_stmt, 0);
2158 gcc_assert (IS_EMPTY_STMT (def_stmt));
2159 def = op;
2160 break;
2161 default:
2162 if (vect_debug_details (NULL))
2164 fprintf (dump_file, "unsupported defining stmt: ");
2165 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2167 internal_error ("unsupported defining stmt");
2170 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2172 if (vect_debug_details (NULL))
2173 fprintf (dump_file, "Create vector_inv.");
2175 for (i = nunits - 1; i >= 0; --i)
2177 t = tree_cons (NULL_TREE, def, t);
2180 vec_inv = build_constructor (vectype, t);
2181 return vect_init_vector (stmt, vec_inv);
2185 /* Function vect_finish_stmt_generation.
2187 Insert a new stmt. */
2189 static void
2190 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2192 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2194 if (vect_debug_details (NULL))
2196 fprintf (dump_file, "add new stmt: ");
2197 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2200 /* Make sure bsi points to the stmt that is being vectorized. */
2202 /* Assumption: any stmts created for the vectorization of stmt S were
2203 inserted before S. BSI is expected to point to S or some new stmt before S.
2206 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2207 bsi_next (bsi);
2208 gcc_assert (stmt == bsi_stmt (*bsi));
2212 /* Function vectorizable_assignment.
2214 Check if STMT performs an assignment (copy) that can be vectorized.
2215 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2216 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2217 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2219 static bool
2220 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2222 tree vec_dest;
2223 tree scalar_dest;
2224 tree op;
2225 tree vec_oprnd;
2226 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2227 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2228 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2229 tree new_temp;
2231 /* Is vectorizable assignment? */
2233 if (TREE_CODE (stmt) != MODIFY_EXPR)
2234 return false;
2236 scalar_dest = TREE_OPERAND (stmt, 0);
2237 if (TREE_CODE (scalar_dest) != SSA_NAME)
2238 return false;
2240 op = TREE_OPERAND (stmt, 1);
2241 if (!vect_is_simple_use (op, loop, NULL))
2243 if (vect_debug_details (NULL))
2244 fprintf (dump_file, "use not simple.");
2245 return false;
2248 if (!vec_stmt) /* transformation not required. */
2250 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2251 return true;
2254 /** Trasform. **/
2255 if (vect_debug_details (NULL))
2256 fprintf (dump_file, "transform assignment.");
2258 /* Handle def. */
2259 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2261 /* Handle use. */
2262 op = TREE_OPERAND (stmt, 1);
2263 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2265 /* Arguments are ready. create the new vector stmt. */
2266 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2267 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2268 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2269 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2271 return true;
2275 /* Function vectorizable_operation.
2277 Check if STMT performs a binary or unary operation that can be vectorized.
2278 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2279 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2280 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2282 static bool
2283 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2285 tree vec_dest;
2286 tree scalar_dest;
2287 tree operation;
2288 tree op0, op1 = NULL;
2289 tree vec_oprnd0, vec_oprnd1=NULL;
2290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2291 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2292 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2293 int i;
2294 enum tree_code code;
2295 enum machine_mode vec_mode;
2296 tree new_temp;
2297 int op_type;
2298 tree op;
2299 optab optab;
2301 /* Is STMT a vectorizable binary/unary operation? */
2302 if (TREE_CODE (stmt) != MODIFY_EXPR)
2303 return false;
2305 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2306 return false;
2308 operation = TREE_OPERAND (stmt, 1);
2309 code = TREE_CODE (operation);
2310 optab = optab_for_tree_code (code, vectype);
2312 /* Support only unary or binary operations. */
2313 op_type = TREE_CODE_LENGTH (code);
2314 if (op_type != unary_op && op_type != binary_op)
2316 if (vect_debug_details (NULL))
2317 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2318 return false;
2321 for (i = 0; i < op_type; i++)
2323 op = TREE_OPERAND (operation, i);
2324 if (!vect_is_simple_use (op, loop, NULL))
2326 if (vect_debug_details (NULL))
2327 fprintf (dump_file, "use not simple.");
2328 return false;
2332 /* Supportable by target? */
2333 if (!optab)
2335 if (vect_debug_details (NULL))
2336 fprintf (dump_file, "no optab.");
2337 return false;
2339 vec_mode = TYPE_MODE (vectype);
2340 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2342 if (vect_debug_details (NULL))
2343 fprintf (dump_file, "op not supported by target.");
2344 return false;
2347 if (!vec_stmt) /* transformation not required. */
2349 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2350 return true;
2353 /** Transform. **/
2355 if (vect_debug_details (NULL))
2356 fprintf (dump_file, "transform binary/unary operation.");
2358 /* Handle def. */
2359 scalar_dest = TREE_OPERAND (stmt, 0);
2360 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2362 /* Handle uses. */
2363 op0 = TREE_OPERAND (operation, 0);
2364 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2366 if (op_type == binary_op)
2368 op1 = TREE_OPERAND (operation, 1);
2369 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2372 /* Arguments are ready. create the new vector stmt. */
2374 if (op_type == binary_op)
2375 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2376 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2377 else
2378 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2379 build1 (code, vectype, vec_oprnd0));
2380 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2381 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2382 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2384 return true;
2388 /* Function vectorizable_store.
2390 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2391 can be vectorized.
2392 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2393 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2394 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2396 static bool
2397 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2399 tree scalar_dest;
2400 tree data_ref;
2401 tree op;
2402 tree vec_oprnd1;
2403 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2404 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2405 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2406 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2407 enum machine_mode vec_mode;
2408 tree dummy;
2409 enum dr_alignment_support alignment_support_cheme;
2411 /* Is vectorizable store? */
2413 if (TREE_CODE (stmt) != MODIFY_EXPR)
2414 return false;
2416 scalar_dest = TREE_OPERAND (stmt, 0);
2417 if (TREE_CODE (scalar_dest) != ARRAY_REF
2418 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2419 return false;
2421 op = TREE_OPERAND (stmt, 1);
2422 if (!vect_is_simple_use (op, loop, NULL))
2424 if (vect_debug_details (NULL))
2425 fprintf (dump_file, "use not simple.");
2426 return false;
2429 vec_mode = TYPE_MODE (vectype);
2430 /* FORNOW. In some cases can vectorize even if data-type not supported
2431 (e.g. - array initialization with 0). */
2432 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2433 return false;
2435 if (!STMT_VINFO_DATA_REF (stmt_info))
2436 return false;
2439 if (!vec_stmt) /* transformation not required. */
2441 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2442 return true;
2445 /** Trasform. **/
2447 if (vect_debug_details (NULL))
2448 fprintf (dump_file, "transform store");
2450 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2451 gcc_assert (alignment_support_cheme);
2452 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2454 /* Handle use - get the vectorized def from the defining stmt. */
2455 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2457 /* Handle def. */
2458 /* FORNOW: make sure the data reference is aligned. */
2459 vect_align_data_ref (stmt);
2460 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2461 data_ref = build_fold_indirect_ref (data_ref);
2463 /* Arguments are ready. create the new vector stmt. */
2464 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2465 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2467 return true;
2471 /* vectorizable_load.
2473 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2474 can be vectorized.
2475 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2476 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2477 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2479 static bool
2480 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2482 tree scalar_dest;
2483 tree vec_dest = NULL;
2484 tree data_ref = NULL;
2485 tree op;
2486 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2487 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2488 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2489 tree new_temp;
2490 int mode;
2491 tree init_addr;
2492 tree new_stmt;
2493 tree dummy;
2494 basic_block new_bb;
2495 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2496 edge pe = loop_preheader_edge (loop);
2497 enum dr_alignment_support alignment_support_cheme;
2499 /* Is vectorizable load? */
2501 if (TREE_CODE (stmt) != MODIFY_EXPR)
2502 return false;
2504 scalar_dest = TREE_OPERAND (stmt, 0);
2505 if (TREE_CODE (scalar_dest) != SSA_NAME)
2506 return false;
2508 op = TREE_OPERAND (stmt, 1);
2509 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2510 return false;
2512 if (!STMT_VINFO_DATA_REF (stmt_info))
2513 return false;
2515 mode = (int) TYPE_MODE (vectype);
2517 /* FORNOW. In some cases can vectorize even if data-type not supported
2518 (e.g. - data copies). */
2519 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2521 if (vect_debug_details (loop))
2522 fprintf (dump_file, "Aligned load, but unsupported type.");
2523 return false;
2526 if (!vec_stmt) /* transformation not required. */
2528 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2529 return true;
2532 /** Trasform. **/
2534 if (vect_debug_details (NULL))
2535 fprintf (dump_file, "transform load.");
2537 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2538 gcc_assert (alignment_support_cheme);
2540 if (alignment_support_cheme == dr_aligned
2541 || alignment_support_cheme == dr_unaligned_supported)
2543 /* Create:
2544 p = initial_addr;
2545 indx = 0;
2546 loop {
2547 vec_dest = *(p);
2548 indx = indx + 1;
2552 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2553 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2554 if (aligned_access_p (dr))
2555 data_ref = build_fold_indirect_ref (data_ref);
2556 else
2558 int mis = DR_MISALIGNMENT (dr);
2559 tree tmis = (mis == -1 ?
2560 integer_zero_node :
2561 build_int_cst (integer_type_node, mis));
2562 tmis = int_const_binop (MULT_EXPR, tmis,
2563 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2564 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2566 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2567 new_temp = make_ssa_name (vec_dest, new_stmt);
2568 TREE_OPERAND (new_stmt, 0) = new_temp;
2569 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2571 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2573 /* Create:
2574 p1 = initial_addr;
2575 msq_init = *(floor(p1))
2576 p2 = initial_addr + VS - 1;
2577 magic = have_builtin ? builtin_result : initial_address;
2578 indx = 0;
2579 loop {
2580 p2' = p2 + indx * vectype_size
2581 lsq = *(floor(p2'))
2582 vec_dest = realign_load (msq, lsq, magic)
2583 indx = indx + 1;
2584 msq = lsq;
2588 tree offset;
2589 tree magic;
2590 tree phi_stmt;
2591 tree msq_init;
2592 tree msq, lsq;
2593 tree dataref_ptr;
2594 tree params;
2596 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2597 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2598 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2599 &init_addr, true);
2600 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2601 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2602 new_temp = make_ssa_name (vec_dest, new_stmt);
2603 TREE_OPERAND (new_stmt, 0) = new_temp;
2604 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2605 gcc_assert (!new_bb);
2606 msq_init = TREE_OPERAND (new_stmt, 0);
2609 /* <2> Create lsq = *(floor(p2')) in the loop */
2610 offset = build_int_cst (integer_type_node,
2611 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2612 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2613 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2614 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2615 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2616 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2617 new_temp = make_ssa_name (vec_dest, new_stmt);
2618 TREE_OPERAND (new_stmt, 0) = new_temp;
2619 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2620 lsq = TREE_OPERAND (new_stmt, 0);
2623 /* <3> */
2624 if (targetm.vectorize.builtin_mask_for_load)
2626 /* Create permutation mask, if required, in loop preheader. */
2627 tree builtin_decl;
2628 params = build_tree_list (NULL_TREE, init_addr);
2629 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2630 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2631 new_stmt = build_function_call_expr (builtin_decl, params);
2632 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2633 new_temp = make_ssa_name (vec_dest, new_stmt);
2634 TREE_OPERAND (new_stmt, 0) = new_temp;
2635 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2636 gcc_assert (!new_bb);
2637 magic = TREE_OPERAND (new_stmt, 0);
2639 else
2641 /* Use current address instead of init_addr for reduced reg pressure.
2643 magic = dataref_ptr;
2647 /* <4> Create msq = phi <msq_init, lsq> in loop */
2648 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2649 msq = make_ssa_name (vec_dest, NULL_TREE);
2650 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2651 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2652 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2653 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2656 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2657 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2658 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2659 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2660 new_temp = make_ssa_name (vec_dest, new_stmt);
2661 TREE_OPERAND (new_stmt, 0) = new_temp;
2662 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2664 else
2665 gcc_unreachable ();
2667 *vec_stmt = new_stmt;
2668 return true;
2672 /* Function vect_supportable_dr_alignment
2674 Return whether the data reference DR is supported with respect to its
2675 alignment. */
2677 static enum dr_alignment_support
2678 vect_supportable_dr_alignment (struct data_reference *dr)
2680 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2681 enum machine_mode mode = (int) TYPE_MODE (vectype);
2683 if (aligned_access_p (dr))
2684 return dr_aligned;
2686 /* Possibly unaligned access. */
2688 if (DR_IS_READ (dr))
2690 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2691 && (!targetm.vectorize.builtin_mask_for_load
2692 || targetm.vectorize.builtin_mask_for_load ()))
2693 return dr_unaligned_software_pipeline;
2695 if (targetm.vectorize.misaligned_mem_ok (mode))
2696 /* Can't software pipeline the loads. */
2697 return dr_unaligned_supported;
2700 /* Unsupported. */
2701 return dr_unaligned_unsupported;
2705 /* Function vect_transform_stmt.
2707 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2709 static bool
2710 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2712 bool is_store = false;
2713 tree vec_stmt = NULL_TREE;
2714 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2715 bool done;
2717 switch (STMT_VINFO_TYPE (stmt_info))
2719 case op_vec_info_type:
2720 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2721 gcc_assert (done);
2722 break;
2724 case assignment_vec_info_type:
2725 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2726 gcc_assert (done);
2727 break;
2729 case load_vec_info_type:
2730 done = vectorizable_load (stmt, bsi, &vec_stmt);
2731 gcc_assert (done);
2732 break;
2734 case store_vec_info_type:
2735 done = vectorizable_store (stmt, bsi, &vec_stmt);
2736 gcc_assert (done);
2737 is_store = true;
2738 break;
2739 default:
2740 if (vect_debug_details (NULL))
2741 fprintf (dump_file, "stmt not supported.");
2742 gcc_unreachable ();
2745 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2747 return is_store;
2751 /* This function builds ni_name = number of iterations loop executes
2752 on the loop preheader. */
2754 static tree
2755 vect_build_loop_niters (loop_vec_info loop_vinfo)
2757 tree ni_name, stmt, var;
2758 edge pe;
2759 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2760 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2762 var = create_tmp_var (TREE_TYPE (ni), "niters");
2763 add_referenced_tmp_var (var);
2764 ni_name = force_gimple_operand (ni, &stmt, false, var);
2766 pe = loop_preheader_edge (loop);
2767 if (stmt)
2769 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2770 gcc_assert (!new_bb);
2773 return ni_name;
2777 /* This function generates the following statements:
2779 ni_name = number of iterations loop executes
2780 ratio = ni_name / vf
2781 ratio_mult_vf_name = ratio * vf
2783 and places them at the loop preheader edge. */
2785 static void
2786 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2787 tree *ni_name_ptr,
2788 tree *ratio_mult_vf_name_ptr,
2789 tree *ratio_name_ptr)
2792 edge pe;
2793 basic_block new_bb;
2794 tree stmt, ni_name;
2795 tree var;
2796 tree ratio_name;
2797 tree ratio_mult_vf_name;
2798 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2799 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2800 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2801 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2803 pe = loop_preheader_edge (loop);
2805 /* Generate temporary variable that contains
2806 number of iterations loop executes. */
2808 ni_name = vect_build_loop_niters (loop_vinfo);
2810 /* Create: ratio = ni >> log2(vf) */
2812 var = create_tmp_var (TREE_TYPE (ni), "bnd");
2813 add_referenced_tmp_var (var);
2814 ratio_name = make_ssa_name (var, NULL_TREE);
2815 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2816 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2817 SSA_NAME_DEF_STMT (ratio_name) = stmt;
2819 pe = loop_preheader_edge (loop);
2820 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2821 gcc_assert (!new_bb);
2823 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2825 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2826 add_referenced_tmp_var (var);
2827 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2828 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2829 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2830 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2832 pe = loop_preheader_edge (loop);
2833 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2834 gcc_assert (!new_bb);
2836 *ni_name_ptr = ni_name;
2837 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2838 *ratio_name_ptr = ratio_name;
2840 return;
2844 /* Function vect_update_ivs_after_vectorizer.
2846 "Advance" the induction variables of LOOP to the value they should take
2847 after the execution of LOOP. This is currently necessary because the
2848 vectorizer does not handle induction variables that are used after the
2849 loop. Such a situation occurs when the last iterations of LOOP are
2850 peeled, because:
2851 1. We introduced new uses after LOOP for IVs that were not originally used
2852 after LOOP: the IVs of LOOP are now used by an epilog loop.
2853 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2854 times, whereas the loop IVs should be bumped N times.
2856 Input:
2857 - LOOP - a loop that is going to be vectorized. The last few iterations
2858 of LOOP were peeled.
2859 - NITERS - the number of iterations that LOOP executes (before it is
2860 vectorized). i.e, the number of times the ivs should be bumped.
2861 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2862 coming out from LOOP on which there are uses of the LOOP ivs
2863 (this is the path from LOOP->exit to epilog_loop->preheader).
2865 The new definitions of the ivs are placed in LOOP->exit.
2866 The phi args associated with the edge UPDATE_E in the bb
2867 UPDATE_E->dest are updated accordingly.
2869 Assumption 1: Like the rest of the vectorizer, this function assumes
2870 a single loop exit that has a single predecessor.
2872 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2873 organized in the same order.
2875 Assumption 3: The access function of the ivs is simple enough (see
2876 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2878 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2879 coming out of LOOP on which the ivs of LOOP are used (this is the path
2880 that leads to the epilog loop; other paths skip the epilog loop). This
2881 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2882 needs to have its phis updated.
2885 static void
2886 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
2888 basic_block exit_bb = loop->exit_edges[0]->dest;
2889 tree phi, phi1;
2890 basic_block update_bb = update_e->dest;
2892 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
2894 /* Make sure there exists a single-predecessor exit bb: */
2895 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
2897 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2898 phi && phi1;
2899 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2901 tree access_fn = NULL;
2902 tree evolution_part;
2903 tree init_expr;
2904 tree step_expr;
2905 tree var, stmt, ni, ni_name;
2906 block_stmt_iterator last_bsi;
2908 /* Skip virtual phi's. */
2909 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2911 if (vect_debug_details (NULL))
2912 fprintf (dump_file, "virtual phi. skip.");
2913 continue;
2916 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2917 gcc_assert (access_fn);
2918 evolution_part =
2919 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2920 gcc_assert (evolution_part != NULL_TREE);
2922 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2923 of degree >= 2 or exponential. */
2924 gcc_assert (!tree_is_chrec (evolution_part));
2926 step_expr = evolution_part;
2927 init_expr = unshare_expr (initial_condition (access_fn));
2929 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2930 build2 (MULT_EXPR, TREE_TYPE (niters),
2931 niters, step_expr), init_expr);
2933 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2934 add_referenced_tmp_var (var);
2936 ni_name = force_gimple_operand (ni, &stmt, false, var);
2938 /* Insert stmt into exit_bb. */
2939 last_bsi = bsi_last (exit_bb);
2940 if (stmt)
2941 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2943 /* Fix phi expressions in the successor bb. */
2944 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
2945 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
2946 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
2951 /* Function vect_do_peeling_for_loop_bound
2953 Peel the last iterations of the loop represented by LOOP_VINFO.
2954 The peeled iterations form a new epilog loop. Given that the loop now
2955 iterates NITERS times, the new epilog loop iterates
2956 NITERS % VECTORIZATION_FACTOR times.
2958 The original loop will later be made to iterate
2959 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2961 static void
2962 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2963 struct loops *loops)
2966 tree ni_name, ratio_mult_vf_name;
2967 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2968 struct loop *new_loop;
2969 edge update_e;
2970 #ifdef ENABLE_CHECKING
2971 int loop_num;
2972 #endif
2974 if (vect_debug_details (NULL))
2975 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
2977 /* Generate the following variables on the preheader of original loop:
2979 ni_name = number of iteration the original loop executes
2980 ratio = ni_name / vf
2981 ratio_mult_vf_name = ratio * vf */
2982 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2983 &ratio_mult_vf_name, ratio);
2985 /* Update loop info. */
2986 loop->pre_header = loop_preheader_edge (loop)->src;
2987 loop->pre_header_edges[0] = loop_preheader_edge (loop);
2989 #ifdef ENABLE_CHECKING
2990 loop_num = loop->num;
2991 #endif
2992 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
2993 ratio_mult_vf_name, ni_name, false);
2994 #ifdef ENABLE_CHECKING
2995 gcc_assert (new_loop);
2996 gcc_assert (loop_num == loop->num);
2997 slpeel_verify_cfg_after_peeling (loop, new_loop);
2998 #endif
3000 /* A guard that controls whether the new_loop is to be executed or skipped
3001 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3002 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3003 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3004 is on the path where the LOOP IVs are used and need to be updated. */
3006 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3007 update_e = EDGE_PRED (new_loop->pre_header, 0);
3008 else
3009 update_e = EDGE_PRED (new_loop->pre_header, 1);
3011 /* Update IVs of original loop as if they were advanced
3012 by ratio_mult_vf_name steps. */
3013 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3015 /* After peeling we have to reset scalar evolution analyzer. */
3016 scev_reset ();
3018 return;
3022 /* Function vect_gen_niters_for_prolog_loop
3024 Set the number of iterations for the loop represented by LOOP_VINFO
3025 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3026 and the misalignment of DR - the first data reference recorded in
3027 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3028 this loop, the data reference DR will refer to an aligned location.
3030 The following computation is generated:
3032 compute address misalignment in bytes:
3033 addr_mis = addr & (vectype_size - 1)
3035 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3037 (elem_size = element type size; an element is the scalar element
3038 whose type is the inner type of the vectype) */
3040 static tree
3041 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3043 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3044 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3045 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3046 tree var, stmt;
3047 tree iters, iters_name;
3048 edge pe;
3049 basic_block new_bb;
3050 tree dr_stmt = DR_STMT (dr);
3051 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3052 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3053 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3054 tree elem_misalign;
3055 tree byte_misalign;
3056 tree new_stmts = NULL_TREE;
3057 tree start_addr =
3058 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3059 tree ptr_type = TREE_TYPE (start_addr);
3060 tree size = TYPE_SIZE (ptr_type);
3061 tree type = lang_hooks.types.type_for_size (TREE_INT_CST_LOW (size), 1);
3062 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3063 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3064 tree niters_type = TREE_TYPE (loop_niters);
3065 tree elem_size_log =
3066 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3067 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3069 pe = loop_preheader_edge (loop);
3070 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3071 gcc_assert (!new_bb);
3073 /* Create: byte_misalign = addr & (vectype_size - 1) */
3074 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3076 /* Create: elem_misalign = byte_misalign / element_size */
3077 elem_misalign =
3078 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3080 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3081 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3082 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3083 iters = fold_convert (niters_type, iters);
3085 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3086 /* If the loop bound is known at compile time we already verified that it is
3087 greater than vf; since the misalignment ('iters') is at most vf, there's
3088 no need to generate the MIN_EXPR in this case. */
3089 if (!host_integerp (loop_niters, 0))
3090 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3092 var = create_tmp_var (niters_type, "prolog_loop_niters");
3093 add_referenced_tmp_var (var);
3094 iters_name = force_gimple_operand (iters, &stmt, false, var);
3096 /* Insert stmt on loop preheader edge. */
3097 pe = loop_preheader_edge (loop);
3098 if (stmt)
3100 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3101 gcc_assert (!new_bb);
3104 return iters_name;
3108 /* Function vect_update_inits_of_dr
3110 NITERS iterations were peeled from LOOP. DR represents a data reference
3111 in LOOP. This function updates the information recorded in DR to
3112 account for the fact that the first NITERS iterations had already been
3113 executed. Specifically, it updates the initial_condition of the
3114 access_function of DR. */
3116 static void
3117 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3118 tree niters)
3120 tree access_fn = DR_ACCESS_FN (dr, 0);
3121 tree init, init_new, step;
3123 step = evolution_part_in_loop_num (access_fn, loop->num);
3124 init = initial_condition (access_fn);
3126 init_new = build2 (PLUS_EXPR, TREE_TYPE (init),
3127 build2 (MULT_EXPR, TREE_TYPE (niters),
3128 niters, step), init);
3129 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3131 return;
3135 /* Function vect_update_inits_of_drs
3137 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3138 This function updates the information recorded for the data references in
3139 the loop to account for the fact that the first NITERS iterations had
3140 already been executed. Specifically, it updates the initial_condition of the
3141 access_function of all the data_references in the loop. */
3143 static void
3144 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3146 unsigned int i;
3147 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3148 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3149 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3151 if (dump_file && (dump_flags & TDF_DETAILS))
3152 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3154 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3156 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3157 vect_update_inits_of_dr (dr, loop, niters);
3160 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3162 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3163 vect_update_inits_of_dr (dr, loop, niters);
3168 /* Function vect_do_peeling_for_alignment
3170 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3171 'niters' is set to the misalignment of one of the data references in the
3172 loop, thereby forcing it to refer to an aligned location at the beginning
3173 of the execution of this loop. The data reference for which we are
3174 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3176 static void
3177 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3179 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3180 tree niters_of_prolog_loop, ni_name;
3181 tree n_iters;
3182 struct loop *new_loop;
3184 if (vect_debug_details (NULL))
3185 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3187 ni_name = vect_build_loop_niters (loop_vinfo);
3188 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3190 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3191 new_loop =
3192 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3193 niters_of_prolog_loop, ni_name, true);
3194 #ifdef ENABLE_CHECKING
3195 gcc_assert (new_loop);
3196 slpeel_verify_cfg_after_peeling (new_loop, loop);
3197 #endif
3199 /* Update number of times loop executes. */
3200 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3201 LOOP_VINFO_NITERS (loop_vinfo) =
3202 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3204 /* Update the init conditions of the access functions of all data refs. */
3205 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3207 /* After peeling we have to reset scalar evolution analyzer. */
3208 scev_reset ();
3210 return;
3214 /* Function vect_transform_loop.
3216 The analysis phase has determined that the loop is vectorizable.
3217 Vectorize the loop - created vectorized stmts to replace the scalar
3218 stmts in the loop, and update the loop exit condition. */
3220 static void
3221 vect_transform_loop (loop_vec_info loop_vinfo,
3222 struct loops *loops ATTRIBUTE_UNUSED)
3224 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3225 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3226 int nbbs = loop->num_nodes;
3227 block_stmt_iterator si;
3228 int i;
3229 tree ratio = NULL;
3230 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3232 if (vect_debug_details (NULL))
3233 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3236 /* Peel the loop if there are data refs with unknown alignment.
3237 Only one data ref with unknown store is allowed. */
3239 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3240 vect_do_peeling_for_alignment (loop_vinfo, loops);
3242 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3243 compile time constant), or it is a constant that doesn't divide by the
3244 vectorization factor, then an epilog loop needs to be created.
3245 We therefore duplicate the loop: the original loop will be vectorized,
3246 and will compute the first (n/VF) iterations. The second copy of the loop
3247 will remain scalar and will compute the remaining (n%VF) iterations.
3248 (VF is the vectorization factor). */
3250 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3251 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3252 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3253 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3254 else
3255 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3256 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3258 /* 1) Make sure the loop header has exactly two entries
3259 2) Make sure we have a preheader basic block. */
3261 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3263 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3266 /* FORNOW: the vectorizer supports only loops which body consist
3267 of one basic block (header + empty latch). When the vectorizer will
3268 support more involved loop forms, the order by which the BBs are
3269 traversed need to be reconsidered. */
3271 for (i = 0; i < nbbs; i++)
3273 basic_block bb = bbs[i];
3275 for (si = bsi_start (bb); !bsi_end_p (si);)
3277 tree stmt = bsi_stmt (si);
3278 stmt_vec_info stmt_info;
3279 bool is_store;
3281 if (vect_debug_details (NULL))
3283 fprintf (dump_file, "------>vectorizing statement: ");
3284 print_generic_expr (dump_file, stmt, TDF_SLIM);
3286 stmt_info = vinfo_for_stmt (stmt);
3287 gcc_assert (stmt_info);
3288 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3290 bsi_next (&si);
3291 continue;
3293 #ifdef ENABLE_CHECKING
3294 /* FORNOW: Verify that all stmts operate on the same number of
3295 units and no inner unrolling is necessary. */
3296 gcc_assert
3297 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3298 == vectorization_factor);
3299 #endif
3300 /* -------- vectorize statement ------------ */
3301 if (vect_debug_details (NULL))
3302 fprintf (dump_file, "transform statement.");
3304 is_store = vect_transform_stmt (stmt, &si);
3305 if (is_store)
3307 /* free the attached stmt_vec_info and remove the stmt. */
3308 stmt_ann_t ann = stmt_ann (stmt);
3309 free (stmt_info);
3310 set_stmt_info (ann, NULL);
3311 bsi_remove (&si);
3312 continue;
3315 bsi_next (&si);
3316 } /* stmts in BB */
3317 } /* BBs in loop */
3319 slpeel_make_loop_iterate_ntimes (loop, ratio);
3321 if (vect_debug_details (loop))
3322 fprintf (dump_file,"Success! loop vectorized.");
3323 if (vect_debug_stats (loop))
3324 fprintf (dump_file, "LOOP VECTORIZED.");
3328 /* Function vect_is_simple_use.
3330 Input:
3331 LOOP - the loop that is being vectorized.
3332 OPERAND - operand of a stmt in LOOP.
3333 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3335 Returns whether a stmt with OPERAND can be vectorized.
3336 Supportable operands are constants, loop invariants, and operands that are
3337 defined by the current iteration of the loop. Unsupportable operands are
3338 those that are defined by a previous iteration of the loop (as is the case
3339 in reduction/induction computations). */
3341 static bool
3342 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3344 tree def_stmt;
3345 basic_block bb;
3347 if (def)
3348 *def = NULL_TREE;
3350 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3351 return true;
3353 if (TREE_CODE (operand) != SSA_NAME)
3354 return false;
3356 def_stmt = SSA_NAME_DEF_STMT (operand);
3357 if (def_stmt == NULL_TREE )
3359 if (vect_debug_details (NULL))
3360 fprintf (dump_file, "no def_stmt.");
3361 return false;
3364 /* empty stmt is expected only in case of a function argument.
3365 (Otherwise - we expect a phi_node or a modify_expr). */
3366 if (IS_EMPTY_STMT (def_stmt))
3368 tree arg = TREE_OPERAND (def_stmt, 0);
3369 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3370 return true;
3371 if (vect_debug_details (NULL))
3373 fprintf (dump_file, "Unexpected empty stmt: ");
3374 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3376 return false;
3379 /* phi_node inside the loop indicates an induction/reduction pattern.
3380 This is not supported yet. */
3381 bb = bb_for_stmt (def_stmt);
3382 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3384 if (vect_debug_details (NULL))
3385 fprintf (dump_file, "reduction/induction - unsupported.");
3386 return false; /* FORNOW: not supported yet. */
3389 /* Expecting a modify_expr or a phi_node. */
3390 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3391 || TREE_CODE (def_stmt) == PHI_NODE)
3393 if (def)
3394 *def = def_stmt;
3395 return true;
3398 return false;
3402 /* Function vect_analyze_operations.
3404 Scan the loop stmts and make sure they are all vectorizable. */
3406 static bool
3407 vect_analyze_operations (loop_vec_info loop_vinfo)
3409 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3410 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3411 int nbbs = loop->num_nodes;
3412 block_stmt_iterator si;
3413 unsigned int vectorization_factor = 0;
3414 int i;
3415 bool ok;
3416 tree scalar_type;
3418 if (vect_debug_details (NULL))
3419 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3421 for (i = 0; i < nbbs; i++)
3423 basic_block bb = bbs[i];
3425 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3427 tree stmt = bsi_stmt (si);
3428 unsigned int nunits;
3429 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3430 tree vectype;
3432 if (vect_debug_details (NULL))
3434 fprintf (dump_file, "==> examining statement: ");
3435 print_generic_expr (dump_file, stmt, TDF_SLIM);
3438 gcc_assert (stmt_info);
3440 /* skip stmts which do not need to be vectorized.
3441 this is expected to include:
3442 - the COND_EXPR which is the loop exit condition
3443 - any LABEL_EXPRs in the loop
3444 - computations that are used only for array indexing or loop
3445 control */
3447 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3449 if (vect_debug_details (NULL))
3450 fprintf (dump_file, "irrelevant.");
3451 continue;
3454 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3456 if (vect_debug_stats (loop) || vect_debug_details (loop))
3458 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3459 print_generic_expr (dump_file, stmt, TDF_SLIM);
3461 return false;
3464 if (STMT_VINFO_DATA_REF (stmt_info))
3465 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3466 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3467 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3468 else
3469 scalar_type = TREE_TYPE (stmt);
3471 if (vect_debug_details (NULL))
3473 fprintf (dump_file, "get vectype for scalar type: ");
3474 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3477 vectype = get_vectype_for_scalar_type (scalar_type);
3478 if (!vectype)
3480 if (vect_debug_stats (loop) || vect_debug_details (loop))
3482 fprintf (dump_file, "not vectorized: unsupported data-type ");
3483 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3485 return false;
3488 if (vect_debug_details (NULL))
3490 fprintf (dump_file, "vectype: ");
3491 print_generic_expr (dump_file, vectype, TDF_SLIM);
3493 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3495 ok = (vectorizable_operation (stmt, NULL, NULL)
3496 || vectorizable_assignment (stmt, NULL, NULL)
3497 || vectorizable_load (stmt, NULL, NULL)
3498 || vectorizable_store (stmt, NULL, NULL));
3500 if (!ok)
3502 if (vect_debug_stats (loop) || vect_debug_details (loop))
3504 fprintf (dump_file, "not vectorized: stmt not supported: ");
3505 print_generic_expr (dump_file, stmt, TDF_SLIM);
3507 return false;
3510 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3511 if (vect_debug_details (NULL))
3512 fprintf (dump_file, "nunits = %d", nunits);
3514 if (vectorization_factor)
3516 /* FORNOW: don't allow mixed units.
3517 This restriction will be relaxed in the future. */
3518 if (nunits != vectorization_factor)
3520 if (vect_debug_stats (loop) || vect_debug_details (loop))
3521 fprintf (dump_file, "not vectorized: mixed data-types");
3522 return false;
3525 else
3526 vectorization_factor = nunits;
3528 #ifdef ENABLE_CHECKING
3529 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3530 * vectorization_factor == UNITS_PER_SIMD_WORD);
3531 #endif
3535 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3537 if (vectorization_factor <= 1)
3539 if (vect_debug_stats (loop) || vect_debug_details (loop))
3540 fprintf (dump_file, "not vectorized: unsupported data-type");
3541 return false;
3543 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3545 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3546 fprintf (dump_file,
3547 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3548 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3550 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3551 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3553 if (vect_debug_stats (loop) || vect_debug_details (loop))
3554 fprintf (dump_file, "not vectorized: iteration count too small.");
3555 return false;
3558 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3559 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3561 if (vect_debug_stats (loop) || vect_debug_details (loop))
3562 fprintf (dump_file, "epilog loop required.");
3563 if (!vect_can_advance_ivs_p (loop))
3565 if (vect_debug_stats (loop) || vect_debug_details (loop))
3566 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3567 return false;
3569 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3571 if (vect_debug_stats (loop) || vect_debug_details (loop))
3572 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3573 return false;
3577 return true;
3581 /* Function exist_non_indexing_operands_for_use_p
3583 USE is one of the uses attached to STMT. Check if USE is
3584 used in STMT for anything other than indexing an array. */
3586 static bool
3587 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3589 tree operand;
3590 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3592 /* USE corresponds to some operand in STMT. If there is no data
3593 reference in STMT, then any operand that corresponds to USE
3594 is not indexing an array. */
3595 if (!STMT_VINFO_DATA_REF (stmt_info))
3596 return true;
3598 /* STMT has a data_ref. FORNOW this means that its of one of
3599 the following forms:
3600 -1- ARRAY_REF = var
3601 -2- var = ARRAY_REF
3602 (This should have been verified in analyze_data_refs).
3604 'var' in the second case corresponds to a def, not a use,
3605 so USE cannot correspond to any operands that are not used
3606 for array indexing.
3608 Therefore, all we need to check is if STMT falls into the
3609 first case, and whether var corresponds to USE. */
3611 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3612 return false;
3614 operand = TREE_OPERAND (stmt, 1);
3616 if (TREE_CODE (operand) != SSA_NAME)
3617 return false;
3619 if (operand == use)
3620 return true;
3622 return false;
3626 /* Function vect_is_simple_iv_evolution.
3628 FORNOW: A simple evolution of an induction variables in the loop is
3629 considered a polynomial evolution with constant step. */
3631 static bool
3632 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3633 tree * step, bool strict)
3635 tree init_expr;
3636 tree step_expr;
3638 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3640 /* When there is no evolution in this loop, the evolution function
3641 is not "simple". */
3642 if (evolution_part == NULL_TREE)
3643 return false;
3645 /* When the evolution is a polynomial of degree >= 2
3646 the evolution function is not "simple". */
3647 if (tree_is_chrec (evolution_part))
3648 return false;
3650 step_expr = evolution_part;
3651 init_expr = unshare_expr (initial_condition (access_fn));
3653 if (vect_debug_details (NULL))
3655 fprintf (dump_file, "step: ");
3656 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3657 fprintf (dump_file, ", init: ");
3658 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3661 *init = init_expr;
3662 *step = step_expr;
3664 if (TREE_CODE (step_expr) != INTEGER_CST)
3666 if (vect_debug_details (NULL))
3667 fprintf (dump_file, "step unknown.");
3668 return false;
3671 if (strict)
3672 if (!integer_onep (step_expr))
3674 if (vect_debug_details (NULL))
3675 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3676 return false;
3679 return true;
3683 /* Function vect_analyze_scalar_cycles.
3685 Examine the cross iteration def-use cycles of scalar variables, by
3686 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3687 cycles that they represent do not impede vectorization.
3689 FORNOW: Reduction as in the following loop, is not supported yet:
3690 loop1:
3691 for (i=0; i<N; i++)
3692 sum += a[i];
3693 The cross-iteration cycle corresponding to variable 'sum' will be
3694 considered too complicated and will impede vectorization.
3696 FORNOW: Induction as in the following loop, is not supported yet:
3697 loop2:
3698 for (i=0; i<N; i++)
3699 a[i] = i;
3701 However, the following loop *is* vectorizable:
3702 loop3:
3703 for (i=0; i<N; i++)
3704 a[i] = b[i];
3706 In both loops there exists a def-use cycle for the variable i:
3707 loop: i_2 = PHI (i_0, i_1)
3708 a[i_2] = ...;
3709 i_1 = i_2 + 1;
3710 GOTO loop;
3712 The evolution of the above cycle is considered simple enough,
3713 however, we also check that the cycle does not need to be
3714 vectorized, i.e - we check that the variable that this cycle
3715 defines is only used for array indexing or in stmts that do not
3716 need to be vectorized. This is not the case in loop2, but it
3717 *is* the case in loop3. */
3719 static bool
3720 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3722 tree phi;
3723 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3724 basic_block bb = loop->header;
3725 tree dummy;
3727 if (vect_debug_details (NULL))
3728 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3730 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3732 tree access_fn = NULL;
3734 if (vect_debug_details (NULL))
3736 fprintf (dump_file, "Analyze phi: ");
3737 print_generic_expr (dump_file, phi, TDF_SLIM);
3740 /* Skip virtual phi's. The data dependences that are associated with
3741 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3743 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3745 if (vect_debug_details (NULL))
3746 fprintf (dump_file, "virtual phi. skip.");
3747 continue;
3750 /* Analyze the evolution function. */
3752 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3753 those of loop induction variables; This property is verified here.
3755 Furthermore, if that induction variable is used in an operation
3756 that needs to be vectorized (i.e, is not solely used to index
3757 arrays and check the exit condition) - we do not support its
3758 vectorization yet. This property is verified in vect_is_simple_use,
3759 during vect_analyze_operations. */
3761 access_fn = /* instantiate_parameters
3762 (loop,*/
3763 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3765 if (!access_fn)
3767 if (vect_debug_stats (loop) || vect_debug_details (loop))
3768 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3769 return false;
3772 if (vect_debug_details (NULL))
3774 fprintf (dump_file, "Access function of PHI: ");
3775 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3778 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3779 &dummy, false))
3781 if (vect_debug_stats (loop) || vect_debug_details (loop))
3782 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3783 return false;
3787 return true;
3791 /* Function vect_analyze_data_ref_dependence.
3793 Return TRUE if there (might) exist a dependence between a memory-reference
3794 DRA and a memory-reference DRB. */
3796 static bool
3797 vect_analyze_data_ref_dependence (struct data_reference *dra,
3798 struct data_reference *drb,
3799 struct loop *loop)
3801 bool differ_p;
3802 struct data_dependence_relation *ddr;
3804 if (!array_base_name_differ_p (dra, drb, &differ_p))
3806 if (vect_debug_stats (loop) || vect_debug_details (loop))
3808 fprintf (dump_file,
3809 "not vectorized: can't determine dependence between: ");
3810 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3811 fprintf (dump_file, " and ");
3812 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3814 return true;
3817 if (differ_p)
3818 return false;
3820 ddr = initialize_data_dependence_relation (dra, drb);
3821 compute_affine_dependence (ddr);
3823 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3824 return false;
3826 if (vect_debug_stats (loop) || vect_debug_details (loop))
3828 fprintf (dump_file,
3829 "not vectorized: possible dependence between data-refs ");
3830 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3831 fprintf (dump_file, " and ");
3832 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3835 return true;
3839 /* Function vect_analyze_data_ref_dependences.
3841 Examine all the data references in the loop, and make sure there do not
3842 exist any data dependences between them.
3844 TODO: dependences which distance is greater than the vectorization factor
3845 can be ignored. */
3847 static bool
3848 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3850 unsigned int i, j;
3851 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3852 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3853 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3855 /* Examine store-store (output) dependences. */
3857 if (vect_debug_details (NULL))
3858 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3860 if (vect_debug_details (NULL))
3861 fprintf (dump_file, "compare all store-store pairs.");
3863 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3865 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3867 struct data_reference *dra =
3868 VARRAY_GENERIC_PTR (loop_write_refs, i);
3869 struct data_reference *drb =
3870 VARRAY_GENERIC_PTR (loop_write_refs, j);
3871 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3872 return false;
3876 /* Examine load-store (true/anti) dependences. */
3878 if (vect_debug_details (NULL))
3879 fprintf (dump_file, "compare all load-store pairs.");
3881 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3883 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3885 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3886 struct data_reference *drb =
3887 VARRAY_GENERIC_PTR (loop_write_refs, j);
3888 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3889 return false;
3893 return true;
3897 /* Function vect_get_first_index.
3899 REF is a data reference.
3900 If it is an ARRAY_REF: if its lower bound is simple enough,
3901 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3902 If it is not an ARRAY_REF: REF has no "first index";
3903 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3905 static bool
3906 vect_get_first_index (tree ref, tree *array_first_index)
3908 tree array_start;
3910 if (TREE_CODE (ref) != ARRAY_REF)
3911 *array_first_index = size_zero_node;
3912 else
3914 array_start = array_ref_low_bound (ref);
3915 if (!host_integerp (array_start, 0))
3917 if (vect_debug_details (NULL))
3919 fprintf (dump_file, "array min val not simple integer cst.");
3920 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3922 return false;
3924 *array_first_index = array_start;
3927 return true;
3931 /* Function vect_compute_array_base_alignment.
3932 A utility function of vect_compute_array_ref_alignment.
3934 Compute the misalignment of ARRAY in bits.
3936 Input:
3937 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3938 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3939 if NULL: don't compute misalignment, just return the base of ARRAY.
3940 PREV_DIMENSIONS - initialized to one.
3941 MISALIGNMENT - the computed misalignment in bits.
3943 Output:
3944 If VECTYPE is not NULL:
3945 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3946 the base of the array, and put the computed misalignment in MISALIGNMENT.
3947 If VECTYPE is NULL:
3948 Return the base of the array.
3950 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3951 a[idx_N]...[idx_2][idx_1] is
3952 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3953 ... + idx_N * dim_0 * ... * dim_N-1}.
3954 (The misalignment of &a is not checked here).
3955 Note, that every term contains dim_0, therefore, if dim_0 is a
3956 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3957 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3958 NUINTS, we can say that the misalignment of the sum is equal to
3959 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3960 we can't determine this array misalignment, and we return
3961 false.
3962 We proceed recursively in this manner, accumulating total misalignment
3963 and the multiplication of previous dimensions for correct misalignment
3964 calculation. */
3966 static tree
3967 vect_compute_array_base_alignment (tree array,
3968 tree vectype,
3969 tree *prev_dimensions,
3970 tree *misalignment)
3972 tree index;
3973 tree domain;
3974 tree dimension_size;
3975 tree mis;
3976 tree bits_per_vectype;
3977 tree bits_per_vectype_unit;
3979 /* The 'stop condition' of the recursion. */
3980 if (TREE_CODE (array) != ARRAY_REF)
3981 return array;
3983 if (!vectype)
3984 /* Just get the base decl. */
3985 return vect_compute_array_base_alignment
3986 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3988 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
3989 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
3990 return NULL_TREE;
3992 domain = TYPE_DOMAIN (TREE_TYPE (array));
3993 dimension_size =
3994 int_const_binop (PLUS_EXPR,
3995 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
3996 TYPE_MIN_VALUE (domain), 1),
3997 size_one_node, 1);
3999 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4000 is a multiple of NUNITS:
4002 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4004 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4005 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4006 if (integer_zerop (mis))
4007 /* This array is aligned. Continue just in order to get the base decl. */
4008 return vect_compute_array_base_alignment
4009 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4011 index = TREE_OPERAND (array, 1);
4012 if (!host_integerp (index, 1))
4013 /* The current index is not constant. */
4014 return NULL_TREE;
4016 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4018 bits_per_vectype = fold_convert (unsigned_type_node,
4019 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4020 GET_MODE_SIZE (TYPE_MODE (vectype))));
4021 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4022 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4023 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4025 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4026 earlier:
4028 *misalignment =
4029 (*misalignment + index_val * dimension_size * *prev_dimensions)
4030 % vectype_nunits;
4033 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4034 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4035 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4036 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4037 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4040 *prev_dimensions = int_const_binop (MULT_EXPR,
4041 *prev_dimensions, dimension_size, 1);
4043 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4044 prev_dimensions,
4045 misalignment);
4049 /* Function vect_compute_data_ref_alignment
4051 Compute the misalignment of the data reference DR.
4053 Output:
4054 1. If during the misalignment computation it is found that the data reference
4055 cannot be vectorized then false is returned.
4056 2. DR_MISALIGNMENT (DR) is defined.
4058 FOR NOW: No analysis is actually performed. Misalignment is calculated
4059 only for trivial cases. TODO. */
4061 static bool
4062 vect_compute_data_ref_alignment (struct data_reference *dr,
4063 loop_vec_info loop_vinfo)
4065 tree stmt = DR_STMT (dr);
4066 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4067 tree ref = DR_REF (dr);
4068 tree vectype;
4069 tree scalar_type;
4070 tree offset = size_zero_node;
4071 tree base, bit_offset, alignment;
4072 tree unit_bits = fold_convert (unsigned_type_node,
4073 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4074 tree dr_base;
4075 bool base_aligned_p;
4077 if (vect_debug_details (NULL))
4078 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4080 /* Initialize misalignment to unknown. */
4081 DR_MISALIGNMENT (dr) = -1;
4083 scalar_type = TREE_TYPE (ref);
4084 vectype = get_vectype_for_scalar_type (scalar_type);
4085 if (!vectype)
4087 if (vect_debug_details (NULL))
4089 fprintf (dump_file, "no vectype for stmt: ");
4090 print_generic_expr (dump_file, stmt, TDF_SLIM);
4091 fprintf (dump_file, " scalar_type: ");
4092 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4094 /* It is not possible to vectorize this data reference. */
4095 return false;
4097 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4098 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4100 if (TREE_CODE (ref) == ARRAY_REF)
4101 dr_base = ref;
4102 else
4103 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4105 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4106 loop_vinfo, &bit_offset, &base_aligned_p);
4107 if (!base)
4109 if (vect_debug_details (NULL))
4111 fprintf (dump_file, "Unknown alignment for access: ");
4112 print_generic_expr (dump_file,
4113 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4115 return true;
4118 if (!base_aligned_p)
4120 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4122 if (vect_debug_details (NULL))
4124 fprintf (dump_file, "can't force alignment of ref: ");
4125 print_generic_expr (dump_file, ref, TDF_SLIM);
4127 return true;
4130 /* Force the alignment of the decl.
4131 NOTE: This is the only change to the code we make during
4132 the analysis phase, before deciding to vectorize the loop. */
4133 if (vect_debug_details (NULL))
4134 fprintf (dump_file, "force alignment");
4135 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4136 DECL_USER_ALIGN (base) = 1;
4139 /* At this point we assume that the base is aligned, and the offset from it
4140 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4141 gcc_assert (base_aligned_p
4142 || (TREE_CODE (base) == VAR_DECL
4143 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4145 /* Convert into bytes. */
4146 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4147 /* Check that there is no remainder in bits. */
4148 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4149 if (!integer_zerop (bit_offset))
4151 if (vect_debug_details (NULL))
4153 fprintf (dump_file, "bit offset alignment: ");
4154 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4156 return false;
4159 /* Alignment required, in bytes: */
4160 alignment = fold_convert (unsigned_type_node,
4161 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4163 /* Modulo alignment. */
4164 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4165 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4167 if (vect_debug_details (NULL))
4168 fprintf (dump_file, "unexpected misalign value");
4169 return false;
4172 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4174 if (vect_debug_details (NULL))
4175 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4177 return true;
4181 /* Function vect_compute_array_ref_alignment
4183 Compute the alignment of an array-ref.
4184 The alignment we compute here is relative to
4185 TYPE_ALIGN(VECTYPE) boundary.
4187 Output:
4188 OFFSET - the alignment in bits
4189 Return value - the base of the array-ref. E.g,
4190 if the array-ref is a.b[k].c[i][j] the returned
4191 base is a.b[k].c
4194 static tree
4195 vect_compute_array_ref_alignment (struct data_reference *dr,
4196 loop_vec_info loop_vinfo,
4197 tree vectype,
4198 tree *offset)
4200 tree array_first_index = size_zero_node;
4201 tree init;
4202 tree ref = DR_REF (dr);
4203 tree scalar_type = TREE_TYPE (ref);
4204 tree oprnd0 = TREE_OPERAND (ref, 0);
4205 tree dims = size_one_node;
4206 tree misalign = size_zero_node;
4207 tree next_ref, this_offset = size_zero_node;
4208 tree nunits;
4209 tree nbits;
4211 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4212 /* The reference is an array without its last index. */
4213 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4214 &misalign);
4215 else
4216 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4217 &misalign);
4218 if (!vectype)
4219 /* Alignment is not requested. Just return the base. */
4220 return next_ref;
4222 /* Compute alignment. */
4223 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4224 return NULL_TREE;
4225 this_offset = misalign;
4227 /* Check the first index accessed. */
4228 if (!vect_get_first_index (ref, &array_first_index))
4230 if (vect_debug_details (NULL))
4231 fprintf (dump_file, "no first_index for array.");
4232 return NULL_TREE;
4235 /* Check the index of the array_ref. */
4236 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4237 LOOP_VINFO_LOOP (loop_vinfo)->num);
4239 /* FORNOW: In order to simplify the handling of alignment, we make sure
4240 that the first location at which the array is accessed ('init') is on an
4241 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4242 This is too conservative, since we require that
4243 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4244 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4245 This should be relaxed in the future. */
4247 if (!init || !host_integerp (init, 0))
4249 if (vect_debug_details (NULL))
4250 fprintf (dump_file, "non constant init. ");
4251 return NULL_TREE;
4254 /* bytes per scalar element: */
4255 nunits = fold_convert (unsigned_type_node,
4256 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4257 nbits = int_const_binop (MULT_EXPR, nunits,
4258 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4260 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4261 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4262 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4263 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4265 /* TODO: allow negative misalign values. */
4266 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4268 if (vect_debug_details (NULL))
4269 fprintf (dump_file, "unexpected misalign value");
4270 return NULL_TREE;
4272 *offset = misalign;
4273 return next_ref;
4277 /* Function vect_compute_data_refs_alignment
4279 Compute the misalignment of data references in the loop.
4280 This pass may take place at function granularity instead of at loop
4281 granularity.
4283 FOR NOW: No analysis is actually performed. Misalignment is calculated
4284 only for trivial cases. TODO. */
4286 static bool
4287 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4289 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4290 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4291 unsigned int i;
4293 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4295 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4296 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4297 return false;
4300 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4302 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4303 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4304 return false;
4307 return true;
4311 /* Function vect_enhance_data_refs_alignment
4313 This pass will use loop versioning and loop peeling in order to enhance
4314 the alignment of data references in the loop.
4316 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4317 original loop is to be vectorized; Any other loops that are created by
4318 the transformations performed in this pass - are not supposed to be
4319 vectorized. This restriction will be relaxed. */
4321 static void
4322 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4324 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4325 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4326 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4327 unsigned int i;
4330 This pass will require a cost model to guide it whether to apply peeling
4331 or versioning or a combination of the two. For example, the scheme that
4332 intel uses when given a loop with several memory accesses, is as follows:
4333 choose one memory access ('p') which alignment you want to force by doing
4334 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4335 other accesses are not necessarily aligned, or (2) use loop versioning to
4336 generate one loop in which all accesses are aligned, and another loop in
4337 which only 'p' is necessarily aligned.
4339 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4340 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4341 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4343 Devising a cost model is the most critical aspect of this work. It will
4344 guide us on which access to peel for, whether to use loop versioning, how
4345 many versions to create, etc. The cost model will probably consist of
4346 generic considerations as well as target specific considerations (on
4347 powerpc for example, misaligned stores are more painful than misaligned
4348 loads).
4350 Here is the general steps involved in alignment enhancements:
4352 -- original loop, before alignment analysis:
4353 for (i=0; i<N; i++){
4354 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4355 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4358 -- After vect_compute_data_refs_alignment:
4359 for (i=0; i<N; i++){
4360 x = q[i]; # DR_MISALIGNMENT(q) = 3
4361 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4364 -- Possibility 1: we do loop versioning:
4365 if (p is aligned) {
4366 for (i=0; i<N; i++){ # loop 1A
4367 x = q[i]; # DR_MISALIGNMENT(q) = 3
4368 p[i] = y; # DR_MISALIGNMENT(p) = 0
4371 else {
4372 for (i=0; i<N; i++){ # loop 1B
4373 x = q[i]; # DR_MISALIGNMENT(q) = 3
4374 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4378 -- Possibility 2: we do loop peeling:
4379 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4380 x = q[i];
4381 p[i] = y;
4383 for (i = 3; i < N; i++){ # loop 2A
4384 x = q[i]; # DR_MISALIGNMENT(q) = 0
4385 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4388 -- Possibility 3: combination of loop peeling and versioning:
4389 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4390 x = q[i];
4391 p[i] = y;
4393 if (p is aligned) {
4394 for (i = 3; i<N; i++){ # loop 3A
4395 x = q[i]; # DR_MISALIGNMENT(q) = 0
4396 p[i] = y; # DR_MISALIGNMENT(p) = 0
4399 else {
4400 for (i = 3; i<N; i++){ # loop 3B
4401 x = q[i]; # DR_MISALIGNMENT(q) = 0
4402 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4406 These loops are later passed to loop_transform to be vectorized. The
4407 vectorizer will use the alignment information to guide the transformation
4408 (whether to generate regular loads/stores, or with special handling for
4409 misalignment).
4412 /* (1) Peeling to force alignment. */
4414 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4415 Considerations:
4416 + How many accesses will become aligned due to the peeling
4417 - How many accesses will become unaligned due to the peeling,
4418 and the cost of misaligned accesses.
4419 - The cost of peeling (the extra runtime checks, the increase
4420 in code size).
4422 The scheme we use FORNOW: peel to force the alignment of the first
4423 misaligned store in the loop.
4424 Rationale: misaligned stores are not yet supported.
4426 TODO: Use a better cost model. */
4428 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4430 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4431 if (!aligned_access_p (dr))
4433 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4434 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4435 break;
4439 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4441 if (vect_debug_details (loop))
4442 fprintf (dump_file, "Peeling for alignment will not be applied.");
4443 return;
4445 else
4446 if (vect_debug_details (loop))
4447 fprintf (dump_file, "Peeling for alignment will be applied.");
4450 /* (1.2) Update the alignment info according to the peeling factor.
4451 If the misalignment of the DR we peel for is M, then the
4452 peeling factor is VF - M, and the misalignment of each access DR_i
4453 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4454 If the misalignment of the DR we peel for is unknown, then the
4455 misalignment of each access DR_i in the loop is also unknown.
4457 FORNOW: set the misalignment of the accesses to unknown even
4458 if the peeling factor is known at compile time.
4460 TODO: - if the peeling factor is known at compile time, use that
4461 when updating the misalignment info of the loop DRs.
4462 - consider accesses that are known to have the same
4463 alignment, even if that alignment is unknown. */
4465 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4467 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4468 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4469 DR_MISALIGNMENT (dr) = 0;
4470 else
4471 DR_MISALIGNMENT (dr) = -1;
4473 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4475 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4476 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4477 DR_MISALIGNMENT (dr) = 0;
4478 else
4479 DR_MISALIGNMENT (dr) = -1;
4484 /* Function vect_analyze_data_refs_alignment
4486 Analyze the alignment of the data-references in the loop.
4487 FOR NOW: Until support for misliagned accesses is in place, only if all
4488 accesses are aligned can the loop be vectorized. This restriction will be
4489 relaxed. */
4491 static bool
4492 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4494 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4495 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4496 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4497 enum dr_alignment_support supportable_dr_alignment;
4498 unsigned int i;
4500 if (vect_debug_details (NULL))
4501 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4504 /* This pass may take place at function granularity instead of at loop
4505 granularity. */
4507 if (!vect_compute_data_refs_alignment (loop_vinfo))
4509 if (vect_debug_details (loop) || vect_debug_stats (loop))
4510 fprintf (dump_file,
4511 "not vectorized: can't calculate alignment for data ref.");
4512 return false;
4516 /* This pass will decide on using loop versioning and/or loop peeling in
4517 order to enhance the alignment of data references in the loop. */
4519 vect_enhance_data_refs_alignment (loop_vinfo);
4522 /* Finally, check that all the data references in the loop can be
4523 handled with respect to their alignment. */
4525 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4527 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4528 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4529 if (!supportable_dr_alignment)
4531 if (vect_debug_details (loop) || vect_debug_stats (loop))
4532 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4533 return false;
4536 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4538 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4539 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4540 if (!supportable_dr_alignment)
4542 if (vect_debug_details (loop) || vect_debug_stats (loop))
4543 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4544 return false;
4548 return true;
4552 /* Function vect_analyze_data_ref_access.
4554 Analyze the access pattern of the data-reference DR. For now, a data access
4555 has to consecutive and aligned to be considered vectorizable. */
4557 static bool
4558 vect_analyze_data_ref_access (struct data_reference *dr)
4560 varray_type access_fns = DR_ACCESS_FNS (dr);
4561 tree access_fn;
4562 tree init, step;
4563 unsigned int dimensions, i;
4565 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4566 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4567 access is contiguous). */
4568 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4570 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4572 access_fn = DR_ACCESS_FN (dr, i);
4574 if (evolution_part_in_loop_num (access_fn,
4575 loop_containing_stmt (DR_STMT (dr))->num))
4577 /* Evolution part is not NULL in this loop (it is neither constant
4578 nor invariant). */
4579 if (vect_debug_details (NULL))
4581 fprintf (dump_file,
4582 "not vectorized: complicated multidim. array access.");
4583 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4585 return false;
4589 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4590 if (!evolution_function_is_constant_p (access_fn)
4591 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4592 access_fn, &init, &step, true))
4594 if (vect_debug_details (NULL))
4596 fprintf (dump_file, "not vectorized: complicated access function.");
4597 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4599 return false;
4602 return true;
4606 /* Function vect_analyze_data_ref_accesses.
4608 Analyze the access pattern of all the data references in the loop.
4610 FORNOW: the only access pattern that is considered vectorizable is a
4611 simple step 1 (consecutive) access.
4613 FORNOW: handle only arrays and pointer accesses. */
4615 static bool
4616 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4618 unsigned int i;
4619 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4620 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4622 if (vect_debug_details (NULL))
4623 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4625 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4627 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4628 bool ok = vect_analyze_data_ref_access (dr);
4629 if (!ok)
4631 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4632 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4633 fprintf (dump_file, "not vectorized: complicated access pattern.");
4634 return false;
4638 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4640 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4641 bool ok = vect_analyze_data_ref_access (dr);
4642 if (!ok)
4644 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4645 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4646 fprintf (dump_file, "not vectorized: complicated access pattern.");
4647 return false;
4651 return true;
4655 /* Function vect_analyze_pointer_ref_access.
4657 Input:
4658 STMT - a stmt that contains a data-ref
4659 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4661 If the data-ref access is vectorizable, return a data_reference structure
4662 that represents it (DR). Otherwise - return NULL. */
4664 static struct data_reference *
4665 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4667 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4668 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4669 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4670 tree init, step;
4671 int step_val;
4672 tree reftype, innertype;
4673 enum machine_mode innermode;
4674 tree indx_access_fn;
4675 int loopnum = loop->num;
4676 struct data_reference *dr;
4678 if (!access_fn)
4680 if (vect_debug_stats (loop) || vect_debug_details (loop))
4681 fprintf (dump_file, "not vectorized: complicated pointer access.");
4682 return NULL;
4685 if (vect_debug_details (NULL))
4687 fprintf (dump_file, "Access function of ptr: ");
4688 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4691 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4693 if (vect_debug_stats (loop) || vect_debug_details (loop))
4694 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4695 return NULL;
4698 STRIP_NOPS (init);
4700 if (!host_integerp (step,0))
4702 if (vect_debug_stats (loop) || vect_debug_details (loop))
4703 fprintf (dump_file,
4704 "not vectorized: non constant step for pointer access.");
4705 return NULL;
4708 step_val = TREE_INT_CST_LOW (step);
4710 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4711 if (TREE_CODE (reftype) != POINTER_TYPE)
4713 if (vect_debug_stats (loop) || vect_debug_details (loop))
4714 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4715 return NULL;
4718 reftype = TREE_TYPE (init);
4719 if (TREE_CODE (reftype) != POINTER_TYPE)
4721 if (vect_debug_stats (loop) || vect_debug_details (loop))
4722 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4723 return NULL;
4726 innertype = TREE_TYPE (reftype);
4727 innermode = TYPE_MODE (innertype);
4728 if (GET_MODE_SIZE (innermode) != step_val)
4730 /* FORNOW: support only consecutive access */
4731 if (vect_debug_stats (loop) || vect_debug_details (loop))
4732 fprintf (dump_file, "not vectorized: non consecutive access.");
4733 return NULL;
4736 indx_access_fn =
4737 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4738 if (vect_debug_details (NULL))
4740 fprintf (dump_file, "Access function of ptr indx: ");
4741 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4743 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4744 return dr;
4748 /* Function vect_get_symbl_and_dr.
4750 The function returns SYMBL - the relevant variable for
4751 memory tag (for aliasing purposes).
4752 Also data reference structure DR is created.
4754 Input:
4755 MEMREF - data reference in STMT
4756 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4758 Output:
4759 DR - data_reference struct for MEMREF
4760 return value - the relevant variable for memory tag (for aliasing purposes).
4764 static tree
4765 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4766 loop_vec_info loop_vinfo, struct data_reference **dr)
4768 tree symbl, oprnd0, oprnd1;
4769 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4770 tree offset;
4771 tree array_base, base;
4772 struct data_reference *new_dr;
4773 bool base_aligned_p;
4775 *dr = NULL;
4776 switch (TREE_CODE (memref))
4778 case INDIRECT_REF:
4779 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4780 if (! new_dr)
4781 return NULL_TREE;
4782 *dr = new_dr;
4783 symbl = DR_BASE_NAME (new_dr);
4784 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4786 switch (TREE_CODE (symbl))
4788 case PLUS_EXPR:
4789 case MINUS_EXPR:
4790 oprnd0 = TREE_OPERAND (symbl, 0);
4791 oprnd1 = TREE_OPERAND (symbl, 1);
4793 STRIP_NOPS(oprnd1);
4794 /* Only {address_base + offset} expressions are supported,
4795 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4796 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4797 TODO: swap operands if {offset + address_base}. */
4798 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4799 && TREE_CODE (oprnd1) != INTEGER_CST)
4800 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4801 return NULL_TREE;
4803 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4804 symbl = oprnd0;
4805 else
4806 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4807 loop_vinfo, &new_dr);
4809 case SSA_NAME:
4810 case ADDR_EXPR:
4811 /* symbl remains unchanged. */
4812 break;
4814 default:
4815 if (vect_debug_details (NULL))
4817 fprintf (dump_file, "unhandled data ref: ");
4818 print_generic_expr (dump_file, memref, TDF_SLIM);
4819 fprintf (dump_file, " (symbl ");
4820 print_generic_expr (dump_file, symbl, TDF_SLIM);
4821 fprintf (dump_file, ") in stmt ");
4822 print_generic_expr (dump_file, stmt, TDF_SLIM);
4824 return NULL_TREE;
4826 break;
4828 case ARRAY_REF:
4829 offset = size_zero_node;
4831 /* Store the array base in the stmt info.
4832 For one dimensional array ref a[i], the base is a,
4833 for multidimensional a[i1][i2]..[iN], the base is
4834 a[i1][i2]..[iN-1]. */
4835 array_base = TREE_OPERAND (memref, 0);
4836 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4838 new_dr = analyze_array (stmt, memref, is_read);
4839 *dr = new_dr;
4841 /* Find the relevant symbol for aliasing purposes. */
4842 base = DR_BASE_NAME (new_dr);
4843 switch (TREE_CODE (base))
4845 case VAR_DECL:
4846 symbl = base;
4847 break;
4849 case INDIRECT_REF:
4850 symbl = TREE_OPERAND (base, 0);
4851 break;
4853 case COMPONENT_REF:
4854 /* Could have recorded more accurate information -
4855 i.e, the actual FIELD_DECL that is being referenced -
4856 but later passes expect VAR_DECL as the nmt. */
4857 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4858 loop_vinfo, &offset, &base_aligned_p);
4859 if (symbl)
4860 break;
4861 /* fall through */
4862 default:
4863 if (vect_debug_details (NULL))
4865 fprintf (dump_file, "unhandled struct/class field access ");
4866 print_generic_expr (dump_file, stmt, TDF_SLIM);
4868 return NULL_TREE;
4870 break;
4872 default:
4873 if (vect_debug_details (NULL))
4875 fprintf (dump_file, "unhandled data ref: ");
4876 print_generic_expr (dump_file, memref, TDF_SLIM);
4877 fprintf (dump_file, " in stmt ");
4878 print_generic_expr (dump_file, stmt, TDF_SLIM);
4880 return NULL_TREE;
4882 return symbl;
4886 /* Function vect_analyze_data_refs.
4888 Find all the data references in the loop.
4890 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4891 which base is really an array (not a pointer) and which alignment
4892 can be forced. This restriction will be relaxed. */
4894 static bool
4895 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4897 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4898 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4899 int nbbs = loop->num_nodes;
4900 block_stmt_iterator si;
4901 int j;
4902 struct data_reference *dr;
4903 tree tag;
4904 tree address_base;
4905 bool base_aligned_p;
4906 tree offset;
4908 if (vect_debug_details (NULL))
4909 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4911 for (j = 0; j < nbbs; j++)
4913 basic_block bb = bbs[j];
4914 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4916 bool is_read = false;
4917 tree stmt = bsi_stmt (si);
4918 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4919 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4920 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4921 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4922 varray_type *datarefs = NULL;
4923 int nvuses, nv_may_defs, nv_must_defs;
4924 tree memref = NULL;
4925 tree symbl;
4927 /* Assumption: there exists a data-ref in stmt, if and only if
4928 it has vuses/vdefs. */
4930 if (!vuses && !v_may_defs && !v_must_defs)
4931 continue;
4933 nvuses = NUM_VUSES (vuses);
4934 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4935 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4937 if (nvuses && (nv_may_defs || nv_must_defs))
4939 if (vect_debug_details (NULL))
4941 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4942 print_generic_expr (dump_file, stmt, TDF_SLIM);
4944 return false;
4947 if (TREE_CODE (stmt) != MODIFY_EXPR)
4949 if (vect_debug_details (NULL))
4951 fprintf (dump_file, "unexpected vops in stmt: ");
4952 print_generic_expr (dump_file, stmt, TDF_SLIM);
4954 return false;
4957 if (vuses)
4959 memref = TREE_OPERAND (stmt, 1);
4960 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4961 is_read = true;
4963 else /* vdefs */
4965 memref = TREE_OPERAND (stmt, 0);
4966 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4967 is_read = false;
4970 /* Analyze MEMREF. If it is of a supported form, build data_reference
4971 struct for it (DR) and find the relevant symbol for aliasing
4972 purposes. */
4973 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4974 &dr);
4975 if (!symbl)
4977 if (vect_debug_stats (loop) || vect_debug_details (loop))
4979 fprintf (dump_file, "not vectorized: unhandled data ref: ");
4980 print_generic_expr (dump_file, stmt, TDF_SLIM);
4982 return false;
4985 /* Find and record the memtag assigned to this data-ref. */
4986 switch (TREE_CODE (symbl))
4988 case VAR_DECL:
4989 STMT_VINFO_MEMTAG (stmt_info) = symbl;
4990 break;
4992 case SSA_NAME:
4993 symbl = SSA_NAME_VAR (symbl);
4994 tag = get_var_ann (symbl)->type_mem_tag;
4995 if (!tag)
4997 tree ptr = TREE_OPERAND (memref, 0);
4998 if (TREE_CODE (ptr) == SSA_NAME)
4999 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5001 if (!tag)
5003 if (vect_debug_stats (loop) || vect_debug_details (loop))
5004 fprintf (dump_file, "not vectorized: no memtag for ref.");
5005 return false;
5007 STMT_VINFO_MEMTAG (stmt_info) = tag;
5008 break;
5010 case ADDR_EXPR:
5011 address_base = TREE_OPERAND (symbl, 0);
5013 switch (TREE_CODE (address_base))
5015 case ARRAY_REF:
5016 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5017 DR_IS_READ(dr));
5018 tag = vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr),
5019 NULL_TREE, loop_vinfo, &offset, &base_aligned_p);
5020 if (!tag)
5022 if (vect_debug_stats (loop) || vect_debug_details (loop))
5023 fprintf (dump_file, "not vectorized: no memtag for ref.");
5024 return false;
5026 STMT_VINFO_MEMTAG (stmt_info) = tag;
5027 break;
5029 case VAR_DECL:
5030 STMT_VINFO_MEMTAG (stmt_info) = address_base;
5031 break;
5033 default:
5034 if (vect_debug_stats (loop) || vect_debug_details (loop))
5036 fprintf (dump_file,
5037 "not vectorized: unhandled address expr: ");
5038 print_generic_expr (dump_file, stmt, TDF_SLIM);
5040 return false;
5042 break;
5044 default:
5045 if (vect_debug_stats (loop) || vect_debug_details (loop))
5047 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5048 print_generic_expr (dump_file, memref, TDF_SLIM);
5050 return false;
5053 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5054 STMT_VINFO_DATA_REF (stmt_info) = dr;
5058 return true;
5062 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5064 /* Function vect_mark_relevant.
5066 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5068 static void
5069 vect_mark_relevant (varray_type worklist, tree stmt)
5071 stmt_vec_info stmt_info;
5073 if (vect_debug_details (NULL))
5074 fprintf (dump_file, "mark relevant.");
5076 if (TREE_CODE (stmt) == PHI_NODE)
5078 VARRAY_PUSH_TREE (worklist, stmt);
5079 return;
5082 stmt_info = vinfo_for_stmt (stmt);
5084 if (!stmt_info)
5086 if (vect_debug_details (NULL))
5088 fprintf (dump_file, "mark relevant: no stmt info!!.");
5089 print_generic_expr (dump_file, stmt, TDF_SLIM);
5091 return;
5094 if (STMT_VINFO_RELEVANT_P (stmt_info))
5096 if (vect_debug_details (NULL))
5097 fprintf (dump_file, "already marked relevant.");
5098 return;
5101 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5102 VARRAY_PUSH_TREE (worklist, stmt);
5106 /* Function vect_stmt_relevant_p.
5108 Return true if STMT in loop that is represented by LOOP_VINFO is
5109 "relevant for vectorization".
5111 A stmt is considered "relevant for vectorization" if:
5112 - it has uses outside the loop.
5113 - it has vdefs (it alters memory).
5114 - control stmts in the loop (except for the exit condition).
5116 CHECKME: what other side effects would the vectorizer allow? */
5118 static bool
5119 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5121 v_may_def_optype v_may_defs;
5122 v_must_def_optype v_must_defs;
5123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5124 int i;
5125 dataflow_t df;
5126 int num_uses;
5128 /* cond stmt other than loop exit cond. */
5129 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5130 return true;
5132 /* changing memory. */
5133 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5134 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5135 if (v_may_defs || v_must_defs)
5137 if (vect_debug_details (NULL))
5138 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5139 return true;
5142 /* uses outside the loop. */
5143 df = get_immediate_uses (stmt);
5144 num_uses = num_immediate_uses (df);
5145 for (i = 0; i < num_uses; i++)
5147 tree use = immediate_use (df, i);
5148 basic_block bb = bb_for_stmt (use);
5149 if (!flow_bb_inside_loop_p (loop, bb))
5151 if (vect_debug_details (NULL))
5152 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5153 return true;
5157 return false;
5161 /* Function vect_mark_stmts_to_be_vectorized.
5163 Not all stmts in the loop need to be vectorized. For example:
5165 for i...
5166 for j...
5167 1. T0 = i + j
5168 2. T1 = a[T0]
5170 3. j = j + 1
5172 Stmt 1 and 3 do not need to be vectorized, because loop control and
5173 addressing of vectorized data-refs are handled differently.
5175 This pass detects such stmts. */
5177 static bool
5178 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5180 varray_type worklist;
5181 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5182 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5183 unsigned int nbbs = loop->num_nodes;
5184 block_stmt_iterator si;
5185 tree stmt;
5186 stmt_ann_t ann;
5187 unsigned int i;
5188 int j;
5189 use_optype use_ops;
5190 stmt_vec_info stmt_info;
5192 if (vect_debug_details (NULL))
5193 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5195 VARRAY_TREE_INIT (worklist, 64, "work list");
5197 /* 1. Init worklist. */
5199 for (i = 0; i < nbbs; i++)
5201 basic_block bb = bbs[i];
5202 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5204 stmt = bsi_stmt (si);
5206 if (vect_debug_details (NULL))
5208 fprintf (dump_file, "init: stmt relevant? ");
5209 print_generic_expr (dump_file, stmt, TDF_SLIM);
5212 stmt_info = vinfo_for_stmt (stmt);
5213 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5215 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5216 vect_mark_relevant (worklist, stmt);
5221 /* 2. Process_worklist */
5223 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5225 stmt = VARRAY_TOP_TREE (worklist);
5226 VARRAY_POP (worklist);
5228 if (vect_debug_details (NULL))
5230 fprintf (dump_file, "worklist: examine stmt: ");
5231 print_generic_expr (dump_file, stmt, TDF_SLIM);
5234 /* Examine the USES in this statement. Mark all the statements which
5235 feed this statement's uses as "relevant", unless the USE is used as
5236 an array index. */
5238 if (TREE_CODE (stmt) == PHI_NODE)
5240 /* follow the def-use chain inside the loop. */
5241 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5243 tree arg = PHI_ARG_DEF (stmt, j);
5244 tree def_stmt = NULL_TREE;
5245 basic_block bb;
5246 if (!vect_is_simple_use (arg, loop, &def_stmt))
5248 if (vect_debug_details (NULL))
5249 fprintf (dump_file, "worklist: unsupported use.");
5250 varray_clear (worklist);
5251 return false;
5253 if (!def_stmt)
5254 continue;
5256 if (vect_debug_details (NULL))
5258 fprintf (dump_file, "worklist: def_stmt: ");
5259 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5262 bb = bb_for_stmt (def_stmt);
5263 if (flow_bb_inside_loop_p (loop, bb))
5264 vect_mark_relevant (worklist, def_stmt);
5268 ann = stmt_ann (stmt);
5269 use_ops = USE_OPS (ann);
5271 for (i = 0; i < NUM_USES (use_ops); i++)
5273 tree use = USE_OP (use_ops, i);
5275 /* We are only interested in uses that need to be vectorized. Uses
5276 that are used for address computation are not considered relevant.
5278 if (exist_non_indexing_operands_for_use_p (use, stmt))
5280 tree def_stmt = NULL_TREE;
5281 basic_block bb;
5282 if (!vect_is_simple_use (use, loop, &def_stmt))
5284 if (vect_debug_details (NULL))
5285 fprintf (dump_file, "worklist: unsupported use.");
5286 varray_clear (worklist);
5287 return false;
5290 if (!def_stmt)
5291 continue;
5293 if (vect_debug_details (NULL))
5295 fprintf (dump_file, "worklist: examine use %d: ", i);
5296 print_generic_expr (dump_file, use, TDF_SLIM);
5299 bb = bb_for_stmt (def_stmt);
5300 if (flow_bb_inside_loop_p (loop, bb))
5301 vect_mark_relevant (worklist, def_stmt);
5304 } /* while worklist */
5306 varray_clear (worklist);
5307 return true;
5311 /* Function vect_can_advance_ivs_p
5313 In case the number of iterations that LOOP iterates in unknown at compile
5314 time, an epilog loop will be generated, and the loop induction variables
5315 (IVs) will be "advanced" to the value they are supposed to take just before
5316 the epilog loop. Here we check that the access function of the loop IVs
5317 and the expression that represents the loop bound are simple enough.
5318 These restrictions will be relaxed in the future. */
5320 static bool
5321 vect_can_advance_ivs_p (struct loop *loop)
5323 basic_block bb = loop->header;
5324 tree phi;
5326 /* Analyze phi functions of the loop header. */
5328 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5330 tree access_fn = NULL;
5331 tree evolution_part;
5333 if (vect_debug_details (NULL))
5335 fprintf (dump_file, "Analyze phi: ");
5336 print_generic_expr (dump_file, phi, TDF_SLIM);
5339 /* Skip virtual phi's. The data dependences that are associated with
5340 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5342 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5344 if (vect_debug_details (NULL))
5345 fprintf (dump_file, "virtual phi. skip.");
5346 continue;
5349 /* Analyze the evolution function. */
5351 access_fn = instantiate_parameters
5352 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5354 if (!access_fn)
5356 if (vect_debug_details (NULL))
5357 fprintf (dump_file, "No Access function.");
5358 return false;
5361 if (vect_debug_details (NULL))
5363 fprintf (dump_file, "Access function of PHI: ");
5364 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5367 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5369 if (evolution_part == NULL_TREE)
5370 return false;
5372 /* FORNOW: We do not transform initial conditions of IVs
5373 which evolution functions are a polynomial of degree >= 2. */
5375 if (tree_is_chrec (evolution_part))
5376 return false;
5379 return true;
5383 /* Function vect_get_loop_niters.
5385 Determine how many iterations the loop is executed.
5386 If an expression that represents the number of iterations
5387 can be constructed, place it in NUMBER_OF_ITERATIONS.
5388 Return the loop exit condition. */
5390 static tree
5391 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5393 tree niters;
5395 if (vect_debug_details (NULL))
5396 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5398 niters = number_of_iterations_in_loop (loop);
5400 if (niters != NULL_TREE
5401 && niters != chrec_dont_know)
5403 *number_of_iterations = niters;
5405 if (vect_debug_details (NULL))
5407 fprintf (dump_file, "==> get_loop_niters:" );
5408 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5412 return get_loop_exit_condition (loop);
5416 /* Function vect_analyze_loop_form.
5418 Verify the following restrictions (some may be relaxed in the future):
5419 - it's an inner-most loop
5420 - number of BBs = 2 (which are the loop header and the latch)
5421 - the loop has a pre-header
5422 - the loop has a single entry and exit
5423 - the loop exit condition is simple enough, and the number of iterations
5424 can be analyzed (a countable loop). */
5426 static loop_vec_info
5427 vect_analyze_loop_form (struct loop *loop)
5429 loop_vec_info loop_vinfo;
5430 tree loop_cond;
5431 tree number_of_iterations = NULL;
5432 bool rescan = false;
5434 if (vect_debug_details (loop))
5435 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5437 if (loop->inner
5438 || !loop->single_exit
5439 || loop->num_nodes != 2
5440 || EDGE_COUNT (loop->header->preds) != 2
5441 || loop->num_entries != 1)
5443 if (vect_debug_stats (loop) || vect_debug_details (loop))
5445 fprintf (dump_file, "not vectorized: bad loop form. ");
5446 if (loop->inner)
5447 fprintf (dump_file, "nested loop.");
5448 else if (!loop->single_exit)
5449 fprintf (dump_file, "multiple exits.");
5450 else if (loop->num_nodes != 2)
5451 fprintf (dump_file, "too many BBs in loop.");
5452 else if (EDGE_COUNT (loop->header->preds) != 2)
5453 fprintf (dump_file, "too many incoming edges.");
5454 else if (loop->num_entries != 1)
5455 fprintf (dump_file, "too many entries.");
5458 return NULL;
5461 /* We assume that the loop exit condition is at the end of the loop. i.e,
5462 that the loop is represented as a do-while (with a proper if-guard
5463 before the loop if needed), where the loop header contains all the
5464 executable statements, and the latch is empty. */
5465 if (!empty_block_p (loop->latch))
5467 if (vect_debug_stats (loop) || vect_debug_details (loop))
5468 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5469 return NULL;
5472 /* Make sure we have a preheader basic block. */
5473 if (!loop->pre_header)
5475 rescan = true;
5476 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5479 /* Make sure there exists a single-predecessor exit bb: */
5480 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5482 rescan = true;
5483 loop_split_edge_with (loop->exit_edges[0], NULL);
5486 if (rescan)
5488 flow_loop_scan (loop, LOOP_ALL);
5489 /* Flow loop scan does not update loop->single_exit field. */
5490 loop->single_exit = loop->exit_edges[0];
5493 if (empty_block_p (loop->header))
5495 if (vect_debug_stats (loop) || vect_debug_details (loop))
5496 fprintf (dump_file, "not vectorized: empty loop.");
5497 return NULL;
5500 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5501 if (!loop_cond)
5503 if (vect_debug_stats (loop) || vect_debug_details (loop))
5504 fprintf (dump_file, "not vectorized: complicated exit condition.");
5505 return NULL;
5508 if (!number_of_iterations)
5510 if (vect_debug_stats (loop) || vect_debug_details (loop))
5511 fprintf (dump_file,
5512 "not vectorized: number of iterations cannot be computed.");
5513 return NULL;
5516 if (chrec_contains_undetermined (number_of_iterations))
5518 if (vect_debug_details (NULL))
5519 fprintf (dump_file, "Infinite number of iterations.");
5520 return false;
5523 loop_vinfo = new_loop_vec_info (loop);
5524 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5526 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5528 if (vect_debug_details (loop))
5530 fprintf (dump_file, "loop bound unknown.\n");
5531 fprintf (dump_file, "Symbolic number of iterations is ");
5532 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5535 else
5536 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5538 if (vect_debug_stats (loop) || vect_debug_details (loop))
5539 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5540 return NULL;
5543 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5545 return loop_vinfo;
5549 /* Function vect_analyze_loop.
5551 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5552 for it. The different analyses will record information in the
5553 loop_vec_info struct. */
5555 static loop_vec_info
5556 vect_analyze_loop (struct loop *loop)
5558 bool ok;
5559 loop_vec_info loop_vinfo;
5561 if (vect_debug_details (NULL))
5562 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5564 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5566 loop_vinfo = vect_analyze_loop_form (loop);
5567 if (!loop_vinfo)
5569 if (vect_debug_details (loop))
5570 fprintf (dump_file, "bad loop form.");
5571 return NULL;
5574 /* Find all data references in the loop (which correspond to vdefs/vuses)
5575 and analyze their evolution in the loop.
5577 FORNOW: Handle only simple, array references, which
5578 alignment can be forced, and aligned pointer-references. */
5580 ok = vect_analyze_data_refs (loop_vinfo);
5581 if (!ok)
5583 if (vect_debug_details (loop))
5584 fprintf (dump_file, "bad data references.");
5585 destroy_loop_vec_info (loop_vinfo);
5586 return NULL;
5589 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5591 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5592 if (!ok)
5594 if (vect_debug_details (loop))
5595 fprintf (dump_file, "unexpected pattern.");
5596 if (vect_debug_details (loop))
5597 fprintf (dump_file, "not vectorized: unexpected pattern.");
5598 destroy_loop_vec_info (loop_vinfo);
5599 return NULL;
5602 /* Check that all cross-iteration scalar data-flow cycles are OK.
5603 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5605 ok = vect_analyze_scalar_cycles (loop_vinfo);
5606 if (!ok)
5608 if (vect_debug_details (loop))
5609 fprintf (dump_file, "bad scalar cycle.");
5610 destroy_loop_vec_info (loop_vinfo);
5611 return NULL;
5614 /* Analyze data dependences between the data-refs in the loop.
5615 FORNOW: fail at the first data dependence that we encounter. */
5617 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5618 if (!ok)
5620 if (vect_debug_details (loop))
5621 fprintf (dump_file, "bad data dependence.");
5622 destroy_loop_vec_info (loop_vinfo);
5623 return NULL;
5626 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5627 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5629 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5630 if (!ok)
5632 if (vect_debug_details (loop))
5633 fprintf (dump_file, "bad data access.");
5634 destroy_loop_vec_info (loop_vinfo);
5635 return NULL;
5638 /* Analyze the alignment of the data-refs in the loop.
5639 FORNOW: Only aligned accesses are handled. */
5641 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5642 if (!ok)
5644 if (vect_debug_details (loop))
5645 fprintf (dump_file, "bad data alignment.");
5646 destroy_loop_vec_info (loop_vinfo);
5647 return NULL;
5650 /* Scan all the operations in the loop and make sure they are
5651 vectorizable. */
5653 ok = vect_analyze_operations (loop_vinfo);
5654 if (!ok)
5656 if (vect_debug_details (loop))
5657 fprintf (dump_file, "bad operation or unsupported loop bound.");
5658 destroy_loop_vec_info (loop_vinfo);
5659 return NULL;
5662 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5664 return loop_vinfo;
5668 /* Function need_imm_uses_for.
5670 Return whether we ought to include information for 'var'
5671 when calculating immediate uses. For this pass we only want use
5672 information for non-virtual variables. */
5674 static bool
5675 need_imm_uses_for (tree var)
5677 return is_gimple_reg (var);
5681 /* Function vectorize_loops.
5683 Entry Point to loop vectorization phase. */
5685 void
5686 vectorize_loops (struct loops *loops)
5688 unsigned int i, loops_num;
5689 unsigned int num_vectorized_loops = 0;
5691 /* Does the target support SIMD? */
5692 /* FORNOW: until more sophisticated machine modelling is in place. */
5693 if (!UNITS_PER_SIMD_WORD)
5695 if (vect_debug_details (NULL))
5696 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5697 return;
5700 #ifdef ENABLE_CHECKING
5701 verify_loop_closed_ssa ();
5702 #endif
5704 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5706 /* ----------- Analyze loops. ----------- */
5708 /* If some loop was duplicated, it gets bigger number
5709 than all previously defined loops. This fact allows us to run
5710 only over initial loops skipping newly generated ones. */
5711 loops_num = loops->num;
5712 for (i = 1; i < loops_num; i++)
5714 loop_vec_info loop_vinfo;
5715 struct loop *loop = loops->parray[i];
5717 if (!loop)
5718 continue;
5720 loop_vinfo = vect_analyze_loop (loop);
5721 loop->aux = loop_vinfo;
5723 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5724 continue;
5726 vect_transform_loop (loop_vinfo, loops);
5727 num_vectorized_loops++;
5730 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5731 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5732 num_vectorized_loops);
5734 /* ----------- Finalize. ----------- */
5736 free_df ();
5737 for (i = 1; i < loops_num; i++)
5739 struct loop *loop = loops->parray[i];
5740 loop_vec_info loop_vinfo;
5742 if (!loop)
5743 continue;
5744 loop_vinfo = loop->aux;
5745 destroy_loop_vec_info (loop_vinfo);
5746 loop->aux = NULL;
5749 rewrite_into_ssa (false);
5750 rewrite_into_loop_closed_ssa (); /* FORNOW */
5751 bitmap_clear (vars_to_rename);