* configure.ac: (target_alias): Default to $host_alias, not
[official-gcc.git] / gcc / tree-vectorizer.c
blob03dac2ddf38cda37bbb677ee8bb9747356d77d2e
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
132 #include "rtl.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
137 #include "timevar.h"
138 #include "cfgloop.h"
139 #include "cfglayout.h"
140 #include "expr.h"
141 #include "optabs.h"
142 #include "toplev.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /* Main analysis functions. */
150 static loop_vec_info vect_analyze_loop (struct loop *);
151 static loop_vec_info vect_analyze_loop_form (struct loop *);
152 static bool vect_analyze_data_refs (loop_vec_info);
153 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
154 static bool vect_analyze_scalar_cycles (loop_vec_info);
155 static bool vect_analyze_data_ref_accesses (loop_vec_info);
156 static bool vect_analyze_data_refs_alignment (loop_vec_info);
157 static void vect_compute_data_refs_alignment (loop_vec_info);
158 static bool vect_analyze_operations (loop_vec_info);
160 /* Main code transformation functions. */
161 static void vect_transform_loop (loop_vec_info, struct loops *);
162 static void vect_transform_loop_bound (loop_vec_info, tree niters);
163 static bool vect_transform_stmt (tree, block_stmt_iterator *);
164 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
167 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
168 static void vect_align_data_ref (tree);
169 static void vect_enhance_data_refs_alignment (loop_vec_info);
171 /* Utility functions for the analyses. */
172 static bool vect_is_simple_use (tree , struct loop *, tree *);
173 static bool exist_non_indexing_operands_for_use_p (tree, tree);
174 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
175 static void vect_mark_relevant (varray_type, tree);
176 static bool vect_stmt_relevant_p (tree, loop_vec_info);
177 static tree vect_get_loop_niters (struct loop *, tree *);
178 static bool vect_compute_data_ref_alignment
179 (struct data_reference *, loop_vec_info);
180 static bool vect_analyze_data_ref_access (struct data_reference *);
181 static bool vect_get_first_index (tree, tree *);
182 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
183 static struct data_reference * vect_analyze_pointer_ref_access
184 (tree, tree, bool);
185 static bool vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
186 struct loop *loop);
187 static tree vect_get_base_and_bit_offset
188 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
189 static struct data_reference * vect_analyze_pointer_ref_access
190 (tree, tree, bool);
191 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
192 static tree vect_compute_array_ref_alignment
193 (struct data_reference *, loop_vec_info, tree, tree *);
194 static tree vect_get_ptr_offset (tree, tree, tree *);
195 static tree vect_get_symbl_and_dr
196 (tree, tree, bool, loop_vec_info, struct data_reference **);
198 /* Utility functions for the code transformation. */
199 static tree vect_create_destination_var (tree, tree);
200 static tree vect_create_data_ref_ptr
201 (tree, block_stmt_iterator *, tree, tree *, bool);
202 static tree vect_create_index_for_vector_ref
203 (struct loop *, block_stmt_iterator *);
204 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
205 static tree get_vectype_for_scalar_type (tree);
206 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
207 static tree vect_get_vec_def_for_operand (tree, tree);
208 static tree vect_init_vector (tree, tree);
209 static tree vect_build_symbol_bound (tree, int, struct loop *);
210 static void vect_finish_stmt_generation
211 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
213 static void vect_generate_tmps_on_preheader (loop_vec_info,
214 tree *, tree *,
215 tree *);
216 static tree vect_build_loop_niters (loop_vec_info);
217 static void vect_update_ivs_after_vectorizer (struct loop *, tree);
219 /* Loop transformations prior to vectorization. */
221 /* Loop transformations entry point function.
222 It can be used outside of the vectorizer
223 in case the loop to be manipulated answers conditions specified
224 in function documentation. */
225 struct loop *tree_duplicate_loop_to_edge (struct loop *,
226 struct loops *, edge,
227 tree, tree, bool);
229 static void allocate_new_names (bitmap);
230 static void rename_use_op (use_operand_p);
231 static void rename_def_op (def_operand_p, tree);
232 static void rename_variables_in_bb (basic_block);
233 static void free_new_names (bitmap);
234 static void rename_variables_in_loop (struct loop *);
235 static void copy_phi_nodes (struct loop *, struct loop *, bool);
236 static void update_phis_for_duplicate_loop (struct loop *,
237 struct loop *,
238 bool after);
239 static void update_phi_nodes_for_guard (edge, struct loop *);
240 static void make_loop_iterate_ntimes (struct loop *, tree, tree, tree);
241 static struct loop *tree_duplicate_loop_to_edge_cfg (struct loop *,
242 struct loops *,
243 edge);
244 static edge add_loop_guard (basic_block, tree, basic_block);
245 static bool verify_loop_for_duplication (struct loop *, bool, edge);
247 /* Utilities dealing with loop peeling (not peeling itself). */
248 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
249 static void vect_update_niters_after_peeling (loop_vec_info, tree);
250 static void vect_update_inits_of_dr (struct data_reference *, struct loop *,
251 tree niters);
252 static void vect_update_inits_of_drs (loop_vec_info, tree);
253 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
255 /* Utilities for creation and deletion of vec_info structs. */
256 loop_vec_info new_loop_vec_info (struct loop *loop);
257 void destroy_loop_vec_info (loop_vec_info);
258 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
260 static bool vect_debug_stats (struct loop *loop);
261 static bool vect_debug_details (struct loop *loop);
264 /* Utilities to support loop peeling for vectorization purposes. */
267 /* For each definition in DEFINITIONS this function allocates
268 new ssa name. */
270 static void
271 allocate_new_names (bitmap definitions)
273 unsigned ver;
274 bitmap_iterator bi;
276 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
278 tree def = ssa_name (ver);
279 tree *new_name_ptr = xmalloc (sizeof (tree));
281 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
283 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
284 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
286 SSA_NAME_AUX (def) = new_name_ptr;
291 /* Renames the use *OP_P. */
293 static void
294 rename_use_op (use_operand_p op_p)
296 tree *new_name_ptr;
298 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
299 return;
301 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
303 /* Something defined outside of the loop. */
304 if (!new_name_ptr)
305 return;
307 /* An ordinary ssa name defined in the loop. */
309 SET_USE (op_p, *new_name_ptr);
313 /* Renames the def *OP_P in statement STMT. */
315 static void
316 rename_def_op (def_operand_p op_p, tree stmt)
318 tree *new_name_ptr;
320 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
321 return;
323 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
325 /* Something defined outside of the loop. */
326 if (!new_name_ptr)
327 return;
329 /* An ordinary ssa name defined in the loop. */
331 SET_DEF (op_p, *new_name_ptr);
332 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
336 /* Renames the variables in basic block BB. */
338 static void
339 rename_variables_in_bb (basic_block bb)
341 tree phi;
342 block_stmt_iterator bsi;
343 tree stmt;
344 stmt_ann_t ann;
345 use_optype uses;
346 vuse_optype vuses;
347 def_optype defs;
348 v_may_def_optype v_may_defs;
349 v_must_def_optype v_must_defs;
350 unsigned i;
351 edge e;
352 edge_iterator ei;
354 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
355 rename_def_op (PHI_RESULT_PTR (phi), phi);
357 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
359 stmt = bsi_stmt (bsi);
360 get_stmt_operands (stmt);
361 ann = stmt_ann (stmt);
363 uses = USE_OPS (ann);
364 for (i = 0; i < NUM_USES (uses); i++)
365 rename_use_op (USE_OP_PTR (uses, i));
367 defs = DEF_OPS (ann);
368 for (i = 0; i < NUM_DEFS (defs); i++)
369 rename_def_op (DEF_OP_PTR (defs, i), stmt);
371 vuses = VUSE_OPS (ann);
372 for (i = 0; i < NUM_VUSES (vuses); i++)
373 rename_use_op (VUSE_OP_PTR (vuses, i));
375 v_may_defs = V_MAY_DEF_OPS (ann);
376 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
378 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
379 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
382 v_must_defs = V_MUST_DEF_OPS (ann);
383 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
385 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
386 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
390 FOR_EACH_EDGE (e, ei, bb->succs)
391 for (phi = phi_nodes (e->dest); phi; phi = TREE_CHAIN (phi))
392 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
396 /* Releases the structures holding the new ssa names. */
398 static void
399 free_new_names (bitmap definitions)
401 unsigned ver;
402 bitmap_iterator bi;
404 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
406 tree def = ssa_name (ver);
408 if (SSA_NAME_AUX (def))
410 free (SSA_NAME_AUX (def));
411 SSA_NAME_AUX (def) = NULL;
417 /* Renames variables in new generated LOOP. */
419 static void
420 rename_variables_in_loop (struct loop *loop)
422 unsigned i;
423 basic_block *bbs;
425 bbs = get_loop_body (loop);
427 for (i = 0; i < loop->num_nodes; i++)
428 rename_variables_in_bb (bbs[i]);
430 free (bbs);
434 /* This function copies phis from LOOP header to
435 NEW_LOOP header. AFTER is as
436 in update_phis_for_duplicate_loop function. */
438 static void
439 copy_phi_nodes (struct loop *loop, struct loop *new_loop,
440 bool after)
442 tree phi, new_phi, def;
443 edge new_e;
444 edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop));
446 /* Second add arguments to newly created phi nodes. */
447 for (phi = phi_nodes (loop->header),
448 new_phi = phi_nodes (new_loop->header);
449 phi;
450 phi = TREE_CHAIN (phi),
451 new_phi = TREE_CHAIN (new_phi))
453 new_e = loop_preheader_edge (new_loop);
454 def = PHI_ARG_DEF_FROM_EDGE (phi, e);
455 add_phi_arg (&new_phi, def, new_e);
460 /* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP
461 executes after LOOP, and false if it executes before it. */
463 static void
464 update_phis_for_duplicate_loop (struct loop *loop,
465 struct loop *new_loop, bool after)
467 edge old_latch;
468 tree *new_name_ptr, new_ssa_name;
469 tree phi_new, phi_old, def;
470 edge orig_entry_e = loop_preheader_edge (loop);
472 /* Copy phis from loop->header to new_loop->header. */
473 copy_phi_nodes (loop, new_loop, after);
475 old_latch = loop_latch_edge (loop);
477 /* Update PHI args for the new loop latch edge, and
478 the old loop preheader edge, we know that the PHI nodes
479 are ordered appropriately in copy_phi_nodes. */
480 for (phi_new = phi_nodes (new_loop->header),
481 phi_old = phi_nodes (loop->header);
482 phi_new && phi_old;
483 phi_new = TREE_CHAIN (phi_new), phi_old = TREE_CHAIN (phi_old))
485 def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch);
487 if (TREE_CODE (def) != SSA_NAME)
488 continue;
490 new_name_ptr = SSA_NAME_AUX (def);
492 /* Something defined outside of the loop. */
493 if (!new_name_ptr)
494 continue;
496 /* An ordinary ssa name defined in the loop. */
497 new_ssa_name = *new_name_ptr;
499 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop));
501 /* Update PHI args for the original loop pre-header edge. */
502 if (! after)
503 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e),
504 new_ssa_name);
509 /* Update PHI nodes for a guard of the LOOP.
511 LOOP is supposed to have a preheader bb at which a guard condition is
512 located. The true edge of this condition skips the LOOP and ends
513 at the destination of the (unique) LOOP exit. The loop exit bb is supposed
514 to be an empty bb (created by this transformation) with one successor.
516 This function creates phi nodes at the LOOP exit bb. These phis need to be
517 created as a result of adding true edge coming from guard.
519 FORNOW: Only phis which have corresponding phi nodes at the header of the
520 LOOP are created. Here we use the assumption that after the LOOP there
521 are no uses of defs generated in LOOP.
523 After the phis creation, the function updates the values of phi nodes at
524 the LOOP exit successor bb:
526 Original loop:
528 bb0: loop preheader
529 goto bb1
530 bb1: loop header
531 if (exit_cond) goto bb3 else goto bb2
532 bb2: loop latch
533 goto bb1
534 bb3:
537 After guard creation (the loop before this function):
539 bb0: loop preheader
540 if (guard_condition) goto bb4 else goto bb1
541 bb1: loop header
542 if (exit_cond) goto bb4 else goto bb2
543 bb2: loop latch
544 goto bb1
545 bb4: loop exit
546 (new empty bb)
547 goto bb3
548 bb3:
550 This function updates the phi nodes in bb4 and in bb3, to account for the
551 new edge from bb0 to bb4. */
553 static void
554 update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop)
556 tree phi, phi1;
558 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
560 tree new_phi;
561 tree phi_arg;
563 /* Generate new phi node. */
564 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)),
565 loop->exit_edges[0]->dest);
567 /* Add argument coming from guard true edge. */
568 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]);
569 add_phi_arg (&new_phi, phi_arg, guard_true_edge);
571 /* Add argument coming from loop exit edge. */
572 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
573 add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]);
575 /* Update all phi nodes at the loop exit successor. */
576 for (phi1 = phi_nodes (EDGE_SUCC (loop->exit_edges[0]->dest, 0)->dest);
577 phi1;
578 phi1 = TREE_CHAIN (phi1))
580 tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1,
581 EDGE_SUCC (loop->exit_edges[0]->dest, 0));
582 if (old_arg == phi_arg)
584 edge e = EDGE_SUCC (loop->exit_edges[0]->dest, 0);
586 SET_PHI_ARG_DEF (phi1,
587 phi_arg_from_edge (phi1, e),
588 PHI_RESULT (new_phi));
595 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
596 that starts at zero, increases by one and its limit is NITERS. */
598 static void
599 make_loop_iterate_ntimes (struct loop *loop, tree niters,
600 tree begin_label, tree exit_label)
602 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
603 tree orig_cond;
604 edge exit_edge = loop->exit_edges[0];
605 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
607 /* Flow loop scan does not update loop->single_exit field. */
608 loop->single_exit = loop->exit_edges[0];
609 orig_cond = get_loop_exit_condition (loop);
610 gcc_assert (orig_cond);
611 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
612 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
614 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
615 back to the exit condition statement. */
616 bsi_next (&loop_exit_bsi);
617 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
620 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
621 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
622 else /* 'then' edge loops back. */
623 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
625 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
626 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
627 cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
628 begin_label, exit_label);
629 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
631 /* Remove old loop exit test: */
632 bsi_remove (&loop_exit_bsi);
634 if (vect_debug_stats (loop) || vect_debug_details (loop))
635 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
639 /* Given LOOP this function generates a new copy of it and puts it
640 on E which is either the entry or exit of LOOP. */
642 static struct loop *
643 tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
644 edge e)
646 struct loop *new_loop;
647 basic_block *new_bbs, *bbs;
648 bool at_exit;
649 bool was_imm_dom;
650 basic_block exit_dest;
651 tree phi, phi_arg;
653 at_exit = (e == loop->exit_edges[0]);
654 if (!at_exit && e != loop_preheader_edge (loop))
656 if (dump_file && (dump_flags & TDF_DETAILS))
657 fprintf (dump_file,
658 "Edge is not an entry nor an exit edge.\n");
659 return NULL;
662 bbs = get_loop_body (loop);
664 /* Check whether duplication is possible. */
665 if (!can_copy_bbs_p (bbs, loop->num_nodes))
667 if (vect_debug_stats (loop) || vect_debug_details (loop))
668 fprintf (dump_file,
669 "Cannot copy basic blocks.\n");
670 free (bbs);
671 return NULL;
674 /* Generate new loop structure. */
675 new_loop = duplicate_loop (loops, loop, loop->outer);
676 if (!new_loop)
678 if (vect_debug_stats (loop) || vect_debug_details (loop))
679 fprintf (dump_file,
680 "The duplicate_loop returns NULL.\n");
681 free (bbs);
682 return NULL;
685 exit_dest = loop->exit_edges[0]->dest;
686 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
687 exit_dest) == loop->header ?
688 true : false);
690 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
692 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
694 /* Duplicating phi args at exit bbs as coming
695 also from exit of duplicated loop. */
696 for (phi = phi_nodes (exit_dest); phi; phi = TREE_CHAIN (phi))
698 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
699 if (phi_arg)
701 edge new_loop_exit_edge;
703 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
704 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
705 else
706 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
708 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
712 if (at_exit) /* Add the loop copy at exit. */
714 redirect_edge_and_branch_force (e, new_loop->header);
715 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
716 if (was_imm_dom)
717 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
719 else /* Add the copy at entry. */
721 edge new_exit_e;
722 edge entry_e = loop_preheader_edge (loop);
723 basic_block preheader = entry_e->src;
725 if (!flow_bb_inside_loop_p (new_loop,
726 EDGE_SUCC (new_loop->header, 0)->dest))
727 new_exit_e = EDGE_SUCC (new_loop->header, 0);
728 else
729 new_exit_e = EDGE_SUCC (new_loop->header, 1);
731 redirect_edge_and_branch_force (new_exit_e, loop->header);
732 set_immediate_dominator (CDI_DOMINATORS, loop->header,
733 new_exit_e->src);
735 /* We have to add phi args to the loop->header here as coming
736 from new_exit_e edge. */
737 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
739 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
740 if (phi_arg)
741 add_phi_arg (&phi, phi_arg, new_exit_e);
744 redirect_edge_and_branch_force (entry_e, new_loop->header);
745 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
748 flow_loop_scan (new_loop, LOOP_ALL);
749 flow_loop_scan (loop, LOOP_ALL);
750 free (new_bbs);
751 free (bbs);
753 return new_loop;
757 /* Given the condition statement COND, put it as the last statement
758 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
759 Assumes that this is the single exit of the guarded loop.
760 Returns the skip edge. */
762 static edge
763 add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
765 block_stmt_iterator bsi;
766 edge new_e, enter_e;
767 tree cond_stmt, then_label, else_label;
769 enter_e = EDGE_SUCC (guard_bb, 0);
770 enter_e->flags &= ~EDGE_FALLTHRU;
771 enter_e->flags |= EDGE_FALSE_VALUE;
772 bsi = bsi_last (guard_bb);
774 then_label = build1 (GOTO_EXPR, void_type_node,
775 tree_block_label (exit_bb));
776 else_label = build1 (GOTO_EXPR, void_type_node,
777 tree_block_label (enter_e->dest));
778 cond_stmt = build (COND_EXPR, void_type_node, cond,
779 then_label, else_label);
780 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
781 /* Add new edge to connect entry block to the second loop. */
782 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
783 set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb);
784 return new_e;
788 /* This function verifies that certain restrictions apply to LOOP. */
790 static bool
791 verify_loop_for_duplication (struct loop *loop,
792 bool update_first_loop_count, edge e)
794 edge exit_e = loop->exit_edges [0];
795 edge entry_e = loop_preheader_edge (loop);
797 /* We duplicate only innermost loops. */
798 if (loop->inner)
800 if (vect_debug_stats (loop) || vect_debug_details (loop))
801 fprintf (dump_file,
802 "Loop duplication failed. Loop is not innermost.\n");
803 return false;
806 /* Only loops with 1 exit. */
807 if (loop->num_exits != 1)
809 if (vect_debug_stats (loop) || vect_debug_details (loop))
810 fprintf (dump_file,
811 "More than one exit from loop.\n");
812 return false;
815 /* Only loops with 1 entry. */
816 if (loop->num_entries != 1)
818 if (vect_debug_stats (loop) || vect_debug_details (loop))
819 fprintf (dump_file,
820 "More than one exit from loop.\n");
821 return false;
824 /* All loops has outers, the only case loop->outer is NULL is for
825 the function itself. */
826 if (!loop->outer)
828 if (vect_debug_stats (loop) || vect_debug_details (loop))
829 fprintf (dump_file,
830 "Loop is outer-most loop.\n");
831 return false;
834 /* Verify that new IV can be created and loop condition
835 can be changed to make first loop iterate first_niters times. */
836 if (!update_first_loop_count)
838 tree orig_cond = get_loop_exit_condition (loop);
839 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
841 if (!orig_cond)
843 if (vect_debug_stats (loop) || vect_debug_details (loop))
844 fprintf (dump_file,
845 "Loop has no exit condition.\n");
846 return false;
848 if (orig_cond != bsi_stmt (loop_exit_bsi))
850 if (vect_debug_stats (loop) || vect_debug_details (loop))
851 fprintf (dump_file,
852 "Loop exit condition is not loop header last stmt.\n");
853 return false;
857 /* Make sure E is either an entry or an exit edge. */
858 if (e != exit_e && e != entry_e)
860 if (vect_debug_stats (loop) || vect_debug_details (loop))
861 fprintf (dump_file,
862 "E is not loop entry or exit edge.\n");
863 return false;
866 return true;
870 /* Given LOOP this function duplicates it to the edge E.
872 This transformation takes place before the loop is vectorized.
873 For now, there are two main cases when it's used
874 by the vectorizer: to support loops with unknown loop bounds
875 (or loop bounds indivisible by vectorization factor) and to force the
876 alignment of data references in the loop. In the first case, LOOP is
877 duplicated to the exit edge, producing epilog loop. In the second case, LOOP
878 is duplicated to the preheader edge thus generating prolog loop. In both
879 cases, the original loop will be vectorized after the transformation.
881 The edge E is supposed to be either preheader edge of the LOOP or
882 its exit edge. If preheader edge is specified, the LOOP copy
883 will precede the original one. Otherwise the copy will be located
884 at the exit of the LOOP.
886 FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate
887 the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first
888 loop will be iterated FIRST_NITERS times by introducing additional
889 induction variable and replacing loop exit condition. If
890 UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and
891 the caller to tree_duplicate_loop_to_edge is responsible for updating
892 the first loop count.
894 NITERS (also SSA_NAME) parameter defines the number of iteration the
895 original loop iterated. The function generates two if-then guards:
896 one prior to the first loop and the other prior to the second loop.
897 The first guard will be:
899 if (FIRST_NITERS == 0) then skip the first loop
901 The second guard will be:
903 if (FIRST_NITERS == NITERS) then skip the second loop
905 Thus the equivalence to the original code is guaranteed by correct values
906 of NITERS and FIRST_NITERS and generation of if-then loop guards.
908 For now this function supports only loop forms that are candidate for
909 vectorization. Such types are the following:
911 (1) only innermost loops
912 (2) loops built from 2 basic blocks
913 (3) loops with one entry and one exit
914 (4) loops without function calls
915 (5) loops without defs that are used after the loop
917 (1), (3) are checked in this function; (2) - in function
918 vect_analyze_loop_form; (4) - in function vect_analyze_data_refs;
919 (5) is checked as part of the function vect_mark_stmts_to_be_vectorized,
920 when excluding induction/reduction support.
922 The function returns NULL in case one of these checks or
923 transformations failed. */
925 struct loop*
926 tree_duplicate_loop_to_edge (struct loop *loop, struct loops *loops,
927 edge e, tree first_niters,
928 tree niters, bool update_first_loop_count)
930 struct loop *new_loop = NULL, *first_loop, *second_loop;
931 edge skip_e;
932 tree pre_condition;
933 bitmap definitions;
934 basic_block first_exit_bb, second_exit_bb;
935 basic_block pre_header_bb;
936 edge exit_e = loop->exit_edges [0];
938 gcc_assert (!any_marked_for_rewrite_p ());
940 if (!verify_loop_for_duplication (loop, update_first_loop_count, e))
941 return NULL;
943 /* We have to initialize cfg_hooks. Then, when calling
944 cfg_hooks->split_edge, the function tree_split_edge
945 is actually called and, when calling cfg_hooks->duplicate_block,
946 the function tree_duplicate_bb is called. */
947 tree_register_cfg_hooks ();
949 /* 1. Generate a copy of LOOP and put it on E (entry or exit). */
950 if (!(new_loop = tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
952 if (vect_debug_stats (loop) || vect_debug_details (loop))
953 fprintf (dump_file,
954 "The tree_duplicate_loop_to_edge_cfg failed.\n");
955 return NULL;
958 definitions = marked_ssa_names ();
959 allocate_new_names (definitions);
960 update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
961 /* Here, using assumption (5), we do not propagate new names further
962 than on phis of the exit from the second loop. */
963 rename_variables_in_loop (new_loop);
964 free_new_names (definitions);
966 if (e == exit_e)
968 first_loop = loop;
969 second_loop = new_loop;
971 else
973 first_loop = new_loop;
974 second_loop = loop;
977 /* 2. Generate bb between the loops. */
978 first_exit_bb = split_edge (first_loop->exit_edges[0]);
979 add_bb_to_loop (first_exit_bb, first_loop->outer);
981 /* We need to update here first loop exit edge
982 and second loop preheader edge. */
983 flow_loop_scan (first_loop, LOOP_ALL);
984 flow_loop_scan (second_loop, LOOP_ALL);
986 /* 3. Make first loop iterate FIRST_NITERS times, if needed. */
987 if (!update_first_loop_count)
989 tree first_loop_latch_lbl = tree_block_label (first_loop->latch);
990 tree first_loop_exit_lbl = tree_block_label (first_exit_bb);
992 make_loop_iterate_ntimes (first_loop, first_niters,
993 first_loop_latch_lbl,
994 first_loop_exit_lbl);
997 /* 4. Add the guard before first loop:
999 if FIRST_NITERS == 0
1000 skip first loop
1001 else
1002 enter first loop */
1004 /* 4a. Generate bb before first loop. */
1005 pre_header_bb = split_edge (loop_preheader_edge (first_loop));
1006 add_bb_to_loop (pre_header_bb, first_loop->outer);
1008 /* First loop preheader edge is changed. */
1009 flow_loop_scan (first_loop, LOOP_ALL);
1011 /* 4b. Generate guard condition. */
1012 pre_condition = build (LE_EXPR, boolean_type_node,
1013 first_niters, integer_zero_node);
1015 /* 4c. Add condition at the end of preheader bb. */
1016 skip_e = add_loop_guard (pre_header_bb, pre_condition, first_exit_bb);
1018 /* 4d. Update phis at first loop exit and propagate changes
1019 to the phis of second loop. */
1020 update_phi_nodes_for_guard (skip_e, first_loop);
1022 /* 5. Add the guard before second loop:
1024 if FIRST_NITERS == NITERS SKIP
1025 skip second loop
1026 else
1027 enter second loop */
1029 /* 5a. Generate empty bb at the exit from the second loop. */
1030 second_exit_bb = split_edge (second_loop->exit_edges[0]);
1031 add_bb_to_loop (second_exit_bb, second_loop->outer);
1033 /* Second loop preheader edge is changed. */
1034 flow_loop_scan (second_loop, LOOP_ALL);
1036 /* 5b. Generate guard condition. */
1037 pre_condition = build (EQ_EXPR, boolean_type_node,
1038 first_niters, niters);
1040 /* 5c. Add condition at the end of preheader bb. */
1041 skip_e = add_loop_guard (first_exit_bb, pre_condition, second_exit_bb);
1042 update_phi_nodes_for_guard (skip_e, second_loop);
1044 BITMAP_XFREE (definitions);
1045 unmark_all_for_rewrite ();
1047 return new_loop;
1052 /* Here the proper Vectorizer starts. */
1054 /* Function new_stmt_vec_info.
1056 Create and initialize a new stmt_vec_info struct for STMT. */
1058 stmt_vec_info
1059 new_stmt_vec_info (tree stmt, struct loop *loop)
1061 stmt_vec_info res;
1062 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1064 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1065 STMT_VINFO_STMT (res) = stmt;
1066 STMT_VINFO_LOOP (res) = loop;
1067 STMT_VINFO_RELEVANT_P (res) = 0;
1068 STMT_VINFO_VECTYPE (res) = NULL;
1069 STMT_VINFO_VEC_STMT (res) = NULL;
1070 STMT_VINFO_DATA_REF (res) = NULL;
1071 STMT_VINFO_MEMTAG (res) = NULL;
1072 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1074 return res;
1078 /* Function new_loop_vec_info.
1080 Create and initialize a new loop_vec_info struct for LOOP, as well as
1081 stmt_vec_info structs for all the stmts in LOOP. */
1083 loop_vec_info
1084 new_loop_vec_info (struct loop *loop)
1086 loop_vec_info res;
1087 basic_block *bbs;
1088 block_stmt_iterator si;
1089 unsigned int i;
1091 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1093 bbs = get_loop_body (loop);
1095 /* Create stmt_info for all stmts in the loop. */
1096 for (i = 0; i < loop->num_nodes; i++)
1098 basic_block bb = bbs[i];
1099 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1101 tree stmt = bsi_stmt (si);
1102 stmt_ann_t ann;
1104 get_stmt_operands (stmt);
1105 ann = stmt_ann (stmt);
1106 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1110 LOOP_VINFO_LOOP (res) = loop;
1111 LOOP_VINFO_BBS (res) = bbs;
1112 LOOP_VINFO_EXIT_COND (res) = NULL;
1113 LOOP_VINFO_NITERS (res) = NULL;
1114 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1115 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1116 LOOP_VINFO_VECT_FACTOR (res) = 0;
1117 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1118 "loop_write_datarefs");
1119 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1120 "loop_read_datarefs");
1122 for (i=0; i<MAX_NUMBER_OF_UNALIGNED_DATA_REFS; i++)
1123 LOOP_UNALIGNED_DR (res, i) = NULL;
1124 return res;
1128 /* Function destroy_loop_vec_info.
1130 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1131 stmts in the loop. */
1133 void
1134 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1136 struct loop *loop;
1137 basic_block *bbs;
1138 int nbbs;
1139 block_stmt_iterator si;
1140 int j;
1142 if (!loop_vinfo)
1143 return;
1145 loop = LOOP_VINFO_LOOP (loop_vinfo);
1147 bbs = LOOP_VINFO_BBS (loop_vinfo);
1148 nbbs = loop->num_nodes;
1150 for (j = 0; j < nbbs; j++)
1152 basic_block bb = bbs[j];
1153 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1155 tree stmt = bsi_stmt (si);
1156 stmt_ann_t ann = stmt_ann (stmt);
1157 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1158 free (stmt_info);
1159 set_stmt_info (ann, NULL);
1163 free (LOOP_VINFO_BBS (loop_vinfo));
1164 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1165 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1167 free (loop_vinfo);
1171 /* Function debug_loop_stats.
1173 For vectorization statistics dumps. */
1175 static bool
1176 vect_debug_stats (struct loop *loop)
1178 basic_block bb;
1179 block_stmt_iterator si;
1180 tree node = NULL_TREE;
1182 if (!dump_file || !(dump_flags & TDF_STATS))
1183 return false;
1185 if (!loop)
1187 fprintf (dump_file, "\n");
1188 return true;
1191 if (!loop->header)
1192 return false;
1194 bb = loop->header;
1196 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1198 node = bsi_stmt (si);
1199 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1200 break;
1203 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1204 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1206 fprintf (dump_file, "\nloop at %s:%d: ",
1207 EXPR_FILENAME (node), EXPR_LINENO (node));
1208 return true;
1211 return false;
1215 /* Function debug_loop_details.
1217 For vectorization debug dumps. */
1219 static bool
1220 vect_debug_details (struct loop *loop)
1222 basic_block bb;
1223 block_stmt_iterator si;
1224 tree node = NULL_TREE;
1226 if (!dump_file || !(dump_flags & TDF_DETAILS))
1227 return false;
1229 if (!loop)
1231 fprintf (dump_file, "\n");
1232 return true;
1235 if (!loop->header)
1236 return false;
1238 bb = loop->header;
1240 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1242 node = bsi_stmt (si);
1243 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1244 break;
1247 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1248 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1250 fprintf (dump_file, "\nloop at %s:%d: ",
1251 EXPR_FILENAME (node), EXPR_LINENO (node));
1252 return true;
1255 return false;
1259 /* Function vect_get_ptr_offset
1261 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1263 static tree
1264 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1265 tree vectype ATTRIBUTE_UNUSED,
1266 tree *offset ATTRIBUTE_UNUSED)
1268 /* TODO: Use alignment information. */
1269 return NULL_TREE;
1273 /* Function vect_get_base_and_bit_offset
1275 Return the BASE of the data reference EXPR.
1276 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1277 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1278 bits of 'a.b[i] + 4B' from a.
1280 Input:
1281 EXPR - the memory reference that is being analyzed
1282 DR - the data_reference struct of the _original_ memory reference
1283 (Note: DR_REF (DR) is not necessarily EXPR)
1284 VECTYPE - the type that defines the alignment (i.e, we compute
1285 alignment relative to TYPE_ALIGN(VECTYPE))
1287 Output:
1288 BASE (returned value) - the base of the data reference EXPR.
1289 E.g, if EXPR is a.b[k].c[i][j] the returned
1290 base is a.
1291 OFFSET - offset of EXPR from BASE in bits
1292 BASE_ALIGNED_P - indicates if BASE is aligned
1294 If something unexpected is encountered (an unsupported form of data-ref),
1295 or if VECTYPE is given but OFFSET cannot be determined:
1296 then NULL_TREE is returned. */
1298 static tree
1299 vect_get_base_and_bit_offset (struct data_reference *dr,
1300 tree expr,
1301 tree vectype,
1302 loop_vec_info loop_vinfo,
1303 tree *offset,
1304 bool *base_aligned_p)
1306 tree this_offset = size_zero_node;
1307 tree base = NULL_TREE;
1308 tree next_ref;
1309 tree oprnd0, oprnd1;
1310 struct data_reference *array_dr;
1311 enum tree_code code = TREE_CODE (expr);
1313 *base_aligned_p = false;
1315 switch (code)
1317 /* These cases end the recursion: */
1318 case VAR_DECL:
1319 *offset = size_zero_node;
1320 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1321 *base_aligned_p = true;
1322 return expr;
1324 case SSA_NAME:
1325 if (!vectype)
1326 return expr;
1328 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1329 return NULL_TREE;
1331 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1333 base = vect_get_ptr_offset (expr, vectype, offset);
1334 if (base)
1335 *base_aligned_p = true;
1337 else
1339 *base_aligned_p = true;
1340 *offset = size_zero_node;
1341 base = expr;
1343 return base;
1345 case INTEGER_CST:
1346 *offset = int_const_binop (MULT_EXPR, expr,
1347 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1348 return expr;
1350 /* These cases continue the recursion: */
1351 case COMPONENT_REF:
1352 oprnd0 = TREE_OPERAND (expr, 0);
1353 oprnd1 = TREE_OPERAND (expr, 1);
1355 this_offset = bit_position (oprnd1);
1356 if (vectype && !host_integerp (this_offset, 1))
1357 return NULL_TREE;
1358 next_ref = oprnd0;
1359 break;
1361 case ADDR_EXPR:
1362 oprnd0 = TREE_OPERAND (expr, 0);
1363 next_ref = oprnd0;
1364 break;
1366 case INDIRECT_REF:
1367 oprnd0 = TREE_OPERAND (expr, 0);
1368 next_ref = oprnd0;
1369 break;
1371 case ARRAY_REF:
1372 if (DR_REF (dr) != expr)
1373 /* Build array data_reference struct if the existing DR_REF
1374 doesn't match EXPR. This happens, for example, when the
1375 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1376 contains information on the access of T, not of arr. In order
1377 to continue the analysis, we create a new DR struct that
1378 describes the access of arr.
1380 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1381 else
1382 array_dr = dr;
1384 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1385 vectype, &this_offset);
1386 if (!next_ref)
1387 return NULL_TREE;
1389 if (vectype &&
1390 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1392 *offset = this_offset;
1393 *base_aligned_p = true;
1394 return next_ref;
1396 break;
1398 case PLUS_EXPR:
1399 case MINUS_EXPR:
1400 /* In case we have a PLUS_EXPR of the form
1401 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1402 This is verified in vect_get_symbl_and_dr. */
1403 oprnd0 = TREE_OPERAND (expr, 0);
1404 oprnd1 = TREE_OPERAND (expr, 1);
1406 base = vect_get_base_and_bit_offset
1407 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1408 if (vectype && !base)
1409 return NULL_TREE;
1411 next_ref = oprnd0;
1412 break;
1414 default:
1415 return NULL_TREE;
1418 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1419 loop_vinfo, offset, base_aligned_p);
1421 if (vectype && base)
1423 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1424 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1425 return NULL_TREE;
1427 if (vect_debug_details (NULL))
1429 print_generic_expr (dump_file, expr, TDF_SLIM);
1430 fprintf (dump_file, " --> total offset for ref: ");
1431 print_generic_expr (dump_file, *offset, TDF_SLIM);
1434 return base;
1438 /* Function vect_force_dr_alignment_p.
1440 Returns whether the alignment of a DECL can be forced to be aligned
1441 on ALIGNMENT bit boundary. */
1443 static bool
1444 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1446 if (TREE_CODE (decl) != VAR_DECL)
1447 return false;
1449 if (DECL_EXTERNAL (decl))
1450 return false;
1452 if (TREE_STATIC (decl))
1453 return (alignment <= MAX_OFILE_ALIGNMENT);
1454 else
1455 /* This is not 100% correct. The absolute correct stack alignment
1456 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1457 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1458 However, until someone implements forced stack alignment, SSE
1459 isn't really usable without this. */
1460 return (alignment <= PREFERRED_STACK_BOUNDARY);
1464 /* Function vect_get_new_vect_var.
1466 Returns a name for a new variable. The current naming scheme appends the
1467 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1468 the name of vectorizer generated variables, and appends that to NAME if
1469 provided. */
1471 static tree
1472 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1474 const char *prefix;
1475 int prefix_len;
1476 tree new_vect_var;
1478 if (var_kind == vect_simple_var)
1479 prefix = "vect_";
1480 else
1481 prefix = "vect_p";
1483 prefix_len = strlen (prefix);
1485 if (name)
1486 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1487 else
1488 new_vect_var = create_tmp_var (type, prefix);
1490 return new_vect_var;
1494 /* Function vect_create_index_for_vector_ref.
1496 Create (and return) an index variable, along with it's update chain in the
1497 loop. This variable will be used to access a memory location in a vector
1498 operation.
1500 Input:
1501 LOOP: The loop being vectorized.
1502 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1503 function can be added here, or in the loop pre-header.
1505 Output:
1506 Return an index that will be used to index a vector array. It is expected
1507 that a pointer to the first vector will be used as the base address for the
1508 indexed reference.
1510 FORNOW: we are not trying to be efficient, just creating a new index each
1511 time from scratch. At this time all vector references could use the same
1512 index.
1514 TODO: create only one index to be used by all vector references. Record
1515 the index in the LOOP_VINFO the first time this procedure is called and
1516 return it on subsequent calls. The increment of this index must be placed
1517 just before the conditional expression that ends the single block loop. */
1519 static tree
1520 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1522 tree init, step;
1523 tree indx_before_incr, indx_after_incr;
1525 /* It is assumed that the base pointer used for vectorized access contains
1526 the address of the first vector. Therefore the index used for vectorized
1527 access must be initialized to zero and incremented by 1. */
1529 init = integer_zero_node;
1530 step = integer_one_node;
1532 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1533 create_iv (init, step, NULL_TREE, loop, bsi, false,
1534 &indx_before_incr, &indx_after_incr);
1536 return indx_before_incr;
1540 /* Function vect_create_addr_base_for_vector_ref.
1542 Create an expression that computes the address of the first memory location
1543 that will be accessed for a data reference.
1545 Input:
1546 STMT: The statement containing the data reference.
1547 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1548 OFFSET: Optional. If supplied, it is be added to the initial address.
1550 Output:
1551 1. Return an SSA_NAME whose value is the address of the memory location of
1552 the first vector of the data reference.
1553 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1554 these statement(s) which define the returned SSA_NAME.
1556 FORNOW: We are only handling array accesses with step 1. */
1558 static tree
1559 vect_create_addr_base_for_vector_ref (tree stmt,
1560 tree *new_stmt_list,
1561 tree offset)
1563 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1564 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1565 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1566 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1567 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1568 tree ref = DR_REF (dr);
1569 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1570 tree scalar_type = TREE_TYPE (ref);
1571 tree scalar_ptr_type = build_pointer_type (scalar_type);
1572 tree access_fn;
1573 tree init_val, step, init_oval;
1574 bool ok;
1575 bool is_ptr_ref, is_array_ref, is_addr_expr;
1576 tree array_base;
1577 tree vec_stmt;
1578 tree new_temp;
1579 tree array_ref;
1580 tree addr_base, addr_expr;
1581 tree dest, new_stmt;
1583 /* Only the access function of the last index is relevant (i_n in
1584 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1585 access_fn = DR_ACCESS_FN (dr, 0);
1586 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1587 true);
1588 if (!ok)
1589 init_oval = integer_zero_node;
1591 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1592 && TREE_CODE (data_ref_base) == SSA_NAME;
1593 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1594 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1595 || TREE_CODE (data_ref_base) == PLUS_EXPR
1596 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1597 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1599 /** Create: &(base[init_val])
1601 if data_ref_base is an ARRAY_TYPE:
1602 base = data_ref_base
1604 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1605 base = *((scalar_array *) data_ref_base)
1608 if (is_array_ref)
1609 array_base = data_ref_base;
1610 else /* is_ptr_ref or is_addr_expr */
1612 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1613 tree scalar_array_type = build_array_type (scalar_type, 0);
1614 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1615 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1616 add_referenced_tmp_var (array_ptr);
1618 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1619 add_referenced_tmp_var (dest);
1620 data_ref_base =
1621 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1622 append_to_statement_list_force (new_stmt, new_stmt_list);
1624 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1625 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1626 new_temp = make_ssa_name (array_ptr, vec_stmt);
1627 TREE_OPERAND (vec_stmt, 0) = new_temp;
1628 append_to_statement_list_force (vec_stmt, new_stmt_list);
1630 /* (*array_ptr) */
1631 array_base = build_fold_indirect_ref (new_temp);
1634 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1635 add_referenced_tmp_var (dest);
1636 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1637 append_to_statement_list_force (new_stmt, new_stmt_list);
1639 if (offset)
1641 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1642 add_referenced_tmp_var (tmp);
1643 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1644 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1645 init_val = make_ssa_name (tmp, vec_stmt);
1646 TREE_OPERAND (vec_stmt, 0) = init_val;
1647 append_to_statement_list_force (vec_stmt, new_stmt_list);
1650 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1651 NULL_TREE, NULL_TREE);
1652 addr_base = build_fold_addr_expr (array_ref);
1654 /* addr_expr = addr_base */
1655 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1656 get_name (base_name));
1657 add_referenced_tmp_var (addr_expr);
1658 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1659 new_temp = make_ssa_name (addr_expr, vec_stmt);
1660 TREE_OPERAND (vec_stmt, 0) = new_temp;
1661 append_to_statement_list_force (vec_stmt, new_stmt_list);
1663 return new_temp;
1667 /* Function get_vectype_for_scalar_type.
1669 Returns the vector type corresponding to SCALAR_TYPE as supported
1670 by the target. */
1672 static tree
1673 get_vectype_for_scalar_type (tree scalar_type)
1675 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1676 int nbytes = GET_MODE_SIZE (inner_mode);
1677 int nunits;
1678 tree vectype;
1680 if (nbytes == 0)
1681 return NULL_TREE;
1683 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1684 is expected. */
1685 nunits = UNITS_PER_SIMD_WORD / nbytes;
1687 vectype = build_vector_type (scalar_type, nunits);
1688 if (vect_debug_details (NULL))
1690 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1691 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1694 if (!vectype)
1695 return NULL_TREE;
1697 if (vect_debug_details (NULL))
1699 fprintf (dump_file, "vectype: ");
1700 print_generic_expr (dump_file, vectype, TDF_SLIM);
1703 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1705 /* TODO: tree-complex.c sometimes can parallelize operations
1706 on generic vectors. We can vectorize the loop in that case,
1707 but then we should re-run the lowering pass. */
1708 if (vect_debug_details (NULL))
1709 fprintf (dump_file, "mode not supported by target.");
1710 return NULL_TREE;
1713 return vectype;
1717 /* Function vect_align_data_ref.
1719 Handle mislignment of a memory accesses.
1721 FORNOW: Can't handle misaligned accesses.
1722 Make sure that the dataref is aligned. */
1724 static void
1725 vect_align_data_ref (tree stmt)
1727 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1728 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1730 /* FORNOW: can't handle misaligned accesses;
1731 all accesses expected to be aligned. */
1732 gcc_assert (aligned_access_p (dr));
1736 /* Function vect_create_data_ref_ptr.
1738 Create a memory reference expression for vector access, to be used in a
1739 vector load/store stmt. The reference is based on a new pointer to vector
1740 type (vp).
1742 Input:
1743 1. STMT: a stmt that references memory. Expected to be of the form
1744 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1745 2. BSI: block_stmt_iterator where new stmts can be added.
1746 3. OFFSET (optional): an offset to be added to the initial address accessed
1747 by the data-ref in STMT.
1748 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1749 pointing to the initial address.
1751 Output:
1752 1. Declare a new ptr to vector_type, and have it point to the base of the
1753 data reference (initial addressed accessed by the data reference).
1754 For example, for vector of type V8HI, the following code is generated:
1756 v8hi *vp;
1757 vp = (v8hi *)initial_address;
1759 if OFFSET is not supplied:
1760 initial_address = &a[init];
1761 if OFFSET is supplied:
1762 initial_address = &a[init + OFFSET];
1764 Return the initial_address in INITIAL_ADDRESS.
1766 2. Create a data-reference in the loop based on the new vector pointer vp,
1767 and using a new index variable 'idx' as follows:
1769 vp' = vp + update
1771 where if ONLY_INIT is true:
1772 update = zero
1773 and otherwise
1774 update = idx + vector_type_size
1776 Return the pointer vp'.
1779 FORNOW: handle only aligned and consecutive accesses. */
1781 static tree
1782 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1783 tree *initial_address, bool only_init)
1785 tree base_name;
1786 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1787 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1788 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1789 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1790 tree vect_ptr_type;
1791 tree vect_ptr;
1792 tree tag;
1793 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1794 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1795 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1796 int nvuses, nv_may_defs, nv_must_defs;
1797 int i;
1798 tree new_temp;
1799 tree vec_stmt;
1800 tree new_stmt_list = NULL_TREE;
1801 tree idx;
1802 edge pe = loop_preheader_edge (loop);
1803 basic_block new_bb;
1804 tree vect_ptr_init;
1805 tree vectype_size;
1806 tree ptr_update;
1807 tree data_ref_ptr;
1809 base_name = unshare_expr (DR_BASE_NAME (dr));
1810 if (vect_debug_details (NULL))
1812 tree data_ref_base = base_name;
1813 fprintf (dump_file, "create array_ref of type: ");
1814 print_generic_expr (dump_file, vectype, TDF_SLIM);
1815 if (TREE_CODE (data_ref_base) == VAR_DECL)
1816 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1817 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1818 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1819 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1820 fprintf (dump_file, "vectorizing a record based array ref: ");
1821 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1822 fprintf (dump_file, "vectorizing a pointer ref: ");
1823 print_generic_expr (dump_file, base_name, TDF_SLIM);
1826 /** (1) Create the new vector-pointer variable: **/
1828 vect_ptr_type = build_pointer_type (vectype);
1829 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1830 get_name (base_name));
1831 add_referenced_tmp_var (vect_ptr);
1834 /** (2) Handle aliasing information of the new vector-pointer: **/
1836 tag = STMT_VINFO_MEMTAG (stmt_info);
1837 gcc_assert (tag);
1838 get_var_ann (vect_ptr)->type_mem_tag = tag;
1840 /* Mark for renaming all aliased variables
1841 (i.e, the may-aliases of the type-mem-tag). */
1842 nvuses = NUM_VUSES (vuses);
1843 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1844 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1845 for (i = 0; i < nvuses; i++)
1847 tree use = VUSE_OP (vuses, i);
1848 if (TREE_CODE (use) == SSA_NAME)
1849 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1851 for (i = 0; i < nv_may_defs; i++)
1853 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1854 if (TREE_CODE (def) == SSA_NAME)
1855 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1857 for (i = 0; i < nv_must_defs; i++)
1859 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1860 if (TREE_CODE (def) == SSA_NAME)
1861 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1865 /** (3) Calculate the initial address the vector-pointer, and set
1866 the vector-pointer to point to it before the loop: **/
1868 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1869 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1870 offset);
1871 pe = loop_preheader_edge (loop);
1872 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1873 gcc_assert (!new_bb);
1874 *initial_address = new_temp;
1876 /* Create: p = (vectype *) initial_base */
1877 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1878 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1879 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1880 TREE_OPERAND (vec_stmt, 0) = new_temp;
1881 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1882 gcc_assert (!new_bb);
1883 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1886 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1888 if (only_init) /* No update in loop is required. */
1889 return vect_ptr_init;
1891 idx = vect_create_index_for_vector_ref (loop, bsi);
1893 /* Create: update = idx * vectype_size */
1894 ptr_update = create_tmp_var (integer_type_node, "update");
1895 add_referenced_tmp_var (ptr_update);
1896 vectype_size = build_int_cst (integer_type_node,
1897 GET_MODE_SIZE (TYPE_MODE (vectype)));
1898 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1899 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1900 new_temp = make_ssa_name (ptr_update, vec_stmt);
1901 TREE_OPERAND (vec_stmt, 0) = new_temp;
1902 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1904 /* Create: data_ref_ptr = vect_ptr_init + update */
1905 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1906 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1907 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1908 TREE_OPERAND (vec_stmt, 0) = new_temp;
1909 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1910 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1912 return data_ref_ptr;
1916 /* Function vect_create_destination_var.
1918 Create a new temporary of type VECTYPE. */
1920 static tree
1921 vect_create_destination_var (tree scalar_dest, tree vectype)
1923 tree vec_dest;
1924 const char *new_name;
1926 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1928 new_name = get_name (scalar_dest);
1929 if (!new_name)
1930 new_name = "var_";
1931 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1932 add_referenced_tmp_var (vec_dest);
1934 return vec_dest;
1938 /* Function vect_init_vector.
1940 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1941 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1942 used in the vectorization of STMT. */
1944 static tree
1945 vect_init_vector (tree stmt, tree vector_var)
1947 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1948 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1949 tree new_var;
1950 tree init_stmt;
1951 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1952 tree vec_oprnd;
1953 edge pe;
1954 tree new_temp;
1955 basic_block new_bb;
1957 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1958 add_referenced_tmp_var (new_var);
1960 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1961 new_temp = make_ssa_name (new_var, init_stmt);
1962 TREE_OPERAND (init_stmt, 0) = new_temp;
1964 pe = loop_preheader_edge (loop);
1965 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1966 gcc_assert (!new_bb);
1968 if (vect_debug_details (NULL))
1970 fprintf (dump_file, "created new init_stmt: ");
1971 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1974 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1975 return vec_oprnd;
1979 /* Function vect_get_vec_def_for_operand.
1981 OP is an operand in STMT. This function returns a (vector) def that will be
1982 used in the vectorized stmt for STMT.
1984 In the case that OP is an SSA_NAME which is defined in the loop, then
1985 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1987 In case OP is an invariant or constant, a new stmt that creates a vector def
1988 needs to be introduced. */
1990 static tree
1991 vect_get_vec_def_for_operand (tree op, tree stmt)
1993 tree vec_oprnd;
1994 tree vec_stmt;
1995 tree def_stmt;
1996 stmt_vec_info def_stmt_info = NULL;
1997 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1998 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1999 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2000 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2001 basic_block bb;
2002 tree vec_inv;
2003 tree t = NULL_TREE;
2004 tree def;
2005 int i;
2007 if (vect_debug_details (NULL))
2009 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2010 print_generic_expr (dump_file, op, TDF_SLIM);
2013 /** ===> Case 1: operand is a constant. **/
2015 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2017 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2019 tree vec_cst;
2021 /* Build a tree with vector elements. */
2022 if (vect_debug_details (NULL))
2023 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2025 for (i = nunits - 1; i >= 0; --i)
2027 t = tree_cons (NULL_TREE, op, t);
2029 vec_cst = build_vector (vectype, t);
2030 return vect_init_vector (stmt, vec_cst);
2033 gcc_assert (TREE_CODE (op) == SSA_NAME);
2035 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2037 def_stmt = SSA_NAME_DEF_STMT (op);
2038 def_stmt_info = vinfo_for_stmt (def_stmt);
2040 if (vect_debug_details (NULL))
2042 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2043 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2047 /** ==> Case 2.1: operand is defined inside the loop. **/
2049 if (def_stmt_info)
2051 /* Get the def from the vectorized stmt. */
2053 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2054 gcc_assert (vec_stmt);
2055 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2056 return vec_oprnd;
2060 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2061 it is a reduction/induction. **/
2063 bb = bb_for_stmt (def_stmt);
2064 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2066 if (vect_debug_details (NULL))
2067 fprintf (dump_file, "reduction/induction - unsupported.");
2068 internal_error ("no support for reduction/induction"); /* FORNOW */
2072 /** ==> Case 2.3: operand is defined outside the loop -
2073 it is a loop invariant. */
2075 switch (TREE_CODE (def_stmt))
2077 case PHI_NODE:
2078 def = PHI_RESULT (def_stmt);
2079 break;
2080 case MODIFY_EXPR:
2081 def = TREE_OPERAND (def_stmt, 0);
2082 break;
2083 case NOP_EXPR:
2084 def = TREE_OPERAND (def_stmt, 0);
2085 gcc_assert (IS_EMPTY_STMT (def_stmt));
2086 def = op;
2087 break;
2088 default:
2089 if (vect_debug_details (NULL))
2091 fprintf (dump_file, "unsupported defining stmt: ");
2092 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2094 internal_error ("unsupported defining stmt");
2097 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2099 if (vect_debug_details (NULL))
2100 fprintf (dump_file, "Create vector_inv.");
2102 for (i = nunits - 1; i >= 0; --i)
2104 t = tree_cons (NULL_TREE, def, t);
2107 vec_inv = build_constructor (vectype, t);
2108 return vect_init_vector (stmt, vec_inv);
2112 /* Function vect_finish_stmt_generation.
2114 Insert a new stmt. */
2116 static void
2117 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2119 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2121 if (vect_debug_details (NULL))
2123 fprintf (dump_file, "add new stmt: ");
2124 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2127 /* Make sure bsi points to the stmt that is being vectorized. */
2129 /* Assumption: any stmts created for the vectorization of stmt S were
2130 inserted before S. BSI is expected to point to S or some new stmt before S. */
2132 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2133 bsi_next (bsi);
2134 gcc_assert (stmt == bsi_stmt (*bsi));
2138 /* Function vectorizable_assignment.
2140 Check if STMT performs an assignment (copy) that can be vectorized.
2141 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2142 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2143 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2145 static bool
2146 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2148 tree vec_dest;
2149 tree scalar_dest;
2150 tree op;
2151 tree vec_oprnd;
2152 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2153 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2154 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2155 tree new_temp;
2157 /* Is vectorizable assignment? */
2159 if (TREE_CODE (stmt) != MODIFY_EXPR)
2160 return false;
2162 scalar_dest = TREE_OPERAND (stmt, 0);
2163 if (TREE_CODE (scalar_dest) != SSA_NAME)
2164 return false;
2166 op = TREE_OPERAND (stmt, 1);
2167 if (!vect_is_simple_use (op, loop, NULL))
2169 if (vect_debug_details (NULL))
2170 fprintf (dump_file, "use not simple.");
2171 return false;
2174 if (!vec_stmt) /* transformation not required. */
2176 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2177 return true;
2180 /** Trasform. **/
2181 if (vect_debug_details (NULL))
2182 fprintf (dump_file, "transform assignment.");
2184 /* Handle def. */
2185 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2187 /* Handle use. */
2188 op = TREE_OPERAND (stmt, 1);
2189 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2191 /* Arguments are ready. create the new vector stmt. */
2192 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2193 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2194 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2195 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2197 return true;
2201 /* Function vectorizable_operation.
2203 Check if STMT performs a binary or unary operation that can be vectorized.
2204 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2205 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2206 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2208 static bool
2209 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2211 tree vec_dest;
2212 tree scalar_dest;
2213 tree operation;
2214 tree op0, op1 = NULL;
2215 tree vec_oprnd0, vec_oprnd1=NULL;
2216 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2217 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2218 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2219 int i;
2220 enum tree_code code;
2221 enum machine_mode vec_mode;
2222 tree new_temp;
2223 int op_type;
2224 tree op;
2225 optab optab;
2227 /* Is STMT a vectorizable binary/unary operation? */
2228 if (TREE_CODE (stmt) != MODIFY_EXPR)
2229 return false;
2231 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2232 return false;
2234 operation = TREE_OPERAND (stmt, 1);
2235 code = TREE_CODE (operation);
2236 optab = optab_for_tree_code (code, vectype);
2238 /* Support only unary or binary operations. */
2239 op_type = TREE_CODE_LENGTH (code);
2240 if (op_type != unary_op && op_type != binary_op)
2242 if (vect_debug_details (NULL))
2243 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2244 return false;
2247 for (i = 0; i < op_type; i++)
2249 op = TREE_OPERAND (operation, i);
2250 if (!vect_is_simple_use (op, loop, NULL))
2252 if (vect_debug_details (NULL))
2253 fprintf (dump_file, "use not simple.");
2254 return false;
2258 /* Supportable by target? */
2259 if (!optab)
2261 if (vect_debug_details (NULL))
2262 fprintf (dump_file, "no optab.");
2263 return false;
2265 vec_mode = TYPE_MODE (vectype);
2266 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2268 if (vect_debug_details (NULL))
2269 fprintf (dump_file, "op not supported by target.");
2270 return false;
2273 if (!vec_stmt) /* transformation not required. */
2275 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2276 return true;
2279 /** Transform. **/
2281 if (vect_debug_details (NULL))
2282 fprintf (dump_file, "transform binary/unary operation.");
2284 /* Handle def. */
2285 scalar_dest = TREE_OPERAND (stmt, 0);
2286 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2288 /* Handle uses. */
2289 op0 = TREE_OPERAND (operation, 0);
2290 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2292 if (op_type == binary_op)
2294 op1 = TREE_OPERAND (operation, 1);
2295 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2298 /* Arguments are ready. create the new vector stmt. */
2300 if (op_type == binary_op)
2301 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2302 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2303 else
2304 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2305 build1 (code, vectype, vec_oprnd0));
2306 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2307 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2308 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2310 return true;
2314 /* Function vectorizable_store.
2316 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2317 can be vectorized.
2318 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2319 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2320 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2322 static bool
2323 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2325 tree scalar_dest;
2326 tree data_ref;
2327 tree op;
2328 tree vec_oprnd1;
2329 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2330 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2331 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2332 enum machine_mode vec_mode;
2333 tree dummy;
2335 /* Is vectorizable store? */
2337 if (TREE_CODE (stmt) != MODIFY_EXPR)
2338 return false;
2340 scalar_dest = TREE_OPERAND (stmt, 0);
2341 if (TREE_CODE (scalar_dest) != ARRAY_REF
2342 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2343 return false;
2345 op = TREE_OPERAND (stmt, 1);
2346 if (!vect_is_simple_use (op, loop, NULL))
2348 if (vect_debug_details (NULL))
2349 fprintf (dump_file, "use not simple.");
2350 return false;
2353 vec_mode = TYPE_MODE (vectype);
2354 /* FORNOW. In some cases can vectorize even if data-type not supported
2355 (e.g. - array initialization with 0). */
2356 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2357 return false;
2359 if (!STMT_VINFO_DATA_REF (stmt_info))
2360 return false;
2363 if (!vec_stmt) /* transformation not required. */
2365 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2366 return true;
2369 /** Trasform. **/
2371 if (vect_debug_details (NULL))
2372 fprintf (dump_file, "transform store");
2374 /* Handle use - get the vectorized def from the defining stmt. */
2375 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2377 /* Handle def. */
2378 /* FORNOW: make sure the data reference is aligned. */
2379 vect_align_data_ref (stmt);
2380 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2381 data_ref = build_fold_indirect_ref (data_ref);
2383 /* Arguments are ready. create the new vector stmt. */
2384 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2385 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2387 return true;
2391 /* vectorizable_load.
2393 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2394 can be vectorized.
2395 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2396 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2397 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2399 static bool
2400 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2402 tree scalar_dest;
2403 tree vec_dest = NULL;
2404 tree data_ref = NULL;
2405 tree op;
2406 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2407 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2408 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2409 tree new_temp;
2410 int mode;
2411 tree init_addr;
2412 tree new_stmt;
2413 tree dummy;
2414 basic_block new_bb;
2415 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2416 edge pe = loop_preheader_edge (loop);
2417 bool software_pipeline_loads_p = false;
2419 /* Is vectorizable load? */
2421 if (TREE_CODE (stmt) != MODIFY_EXPR)
2422 return false;
2424 scalar_dest = TREE_OPERAND (stmt, 0);
2425 if (TREE_CODE (scalar_dest) != SSA_NAME)
2426 return false;
2428 op = TREE_OPERAND (stmt, 1);
2429 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2430 return false;
2432 if (!STMT_VINFO_DATA_REF (stmt_info))
2433 return false;
2435 mode = (int) TYPE_MODE (vectype);
2437 /* FORNOW. In some cases can vectorize even if data-type not supported
2438 (e.g. - data copies). */
2439 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2441 if (vect_debug_details (loop))
2442 fprintf (dump_file, "Aligned load, but unsupported type.");
2443 return false;
2446 if (!aligned_access_p (dr))
2448 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2449 && (!targetm.vectorize.builtin_mask_for_load
2450 || targetm.vectorize.builtin_mask_for_load ()))
2451 software_pipeline_loads_p = true;
2452 else if (!targetm.vectorize.misaligned_mem_ok (mode))
2454 /* Possibly unaligned access, and can't software pipeline the loads.
2456 if (vect_debug_details (loop))
2457 fprintf (dump_file, "Arbitrary load not supported.");
2458 return false;
2462 if (!vec_stmt) /* transformation not required. */
2464 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2465 return true;
2468 /** Trasform. **/
2470 if (vect_debug_details (NULL))
2471 fprintf (dump_file, "transform load.");
2473 if (!software_pipeline_loads_p)
2475 /* Create:
2476 p = initial_addr;
2477 indx = 0;
2478 loop {
2479 vec_dest = *(p);
2480 indx = indx + 1;
2484 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2485 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2486 if (aligned_access_p (dr))
2487 data_ref = build_fold_indirect_ref (data_ref);
2488 else
2490 int mis = DR_MISALIGNMENT (dr);
2491 tree tmis = (mis == -1 ?
2492 integer_zero_node :
2493 build_int_cst (integer_type_node, mis));
2494 tmis = int_const_binop (MULT_EXPR, tmis,
2495 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2496 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2498 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2499 new_temp = make_ssa_name (vec_dest, new_stmt);
2500 TREE_OPERAND (new_stmt, 0) = new_temp;
2501 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2503 else /* software-pipeline the loads */
2505 /* Create:
2506 p1 = initial_addr;
2507 msq_init = *(floor(p1))
2508 p2 = initial_addr + VS - 1;
2509 magic = have_builtin ? builtin_result : initial_address;
2510 indx = 0;
2511 loop {
2512 p2' = p2 + indx * vectype_size
2513 lsq = *(floor(p2'))
2514 vec_dest = realign_load (msq, lsq, magic)
2515 indx = indx + 1;
2516 msq = lsq;
2520 tree offset;
2521 tree magic;
2522 tree phi_stmt;
2523 tree msq_init;
2524 tree msq, lsq;
2525 tree dataref_ptr;
2526 tree params;
2528 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2529 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2530 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2531 &init_addr, true);
2532 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2533 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2534 new_temp = make_ssa_name (vec_dest, new_stmt);
2535 TREE_OPERAND (new_stmt, 0) = new_temp;
2536 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2537 gcc_assert (!new_bb);
2538 msq_init = TREE_OPERAND (new_stmt, 0);
2541 /* <2> Create lsq = *(floor(p2')) in the loop */
2542 offset = build_int_cst (integer_type_node,
2543 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2544 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2545 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2546 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2547 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2548 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2549 new_temp = make_ssa_name (vec_dest, new_stmt);
2550 TREE_OPERAND (new_stmt, 0) = new_temp;
2551 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2552 lsq = TREE_OPERAND (new_stmt, 0);
2555 /* <3> */
2556 if (targetm.vectorize.builtin_mask_for_load)
2558 /* Create permutation mask, if required, in loop preheader. */
2559 tree builtin_decl;
2560 params = build_tree_list (NULL_TREE, init_addr);
2561 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2562 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2563 new_stmt = build_function_call_expr (builtin_decl, params);
2564 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2565 new_temp = make_ssa_name (vec_dest, new_stmt);
2566 TREE_OPERAND (new_stmt, 0) = new_temp;
2567 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2568 gcc_assert (!new_bb);
2569 magic = TREE_OPERAND (new_stmt, 0);
2571 else
2573 /* Use current address instead of init_addr for reduced reg pressure.
2575 magic = dataref_ptr;
2579 /* <4> Create msq = phi <msq_init, lsq> in loop */
2580 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2581 msq = make_ssa_name (vec_dest, NULL_TREE);
2582 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2583 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2584 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2585 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2588 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2589 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2590 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2591 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2592 new_temp = make_ssa_name (vec_dest, new_stmt);
2593 TREE_OPERAND (new_stmt, 0) = new_temp;
2594 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2597 *vec_stmt = new_stmt;
2598 return true;
2602 /* Function vect_transform_stmt.
2604 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2606 static bool
2607 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2609 bool is_store = false;
2610 tree vec_stmt = NULL_TREE;
2611 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2612 bool done;
2614 switch (STMT_VINFO_TYPE (stmt_info))
2616 case op_vec_info_type:
2617 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2618 gcc_assert (done);
2619 break;
2621 case assignment_vec_info_type:
2622 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2623 gcc_assert (done);
2624 break;
2626 case load_vec_info_type:
2627 done = vectorizable_load (stmt, bsi, &vec_stmt);
2628 gcc_assert (done);
2629 break;
2631 case store_vec_info_type:
2632 done = vectorizable_store (stmt, bsi, &vec_stmt);
2633 gcc_assert (done);
2634 is_store = true;
2635 break;
2636 default:
2637 if (vect_debug_details (NULL))
2638 fprintf (dump_file, "stmt not supported.");
2639 gcc_unreachable ();
2642 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2644 return is_store;
2648 /* This function builds ni_name = number of iterations loop executes
2649 on the loop preheader. */
2651 static tree
2652 vect_build_loop_niters (loop_vec_info loop_vinfo)
2654 tree ni_name, stmt, var;
2655 edge pe;
2656 basic_block new_bb;
2657 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2658 tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
2660 var = create_tmp_var (TREE_TYPE (ni), "niters");
2661 add_referenced_tmp_var (var);
2662 if (TREE_CODE (ni) == INTEGER_CST)
2664 /* This case is generated when treating a known loop bound
2665 indivisible by VF. Here we cannot use force_gimple_operand. */
2666 stmt = build (MODIFY_EXPR, void_type_node, var, ni);
2667 ni_name = make_ssa_name (var, stmt);
2668 TREE_OPERAND (stmt, 0) = ni_name;
2670 else
2671 ni_name = force_gimple_operand (ni, &stmt, false, var);
2673 pe = loop_preheader_edge (loop);
2674 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2675 if (new_bb)
2676 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2678 return ni_name;
2682 /* This function generates the following statements:
2684 ni_name = number of iterations loop executes
2685 ratio = ni_name / vf
2686 ratio_mult_vf_name = ratio * vf
2688 and places them at the loop preheader edge. */
2690 static void
2691 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2692 tree *ratio_mult_vf_name_p, tree *ratio_p)
2695 edge pe;
2696 basic_block new_bb;
2697 tree stmt, ni_name;
2698 tree ratio;
2699 tree ratio_mult_vf_name, ratio_mult_vf;
2700 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2701 tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2703 int vf, i;
2705 /* Generate temporary variable that contains
2706 number of iterations loop executes. */
2708 ni_name = vect_build_loop_niters (loop_vinfo);
2710 /* ratio = ni / vf.
2711 vf is power of 2; then if ratio = = n >> log2 (vf). */
2712 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2713 ratio = vect_build_symbol_bound (ni_name, vf, loop);
2715 /* Update initial conditions of loop copy. */
2717 /* ratio_mult_vf = ratio * vf;
2718 then if ratio_mult_vf = ratio << log2 (vf). */
2720 i = exact_log2 (vf);
2721 ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2722 add_referenced_tmp_var (ratio_mult_vf);
2724 ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2726 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2727 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2728 ratio, build_int_cst (unsigned_type_node,
2729 i)));
2731 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2733 pe = loop_preheader_edge (loop);
2734 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2735 if (new_bb)
2736 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2738 *ni_name_p = ni_name;
2739 *ratio_mult_vf_name_p = ratio_mult_vf_name;
2740 *ratio_p = ratio;
2742 return;
2746 /* This function generates stmt
2748 tmp = n / vf;
2750 and attaches it to preheader of LOOP. */
2752 static tree
2753 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2755 tree var, stmt, var_name;
2756 edge pe;
2757 basic_block new_bb;
2758 int i;
2760 /* create temporary variable */
2761 var = create_tmp_var (TREE_TYPE (n), "bnd");
2762 add_referenced_tmp_var (var);
2764 var_name = make_ssa_name (var, NULL_TREE);
2766 /* vf is power of 2; then n/vf = n >> log2 (vf). */
2768 i = exact_log2 (vf);
2769 stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2770 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2771 n, build_int_cst (unsigned_type_node,i)));
2773 SSA_NAME_DEF_STMT (var_name) = stmt;
2775 pe = loop_preheader_edge (loop);
2776 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2777 if (new_bb)
2778 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2779 else
2780 if (vect_debug_details (NULL))
2781 fprintf (dump_file, "New bb on preheader edge was not generated.");
2783 return var_name;
2787 /* Function vect_transform_loop_bound.
2789 Create a new exit condition for the loop. */
2791 static void
2792 vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
2794 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2795 edge exit_edge = loop->single_exit;
2796 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
2797 tree indx_before_incr, indx_after_incr;
2798 tree orig_cond_expr;
2799 HOST_WIDE_INT old_N = 0;
2800 int vf;
2801 tree cond_stmt;
2802 tree new_loop_bound;
2803 bool symbol_niters;
2804 tree cond;
2805 tree lb_type;
2807 symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
2809 if (!symbol_niters)
2810 old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
2812 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2814 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
2815 #ifdef ENABLE_CHECKING
2816 gcc_assert (orig_cond_expr);
2817 #endif
2818 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
2820 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
2821 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
2823 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
2824 to point to the exit condition. */
2825 bsi_next (&loop_exit_bsi);
2826 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
2828 /* new loop exit test: */
2829 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
2830 if (!symbol_niters)
2831 new_loop_bound = fold_convert (lb_type,
2832 build_int_cst (unsigned_type_node,
2833 old_N/vf));
2834 else
2835 new_loop_bound = niters;
2837 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
2838 cond = build2 (GE_EXPR, boolean_type_node,
2839 indx_after_incr, new_loop_bound);
2840 else /* 'then' edge loops back. */
2841 cond = build2 (LT_EXPR, boolean_type_node,
2842 indx_after_incr, new_loop_bound);
2844 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
2845 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
2847 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
2849 /* remove old loop exit test: */
2850 bsi_remove (&loop_exit_bsi);
2852 if (vect_debug_details (NULL))
2853 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
2857 /* Advance IVs of the loop (to be vectorized later) to correct position.
2859 When loop is vectorized, its IVs are not always advanced
2860 correctly since vectorization changes the loop count. It's ok
2861 in case epilog loop was not produced after original one before
2862 vectorization process (the vectorizer checks that there is no uses
2863 of IVs after the loop). However, in case the epilog loop was peeled,
2864 IVs from original loop are used in epilog loop and should be
2865 advanced correctly.
2867 Here we use access functions of IVs and number of
2868 iteration loop executes in order to bring IVs to correct position.
2870 Function also update phis of basic block at the exit
2871 from the loop. */
2873 static void
2874 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
2876 edge exit = loop->exit_edges[0];
2877 tree phi;
2878 edge latch = loop_latch_edge (loop);
2880 /* Generate basic block at the exit from the loop. */
2881 basic_block new_bb = split_edge (exit);
2882 add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father);
2884 loop->exit_edges[0] = EDGE_PRED (new_bb, 0);
2886 for (phi = phi_nodes (loop->header); phi; phi = TREE_CHAIN (phi))
2888 tree access_fn = NULL;
2889 tree evolution_part;
2890 tree init_expr;
2891 tree step_expr;
2892 tree var, stmt, ni, ni_name;
2893 int i, j, num_elem1, num_elem2;
2894 tree phi1;
2895 block_stmt_iterator last_bsi;
2897 /* Skip virtual phi's. The data dependences that are associated with
2898 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2900 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2902 if (vect_debug_details (NULL))
2903 fprintf (dump_file, "virtual phi. skip.");
2904 continue;
2907 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2909 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2911 /* FORNOW: We do not transform initial conditions of IVs
2912 which evolution functions are a polynomial of degree >= 2 or
2913 exponential. */
2915 step_expr = evolution_part;
2916 init_expr = initial_condition (access_fn);
2918 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2919 build2 (MULT_EXPR, TREE_TYPE (niters),
2920 niters, step_expr), init_expr);
2922 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2923 add_referenced_tmp_var (var);
2925 ni_name = force_gimple_operand (ni, &stmt, false, var);
2927 /* Insert stmt into new_bb. */
2928 last_bsi = bsi_last (new_bb);
2929 bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT);
2931 /* Fix phi expressions in duplicated loop. */
2932 num_elem1 = PHI_NUM_ARGS (phi);
2933 for (i = 0; i < num_elem1; i++)
2934 if (PHI_ARG_EDGE (phi, i) == latch)
2936 tree def = PHI_ARG_DEF (phi, i);
2938 for (phi1 = phi_nodes (EDGE_SUCC (new_bb, 0)->dest); phi1;
2939 phi1 = TREE_CHAIN (phi1))
2941 num_elem2 = PHI_NUM_ARGS (phi1);
2942 for (j = 0; j < num_elem2; j++)
2943 if (PHI_ARG_DEF (phi1, j) == def)
2945 SET_PHI_ARG_DEF (phi1, j, ni_name);
2946 PHI_ARG_EDGE (phi1, j) = EDGE_SUCC (new_bb, 0);
2947 break;
2950 break;
2957 /* This function is the main driver of transformation
2958 to be done for loop before vectorizing it in case of
2959 unknown loop bound. */
2961 static void
2962 vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
2963 struct loops *loops)
2966 tree ni_name, ratio_mult_vf_name;
2967 #ifdef ENABLE_CHECKING
2968 int loop_num;
2969 #endif
2970 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2971 struct loop *new_loop;
2973 if (vect_debug_details (NULL))
2974 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
2976 /* Generate the following variables on the preheader of original loop:
2978 ni_name = number of iteration the original loop executes
2979 ratio = ni_name / vf
2980 ratio_mult_vf_name = ratio * vf */
2981 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2982 &ratio_mult_vf_name, ratio);
2984 /* Update loop info. */
2985 loop->pre_header = loop_preheader_edge (loop)->src;
2986 loop->pre_header_edges[0] = loop_preheader_edge (loop);
2988 #ifdef ENABLE_CHECKING
2989 loop_num = loop->num;
2990 #endif
2991 new_loop = tree_duplicate_loop_to_edge (loop, loops, loop->exit_edges[0],
2992 ratio_mult_vf_name, ni_name, true);
2993 #ifdef ENABLE_CHECKING
2994 gcc_assert (new_loop);
2995 gcc_assert (loop_num == loop->num);
2996 #endif
2998 /* Update IVs of original loop as if they were advanced
2999 by ratio_mult_vf_name steps. */
3001 #ifdef ENABLE_CHECKING
3002 /* Check existence of intermediate bb. */
3003 gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header);
3004 #endif
3005 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name);
3007 return;
3012 /* Function vect_gen_niters_for_prolog_loop
3014 Set the number of iterations for the loop represented by LOOP_VINFO
3015 to the minimum between NITERS (the original iteration count of the loop)
3016 and the misalignment DR - the first data reference in the list
3017 LOOP_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of this
3018 loop, the data reference DR will refer to an aligned location. */
3020 static tree
3021 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
3023 struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0);
3024 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3025 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3026 tree var, stmt;
3027 tree iters, iters_name;
3028 edge pe;
3029 basic_block new_bb;
3030 tree dr_stmt = DR_STMT (dr);
3031 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3032 tree start_addr, byte_miss_align, elem_miss_align;
3033 int vec_type_align =
3034 GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3035 / BITS_PER_UNIT;
3036 tree tmp1, tmp2;
3037 tree new_stmt_list = NULL_TREE;
3039 start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3040 &new_stmt_list, NULL_TREE);
3042 pe = loop_preheader_edge (loop);
3043 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
3044 if (new_bb)
3045 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3047 byte_miss_align =
3048 build (BIT_AND_EXPR, integer_type_node, start_addr,
3049 build (MINUS_EXPR, integer_type_node,
3050 build_int_cst (unsigned_type_node,
3051 vec_type_align), integer_one_node));
3052 tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
3053 elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
3054 byte_miss_align, tmp1);
3056 tmp2 =
3057 build (BIT_AND_EXPR, integer_type_node,
3058 build (MINUS_EXPR, integer_type_node,
3059 build_int_cst (unsigned_type_node, vf), elem_miss_align),
3060 build (MINUS_EXPR, integer_type_node,
3061 build_int_cst (unsigned_type_node, vf), integer_one_node));
3063 iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
3064 var = create_tmp_var (TREE_TYPE (iters), "iters");
3065 add_referenced_tmp_var (var);
3066 iters_name = force_gimple_operand (iters, &stmt, false, var);
3068 /* Insert stmt on loop preheader edge. */
3069 pe = loop_preheader_edge (loop);
3070 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3071 if (new_bb)
3072 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3074 return iters_name;
3078 /* Function vect_update_niters_after_peeling
3080 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3081 The new number of iterations is therefore original_niters - NITERS.
3082 Record the new number of iterations in LOOP_VINFO. */
3084 static void
3085 vect_update_niters_after_peeling (loop_vec_info loop_vinfo, tree niters)
3087 tree n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3088 LOOP_VINFO_NITERS (loop_vinfo) =
3089 build (MINUS_EXPR, integer_type_node, n_iters, niters);
3093 /* Function vect_update_inits_of_dr
3095 NITERS iterations were peeled from LOOP. DR represents a data reference
3096 in LOOP. This function updates the information recorded in DR to
3097 account for the fact that the first NITERS iterations had already been
3098 executed. Specifically, it updates the initial_condition of the
3099 access_function of DR. */
3101 static void
3102 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3103 tree niters)
3105 tree access_fn = DR_ACCESS_FN (dr, 0);
3106 tree init, init_new, step;
3108 step = evolution_part_in_loop_num (access_fn, loop->num);
3109 init = initial_condition (access_fn);
3111 init_new = build (PLUS_EXPR, TREE_TYPE (init),
3112 build (MULT_EXPR, TREE_TYPE (niters),
3113 niters, step), init);
3114 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3116 return;
3120 /* Function vect_update_inits_of_drs
3122 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3123 This function updates the information recorded for the data references in
3124 the loop to account for the fact that the first NITERS iterations had
3125 already been executed. Specifically, it updates the initial_condition of the
3126 access_function of all the data_references in the loop. */
3128 static void
3129 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3131 unsigned int i;
3132 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3133 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3134 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3136 if (dump_file && (dump_flags & TDF_DETAILS))
3137 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3139 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3141 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3142 vect_update_inits_of_dr (dr, loop, niters);
3145 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3147 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3148 vect_update_inits_of_dr (dr, loop, niters);
3149 DR_MISALIGNMENT (dr) = -1;
3154 /* Function vect_do_peeling_for_alignment
3156 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3157 'niters' is set to the misalignment of one of the data references in the
3158 loop, thereby forcing it to refer to an aligned location at the beginning
3159 of the execution of this loop. The data reference for which we are
3160 peeling is chosen from LOOP_UNALIGNED_DR. */
3162 static void
3163 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3165 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3166 tree niters_of_prolog_loop, ni_name;
3167 struct data_reference *dr = LOOP_UNALIGNED_DR (loop_vinfo, 0);
3169 if (vect_debug_details (NULL))
3170 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3172 ni_name = vect_build_loop_niters (loop_vinfo);
3173 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3176 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3177 tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop),
3178 niters_of_prolog_loop, ni_name, false);
3181 /* Update stmt info of dr according to which we peeled. */
3182 DR_MISALIGNMENT (dr) = 0;
3184 /* Update number of times loop executes. */
3185 vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop);
3187 /* Update all inits of access functions of all data refs. */
3188 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3190 /* After peeling we have to reset scalar evolution analyzer. */
3191 scev_reset ();
3193 return;
3197 /* Function vect_transform_loop.
3199 The analysis phase has determined that the loop is vectorizable.
3200 Vectorize the loop - created vectorized stmts to replace the scalar
3201 stmts in the loop, and update the loop exit condition. */
3203 static void
3204 vect_transform_loop (loop_vec_info loop_vinfo,
3205 struct loops *loops ATTRIBUTE_UNUSED)
3207 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3208 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3209 int nbbs = loop->num_nodes;
3210 block_stmt_iterator si;
3211 int i;
3212 tree ratio = NULL;
3213 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3215 if (vect_debug_details (NULL))
3216 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3219 /* Peel the loop if there are data refs with unknown alignment.
3220 Only one data ref with unknown store is allowed. */
3223 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3224 vect_do_peeling_for_alignment (loop_vinfo, loops);
3226 /* If the loop has a symbolic number of iterations 'n'
3227 (i.e. it's not a compile time constant),
3228 then an epilog loop needs to be created. We therefore duplicate
3229 the initial loop. The original loop will be vectorized, and will compute
3230 the first (n/VF) iterations. The second copy of the loop will remain
3231 serial and will compute the remaining (n%VF) iterations.
3232 (VF is the vectorization factor). */
3234 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3235 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3237 /* FORNOW: we'll treat the case where niters is constant and
3239 niters % vf != 0
3241 in the way similar to one with symbolic niters.
3242 For this we'll generate variable which value is equal to niters. */
3244 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3245 && (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3246 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3249 /* 1) Make sure the loop header has exactly two entries
3250 2) Make sure we have a preheader basic block. */
3252 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3254 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3257 /* FORNOW: the vectorizer supports only loops which body consist
3258 of one basic block (header + empty latch). When the vectorizer will
3259 support more involved loop forms, the order by which the BBs are
3260 traversed need to be reconsidered. */
3262 for (i = 0; i < nbbs; i++)
3264 basic_block bb = bbs[i];
3266 for (si = bsi_start (bb); !bsi_end_p (si);)
3268 tree stmt = bsi_stmt (si);
3269 stmt_vec_info stmt_info;
3270 bool is_store;
3272 if (vect_debug_details (NULL))
3274 fprintf (dump_file, "------>vectorizing statement: ");
3275 print_generic_expr (dump_file, stmt, TDF_SLIM);
3277 stmt_info = vinfo_for_stmt (stmt);
3278 gcc_assert (stmt_info);
3279 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3281 bsi_next (&si);
3282 continue;
3284 #ifdef ENABLE_CHECKING
3285 /* FORNOW: Verify that all stmts operate on the same number of
3286 units and no inner unrolling is necessary. */
3287 gcc_assert (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3288 == vectorization_factor);
3289 #endif
3290 /* -------- vectorize statement ------------ */
3291 if (vect_debug_details (NULL))
3292 fprintf (dump_file, "transform statement.");
3294 is_store = vect_transform_stmt (stmt, &si);
3295 if (is_store)
3297 /* free the attached stmt_vec_info and remove the stmt. */
3298 stmt_ann_t ann = stmt_ann (stmt);
3299 free (stmt_info);
3300 set_stmt_info (ann, NULL);
3301 bsi_remove (&si);
3302 continue;
3305 bsi_next (&si);
3306 } /* stmts in BB */
3307 } /* BBs in loop */
3309 vect_transform_loop_bound (loop_vinfo, ratio);
3311 if (vect_debug_details (loop))
3312 fprintf (dump_file,"Success! loop vectorized.");
3313 if (vect_debug_stats (loop))
3314 fprintf (dump_file, "LOOP VECTORIZED.");
3318 /* Function vect_is_simple_use.
3320 Input:
3321 LOOP - the loop that is being vectorized.
3322 OPERAND - operand of a stmt in LOOP.
3323 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3325 Returns whether a stmt with OPERAND can be vectorized.
3326 Supportable operands are constants, loop invariants, and operands that are
3327 defined by the current iteration of the loop. Unsupportable operands are
3328 those that are defined by a previous iteration of the loop (as is the case
3329 in reduction/induction computations). */
3331 static bool
3332 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3334 tree def_stmt;
3335 basic_block bb;
3337 if (def)
3338 *def = NULL_TREE;
3340 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3341 return true;
3343 if (TREE_CODE (operand) != SSA_NAME)
3344 return false;
3346 def_stmt = SSA_NAME_DEF_STMT (operand);
3347 if (def_stmt == NULL_TREE )
3349 if (vect_debug_details (NULL))
3350 fprintf (dump_file, "no def_stmt.");
3351 return false;
3354 /* empty stmt is expected only in case of a function argument.
3355 (Otherwise - we expect a phi_node or a modify_expr). */
3356 if (IS_EMPTY_STMT (def_stmt))
3358 tree arg = TREE_OPERAND (def_stmt, 0);
3359 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3360 return true;
3361 if (vect_debug_details (NULL))
3363 fprintf (dump_file, "Unexpected empty stmt: ");
3364 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3366 return false;
3369 /* phi_node inside the loop indicates an induction/reduction pattern.
3370 This is not supported yet. */
3371 bb = bb_for_stmt (def_stmt);
3372 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3374 if (vect_debug_details (NULL))
3375 fprintf (dump_file, "reduction/induction - unsupported.");
3376 return false; /* FORNOW: not supported yet. */
3379 /* Expecting a modify_expr or a phi_node. */
3380 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3381 || TREE_CODE (def_stmt) == PHI_NODE)
3383 if (def)
3384 *def = def_stmt;
3385 return true;
3388 return false;
3392 /* Function vect_analyze_operations.
3394 Scan the loop stmts and make sure they are all vectorizable. */
3396 static bool
3397 vect_analyze_operations (loop_vec_info loop_vinfo)
3399 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3400 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3401 int nbbs = loop->num_nodes;
3402 block_stmt_iterator si;
3403 int vectorization_factor = 0;
3404 int i;
3405 bool ok;
3406 tree scalar_type;
3408 if (vect_debug_details (NULL))
3409 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3411 for (i = 0; i < nbbs; i++)
3413 basic_block bb = bbs[i];
3415 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3417 tree stmt = bsi_stmt (si);
3418 int nunits;
3419 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3420 tree vectype;
3422 if (vect_debug_details (NULL))
3424 fprintf (dump_file, "==> examining statement: ");
3425 print_generic_expr (dump_file, stmt, TDF_SLIM);
3428 gcc_assert (stmt_info);
3430 /* skip stmts which do not need to be vectorized.
3431 this is expected to include:
3432 - the COND_EXPR which is the loop exit condition
3433 - any LABEL_EXPRs in the loop
3434 - computations that are used only for array indexing or loop
3435 control */
3437 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3439 if (vect_debug_details (NULL))
3440 fprintf (dump_file, "irrelevant.");
3441 continue;
3444 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3446 if (vect_debug_stats (loop) || vect_debug_details (loop))
3448 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3449 print_generic_expr (dump_file, stmt, TDF_SLIM);
3451 return false;
3454 if (STMT_VINFO_DATA_REF (stmt_info))
3455 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3456 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3457 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3458 else
3459 scalar_type = TREE_TYPE (stmt);
3461 if (vect_debug_details (NULL))
3463 fprintf (dump_file, "get vectype for scalar type: ");
3464 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3467 vectype = get_vectype_for_scalar_type (scalar_type);
3468 if (!vectype)
3470 if (vect_debug_stats (loop) || vect_debug_details (loop))
3472 fprintf (dump_file, "not vectorized: unsupported data-type ");
3473 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3475 return false;
3478 if (vect_debug_details (NULL))
3480 fprintf (dump_file, "vectype: ");
3481 print_generic_expr (dump_file, vectype, TDF_SLIM);
3483 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3485 ok = (vectorizable_operation (stmt, NULL, NULL)
3486 || vectorizable_assignment (stmt, NULL, NULL)
3487 || vectorizable_load (stmt, NULL, NULL)
3488 || vectorizable_store (stmt, NULL, NULL));
3490 if (!ok)
3492 if (vect_debug_stats (loop) || vect_debug_details (loop))
3494 fprintf (dump_file, "not vectorized: stmt not supported: ");
3495 print_generic_expr (dump_file, stmt, TDF_SLIM);
3497 return false;
3500 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3501 if (vect_debug_details (NULL))
3502 fprintf (dump_file, "nunits = %d", nunits);
3504 if (vectorization_factor)
3506 /* FORNOW: don't allow mixed units.
3507 This restriction will be relaxed in the future. */
3508 if (nunits != vectorization_factor)
3510 if (vect_debug_stats (loop) || vect_debug_details (loop))
3511 fprintf (dump_file, "not vectorized: mixed data-types");
3512 return false;
3515 else
3516 vectorization_factor = nunits;
3518 #ifdef ENABLE_CHECKING
3519 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3520 * vectorization_factor == UNITS_PER_SIMD_WORD);
3521 #endif
3525 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3527 if (vectorization_factor <= 1)
3529 if (vect_debug_stats (loop) || vect_debug_details (loop))
3530 fprintf (dump_file, "not vectorized: unsupported data-type");
3531 return false;
3533 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3536 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3537 && vect_debug_details (NULL))
3538 fprintf (dump_file,
3539 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3540 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3542 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3543 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3545 /* In this case we have to generate epilog loop, that
3546 can be done only for loops with one entry edge. */
3547 if (LOOP_VINFO_LOOP (loop_vinfo)->num_entries != 1
3548 || !(LOOP_VINFO_LOOP (loop_vinfo)->pre_header))
3550 if (vect_debug_stats (loop) || vect_debug_details (loop))
3551 fprintf (dump_file, "not vectorized: more than one entry.");
3552 return false;
3556 return true;
3560 /* Function exist_non_indexing_operands_for_use_p
3562 USE is one of the uses attached to STMT. Check if USE is
3563 used in STMT for anything other than indexing an array. */
3565 static bool
3566 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3568 tree operand;
3569 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3571 /* USE corresponds to some operand in STMT. If there is no data
3572 reference in STMT, then any operand that corresponds to USE
3573 is not indexing an array. */
3574 if (!STMT_VINFO_DATA_REF (stmt_info))
3575 return true;
3577 /* STMT has a data_ref. FORNOW this means that its of one of
3578 the following forms:
3579 -1- ARRAY_REF = var
3580 -2- var = ARRAY_REF
3581 (This should have been verified in analyze_data_refs).
3583 'var' in the second case corresponds to a def, not a use,
3584 so USE cannot correspond to any operands that are not used
3585 for array indexing.
3587 Therefore, all we need to check is if STMT falls into the
3588 first case, and whether var corresponds to USE. */
3590 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3591 return false;
3593 operand = TREE_OPERAND (stmt, 1);
3595 if (TREE_CODE (operand) != SSA_NAME)
3596 return false;
3598 if (operand == use)
3599 return true;
3601 return false;
3605 /* Function vect_is_simple_iv_evolution.
3607 FORNOW: A simple evolution of an induction variables in the loop is
3608 considered a polynomial evolution with constant step. */
3610 static bool
3611 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3612 tree * step, bool strict)
3614 tree init_expr;
3615 tree step_expr;
3617 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3619 /* When there is no evolution in this loop, the evolution function
3620 is not "simple". */
3621 if (evolution_part == NULL_TREE)
3622 return false;
3624 /* When the evolution is a polynomial of degree >= 2
3625 the evolution function is not "simple". */
3626 if (tree_is_chrec (evolution_part))
3627 return false;
3629 step_expr = evolution_part;
3630 init_expr = unshare_expr (initial_condition (access_fn));
3632 if (vect_debug_details (NULL))
3634 fprintf (dump_file, "step: ");
3635 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3636 fprintf (dump_file, ", init: ");
3637 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3640 *init = init_expr;
3641 *step = step_expr;
3643 if (TREE_CODE (step_expr) != INTEGER_CST)
3645 if (vect_debug_details (NULL))
3646 fprintf (dump_file, "step unknown.");
3647 return false;
3650 if (strict)
3651 if (!integer_onep (step_expr))
3653 if (vect_debug_details (NULL))
3654 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3655 return false;
3658 return true;
3662 /* Function vect_analyze_scalar_cycles.
3664 Examine the cross iteration def-use cycles of scalar variables, by
3665 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3666 cycles that they represent do not impede vectorization.
3668 FORNOW: Reduction as in the following loop, is not supported yet:
3669 loop1:
3670 for (i=0; i<N; i++)
3671 sum += a[i];
3672 The cross-iteration cycle corresponding to variable 'sum' will be
3673 considered too complicated and will impede vectorization.
3675 FORNOW: Induction as in the following loop, is not supported yet:
3676 loop2:
3677 for (i=0; i<N; i++)
3678 a[i] = i;
3680 However, the following loop *is* vectorizable:
3681 loop3:
3682 for (i=0; i<N; i++)
3683 a[i] = b[i];
3685 In both loops there exists a def-use cycle for the variable i:
3686 loop: i_2 = PHI (i_0, i_1)
3687 a[i_2] = ...;
3688 i_1 = i_2 + 1;
3689 GOTO loop;
3691 The evolution of the above cycle is considered simple enough,
3692 however, we also check that the cycle does not need to be
3693 vectorized, i.e - we check that the variable that this cycle
3694 defines is only used for array indexing or in stmts that do not
3695 need to be vectorized. This is not the case in loop2, but it
3696 *is* the case in loop3. */
3698 static bool
3699 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3701 tree phi;
3702 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3703 basic_block bb = loop->header;
3704 tree dummy;
3706 if (vect_debug_details (NULL))
3707 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3709 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
3711 tree access_fn = NULL;
3713 if (vect_debug_details (NULL))
3715 fprintf (dump_file, "Analyze phi: ");
3716 print_generic_expr (dump_file, phi, TDF_SLIM);
3719 /* Skip virtual phi's. The data dependences that are associated with
3720 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3722 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3724 if (vect_debug_details (NULL))
3725 fprintf (dump_file, "virtual phi. skip.");
3726 continue;
3729 /* Analyze the evolution function. */
3731 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3732 those of loop induction variables; This property is verified here.
3734 Furthermore, if that induction variable is used in an operation
3735 that needs to be vectorized (i.e, is not solely used to index
3736 arrays and check the exit condition) - we do not support its
3737 vectorization yet. This property is verified in vect_is_simple_use,
3738 during vect_analyze_operations. */
3740 access_fn = /* instantiate_parameters
3741 (loop,*/
3742 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3744 if (!access_fn)
3746 if (vect_debug_stats (loop) || vect_debug_details (loop))
3747 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3748 return false;
3751 if (vect_debug_details (NULL))
3753 fprintf (dump_file, "Access function of PHI: ");
3754 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3757 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3758 &dummy, false))
3760 if (vect_debug_stats (loop) || vect_debug_details (loop))
3761 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3762 return false;
3766 return true;
3770 /* Function vect_analyze_data_ref_dependence.
3772 Return TRUE if there (might) exist a dependence between a memory-reference
3773 DRA and a memory-reference DRB. */
3775 static bool
3776 vect_analyze_data_ref_dependence (struct data_reference *dra,
3777 struct data_reference *drb,
3778 struct loop *loop)
3780 bool differ_p;
3781 struct data_dependence_relation *ddr;
3783 if (!array_base_name_differ_p (dra, drb, &differ_p))
3785 if (vect_debug_stats (loop) || vect_debug_details (loop))
3787 fprintf (dump_file,
3788 "not vectorized: can't determine dependence between: ");
3789 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3790 fprintf (dump_file, " and ");
3791 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3793 return true;
3796 if (differ_p)
3797 return false;
3799 ddr = initialize_data_dependence_relation (dra, drb);
3800 compute_affine_dependence (ddr);
3802 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3803 return false;
3805 if (vect_debug_stats (loop) || vect_debug_details (loop))
3807 fprintf (dump_file,
3808 "not vectorized: possible dependence between data-refs ");
3809 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3810 fprintf (dump_file, " and ");
3811 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3814 return true;
3818 /* Function vect_analyze_data_ref_dependences.
3820 Examine all the data references in the loop, and make sure there do not
3821 exist any data dependences between them.
3823 TODO: dependences which distance is greater than the vectorization factor
3824 can be ignored. */
3826 static bool
3827 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3829 unsigned int i, j;
3830 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3831 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3832 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3834 /* Examine store-store (output) dependences. */
3836 if (vect_debug_details (NULL))
3837 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3839 if (vect_debug_details (NULL))
3840 fprintf (dump_file, "compare all store-store pairs.");
3842 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3844 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3846 struct data_reference *dra =
3847 VARRAY_GENERIC_PTR (loop_write_refs, i);
3848 struct data_reference *drb =
3849 VARRAY_GENERIC_PTR (loop_write_refs, j);
3850 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3851 return false;
3855 /* Examine load-store (true/anti) dependences. */
3857 if (vect_debug_details (NULL))
3858 fprintf (dump_file, "compare all load-store pairs.");
3860 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3862 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3864 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3865 struct data_reference *drb =
3866 VARRAY_GENERIC_PTR (loop_write_refs, j);
3867 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3868 return false;
3872 return true;
3876 /* Function vect_get_first_index.
3878 REF is a data reference.
3879 If it is an ARRAY_REF: if its lower bound is simple enough,
3880 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3881 If it is not an ARRAY_REF: REF has no "first index";
3882 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3884 static bool
3885 vect_get_first_index (tree ref, tree *array_first_index)
3887 tree array_start;
3889 if (TREE_CODE (ref) != ARRAY_REF)
3890 *array_first_index = size_zero_node;
3891 else
3893 array_start = array_ref_low_bound (ref);
3894 if (!host_integerp (array_start,0))
3896 if (vect_debug_details (NULL))
3898 fprintf (dump_file, "array min val not simple integer cst.");
3899 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3901 return false;
3903 *array_first_index = array_start;
3906 return true;
3910 /* Function vect_compute_array_base_alignment.
3911 A utility function of vect_compute_array_ref_alignment.
3913 Compute the misalignment of ARRAY in bits.
3915 Input:
3916 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3917 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3918 if NULL: don't compute misalignment, just return the base of ARRAY.
3919 PREV_DIMENSIONS - initialized to one.
3920 MISALIGNMENT - the computed misalignment in bits.
3922 Output:
3923 If VECTYPE is not NULL:
3924 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3925 the base of the array, and put the computed misalignment in MISALIGNMENT.
3926 If VECTYPE is NULL:
3927 Return the base of the array.
3929 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3930 a[idx_N]...[idx_2][idx_1] is
3931 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3932 ... + idx_N * dim_0 * ... * dim_N-1}.
3933 (The misalignment of &a is not checked here).
3934 Note, that every term contains dim_0, therefore, if dim_0 is a
3935 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3936 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3937 NUINTS, we can say that the misalignment of the sum is equal to
3938 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3939 we can't determine this array misalignment, and we return
3940 false.
3941 We proceed recursively in this manner, accumulating total misalignment
3942 and the multiplication of previous dimensions for correct misalignment
3943 calculation. */
3945 static tree
3946 vect_compute_array_base_alignment (tree array,
3947 tree vectype,
3948 tree *prev_dimensions,
3949 tree *misalignment)
3951 tree index;
3952 tree domain;
3953 tree dimension_size;
3954 tree mis;
3955 tree bits_per_vectype;
3956 tree bits_per_vectype_unit;
3958 /* The 'stop condition' of the recursion. */
3959 if (TREE_CODE (array) != ARRAY_REF)
3960 return array;
3962 if (!vectype)
3963 /* Just get the base decl. */
3964 return vect_compute_array_base_alignment
3965 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3967 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
3968 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
3969 return NULL_TREE;
3971 domain = TYPE_DOMAIN (TREE_TYPE (array));
3972 dimension_size =
3973 int_const_binop (PLUS_EXPR,
3974 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
3975 TYPE_MIN_VALUE (domain), 1),
3976 size_one_node, 1);
3978 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
3979 is a multiple of NUNITS:
3981 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
3983 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
3984 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
3985 if (integer_zerop (mis))
3986 /* This array is aligned. Continue just in order to get the base decl. */
3987 return vect_compute_array_base_alignment
3988 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
3990 index = TREE_OPERAND (array, 1);
3991 if (!host_integerp (index, 1))
3992 /* The current index is not constant. */
3993 return NULL_TREE;
3995 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
3997 bits_per_vectype = fold_convert (unsigned_type_node,
3998 build_int_cst (NULL_TREE, BITS_PER_UNIT *
3999 GET_MODE_SIZE (TYPE_MODE (vectype))));
4000 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4001 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4002 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4004 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4005 earlier:
4007 *misalignment =
4008 (*misalignment + index_val * dimension_size * *prev_dimensions)
4009 % vectype_nunits;
4012 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4013 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4014 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4015 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4016 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4019 *prev_dimensions = int_const_binop (MULT_EXPR,
4020 *prev_dimensions, dimension_size, 1);
4022 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4023 prev_dimensions,
4024 misalignment);
4028 /* Function vect_compute_data_ref_alignment
4030 Compute the misalignment of the data reference DR.
4032 Output:
4033 1. If during the misalignment computation it is found that the data reference
4034 cannot be vectorized then false is returned.
4035 2. DR_MISALIGNMENT (DR) is defined.
4037 FOR NOW: No analysis is actually performed. Misalignment is calculated
4038 only for trivial cases. TODO. */
4040 static bool
4041 vect_compute_data_ref_alignment (struct data_reference *dr,
4042 loop_vec_info loop_vinfo)
4044 tree stmt = DR_STMT (dr);
4045 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4046 tree ref = DR_REF (dr);
4047 tree vectype;
4048 tree scalar_type;
4049 tree offset = size_zero_node;
4050 tree base, bit_offset, alignment;
4051 tree unit_bits = fold_convert (unsigned_type_node,
4052 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4053 tree dr_base;
4054 bool base_aligned_p;
4056 if (vect_debug_details (NULL))
4057 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4059 /* Initialize misalignment to unknown. */
4060 DR_MISALIGNMENT (dr) = -1;
4062 scalar_type = TREE_TYPE (ref);
4063 vectype = get_vectype_for_scalar_type (scalar_type);
4064 if (!vectype)
4066 if (vect_debug_details (NULL))
4068 fprintf (dump_file, "no vectype for stmt: ");
4069 print_generic_expr (dump_file, stmt, TDF_SLIM);
4070 fprintf (dump_file, " scalar_type: ");
4071 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4073 /* It is not possible to vectorize this data reference. */
4074 return false;
4076 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4078 if (TREE_CODE (ref) == ARRAY_REF)
4079 dr_base = ref;
4080 else
4081 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4083 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4084 loop_vinfo, &bit_offset, &base_aligned_p);
4085 if (!base)
4087 if (vect_debug_details (NULL))
4089 fprintf (dump_file, "Unknown alignment for access: ");
4090 print_generic_expr (dump_file,
4091 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4093 return true;
4096 if (!base_aligned_p)
4098 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4100 if (vect_debug_details (NULL))
4102 fprintf (dump_file, "can't force alignment of ref: ");
4103 print_generic_expr (dump_file, ref, TDF_SLIM);
4105 return true;
4108 /* Force the alignment of the decl.
4109 NOTE: This is the only change to the code we make during
4110 the analysis phase, before deciding to vectorize the loop. */
4111 if (vect_debug_details (NULL))
4112 fprintf (dump_file, "force alignment");
4113 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4114 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4117 /* At this point we assume that the base is aligned, and the offset from it
4118 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4119 gcc_assert (base_aligned_p
4120 || (TREE_CODE (base) == VAR_DECL
4121 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4123 /* Convert into bytes. */
4124 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4125 /* Check that there is no remainder in bits. */
4126 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4127 if (!integer_zerop (bit_offset))
4129 if (vect_debug_details (NULL))
4131 fprintf (dump_file, "bit offset alignment: ");
4132 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4134 return false;
4137 /* Alignment required, in bytes: */
4138 alignment = fold_convert (unsigned_type_node,
4139 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4141 /* Modulo alignment. */
4142 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4143 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4145 if (vect_debug_details (NULL))
4146 fprintf (dump_file, "unexpected misalign value");
4147 return false;
4150 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4152 if (vect_debug_details (NULL))
4153 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4155 return true;
4159 /* Function vect_compute_array_ref_alignment
4161 Compute the alignment of an array-ref.
4162 The alignment we compute here is relative to
4163 TYPE_ALIGN(VECTYPE) boundary.
4165 Output:
4166 OFFSET - the alignment in bits
4167 Return value - the base of the array-ref. E.g,
4168 if the array-ref is a.b[k].c[i][j] the returned
4169 base is a.b[k].c
4172 static tree
4173 vect_compute_array_ref_alignment (struct data_reference *dr,
4174 loop_vec_info loop_vinfo,
4175 tree vectype,
4176 tree *offset)
4178 tree array_first_index = size_zero_node;
4179 tree init;
4180 tree ref = DR_REF (dr);
4181 tree scalar_type = TREE_TYPE (ref);
4182 tree oprnd0 = TREE_OPERAND (ref, 0);
4183 tree dims = size_one_node;
4184 tree misalign = size_zero_node;
4185 tree next_ref, this_offset = size_zero_node;
4186 tree nunits;
4187 tree nbits;
4189 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4190 /* The reference is an array without its last index. */
4191 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4192 &misalign);
4193 else
4194 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4195 &misalign);
4196 if (!vectype)
4197 /* Alignment is not requested. Just return the base. */
4198 return next_ref;
4200 /* Compute alignment. */
4201 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4202 return NULL_TREE;
4203 this_offset = misalign;
4205 /* Check the first index accessed. */
4206 if (!vect_get_first_index (ref, &array_first_index))
4208 if (vect_debug_details (NULL))
4209 fprintf (dump_file, "no first_index for array.");
4210 return NULL_TREE;
4213 /* Check the index of the array_ref. */
4214 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4215 LOOP_VINFO_LOOP (loop_vinfo)->num);
4217 /* FORNOW: In order to simplify the handling of alignment, we make sure
4218 that the first location at which the array is accessed ('init') is on an
4219 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4220 This is too conservative, since we require that
4221 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4222 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4223 This should be relaxed in the future. */
4225 if (!init || !host_integerp (init, 0))
4227 if (vect_debug_details (NULL))
4228 fprintf (dump_file, "non constant init. ");
4229 return NULL_TREE;
4232 /* bytes per scalar element: */
4233 nunits = fold_convert (unsigned_type_node,
4234 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4235 nbits = int_const_binop (MULT_EXPR, nunits,
4236 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4238 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4239 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4240 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4241 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4243 /* TODO: allow negative misalign values. */
4244 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4246 if (vect_debug_details (NULL))
4247 fprintf (dump_file, "unexpected misalign value");
4248 return NULL_TREE;
4250 *offset = misalign;
4251 return next_ref;
4255 /* Function vect_compute_data_refs_alignment
4257 Compute the misalignment of data references in the loop.
4258 This pass may take place at function granularity instead of at loop
4259 granularity.
4261 FOR NOW: No analysis is actually performed. Misalignment is calculated
4262 only for trivial cases. TODO. */
4264 static void
4265 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4267 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4268 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4269 unsigned int i;
4271 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4273 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4274 vect_compute_data_ref_alignment (dr, loop_vinfo);
4277 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4279 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4280 vect_compute_data_ref_alignment (dr, loop_vinfo);
4285 /* Function vect_enhance_data_refs_alignment
4287 This pass will use loop versioning and loop peeling in order to enhance
4288 the alignment of data references in the loop.
4290 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4291 original loop is to be vectorized; Any other loops that are created by
4292 the transformations performed in this pass - are not supposed to be
4293 vectorized. This restriction will be relaxed.
4295 FOR NOW: No transformation is actually performed. TODO. */
4297 static void
4298 vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED)
4301 This pass will require a cost model to guide it whether to apply peeling
4302 or versioning or a combination of the two. For example, the scheme that
4303 intel uses when given a loop with several memory accesses, is as follows:
4304 choose one memory access ('p') which alignment you want to force by doing
4305 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4306 other accesses are not necessarily aligned, or (2) use loop versioning to
4307 generate one loop in which all accesses are aligned, and another loop in
4308 which only 'p' is necessarily aligned.
4310 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4311 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4312 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4314 Devising a cost model is the most critical aspect of this work. It will
4315 guide us on which access to peel for, whether to use loop versioning, how
4316 many versions to create, etc. The cost model will probably consist of
4317 generic considerations as well as target specific considerations (on
4318 powerpc for example, misaligned stores are more painful than misaligned
4319 loads).
4321 Here is the general steps involved in alignment enhancements:
4323 -- original loop, before alignment analysis:
4324 for (i=0; i<N; i++){
4325 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4326 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4329 -- After vect_compute_data_refs_alignment:
4330 for (i=0; i<N; i++){
4331 x = q[i]; # DR_MISALIGNMENT(q) = 3
4332 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4335 -- Possibility 1: we do loop versioning:
4336 if (p is aligned) {
4337 for (i=0; i<N; i++){ # loop 1A
4338 x = q[i]; # DR_MISALIGNMENT(q) = 3
4339 p[i] = y; # DR_MISALIGNMENT(p) = 0
4342 else {
4343 for (i=0; i<N; i++){ # loop 1B
4344 x = q[i]; # DR_MISALIGNMENT(q) = 3
4345 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4349 -- Possibility 2: we do loop peeling:
4350 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4351 x = q[i];
4352 p[i] = y;
4354 for (i = 3; i < N; i++){ # loop 2A
4355 x = q[i]; # DR_MISALIGNMENT(q) = 0
4356 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4359 -- Possibility 3: combination of loop peeling and versioning:
4360 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4361 x = q[i];
4362 p[i] = y;
4364 if (p is aligned) {
4365 for (i = 3; i<N; i++){ # loop 3A
4366 x = q[i]; # DR_MISALIGNMENT(q) = 0
4367 p[i] = y; # DR_MISALIGNMENT(p) = 0
4370 else {
4371 for (i = 3; i<N; i++){ # loop 3B
4372 x = q[i]; # DR_MISALIGNMENT(q) = 0
4373 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4377 These loops are later passed to loop_transform to be vectorized. The
4378 vectorizer will use the alignment information to guide the transformation
4379 (whether to generate regular loads/stores, or with special handling for
4380 misalignment).
4385 /* Function vect_analyze_data_refs_alignment
4387 Analyze the alignment of the data-references in the loop.
4388 FOR NOW: Until support for misliagned accesses is in place, only if all
4389 accesses are aligned can the loop be vectorized. This restriction will be
4390 relaxed. */
4392 static bool
4393 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4395 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4396 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4397 /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/
4399 unsigned int i;
4400 unsigned int decide_peeling_count = 0;
4402 if (vect_debug_details (NULL))
4403 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4406 /* This pass may take place at function granularity instead of at loop
4407 granularity. */
4409 vect_compute_data_refs_alignment (loop_vinfo);
4412 /* This pass will use loop versioning and loop peeling in order to enhance
4413 the alignment of data references in the loop.
4414 FOR NOW: we assume that whatever versioning/peeling took place, the
4415 original loop is to be vectorized. Any other loops that were created by
4416 the transformations performed in this pass - are not supposed to be
4417 vectorized. This restriction will be relaxed. */
4419 vect_enhance_data_refs_alignment (loop_vinfo);
4422 /* Finally, check that loop can be vectorized.
4423 FOR NOW: Until support for misaligned stores is in place, only if all
4424 stores are aligned can the loop be vectorized. This restriction will be
4425 relaxed. In the meantime, we can force the alignment of on of the
4426 data-references in the loop using peeling. We currently use a heuristic
4427 that peels the first misaligned store, but we plan to develop a
4428 better cost model to guide the decision on which data-access to peel for.
4431 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4433 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4434 if (!aligned_access_p (dr))
4436 /* Decide here whether we need peeling for alignment. */
4437 decide_peeling_count++;
4438 if (decide_peeling_count > MAX_NUMBER_OF_UNALIGNED_DATA_REFS)
4440 if (vect_debug_stats (loop) || vect_debug_details (loop))
4441 fprintf (dump_file,
4442 "not vectorized: multiple misaligned stores.");
4443 return false;
4445 else
4447 LOOP_UNALIGNED_DR (loop_vinfo, decide_peeling_count - 1) = dr;
4448 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4453 /* The vectorizer now supports misaligned loads, so we don't fail anymore
4454 in the presence of a misaligned read dataref. For some targets however
4455 it may be preferable not to vectorize in such a case as misaligned
4456 accesses are very costly. This should be considered in the future. */
4458 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4460 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4461 if (!aligned_access_p (dr))
4463 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4464 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4465 fprintf (dump_file, "not vectorized: unaligned load.");
4466 return false;
4471 return true;
4475 /* Function vect_analyze_data_ref_access.
4477 Analyze the access pattern of the data-reference DR. For now, a data access
4478 has to consecutive and aligned to be considered vectorizable. */
4480 static bool
4481 vect_analyze_data_ref_access (struct data_reference *dr)
4483 varray_type access_fns = DR_ACCESS_FNS (dr);
4484 tree access_fn;
4485 tree init, step;
4486 unsigned int dimensions, i;
4488 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4489 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4490 access is contiguous). */
4491 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4493 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4495 access_fn = DR_ACCESS_FN (dr, i);
4497 if (evolution_part_in_loop_num (access_fn,
4498 loop_containing_stmt (DR_STMT (dr))->num))
4500 /* Evolution part is not NULL in this loop (it is neither constant
4501 nor invariant). */
4502 if (vect_debug_details (NULL))
4504 fprintf (dump_file,
4505 "not vectorized: complicated multidim. array access.");
4506 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4508 return false;
4512 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4513 if (!evolution_function_is_constant_p (access_fn)
4514 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4515 access_fn, &init, &step, true))
4517 if (vect_debug_details (NULL))
4519 fprintf (dump_file, "not vectorized: complicated access function.");
4520 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4522 return false;
4525 return true;
4529 /* Function vect_analyze_data_ref_accesses.
4531 Analyze the access pattern of all the data references in the loop.
4533 FORNOW: the only access pattern that is considered vectorizable is a
4534 simple step 1 (consecutive) access.
4536 FORNOW: handle only arrays and pointer accesses. */
4538 static bool
4539 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4541 unsigned int i;
4542 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4543 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4545 if (vect_debug_details (NULL))
4546 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4548 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4550 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4551 bool ok = vect_analyze_data_ref_access (dr);
4552 if (!ok)
4554 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4555 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4556 fprintf (dump_file, "not vectorized: complicated access pattern.");
4557 return false;
4561 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4563 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4564 bool ok = vect_analyze_data_ref_access (dr);
4565 if (!ok)
4567 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4568 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4569 fprintf (dump_file, "not vectorized: complicated access pattern.");
4570 return false;
4574 return true;
4578 /* Function vect_analyze_pointer_ref_access.
4580 Input:
4581 STMT - a stmt that contains a data-ref
4582 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4584 If the data-ref access is vectorizable, return a data_reference structure
4585 that represents it (DR). Otherwise - return NULL. */
4587 static struct data_reference *
4588 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4590 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4591 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4592 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4593 tree init, step;
4594 int step_val;
4595 tree reftype, innertype;
4596 enum machine_mode innermode;
4597 tree indx_access_fn;
4598 int loopnum = loop->num;
4599 struct data_reference *dr;
4601 if (!access_fn)
4603 if (vect_debug_stats (loop) || vect_debug_details (loop))
4604 fprintf (dump_file, "not vectorized: complicated pointer access.");
4605 return NULL;
4608 if (vect_debug_details (NULL))
4610 fprintf (dump_file, "Access function of ptr: ");
4611 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4614 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4616 if (vect_debug_stats (loop) || vect_debug_details (loop))
4617 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4618 return NULL;
4621 STRIP_NOPS (init);
4623 if (!host_integerp (step,0))
4625 if (vect_debug_stats (loop) || vect_debug_details (loop))
4626 fprintf (dump_file,
4627 "not vectorized: non constant step for pointer access.");
4628 return NULL;
4631 step_val = TREE_INT_CST_LOW (step);
4633 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4634 if (TREE_CODE (reftype) != POINTER_TYPE)
4636 if (vect_debug_stats (loop) || vect_debug_details (loop))
4637 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4638 return NULL;
4641 reftype = TREE_TYPE (init);
4642 if (TREE_CODE (reftype) != POINTER_TYPE)
4644 if (vect_debug_stats (loop) || vect_debug_details (loop))
4645 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4646 return NULL;
4649 innertype = TREE_TYPE (reftype);
4650 innermode = TYPE_MODE (innertype);
4651 if (GET_MODE_SIZE (innermode) != step_val)
4653 /* FORNOW: support only consecutive access */
4654 if (vect_debug_stats (loop) || vect_debug_details (loop))
4655 fprintf (dump_file, "not vectorized: non consecutive access.");
4656 return NULL;
4659 indx_access_fn =
4660 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4661 if (vect_debug_details (NULL))
4663 fprintf (dump_file, "Access function of ptr indx: ");
4664 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4666 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4667 return dr;
4671 /* Function vect_get_symbl_and_dr.
4673 The function returns SYMBL - the relevant variable for
4674 memory tag (for aliasing purposes).
4675 Also data reference structure DR is created.
4677 Input:
4678 MEMREF - data reference in STMT
4679 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4681 Output:
4682 DR - data_reference struct for MEMREF
4683 return value - the relevant variable for memory tag (for aliasing purposes).
4687 static tree
4688 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4689 loop_vec_info loop_vinfo, struct data_reference **dr)
4691 tree symbl, oprnd0, oprnd1;
4692 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4693 tree offset;
4694 tree array_base, base;
4695 struct data_reference *new_dr;
4696 bool base_aligned_p;
4698 *dr = NULL;
4699 switch (TREE_CODE (memref))
4701 case INDIRECT_REF:
4702 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4703 if (! new_dr)
4704 return NULL_TREE;
4705 *dr = new_dr;
4706 symbl = DR_BASE_NAME (new_dr);
4707 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4709 switch (TREE_CODE (symbl))
4711 case PLUS_EXPR:
4712 case MINUS_EXPR:
4713 oprnd0 = TREE_OPERAND (symbl, 0);
4714 oprnd1 = TREE_OPERAND (symbl, 1);
4716 STRIP_NOPS(oprnd1);
4717 /* Only {address_base + offset} expressions are supported,
4718 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4719 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4720 TODO: swap operands if {offset + address_base}. */
4721 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4722 && TREE_CODE (oprnd1) != INTEGER_CST)
4723 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4724 return NULL_TREE;
4726 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4727 symbl = oprnd0;
4728 else
4729 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4730 loop_vinfo, &new_dr);
4732 case SSA_NAME:
4733 case ADDR_EXPR:
4734 /* symbl remains unchanged. */
4735 break;
4737 default:
4738 if (vect_debug_details (NULL))
4740 fprintf (dump_file, "unhandled data ref: ");
4741 print_generic_expr (dump_file, memref, TDF_SLIM);
4742 fprintf (dump_file, " (symbl ");
4743 print_generic_expr (dump_file, symbl, TDF_SLIM);
4744 fprintf (dump_file, ") in stmt ");
4745 print_generic_expr (dump_file, stmt, TDF_SLIM);
4747 return NULL_TREE;
4749 break;
4751 case ARRAY_REF:
4752 offset = size_zero_node;
4754 /* Store the array base in the stmt info.
4755 For one dimensional array ref a[i], the base is a,
4756 for multidimensional a[i1][i2]..[iN], the base is
4757 a[i1][i2]..[iN-1]. */
4758 array_base = TREE_OPERAND (memref, 0);
4759 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4761 new_dr = analyze_array (stmt, memref, is_read);
4762 *dr = new_dr;
4764 /* Find the relevant symbol for aliasing purposes. */
4765 base = DR_BASE_NAME (new_dr);
4766 switch (TREE_CODE (base))
4768 case VAR_DECL:
4769 symbl = base;
4770 break;
4772 case INDIRECT_REF:
4773 symbl = TREE_OPERAND (base, 0);
4774 break;
4776 case COMPONENT_REF:
4777 /* Could have recorded more accurate information -
4778 i.e, the actual FIELD_DECL that is being referenced -
4779 but later passes expect VAR_DECL as the nmt. */
4780 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4781 loop_vinfo, &offset, &base_aligned_p);
4782 if (symbl)
4783 break;
4784 /* fall through */
4785 default:
4786 if (vect_debug_details (NULL))
4788 fprintf (dump_file, "unhandled struct/class field access ");
4789 print_generic_expr (dump_file, stmt, TDF_SLIM);
4791 return NULL_TREE;
4793 break;
4795 default:
4796 if (vect_debug_details (NULL))
4798 fprintf (dump_file, "unhandled data ref: ");
4799 print_generic_expr (dump_file, memref, TDF_SLIM);
4800 fprintf (dump_file, " in stmt ");
4801 print_generic_expr (dump_file, stmt, TDF_SLIM);
4803 return NULL_TREE;
4805 return symbl;
4809 /* Function vect_analyze_data_refs.
4811 Find all the data references in the loop.
4813 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4814 which base is really an array (not a pointer) and which alignment
4815 can be forced. This restriction will be relaxed. */
4817 static bool
4818 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4820 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4821 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4822 int nbbs = loop->num_nodes;
4823 block_stmt_iterator si;
4824 int j;
4825 struct data_reference *dr;
4826 tree tag;
4827 tree address_base;
4828 bool base_aligned_p;
4829 tree offset;
4831 if (vect_debug_details (NULL))
4832 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4834 for (j = 0; j < nbbs; j++)
4836 basic_block bb = bbs[j];
4837 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4839 bool is_read = false;
4840 tree stmt = bsi_stmt (si);
4841 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4842 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4843 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4844 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4845 varray_type *datarefs = NULL;
4846 int nvuses, nv_may_defs, nv_must_defs;
4847 tree memref = NULL;
4848 tree symbl;
4850 /* Assumption: there exists a data-ref in stmt, if and only if
4851 it has vuses/vdefs. */
4853 if (!vuses && !v_may_defs && !v_must_defs)
4854 continue;
4856 nvuses = NUM_VUSES (vuses);
4857 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4858 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4860 if (nvuses && (nv_may_defs || nv_must_defs))
4862 if (vect_debug_details (NULL))
4864 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4865 print_generic_expr (dump_file, stmt, TDF_SLIM);
4867 return false;
4870 if (TREE_CODE (stmt) != MODIFY_EXPR)
4872 if (vect_debug_details (NULL))
4874 fprintf (dump_file, "unexpected vops in stmt: ");
4875 print_generic_expr (dump_file, stmt, TDF_SLIM);
4877 return false;
4880 if (vuses)
4882 memref = TREE_OPERAND (stmt, 1);
4883 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4884 is_read = true;
4886 else /* vdefs */
4888 memref = TREE_OPERAND (stmt, 0);
4889 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4890 is_read = false;
4893 /* Analyze MEMREF. If it is of a supported form, build data_reference
4894 struct for it (DR) and find the relevant symbol for aliasing
4895 purposes. */
4896 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4897 &dr);
4898 if (!symbl)
4900 if (vect_debug_stats (loop) || vect_debug_details (loop))
4902 fprintf (dump_file, "not vectorized: unhandled data ref: ");
4903 print_generic_expr (dump_file, stmt, TDF_SLIM);
4905 return false;
4908 /* Find and record the memtag assigned to this data-ref. */
4909 switch (TREE_CODE (symbl))
4911 case VAR_DECL:
4912 STMT_VINFO_MEMTAG (stmt_info) = symbl;
4913 break;
4915 case SSA_NAME:
4916 symbl = SSA_NAME_VAR (symbl);
4917 tag = get_var_ann (symbl)->type_mem_tag;
4918 if (!tag)
4920 tree ptr = TREE_OPERAND (memref, 0);
4921 if (TREE_CODE (ptr) == SSA_NAME)
4922 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4924 if (!tag)
4926 if (vect_debug_stats (loop) || vect_debug_details (loop))
4927 fprintf (dump_file, "not vectorized: no memtag for ref.");
4928 return false;
4930 STMT_VINFO_MEMTAG (stmt_info) = tag;
4931 break;
4933 case ADDR_EXPR:
4934 address_base = TREE_OPERAND (symbl, 0);
4936 switch (TREE_CODE (address_base))
4938 case ARRAY_REF:
4939 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
4940 DR_IS_READ(dr));
4941 STMT_VINFO_MEMTAG (stmt_info) =
4942 vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
4943 loop_vinfo, &offset,
4944 &base_aligned_p);
4945 break;
4947 case VAR_DECL:
4948 STMT_VINFO_MEMTAG (stmt_info) = address_base;
4949 break;
4951 default:
4952 if (vect_debug_stats (loop) || vect_debug_details (loop))
4954 fprintf (dump_file,
4955 "not vectorized: unhandled address expr: ");
4956 print_generic_expr (dump_file, stmt, TDF_SLIM);
4958 return false;
4960 break;
4962 default:
4963 if (vect_debug_stats (loop) || vect_debug_details (loop))
4965 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
4966 print_generic_expr (dump_file, memref, TDF_SLIM);
4968 return false;
4971 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
4972 STMT_VINFO_DATA_REF (stmt_info) = dr;
4976 return true;
4980 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
4982 /* Function vect_mark_relevant.
4984 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
4986 static void
4987 vect_mark_relevant (varray_type worklist, tree stmt)
4989 stmt_vec_info stmt_info;
4991 if (vect_debug_details (NULL))
4992 fprintf (dump_file, "mark relevant.");
4994 if (TREE_CODE (stmt) == PHI_NODE)
4996 VARRAY_PUSH_TREE (worklist, stmt);
4997 return;
5000 stmt_info = vinfo_for_stmt (stmt);
5002 if (!stmt_info)
5004 if (vect_debug_details (NULL))
5006 fprintf (dump_file, "mark relevant: no stmt info!!.");
5007 print_generic_expr (dump_file, stmt, TDF_SLIM);
5009 return;
5012 if (STMT_VINFO_RELEVANT_P (stmt_info))
5014 if (vect_debug_details (NULL))
5015 fprintf (dump_file, "already marked relevant.");
5016 return;
5019 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5020 VARRAY_PUSH_TREE (worklist, stmt);
5024 /* Function vect_stmt_relevant_p.
5026 Return true if STMT in loop that is represented by LOOP_VINFO is
5027 "relevant for vectorization".
5029 A stmt is considered "relevant for vectorization" if:
5030 - it has uses outside the loop.
5031 - it has vdefs (it alters memory).
5032 - control stmts in the loop (except for the exit condition).
5034 CHECKME: what other side effects would the vectorizer allow? */
5036 static bool
5037 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5039 v_may_def_optype v_may_defs;
5040 v_must_def_optype v_must_defs;
5041 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5042 int i;
5043 dataflow_t df;
5044 int num_uses;
5046 /* cond stmt other than loop exit cond. */
5047 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5048 return true;
5050 /* changing memory. */
5051 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5052 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5053 if (v_may_defs || v_must_defs)
5055 if (vect_debug_details (NULL))
5056 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5057 return true;
5060 /* uses outside the loop. */
5061 df = get_immediate_uses (stmt);
5062 num_uses = num_immediate_uses (df);
5063 for (i = 0; i < num_uses; i++)
5065 tree use = immediate_use (df, i);
5066 basic_block bb = bb_for_stmt (use);
5067 if (!flow_bb_inside_loop_p (loop, bb))
5069 if (vect_debug_details (NULL))
5070 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5071 return true;
5075 return false;
5079 /* Function vect_mark_stmts_to_be_vectorized.
5081 Not all stmts in the loop need to be vectorized. For example:
5083 for i...
5084 for j...
5085 1. T0 = i + j
5086 2. T1 = a[T0]
5088 3. j = j + 1
5090 Stmt 1 and 3 do not need to be vectorized, because loop control and
5091 addressing of vectorized data-refs are handled differently.
5093 This pass detects such stmts. */
5095 static bool
5096 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5098 varray_type worklist;
5099 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5100 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5101 unsigned int nbbs = loop->num_nodes;
5102 block_stmt_iterator si;
5103 tree stmt;
5104 stmt_ann_t ann;
5105 unsigned int i;
5106 int j;
5107 use_optype use_ops;
5108 stmt_vec_info stmt_info;
5110 if (vect_debug_details (NULL))
5111 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5113 VARRAY_TREE_INIT (worklist, 64, "work list");
5115 /* 1. Init worklist. */
5117 for (i = 0; i < nbbs; i++)
5119 basic_block bb = bbs[i];
5120 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5122 stmt = bsi_stmt (si);
5124 if (vect_debug_details (NULL))
5126 fprintf (dump_file, "init: stmt relevant? ");
5127 print_generic_expr (dump_file, stmt, TDF_SLIM);
5130 stmt_info = vinfo_for_stmt (stmt);
5131 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5133 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5134 vect_mark_relevant (worklist, stmt);
5139 /* 2. Process_worklist */
5141 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5143 stmt = VARRAY_TOP_TREE (worklist);
5144 VARRAY_POP (worklist);
5146 if (vect_debug_details (NULL))
5148 fprintf (dump_file, "worklist: examine stmt: ");
5149 print_generic_expr (dump_file, stmt, TDF_SLIM);
5152 /* Examine the USES in this statement. Mark all the statements which
5153 feed this statement's uses as "relevant", unless the USE is used as
5154 an array index. */
5156 if (TREE_CODE (stmt) == PHI_NODE)
5158 /* follow the def-use chain inside the loop. */
5159 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5161 tree arg = PHI_ARG_DEF (stmt, j);
5162 tree def_stmt = NULL_TREE;
5163 basic_block bb;
5164 if (!vect_is_simple_use (arg, loop, &def_stmt))
5166 if (vect_debug_details (NULL))
5167 fprintf (dump_file, "worklist: unsupported use.");
5168 varray_clear (worklist);
5169 return false;
5171 if (!def_stmt)
5172 continue;
5174 if (vect_debug_details (NULL))
5176 fprintf (dump_file, "worklist: def_stmt: ");
5177 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5180 bb = bb_for_stmt (def_stmt);
5181 if (flow_bb_inside_loop_p (loop, bb))
5182 vect_mark_relevant (worklist, def_stmt);
5186 ann = stmt_ann (stmt);
5187 use_ops = USE_OPS (ann);
5189 for (i = 0; i < NUM_USES (use_ops); i++)
5191 tree use = USE_OP (use_ops, i);
5193 /* We are only interested in uses that need to be vectorized. Uses
5194 that are used for address computation are not considered relevant.
5196 if (exist_non_indexing_operands_for_use_p (use, stmt))
5198 tree def_stmt = NULL_TREE;
5199 basic_block bb;
5200 if (!vect_is_simple_use (use, loop, &def_stmt))
5202 if (vect_debug_details (NULL))
5203 fprintf (dump_file, "worklist: unsupported use.");
5204 varray_clear (worklist);
5205 return false;
5208 if (!def_stmt)
5209 continue;
5211 if (vect_debug_details (NULL))
5213 fprintf (dump_file, "worklist: examine use %d: ", i);
5214 print_generic_expr (dump_file, use, TDF_SLIM);
5217 bb = bb_for_stmt (def_stmt);
5218 if (flow_bb_inside_loop_p (loop, bb))
5219 vect_mark_relevant (worklist, def_stmt);
5222 } /* while worklist */
5224 varray_clear (worklist);
5225 return true;
5229 /* Function vect_analyze_loop_with_symbolic_num_of_iters.
5231 In case the number of iterations that LOOP iterates in unknown at compile
5232 time, an epilog loop will be generated, and the loop induction variables
5233 (IVs) will be "advanced" to the value they are supposed to take just before
5234 the epilog loop. Here we check that the access function of the loop IVs
5235 and the expression that represents the loop bound are simple enough.
5236 These restrictions will be relaxed in the future. */
5238 static bool
5239 vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
5240 struct loop *loop)
5242 basic_block bb = loop->header;
5243 tree phi;
5245 if (vect_debug_details (NULL))
5246 fprintf (dump_file,
5247 "\n<<vect_analyze_loop_with_symbolic_num_of_iters>>\n");
5249 if (chrec_contains_undetermined (niters))
5251 if (vect_debug_details (NULL))
5252 fprintf (dump_file, "Infinite number of iterations.");
5253 return false;
5256 if (!niters)
5258 if (vect_debug_details (NULL))
5259 fprintf (dump_file, "niters is NULL pointer.");
5260 return false;
5263 if (vect_debug_details (NULL))
5265 fprintf (dump_file, "Symbolic number of iterations is ");
5266 print_generic_expr (dump_file, niters, TDF_DETAILS);
5269 /* Analyze phi functions of the loop header. */
5271 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
5273 tree access_fn = NULL;
5274 tree evolution_part;
5276 if (vect_debug_details (NULL))
5278 fprintf (dump_file, "Analyze phi: ");
5279 print_generic_expr (dump_file, phi, TDF_SLIM);
5282 /* Skip virtual phi's. The data dependences that are associated with
5283 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5285 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5287 if (vect_debug_details (NULL))
5288 fprintf (dump_file, "virtual phi. skip.");
5289 continue;
5292 /* Analyze the evolution function. */
5294 access_fn = instantiate_parameters
5295 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5297 if (!access_fn)
5299 if (vect_debug_details (NULL))
5300 fprintf (dump_file, "No Access function.");
5301 return false;
5304 if (vect_debug_details (NULL))
5306 fprintf (dump_file, "Access function of PHI: ");
5307 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5310 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5312 if (evolution_part == NULL_TREE)
5313 return false;
5315 /* FORNOW: We do not transform initial conditions of IVs
5316 which evolution functions are a polynomial of degree >= 2. */
5318 if (tree_is_chrec (evolution_part))
5319 return false;
5322 return true;
5326 /* Function vect_get_loop_niters.
5328 Determine how many iterations the loop is executed. */
5330 static tree
5331 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5333 tree niters;
5335 if (vect_debug_details (NULL))
5336 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5338 niters = number_of_iterations_in_loop (loop);
5340 if (niters != NULL_TREE
5341 && niters != chrec_dont_know)
5343 *number_of_iterations = niters;
5345 if (vect_debug_details (NULL))
5347 fprintf (dump_file, "==> get_loop_niters:" );
5348 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5352 return get_loop_exit_condition (loop);
5356 /* Function vect_analyze_loop_form.
5358 Verify the following restrictions (some may be relaxed in the future):
5359 - it's an inner-most loop
5360 - number of BBs = 2 (which are the loop header and the latch)
5361 - the loop has a pre-header
5362 - the loop has a single entry and exit
5363 - the loop exit condition is simple enough, and the number of iterations
5364 can be analyzed (a countable loop). */
5366 static loop_vec_info
5367 vect_analyze_loop_form (struct loop *loop)
5369 loop_vec_info loop_vinfo;
5370 tree loop_cond;
5371 tree number_of_iterations = NULL;
5373 if (vect_debug_details (loop))
5374 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5376 if (loop->inner
5377 || !loop->single_exit
5378 || loop->num_nodes != 2)
5380 if (vect_debug_stats (loop) || vect_debug_details (loop))
5382 fprintf (dump_file, "not vectorized: bad loop form. ");
5383 if (loop->inner)
5384 fprintf (dump_file, "nested loop.");
5385 else if (!loop->single_exit)
5386 fprintf (dump_file, "multiple exits.");
5387 else if (loop->num_nodes != 2)
5388 fprintf (dump_file, "too many BBs in loop.");
5391 return NULL;
5394 /* We assume that the loop exit condition is at the end of the loop. i.e,
5395 that the loop is represented as a do-while (with a proper if-guard
5396 before the loop if needed), where the loop header contains all the
5397 executable statements, and the latch is empty. */
5398 if (!empty_block_p (loop->latch))
5400 if (vect_debug_stats (loop) || vect_debug_details (loop))
5401 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5402 return NULL;
5405 if (empty_block_p (loop->header))
5407 if (vect_debug_stats (loop) || vect_debug_details (loop))
5408 fprintf (dump_file, "not vectorized: empty loop.");
5409 return NULL;
5412 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5413 if (!loop_cond)
5415 if (vect_debug_stats (loop) || vect_debug_details (loop))
5416 fprintf (dump_file, "not vectorized: complicated exit condition.");
5417 return NULL;
5420 if (!number_of_iterations)
5422 if (vect_debug_stats (loop) || vect_debug_details (loop))
5423 fprintf (dump_file,
5424 "not vectorized: number of iterations cannot be computed.");
5425 return NULL;
5428 loop_vinfo = new_loop_vec_info (loop);
5429 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5430 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5432 if (vect_debug_stats (loop) || vect_debug_details (loop))
5433 fprintf (dump_file, "loop bound unknown.");
5435 /* Unknown loop bound. */
5436 if (!vect_analyze_loop_with_symbolic_num_of_iters
5437 (number_of_iterations, loop))
5439 if (vect_debug_stats (loop) || vect_debug_details (loop))
5440 fprintf (dump_file,
5441 "not vectorized: can't determine loop bound.");
5442 return NULL;
5444 else
5446 /* We need only one loop entry for unknown loop bound support. */
5447 if (loop->num_entries != 1 || !loop->pre_header)
5449 if (vect_debug_stats (loop) || vect_debug_details (loop))
5450 fprintf (dump_file,
5451 "not vectorized: more than one loop entry.");
5452 return NULL;
5456 else
5457 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5459 if (vect_debug_stats (loop) || vect_debug_details (loop))
5460 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5461 return NULL;
5464 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5466 return loop_vinfo;
5470 /* Function vect_analyze_loop.
5472 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5473 for it. The different analyses will record information in the
5474 loop_vec_info struct. */
5476 static loop_vec_info
5477 vect_analyze_loop (struct loop *loop)
5479 bool ok;
5480 loop_vec_info loop_vinfo;
5482 if (vect_debug_details (NULL))
5483 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5485 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5487 loop_vinfo = vect_analyze_loop_form (loop);
5488 if (!loop_vinfo)
5490 if (vect_debug_details (loop))
5491 fprintf (dump_file, "bad loop form.");
5492 return NULL;
5495 /* Find all data references in the loop (which correspond to vdefs/vuses)
5496 and analyze their evolution in the loop.
5498 FORNOW: Handle only simple, array references, which
5499 alignment can be forced, and aligned pointer-references. */
5501 ok = vect_analyze_data_refs (loop_vinfo);
5502 if (!ok)
5504 if (vect_debug_details (loop))
5505 fprintf (dump_file, "bad data references.");
5506 destroy_loop_vec_info (loop_vinfo);
5507 return NULL;
5510 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5512 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5513 if (!ok)
5515 if (vect_debug_details (loop))
5516 fprintf (dump_file, "unexpected pattern.");
5517 if (vect_debug_details (loop))
5518 fprintf (dump_file, "not vectorized: unexpected pattern.");
5519 destroy_loop_vec_info (loop_vinfo);
5520 return NULL;
5523 /* Check that all cross-iteration scalar data-flow cycles are OK.
5524 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5526 ok = vect_analyze_scalar_cycles (loop_vinfo);
5527 if (!ok)
5529 if (vect_debug_details (loop))
5530 fprintf (dump_file, "bad scalar cycle.");
5531 destroy_loop_vec_info (loop_vinfo);
5532 return NULL;
5535 /* Analyze data dependences between the data-refs in the loop.
5536 FORNOW: fail at the first data dependence that we encounter. */
5538 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5539 if (!ok)
5541 if (vect_debug_details (loop))
5542 fprintf (dump_file, "bad data dependence.");
5543 destroy_loop_vec_info (loop_vinfo);
5544 return NULL;
5547 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5548 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5550 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5551 if (!ok)
5553 if (vect_debug_details (loop))
5554 fprintf (dump_file, "bad data access.");
5555 destroy_loop_vec_info (loop_vinfo);
5556 return NULL;
5559 /* Analyze the alignment of the data-refs in the loop.
5560 FORNOW: Only aligned accesses are handled. */
5562 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5563 if (!ok)
5565 if (vect_debug_details (loop))
5566 fprintf (dump_file, "bad data alignment.");
5567 destroy_loop_vec_info (loop_vinfo);
5568 return NULL;
5571 /* Scan all the operations in the loop and make sure they are
5572 vectorizable. */
5574 ok = vect_analyze_operations (loop_vinfo);
5575 if (!ok)
5577 if (vect_debug_details (loop))
5578 fprintf (dump_file, "bad operation or unsupported loop bound.");
5579 destroy_loop_vec_info (loop_vinfo);
5580 return NULL;
5583 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5585 return loop_vinfo;
5589 /* Function need_imm_uses_for.
5591 Return whether we ought to include information for 'var'
5592 when calculating immediate uses. For this pass we only want use
5593 information for non-virtual variables. */
5595 static bool
5596 need_imm_uses_for (tree var)
5598 return is_gimple_reg (var);
5602 /* Function vectorize_loops.
5604 Entry Point to loop vectorization phase. */
5606 void
5607 vectorize_loops (struct loops *loops)
5609 unsigned int i, loops_num;
5610 unsigned int num_vectorized_loops = 0;
5612 /* Does the target support SIMD? */
5613 /* FORNOW: until more sophisticated machine modelling is in place. */
5614 if (!UNITS_PER_SIMD_WORD)
5616 if (vect_debug_details (NULL))
5617 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5618 return;
5621 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5623 /* ----------- Analyze loops. ----------- */
5625 /* If some loop was duplicated, it gets bigger number
5626 than all previously defined loops. This fact allows us to run
5627 only over initial loops skipping newly generated ones. */
5628 loops_num = loops->num;
5629 for (i = 1; i < loops_num; i++)
5631 loop_vec_info loop_vinfo;
5632 struct loop *loop = loops->parray[i];
5634 if (!loop)
5635 continue;
5637 loop_vinfo = vect_analyze_loop (loop);
5638 loop->aux = loop_vinfo;
5640 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5641 continue;
5643 vect_transform_loop (loop_vinfo, loops);
5644 num_vectorized_loops++;
5647 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5648 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5649 num_vectorized_loops);
5651 /* ----------- Finalize. ----------- */
5653 free_df ();
5654 for (i = 1; i < loops_num; i++)
5656 struct loop *loop = loops->parray[i];
5657 loop_vec_info loop_vinfo;
5659 if (!loop)
5660 continue;
5661 loop_vinfo = loop->aux;
5662 destroy_loop_vec_info (loop_vinfo);
5663 loop->aux = NULL;
5666 rewrite_into_ssa (false);
5667 if (!bitmap_empty_p (vars_to_rename))
5669 /* The rewrite of ssa names may cause violation of loop closed ssa
5670 form invariants. TODO -- avoid these rewrites completely.
5671 Information in virtual phi nodes is sufficient for it. */
5672 rewrite_into_loop_closed_ssa ();
5674 rewrite_into_loop_closed_ssa ();
5675 bitmap_clear (vars_to_rename);