* gcc.c-torture/execute/20041113-1.c: New test.
[official-gcc.git] / gcc / tree-vectorizer.c
blob0c29a34251e08a57d52aa8e23ef86babf16e11b2
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
132 #include "rtl.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
137 #include "timevar.h"
138 #include "cfgloop.h"
139 #include "cfglayout.h"
140 #include "expr.h"
141 #include "optabs.h"
142 #include "toplev.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /* Main analysis functions. */
150 static loop_vec_info vect_analyze_loop (struct loop *);
151 static loop_vec_info vect_analyze_loop_form (struct loop *);
152 static bool vect_analyze_data_refs (loop_vec_info);
153 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
154 static bool vect_analyze_scalar_cycles (loop_vec_info);
155 static bool vect_analyze_data_ref_accesses (loop_vec_info);
156 static bool vect_analyze_data_refs_alignment (loop_vec_info);
157 static bool vect_compute_data_refs_alignment (loop_vec_info);
158 static bool vect_analyze_operations (loop_vec_info);
160 /* Main code transformation functions. */
161 static void vect_transform_loop (loop_vec_info, struct loops *);
162 static void vect_transform_loop_bound (loop_vec_info, tree niters);
163 static bool vect_transform_stmt (tree, block_stmt_iterator *);
164 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
167 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
168 static enum dr_alignment_support vect_supportable_dr_alignment
169 (struct data_reference *);
170 static void vect_align_data_ref (tree);
171 static void vect_enhance_data_refs_alignment (loop_vec_info);
173 /* Utility functions for the analyses. */
174 static bool vect_is_simple_use (tree , struct loop *, tree *);
175 static bool exist_non_indexing_operands_for_use_p (tree, tree);
176 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
177 static void vect_mark_relevant (varray_type, tree);
178 static bool vect_stmt_relevant_p (tree, loop_vec_info);
179 static tree vect_get_loop_niters (struct loop *, tree *);
180 static bool vect_compute_data_ref_alignment
181 (struct data_reference *, loop_vec_info);
182 static bool vect_analyze_data_ref_access (struct data_reference *);
183 static bool vect_get_first_index (tree, tree *);
184 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
185 static struct data_reference * vect_analyze_pointer_ref_access
186 (tree, tree, bool);
187 static bool vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
188 struct loop *loop);
189 static tree vect_get_base_and_bit_offset
190 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
191 static struct data_reference * vect_analyze_pointer_ref_access
192 (tree, tree, bool);
193 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
194 static tree vect_compute_array_ref_alignment
195 (struct data_reference *, loop_vec_info, tree, tree *);
196 static tree vect_get_ptr_offset (tree, tree, tree *);
197 static tree vect_get_symbl_and_dr
198 (tree, tree, bool, loop_vec_info, struct data_reference **);
200 /* Utility functions for the code transformation. */
201 static tree vect_create_destination_var (tree, tree);
202 static tree vect_create_data_ref_ptr
203 (tree, block_stmt_iterator *, tree, tree *, bool);
204 static tree vect_create_index_for_vector_ref
205 (struct loop *, block_stmt_iterator *);
206 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
207 static tree get_vectype_for_scalar_type (tree);
208 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
209 static tree vect_get_vec_def_for_operand (tree, tree);
210 static tree vect_init_vector (tree, tree);
211 static tree vect_build_symbol_bound (tree, int, struct loop *);
212 static void vect_finish_stmt_generation
213 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
215 static void vect_generate_tmps_on_preheader (loop_vec_info,
216 tree *, tree *,
217 tree *);
218 static tree vect_build_loop_niters (loop_vec_info);
219 static void vect_update_ivs_after_vectorizer (struct loop *, tree);
221 /* Loop transformations prior to vectorization. */
223 /* Loop transformations entry point function.
224 It can be used outside of the vectorizer
225 in case the loop to be manipulated answers conditions specified
226 in function documentation. */
227 struct loop *tree_duplicate_loop_to_edge (struct loop *,
228 struct loops *, edge,
229 tree, tree, bool);
231 static void allocate_new_names (bitmap);
232 static void rename_use_op (use_operand_p);
233 static void rename_def_op (def_operand_p, tree);
234 static void rename_variables_in_bb (basic_block);
235 static void free_new_names (bitmap);
236 static void rename_variables_in_loop (struct loop *);
237 static void copy_phi_nodes (struct loop *, struct loop *, bool);
238 static void update_phis_for_duplicate_loop (struct loop *,
239 struct loop *,
240 bool after);
241 static void update_phi_nodes_for_guard (edge, struct loop *);
242 static void make_loop_iterate_ntimes (struct loop *, tree, tree, tree);
243 static struct loop *tree_duplicate_loop_to_edge_cfg (struct loop *,
244 struct loops *,
245 edge);
246 static edge add_loop_guard (basic_block, tree, basic_block);
247 static bool verify_loop_for_duplication (struct loop *, bool, edge);
249 /* Utilities dealing with loop peeling (not peeling itself). */
250 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
251 static void vect_update_niters_after_peeling (loop_vec_info, tree);
252 static void vect_update_inits_of_dr (struct data_reference *, struct loop *,
253 tree niters);
254 static void vect_update_inits_of_drs (loop_vec_info, tree);
255 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
257 /* Utilities for creation and deletion of vec_info structs. */
258 loop_vec_info new_loop_vec_info (struct loop *loop);
259 void destroy_loop_vec_info (loop_vec_info);
260 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
262 static bool vect_debug_stats (struct loop *loop);
263 static bool vect_debug_details (struct loop *loop);
266 /* Utilities to support loop peeling for vectorization purposes. */
269 /* For each definition in DEFINITIONS this function allocates
270 new ssa name. */
272 static void
273 allocate_new_names (bitmap definitions)
275 unsigned ver;
276 bitmap_iterator bi;
278 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
280 tree def = ssa_name (ver);
281 tree *new_name_ptr = xmalloc (sizeof (tree));
283 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
285 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
286 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
288 SSA_NAME_AUX (def) = new_name_ptr;
293 /* Renames the use *OP_P. */
295 static void
296 rename_use_op (use_operand_p op_p)
298 tree *new_name_ptr;
300 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
301 return;
303 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
305 /* Something defined outside of the loop. */
306 if (!new_name_ptr)
307 return;
309 /* An ordinary ssa name defined in the loop. */
311 SET_USE (op_p, *new_name_ptr);
315 /* Renames the def *OP_P in statement STMT. */
317 static void
318 rename_def_op (def_operand_p op_p, tree stmt)
320 tree *new_name_ptr;
322 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
323 return;
325 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
327 /* Something defined outside of the loop. */
328 if (!new_name_ptr)
329 return;
331 /* An ordinary ssa name defined in the loop. */
333 SET_DEF (op_p, *new_name_ptr);
334 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
338 /* Renames the variables in basic block BB. */
340 static void
341 rename_variables_in_bb (basic_block bb)
343 tree phi;
344 block_stmt_iterator bsi;
345 tree stmt;
346 stmt_ann_t ann;
347 use_optype uses;
348 vuse_optype vuses;
349 def_optype defs;
350 v_may_def_optype v_may_defs;
351 v_must_def_optype v_must_defs;
352 unsigned i;
353 edge e;
354 edge_iterator ei;
356 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
357 rename_def_op (PHI_RESULT_PTR (phi), phi);
359 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
361 stmt = bsi_stmt (bsi);
362 get_stmt_operands (stmt);
363 ann = stmt_ann (stmt);
365 uses = USE_OPS (ann);
366 for (i = 0; i < NUM_USES (uses); i++)
367 rename_use_op (USE_OP_PTR (uses, i));
369 defs = DEF_OPS (ann);
370 for (i = 0; i < NUM_DEFS (defs); i++)
371 rename_def_op (DEF_OP_PTR (defs, i), stmt);
373 vuses = VUSE_OPS (ann);
374 for (i = 0; i < NUM_VUSES (vuses); i++)
375 rename_use_op (VUSE_OP_PTR (vuses, i));
377 v_may_defs = V_MAY_DEF_OPS (ann);
378 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
380 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
381 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
384 v_must_defs = V_MUST_DEF_OPS (ann);
385 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
387 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
388 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
392 FOR_EACH_EDGE (e, ei, bb->succs)
393 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
394 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
398 /* Releases the structures holding the new ssa names. */
400 static void
401 free_new_names (bitmap definitions)
403 unsigned ver;
404 bitmap_iterator bi;
406 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
408 tree def = ssa_name (ver);
410 if (SSA_NAME_AUX (def))
412 free (SSA_NAME_AUX (def));
413 SSA_NAME_AUX (def) = NULL;
419 /* Renames variables in new generated LOOP. */
421 static void
422 rename_variables_in_loop (struct loop *loop)
424 unsigned i;
425 basic_block *bbs;
427 bbs = get_loop_body (loop);
429 for (i = 0; i < loop->num_nodes; i++)
430 rename_variables_in_bb (bbs[i]);
432 free (bbs);
436 /* This function copies phis from LOOP header to
437 NEW_LOOP header. AFTER is as
438 in update_phis_for_duplicate_loop function. */
440 static void
441 copy_phi_nodes (struct loop *loop, struct loop *new_loop,
442 bool after)
444 tree phi, new_phi, def;
445 edge new_e;
446 edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop));
448 /* Second add arguments to newly created phi nodes. */
449 for (phi = phi_nodes (loop->header),
450 new_phi = phi_nodes (new_loop->header);
451 phi;
452 phi = PHI_CHAIN (phi),
453 new_phi = PHI_CHAIN (new_phi))
455 new_e = loop_preheader_edge (new_loop);
456 def = PHI_ARG_DEF_FROM_EDGE (phi, e);
457 add_phi_arg (&new_phi, def, new_e);
462 /* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP
463 executes after LOOP, and false if it executes before it. */
465 static void
466 update_phis_for_duplicate_loop (struct loop *loop,
467 struct loop *new_loop, bool after)
469 edge old_latch;
470 tree *new_name_ptr, new_ssa_name;
471 tree phi_new, phi_old, def;
472 edge orig_entry_e = loop_preheader_edge (loop);
474 /* Copy phis from loop->header to new_loop->header. */
475 copy_phi_nodes (loop, new_loop, after);
477 old_latch = loop_latch_edge (loop);
479 /* Update PHI args for the new loop latch edge, and
480 the old loop preheader edge, we know that the PHI nodes
481 are ordered appropriately in copy_phi_nodes. */
482 for (phi_new = phi_nodes (new_loop->header),
483 phi_old = phi_nodes (loop->header);
484 phi_new && phi_old;
485 phi_new = TREE_CHAIN (phi_new), phi_old = TREE_CHAIN (phi_old))
487 def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch);
489 if (TREE_CODE (def) != SSA_NAME)
490 continue;
492 new_name_ptr = SSA_NAME_AUX (def);
494 /* Something defined outside of the loop. */
495 if (!new_name_ptr)
496 continue;
498 /* An ordinary ssa name defined in the loop. */
499 new_ssa_name = *new_name_ptr;
501 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop));
503 /* Update PHI args for the original loop pre-header edge. */
504 if (! after)
505 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e),
506 new_ssa_name);
511 /* Update PHI nodes for a guard of the LOOP.
513 LOOP is supposed to have a preheader bb at which a guard condition is
514 located. The true edge of this condition skips the LOOP and ends
515 at the destination of the (unique) LOOP exit. The loop exit bb is supposed
516 to be an empty bb (created by this transformation) with one successor.
518 This function creates phi nodes at the LOOP exit bb. These phis need to be
519 created as a result of adding true edge coming from guard.
521 FORNOW: Only phis which have corresponding phi nodes at the header of the
522 LOOP are created. Here we use the assumption that after the LOOP there
523 are no uses of defs generated in LOOP.
525 After the phis creation, the function updates the values of phi nodes at
526 the LOOP exit successor bb:
528 Original loop:
530 bb0: loop preheader
531 goto bb1
532 bb1: loop header
533 if (exit_cond) goto bb3 else goto bb2
534 bb2: loop latch
535 goto bb1
536 bb3:
539 After guard creation (the loop before this function):
541 bb0: loop preheader
542 if (guard_condition) goto bb4 else goto bb1
543 bb1: loop header
544 if (exit_cond) goto bb4 else goto bb2
545 bb2: loop latch
546 goto bb1
547 bb4: loop exit
548 (new empty bb)
549 goto bb3
550 bb3:
552 This function updates the phi nodes in bb4 and in bb3, to account for the
553 new edge from bb0 to bb4. */
555 static void
556 update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop)
558 tree phi, phi1;
559 basic_block bb = loop->exit_edges[0]->dest;
561 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
563 tree new_phi;
564 tree phi_arg;
566 /* Generate new phi node. */
567 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), bb);
569 /* Add argument coming from guard true edge. */
570 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]);
571 add_phi_arg (&new_phi, phi_arg, guard_true_edge);
573 /* Add argument coming from loop exit edge. */
574 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
575 add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]);
577 /* Update all phi nodes at the loop exit successor. */
578 for (phi1 = phi_nodes (EDGE_SUCC (bb, 0)->dest);
579 phi1;
580 phi1 = TREE_CHAIN (phi1))
582 tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1, EDGE_SUCC (bb, 0));
583 if (old_arg == phi_arg)
585 edge e = EDGE_SUCC (bb, 0);
587 SET_PHI_ARG_DEF (phi1,
588 phi_arg_from_edge (phi1, e),
589 PHI_RESULT (new_phi));
594 set_phi_nodes (bb, phi_reverse (phi_nodes (bb)));
598 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
599 that starts at zero, increases by one and its limit is NITERS. */
601 static void
602 make_loop_iterate_ntimes (struct loop *loop, tree niters,
603 tree begin_label, tree exit_label)
605 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
606 tree orig_cond;
607 edge exit_edge = loop->exit_edges[0];
608 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
610 /* Flow loop scan does not update loop->single_exit field. */
611 loop->single_exit = loop->exit_edges[0];
612 orig_cond = get_loop_exit_condition (loop);
613 gcc_assert (orig_cond);
614 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
615 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
617 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
618 back to the exit condition statement. */
619 bsi_next (&loop_exit_bsi);
620 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
623 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
624 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
625 else /* 'then' edge loops back. */
626 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
628 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
629 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
630 cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
631 begin_label, exit_label);
632 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
634 /* Remove old loop exit test: */
635 bsi_remove (&loop_exit_bsi);
637 if (vect_debug_stats (loop) || vect_debug_details (loop))
638 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
642 /* Given LOOP this function generates a new copy of it and puts it
643 on E which is either the entry or exit of LOOP. */
645 static struct loop *
646 tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
647 edge e)
649 struct loop *new_loop;
650 basic_block *new_bbs, *bbs;
651 bool at_exit;
652 bool was_imm_dom;
653 basic_block exit_dest;
654 tree phi, phi_arg;
656 at_exit = (e == loop->exit_edges[0]);
657 if (!at_exit && e != loop_preheader_edge (loop))
659 if (dump_file && (dump_flags & TDF_DETAILS))
660 fprintf (dump_file,
661 "Edge is not an entry nor an exit edge.\n");
662 return NULL;
665 bbs = get_loop_body (loop);
667 /* Check whether duplication is possible. */
668 if (!can_copy_bbs_p (bbs, loop->num_nodes))
670 if (vect_debug_stats (loop) || vect_debug_details (loop))
671 fprintf (dump_file,
672 "Cannot copy basic blocks.\n");
673 free (bbs);
674 return NULL;
677 /* Generate new loop structure. */
678 new_loop = duplicate_loop (loops, loop, loop->outer);
679 if (!new_loop)
681 if (vect_debug_stats (loop) || vect_debug_details (loop))
682 fprintf (dump_file,
683 "The duplicate_loop returns NULL.\n");
684 free (bbs);
685 return NULL;
688 exit_dest = loop->exit_edges[0]->dest;
689 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
690 exit_dest) == loop->header ?
691 true : false);
693 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
695 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
697 /* Duplicating phi args at exit bbs as coming
698 also from exit of duplicated loop. */
699 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
701 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
702 if (phi_arg)
704 edge new_loop_exit_edge;
706 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
707 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
708 else
709 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
711 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
715 if (at_exit) /* Add the loop copy at exit. */
717 redirect_edge_and_branch_force (e, new_loop->header);
718 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
719 if (was_imm_dom)
720 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
722 else /* Add the copy at entry. */
724 edge new_exit_e;
725 edge entry_e = loop_preheader_edge (loop);
726 basic_block preheader = entry_e->src;
728 if (!flow_bb_inside_loop_p (new_loop,
729 EDGE_SUCC (new_loop->header, 0)->dest))
730 new_exit_e = EDGE_SUCC (new_loop->header, 0);
731 else
732 new_exit_e = EDGE_SUCC (new_loop->header, 1);
734 redirect_edge_and_branch_force (new_exit_e, loop->header);
735 set_immediate_dominator (CDI_DOMINATORS, loop->header,
736 new_exit_e->src);
738 /* We have to add phi args to the loop->header here as coming
739 from new_exit_e edge. */
740 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
742 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
743 if (phi_arg)
744 add_phi_arg (&phi, phi_arg, new_exit_e);
747 redirect_edge_and_branch_force (entry_e, new_loop->header);
748 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
751 flow_loop_scan (new_loop, LOOP_ALL);
752 flow_loop_scan (loop, LOOP_ALL);
753 free (new_bbs);
754 free (bbs);
756 return new_loop;
760 /* Given the condition statement COND, put it as the last statement
761 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
762 Assumes that this is the single exit of the guarded loop.
763 Returns the skip edge. */
765 static edge
766 add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
768 block_stmt_iterator bsi;
769 edge new_e, enter_e;
770 tree cond_stmt, then_label, else_label;
772 enter_e = EDGE_SUCC (guard_bb, 0);
773 enter_e->flags &= ~EDGE_FALLTHRU;
774 enter_e->flags |= EDGE_FALSE_VALUE;
775 bsi = bsi_last (guard_bb);
777 then_label = build1 (GOTO_EXPR, void_type_node,
778 tree_block_label (exit_bb));
779 else_label = build1 (GOTO_EXPR, void_type_node,
780 tree_block_label (enter_e->dest));
781 cond_stmt = build (COND_EXPR, void_type_node, cond,
782 then_label, else_label);
783 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
784 /* Add new edge to connect entry block to the second loop. */
785 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
786 set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb);
787 return new_e;
791 /* This function verifies that certain restrictions apply to LOOP. */
793 static bool
794 verify_loop_for_duplication (struct loop *loop,
795 bool update_first_loop_count, edge e)
797 edge exit_e = loop->exit_edges [0];
798 edge entry_e = loop_preheader_edge (loop);
800 /* We duplicate only innermost loops. */
801 if (loop->inner)
803 if (vect_debug_stats (loop) || vect_debug_details (loop))
804 fprintf (dump_file,
805 "Loop duplication failed. Loop is not innermost.\n");
806 return false;
809 /* Only loops with 1 exit. */
810 if (loop->num_exits != 1)
812 if (vect_debug_stats (loop) || vect_debug_details (loop))
813 fprintf (dump_file,
814 "More than one exit from loop.\n");
815 return false;
818 /* Only loops with 1 entry. */
819 if (loop->num_entries != 1)
821 if (vect_debug_stats (loop) || vect_debug_details (loop))
822 fprintf (dump_file,
823 "More than one exit from loop.\n");
824 return false;
827 /* All loops has outers, the only case loop->outer is NULL is for
828 the function itself. */
829 if (!loop->outer)
831 if (vect_debug_stats (loop) || vect_debug_details (loop))
832 fprintf (dump_file,
833 "Loop is outer-most loop.\n");
834 return false;
837 /* Verify that new IV can be created and loop condition
838 can be changed to make first loop iterate first_niters times. */
839 if (!update_first_loop_count)
841 tree orig_cond = get_loop_exit_condition (loop);
842 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
844 if (!orig_cond)
846 if (vect_debug_stats (loop) || vect_debug_details (loop))
847 fprintf (dump_file,
848 "Loop has no exit condition.\n");
849 return false;
851 if (orig_cond != bsi_stmt (loop_exit_bsi))
853 if (vect_debug_stats (loop) || vect_debug_details (loop))
854 fprintf (dump_file,
855 "Loop exit condition is not loop header last stmt.\n");
856 return false;
860 /* Make sure E is either an entry or an exit edge. */
861 if (e != exit_e && e != entry_e)
863 if (vect_debug_stats (loop) || vect_debug_details (loop))
864 fprintf (dump_file,
865 "E is not loop entry or exit edge.\n");
866 return false;
869 return true;
873 /* Given LOOP this function duplicates it to the edge E.
875 This transformation takes place before the loop is vectorized.
876 For now, there are two main cases when it's used
877 by the vectorizer: to support loops with unknown loop bounds
878 (or loop bounds indivisible by vectorization factor) and to force the
879 alignment of data references in the loop. In the first case, LOOP is
880 duplicated to the exit edge, producing epilog loop. In the second case, LOOP
881 is duplicated to the preheader edge thus generating prolog loop. In both
882 cases, the original loop will be vectorized after the transformation.
884 The edge E is supposed to be either preheader edge of the LOOP or
885 its exit edge. If preheader edge is specified, the LOOP copy
886 will precede the original one. Otherwise the copy will be located
887 at the exit of the LOOP.
889 FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate
890 the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first
891 loop will be iterated FIRST_NITERS times by introducing additional
892 induction variable and replacing loop exit condition. If
893 UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and
894 the caller to tree_duplicate_loop_to_edge is responsible for updating
895 the first loop count.
897 NITERS (also SSA_NAME) parameter defines the number of iteration the
898 original loop iterated. The function generates two if-then guards:
899 one prior to the first loop and the other prior to the second loop.
900 The first guard will be:
902 if (FIRST_NITERS == 0) then skip the first loop
904 The second guard will be:
906 if (FIRST_NITERS == NITERS) then skip the second loop
908 Thus the equivalence to the original code is guaranteed by correct values
909 of NITERS and FIRST_NITERS and generation of if-then loop guards.
911 For now this function supports only loop forms that are candidate for
912 vectorization. Such types are the following:
914 (1) only innermost loops
915 (2) loops built from 2 basic blocks
916 (3) loops with one entry and one exit
917 (4) loops without function calls
918 (5) loops without defs that are used after the loop
920 (1), (3) are checked in this function; (2) - in function
921 vect_analyze_loop_form; (4) - in function vect_analyze_data_refs;
922 (5) is checked as part of the function vect_mark_stmts_to_be_vectorized,
923 when excluding induction/reduction support.
925 The function returns NULL in case one of these checks or
926 transformations failed. */
928 struct loop*
929 tree_duplicate_loop_to_edge (struct loop *loop, struct loops *loops,
930 edge e, tree first_niters,
931 tree niters, bool update_first_loop_count)
933 struct loop *new_loop = NULL, *first_loop, *second_loop;
934 edge skip_e;
935 tree pre_condition;
936 bitmap definitions;
937 basic_block first_exit_bb, second_exit_bb;
938 basic_block pre_header_bb;
939 edge exit_e = loop->exit_edges [0];
941 gcc_assert (!any_marked_for_rewrite_p ());
943 if (!verify_loop_for_duplication (loop, update_first_loop_count, e))
944 return NULL;
946 /* We have to initialize cfg_hooks. Then, when calling
947 cfg_hooks->split_edge, the function tree_split_edge
948 is actually called and, when calling cfg_hooks->duplicate_block,
949 the function tree_duplicate_bb is called. */
950 tree_register_cfg_hooks ();
952 /* 1. Generate a copy of LOOP and put it on E (entry or exit). */
953 if (!(new_loop = tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
955 if (vect_debug_stats (loop) || vect_debug_details (loop))
956 fprintf (dump_file,
957 "The tree_duplicate_loop_to_edge_cfg failed.\n");
958 return NULL;
961 definitions = marked_ssa_names ();
962 allocate_new_names (definitions);
963 update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
964 /* Here, using assumption (5), we do not propagate new names further
965 than on phis of the exit from the second loop. */
966 rename_variables_in_loop (new_loop);
967 free_new_names (definitions);
969 if (e == exit_e)
971 first_loop = loop;
972 second_loop = new_loop;
974 else
976 first_loop = new_loop;
977 second_loop = loop;
980 /* 2. Generate bb between the loops. */
981 first_exit_bb = split_edge (first_loop->exit_edges[0]);
982 add_bb_to_loop (first_exit_bb, first_loop->outer);
984 /* We need to update here first loop exit edge
985 and second loop preheader edge. */
986 flow_loop_scan (first_loop, LOOP_ALL);
987 flow_loop_scan (second_loop, LOOP_ALL);
989 /* 3. Make first loop iterate FIRST_NITERS times, if needed. */
990 if (!update_first_loop_count)
992 tree first_loop_latch_lbl = tree_block_label (first_loop->latch);
993 tree first_loop_exit_lbl = tree_block_label (first_exit_bb);
995 make_loop_iterate_ntimes (first_loop, first_niters,
996 first_loop_latch_lbl,
997 first_loop_exit_lbl);
1000 /* 4. Add the guard before first loop:
1002 if FIRST_NITERS == 0
1003 skip first loop
1004 else
1005 enter first loop */
1007 /* 4a. Generate bb before first loop. */
1008 pre_header_bb = split_edge (loop_preheader_edge (first_loop));
1009 add_bb_to_loop (pre_header_bb, first_loop->outer);
1011 /* First loop preheader edge is changed. */
1012 flow_loop_scan (first_loop, LOOP_ALL);
1014 /* 4b. Generate guard condition. */
1015 pre_condition = build (LE_EXPR, boolean_type_node,
1016 first_niters, integer_zero_node);
1018 /* 4c. Add condition at the end of preheader bb. */
1019 skip_e = add_loop_guard (pre_header_bb, pre_condition, first_exit_bb);
1021 /* 4d. Update phis at first loop exit and propagate changes
1022 to the phis of second loop. */
1023 update_phi_nodes_for_guard (skip_e, first_loop);
1025 /* 5. Add the guard before second loop:
1027 if FIRST_NITERS == NITERS SKIP
1028 skip second loop
1029 else
1030 enter second loop */
1032 /* 5a. Generate empty bb at the exit from the second loop. */
1033 second_exit_bb = split_edge (second_loop->exit_edges[0]);
1034 add_bb_to_loop (second_exit_bb, second_loop->outer);
1036 /* Second loop preheader edge is changed. */
1037 flow_loop_scan (second_loop, LOOP_ALL);
1039 /* 5b. Generate guard condition. */
1040 pre_condition = build (EQ_EXPR, boolean_type_node,
1041 first_niters, niters);
1043 /* 5c. Add condition at the end of preheader bb. */
1044 skip_e = add_loop_guard (first_exit_bb, pre_condition, second_exit_bb);
1045 update_phi_nodes_for_guard (skip_e, second_loop);
1047 BITMAP_XFREE (definitions);
1048 unmark_all_for_rewrite ();
1050 return new_loop;
1055 /* Here the proper Vectorizer starts. */
1057 /* Function new_stmt_vec_info.
1059 Create and initialize a new stmt_vec_info struct for STMT. */
1061 stmt_vec_info
1062 new_stmt_vec_info (tree stmt, struct loop *loop)
1064 stmt_vec_info res;
1065 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1067 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1068 STMT_VINFO_STMT (res) = stmt;
1069 STMT_VINFO_LOOP (res) = loop;
1070 STMT_VINFO_RELEVANT_P (res) = 0;
1071 STMT_VINFO_VECTYPE (res) = NULL;
1072 STMT_VINFO_VEC_STMT (res) = NULL;
1073 STMT_VINFO_DATA_REF (res) = NULL;
1074 STMT_VINFO_MEMTAG (res) = NULL;
1075 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1077 return res;
1081 /* Function new_loop_vec_info.
1083 Create and initialize a new loop_vec_info struct for LOOP, as well as
1084 stmt_vec_info structs for all the stmts in LOOP. */
1086 loop_vec_info
1087 new_loop_vec_info (struct loop *loop)
1089 loop_vec_info res;
1090 basic_block *bbs;
1091 block_stmt_iterator si;
1092 unsigned int i;
1094 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1096 bbs = get_loop_body (loop);
1098 /* Create stmt_info for all stmts in the loop. */
1099 for (i = 0; i < loop->num_nodes; i++)
1101 basic_block bb = bbs[i];
1102 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1104 tree stmt = bsi_stmt (si);
1105 stmt_ann_t ann;
1107 get_stmt_operands (stmt);
1108 ann = stmt_ann (stmt);
1109 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1113 LOOP_VINFO_LOOP (res) = loop;
1114 LOOP_VINFO_BBS (res) = bbs;
1115 LOOP_VINFO_EXIT_COND (res) = NULL;
1116 LOOP_VINFO_NITERS (res) = NULL;
1117 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1118 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1119 LOOP_VINFO_VECT_FACTOR (res) = 0;
1120 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1121 "loop_write_datarefs");
1122 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1123 "loop_read_datarefs");
1124 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1126 return res;
1130 /* Function destroy_loop_vec_info.
1132 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1133 stmts in the loop. */
1135 void
1136 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1138 struct loop *loop;
1139 basic_block *bbs;
1140 int nbbs;
1141 block_stmt_iterator si;
1142 int j;
1144 if (!loop_vinfo)
1145 return;
1147 loop = LOOP_VINFO_LOOP (loop_vinfo);
1149 bbs = LOOP_VINFO_BBS (loop_vinfo);
1150 nbbs = loop->num_nodes;
1152 for (j = 0; j < nbbs; j++)
1154 basic_block bb = bbs[j];
1155 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1157 tree stmt = bsi_stmt (si);
1158 stmt_ann_t ann = stmt_ann (stmt);
1159 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1160 free (stmt_info);
1161 set_stmt_info (ann, NULL);
1165 free (LOOP_VINFO_BBS (loop_vinfo));
1166 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1167 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1169 free (loop_vinfo);
1173 /* Function debug_loop_stats.
1175 For vectorization statistics dumps. */
1177 static bool
1178 vect_debug_stats (struct loop *loop)
1180 basic_block bb;
1181 block_stmt_iterator si;
1182 tree node = NULL_TREE;
1184 if (!dump_file || !(dump_flags & TDF_STATS))
1185 return false;
1187 if (!loop)
1189 fprintf (dump_file, "\n");
1190 return true;
1193 if (!loop->header)
1194 return false;
1196 bb = loop->header;
1198 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1200 node = bsi_stmt (si);
1201 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1202 break;
1205 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1206 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1208 fprintf (dump_file, "\nloop at %s:%d: ",
1209 EXPR_FILENAME (node), EXPR_LINENO (node));
1210 return true;
1213 return false;
1217 /* Function debug_loop_details.
1219 For vectorization debug dumps. */
1221 static bool
1222 vect_debug_details (struct loop *loop)
1224 basic_block bb;
1225 block_stmt_iterator si;
1226 tree node = NULL_TREE;
1228 if (!dump_file || !(dump_flags & TDF_DETAILS))
1229 return false;
1231 if (!loop)
1233 fprintf (dump_file, "\n");
1234 return true;
1237 if (!loop->header)
1238 return false;
1240 bb = loop->header;
1242 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1244 node = bsi_stmt (si);
1245 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1246 break;
1249 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1250 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1252 fprintf (dump_file, "\nloop at %s:%d: ",
1253 EXPR_FILENAME (node), EXPR_LINENO (node));
1254 return true;
1257 return false;
1261 /* Function vect_get_ptr_offset
1263 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1265 static tree
1266 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1267 tree vectype ATTRIBUTE_UNUSED,
1268 tree *offset ATTRIBUTE_UNUSED)
1270 /* TODO: Use alignment information. */
1271 return NULL_TREE;
1275 /* Function vect_get_base_and_bit_offset
1277 Return the BASE of the data reference EXPR.
1278 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1279 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1280 bits of 'a.b[i] + 4B' from a.
1282 Input:
1283 EXPR - the memory reference that is being analyzed
1284 DR - the data_reference struct of the _original_ memory reference
1285 (Note: DR_REF (DR) is not necessarily EXPR)
1286 VECTYPE - the type that defines the alignment (i.e, we compute
1287 alignment relative to TYPE_ALIGN(VECTYPE))
1289 Output:
1290 BASE (returned value) - the base of the data reference EXPR.
1291 E.g, if EXPR is a.b[k].c[i][j] the returned
1292 base is a.
1293 OFFSET - offset of EXPR from BASE in bits
1294 BASE_ALIGNED_P - indicates if BASE is aligned
1296 If something unexpected is encountered (an unsupported form of data-ref),
1297 or if VECTYPE is given but OFFSET cannot be determined:
1298 then NULL_TREE is returned. */
1300 static tree
1301 vect_get_base_and_bit_offset (struct data_reference *dr,
1302 tree expr,
1303 tree vectype,
1304 loop_vec_info loop_vinfo,
1305 tree *offset,
1306 bool *base_aligned_p)
1308 tree this_offset = size_zero_node;
1309 tree base = NULL_TREE;
1310 tree next_ref;
1311 tree oprnd0, oprnd1;
1312 struct data_reference *array_dr;
1313 enum tree_code code = TREE_CODE (expr);
1315 *base_aligned_p = false;
1317 switch (code)
1319 /* These cases end the recursion: */
1320 case VAR_DECL:
1321 *offset = size_zero_node;
1322 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1323 *base_aligned_p = true;
1324 return expr;
1326 case SSA_NAME:
1327 if (!vectype)
1328 return expr;
1330 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1331 return NULL_TREE;
1333 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1335 base = vect_get_ptr_offset (expr, vectype, offset);
1336 if (base)
1337 *base_aligned_p = true;
1339 else
1341 *base_aligned_p = true;
1342 *offset = size_zero_node;
1343 base = expr;
1345 return base;
1347 case INTEGER_CST:
1348 *offset = int_const_binop (MULT_EXPR, expr,
1349 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1350 return expr;
1352 /* These cases continue the recursion: */
1353 case COMPONENT_REF:
1354 oprnd0 = TREE_OPERAND (expr, 0);
1355 oprnd1 = TREE_OPERAND (expr, 1);
1357 this_offset = bit_position (oprnd1);
1358 if (vectype && !host_integerp (this_offset, 1))
1359 return NULL_TREE;
1360 next_ref = oprnd0;
1361 break;
1363 case ADDR_EXPR:
1364 oprnd0 = TREE_OPERAND (expr, 0);
1365 next_ref = oprnd0;
1366 break;
1368 case INDIRECT_REF:
1369 oprnd0 = TREE_OPERAND (expr, 0);
1370 next_ref = oprnd0;
1371 break;
1373 case ARRAY_REF:
1374 if (DR_REF (dr) != expr)
1375 /* Build array data_reference struct if the existing DR_REF
1376 doesn't match EXPR. This happens, for example, when the
1377 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1378 contains information on the access of T, not of arr. In order
1379 to continue the analysis, we create a new DR struct that
1380 describes the access of arr.
1382 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1383 else
1384 array_dr = dr;
1386 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1387 vectype, &this_offset);
1388 if (!next_ref)
1389 return NULL_TREE;
1391 if (vectype &&
1392 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1394 *offset = this_offset;
1395 *base_aligned_p = true;
1396 return next_ref;
1398 break;
1400 case PLUS_EXPR:
1401 case MINUS_EXPR:
1402 /* In case we have a PLUS_EXPR of the form
1403 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1404 This is verified in vect_get_symbl_and_dr. */
1405 oprnd0 = TREE_OPERAND (expr, 0);
1406 oprnd1 = TREE_OPERAND (expr, 1);
1408 base = vect_get_base_and_bit_offset
1409 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1410 if (vectype && !base)
1411 return NULL_TREE;
1413 next_ref = oprnd0;
1414 break;
1416 default:
1417 return NULL_TREE;
1420 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1421 loop_vinfo, offset, base_aligned_p);
1423 if (vectype && base)
1425 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1426 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1427 return NULL_TREE;
1429 if (vect_debug_details (NULL))
1431 print_generic_expr (dump_file, expr, TDF_SLIM);
1432 fprintf (dump_file, " --> total offset for ref: ");
1433 print_generic_expr (dump_file, *offset, TDF_SLIM);
1436 return base;
1440 /* Function vect_force_dr_alignment_p.
1442 Returns whether the alignment of a DECL can be forced to be aligned
1443 on ALIGNMENT bit boundary. */
1445 static bool
1446 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1448 if (TREE_CODE (decl) != VAR_DECL)
1449 return false;
1451 if (DECL_EXTERNAL (decl))
1452 return false;
1454 if (TREE_STATIC (decl))
1455 return (alignment <= MAX_OFILE_ALIGNMENT);
1456 else
1457 /* This is not 100% correct. The absolute correct stack alignment
1458 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1459 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1460 However, until someone implements forced stack alignment, SSE
1461 isn't really usable without this. */
1462 return (alignment <= PREFERRED_STACK_BOUNDARY);
1466 /* Function vect_get_new_vect_var.
1468 Returns a name for a new variable. The current naming scheme appends the
1469 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1470 the name of vectorizer generated variables, and appends that to NAME if
1471 provided. */
1473 static tree
1474 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1476 const char *prefix;
1477 int prefix_len;
1478 tree new_vect_var;
1480 if (var_kind == vect_simple_var)
1481 prefix = "vect_";
1482 else
1483 prefix = "vect_p";
1485 prefix_len = strlen (prefix);
1487 if (name)
1488 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1489 else
1490 new_vect_var = create_tmp_var (type, prefix);
1492 return new_vect_var;
1496 /* Function vect_create_index_for_vector_ref.
1498 Create (and return) an index variable, along with it's update chain in the
1499 loop. This variable will be used to access a memory location in a vector
1500 operation.
1502 Input:
1503 LOOP: The loop being vectorized.
1504 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1505 function can be added here, or in the loop pre-header.
1507 Output:
1508 Return an index that will be used to index a vector array. It is expected
1509 that a pointer to the first vector will be used as the base address for the
1510 indexed reference.
1512 FORNOW: we are not trying to be efficient, just creating a new index each
1513 time from scratch. At this time all vector references could use the same
1514 index.
1516 TODO: create only one index to be used by all vector references. Record
1517 the index in the LOOP_VINFO the first time this procedure is called and
1518 return it on subsequent calls. The increment of this index must be placed
1519 just before the conditional expression that ends the single block loop. */
1521 static tree
1522 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1524 tree init, step;
1525 tree indx_before_incr, indx_after_incr;
1527 /* It is assumed that the base pointer used for vectorized access contains
1528 the address of the first vector. Therefore the index used for vectorized
1529 access must be initialized to zero and incremented by 1. */
1531 init = integer_zero_node;
1532 step = integer_one_node;
1534 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1535 create_iv (init, step, NULL_TREE, loop, bsi, false,
1536 &indx_before_incr, &indx_after_incr);
1538 return indx_before_incr;
1542 /* Function vect_create_addr_base_for_vector_ref.
1544 Create an expression that computes the address of the first memory location
1545 that will be accessed for a data reference.
1547 Input:
1548 STMT: The statement containing the data reference.
1549 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1550 OFFSET: Optional. If supplied, it is be added to the initial address.
1552 Output:
1553 1. Return an SSA_NAME whose value is the address of the memory location of
1554 the first vector of the data reference.
1555 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1556 these statement(s) which define the returned SSA_NAME.
1558 FORNOW: We are only handling array accesses with step 1. */
1560 static tree
1561 vect_create_addr_base_for_vector_ref (tree stmt,
1562 tree *new_stmt_list,
1563 tree offset)
1565 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1566 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1567 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1568 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1569 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1570 tree ref = DR_REF (dr);
1571 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1572 tree scalar_type = TREE_TYPE (ref);
1573 tree scalar_ptr_type = build_pointer_type (scalar_type);
1574 tree access_fn;
1575 tree init_val, step, init_oval;
1576 bool ok;
1577 bool is_ptr_ref, is_array_ref, is_addr_expr;
1578 tree array_base;
1579 tree vec_stmt;
1580 tree new_temp;
1581 tree array_ref;
1582 tree addr_base, addr_expr;
1583 tree dest, new_stmt;
1585 /* Only the access function of the last index is relevant (i_n in
1586 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1587 access_fn = DR_ACCESS_FN (dr, 0);
1588 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1589 true);
1590 if (!ok)
1591 init_oval = integer_zero_node;
1593 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1594 && TREE_CODE (data_ref_base) == SSA_NAME;
1595 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1596 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1597 || TREE_CODE (data_ref_base) == PLUS_EXPR
1598 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1599 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1601 /** Create: &(base[init_val])
1603 if data_ref_base is an ARRAY_TYPE:
1604 base = data_ref_base
1606 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1607 base = *((scalar_array *) data_ref_base)
1610 if (is_array_ref)
1611 array_base = data_ref_base;
1612 else /* is_ptr_ref or is_addr_expr */
1614 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1615 tree scalar_array_type = build_array_type (scalar_type, 0);
1616 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1617 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1618 add_referenced_tmp_var (array_ptr);
1620 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1621 add_referenced_tmp_var (dest);
1622 data_ref_base =
1623 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1624 append_to_statement_list_force (new_stmt, new_stmt_list);
1626 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1627 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1628 new_temp = make_ssa_name (array_ptr, vec_stmt);
1629 TREE_OPERAND (vec_stmt, 0) = new_temp;
1630 append_to_statement_list_force (vec_stmt, new_stmt_list);
1632 /* (*array_ptr) */
1633 array_base = build_fold_indirect_ref (new_temp);
1636 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1637 add_referenced_tmp_var (dest);
1638 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1639 append_to_statement_list_force (new_stmt, new_stmt_list);
1641 if (offset)
1643 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1644 add_referenced_tmp_var (tmp);
1645 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1646 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1647 init_val = make_ssa_name (tmp, vec_stmt);
1648 TREE_OPERAND (vec_stmt, 0) = init_val;
1649 append_to_statement_list_force (vec_stmt, new_stmt_list);
1652 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1653 NULL_TREE, NULL_TREE);
1654 addr_base = build_fold_addr_expr (array_ref);
1656 /* addr_expr = addr_base */
1657 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1658 get_name (base_name));
1659 add_referenced_tmp_var (addr_expr);
1660 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1661 new_temp = make_ssa_name (addr_expr, vec_stmt);
1662 TREE_OPERAND (vec_stmt, 0) = new_temp;
1663 append_to_statement_list_force (vec_stmt, new_stmt_list);
1665 return new_temp;
1669 /* Function get_vectype_for_scalar_type.
1671 Returns the vector type corresponding to SCALAR_TYPE as supported
1672 by the target. */
1674 static tree
1675 get_vectype_for_scalar_type (tree scalar_type)
1677 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1678 int nbytes = GET_MODE_SIZE (inner_mode);
1679 int nunits;
1680 tree vectype;
1682 if (nbytes == 0)
1683 return NULL_TREE;
1685 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1686 is expected. */
1687 nunits = UNITS_PER_SIMD_WORD / nbytes;
1689 vectype = build_vector_type (scalar_type, nunits);
1690 if (vect_debug_details (NULL))
1692 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1693 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1696 if (!vectype)
1697 return NULL_TREE;
1699 if (vect_debug_details (NULL))
1701 fprintf (dump_file, "vectype: ");
1702 print_generic_expr (dump_file, vectype, TDF_SLIM);
1705 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1707 /* TODO: tree-complex.c sometimes can parallelize operations
1708 on generic vectors. We can vectorize the loop in that case,
1709 but then we should re-run the lowering pass. */
1710 if (vect_debug_details (NULL))
1711 fprintf (dump_file, "mode not supported by target.");
1712 return NULL_TREE;
1715 return vectype;
1719 /* Function vect_align_data_ref.
1721 Handle mislignment of a memory accesses.
1723 FORNOW: Can't handle misaligned accesses.
1724 Make sure that the dataref is aligned. */
1726 static void
1727 vect_align_data_ref (tree stmt)
1729 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1730 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1732 /* FORNOW: can't handle misaligned accesses;
1733 all accesses expected to be aligned. */
1734 gcc_assert (aligned_access_p (dr));
1738 /* Function vect_create_data_ref_ptr.
1740 Create a memory reference expression for vector access, to be used in a
1741 vector load/store stmt. The reference is based on a new pointer to vector
1742 type (vp).
1744 Input:
1745 1. STMT: a stmt that references memory. Expected to be of the form
1746 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1747 2. BSI: block_stmt_iterator where new stmts can be added.
1748 3. OFFSET (optional): an offset to be added to the initial address accessed
1749 by the data-ref in STMT.
1750 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1751 pointing to the initial address.
1753 Output:
1754 1. Declare a new ptr to vector_type, and have it point to the base of the
1755 data reference (initial addressed accessed by the data reference).
1756 For example, for vector of type V8HI, the following code is generated:
1758 v8hi *vp;
1759 vp = (v8hi *)initial_address;
1761 if OFFSET is not supplied:
1762 initial_address = &a[init];
1763 if OFFSET is supplied:
1764 initial_address = &a[init + OFFSET];
1766 Return the initial_address in INITIAL_ADDRESS.
1768 2. Create a data-reference in the loop based on the new vector pointer vp,
1769 and using a new index variable 'idx' as follows:
1771 vp' = vp + update
1773 where if ONLY_INIT is true:
1774 update = zero
1775 and otherwise
1776 update = idx + vector_type_size
1778 Return the pointer vp'.
1781 FORNOW: handle only aligned and consecutive accesses. */
1783 static tree
1784 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1785 tree *initial_address, bool only_init)
1787 tree base_name;
1788 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1789 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1790 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1791 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1792 tree vect_ptr_type;
1793 tree vect_ptr;
1794 tree tag;
1795 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1796 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1797 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1798 int nvuses, nv_may_defs, nv_must_defs;
1799 int i;
1800 tree new_temp;
1801 tree vec_stmt;
1802 tree new_stmt_list = NULL_TREE;
1803 tree idx;
1804 edge pe = loop_preheader_edge (loop);
1805 basic_block new_bb;
1806 tree vect_ptr_init;
1807 tree vectype_size;
1808 tree ptr_update;
1809 tree data_ref_ptr;
1811 base_name = unshare_expr (DR_BASE_NAME (dr));
1812 if (vect_debug_details (NULL))
1814 tree data_ref_base = base_name;
1815 fprintf (dump_file, "create array_ref of type: ");
1816 print_generic_expr (dump_file, vectype, TDF_SLIM);
1817 if (TREE_CODE (data_ref_base) == VAR_DECL)
1818 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1819 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1820 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1821 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1822 fprintf (dump_file, "vectorizing a record based array ref: ");
1823 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1824 fprintf (dump_file, "vectorizing a pointer ref: ");
1825 print_generic_expr (dump_file, base_name, TDF_SLIM);
1828 /** (1) Create the new vector-pointer variable: **/
1830 vect_ptr_type = build_pointer_type (vectype);
1831 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1832 get_name (base_name));
1833 add_referenced_tmp_var (vect_ptr);
1836 /** (2) Handle aliasing information of the new vector-pointer: **/
1838 tag = STMT_VINFO_MEMTAG (stmt_info);
1839 gcc_assert (tag);
1840 get_var_ann (vect_ptr)->type_mem_tag = tag;
1842 /* Mark for renaming all aliased variables
1843 (i.e, the may-aliases of the type-mem-tag). */
1844 nvuses = NUM_VUSES (vuses);
1845 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1846 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1847 for (i = 0; i < nvuses; i++)
1849 tree use = VUSE_OP (vuses, i);
1850 if (TREE_CODE (use) == SSA_NAME)
1851 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1853 for (i = 0; i < nv_may_defs; i++)
1855 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1856 if (TREE_CODE (def) == SSA_NAME)
1857 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1859 for (i = 0; i < nv_must_defs; i++)
1861 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1862 if (TREE_CODE (def) == SSA_NAME)
1863 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1867 /** (3) Calculate the initial address the vector-pointer, and set
1868 the vector-pointer to point to it before the loop: **/
1870 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1871 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1872 offset);
1873 pe = loop_preheader_edge (loop);
1874 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1875 gcc_assert (!new_bb);
1876 *initial_address = new_temp;
1878 /* Create: p = (vectype *) initial_base */
1879 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1880 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1881 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1882 TREE_OPERAND (vec_stmt, 0) = new_temp;
1883 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1884 gcc_assert (!new_bb);
1885 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1888 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1890 if (only_init) /* No update in loop is required. */
1891 return vect_ptr_init;
1893 idx = vect_create_index_for_vector_ref (loop, bsi);
1895 /* Create: update = idx * vectype_size */
1896 ptr_update = create_tmp_var (integer_type_node, "update");
1897 add_referenced_tmp_var (ptr_update);
1898 vectype_size = build_int_cst (integer_type_node,
1899 GET_MODE_SIZE (TYPE_MODE (vectype)));
1900 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1901 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1902 new_temp = make_ssa_name (ptr_update, vec_stmt);
1903 TREE_OPERAND (vec_stmt, 0) = new_temp;
1904 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1906 /* Create: data_ref_ptr = vect_ptr_init + update */
1907 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1908 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1909 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1910 TREE_OPERAND (vec_stmt, 0) = new_temp;
1911 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1912 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1914 return data_ref_ptr;
1918 /* Function vect_create_destination_var.
1920 Create a new temporary of type VECTYPE. */
1922 static tree
1923 vect_create_destination_var (tree scalar_dest, tree vectype)
1925 tree vec_dest;
1926 const char *new_name;
1928 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1930 new_name = get_name (scalar_dest);
1931 if (!new_name)
1932 new_name = "var_";
1933 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1934 add_referenced_tmp_var (vec_dest);
1936 return vec_dest;
1940 /* Function vect_init_vector.
1942 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1943 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1944 used in the vectorization of STMT. */
1946 static tree
1947 vect_init_vector (tree stmt, tree vector_var)
1949 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1950 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1951 tree new_var;
1952 tree init_stmt;
1953 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1954 tree vec_oprnd;
1955 edge pe;
1956 tree new_temp;
1957 basic_block new_bb;
1959 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1960 add_referenced_tmp_var (new_var);
1962 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1963 new_temp = make_ssa_name (new_var, init_stmt);
1964 TREE_OPERAND (init_stmt, 0) = new_temp;
1966 pe = loop_preheader_edge (loop);
1967 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1968 gcc_assert (!new_bb);
1970 if (vect_debug_details (NULL))
1972 fprintf (dump_file, "created new init_stmt: ");
1973 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1976 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1977 return vec_oprnd;
1981 /* Function vect_get_vec_def_for_operand.
1983 OP is an operand in STMT. This function returns a (vector) def that will be
1984 used in the vectorized stmt for STMT.
1986 In the case that OP is an SSA_NAME which is defined in the loop, then
1987 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1989 In case OP is an invariant or constant, a new stmt that creates a vector def
1990 needs to be introduced. */
1992 static tree
1993 vect_get_vec_def_for_operand (tree op, tree stmt)
1995 tree vec_oprnd;
1996 tree vec_stmt;
1997 tree def_stmt;
1998 stmt_vec_info def_stmt_info = NULL;
1999 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2000 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2001 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2002 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2003 basic_block bb;
2004 tree vec_inv;
2005 tree t = NULL_TREE;
2006 tree def;
2007 int i;
2009 if (vect_debug_details (NULL))
2011 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2012 print_generic_expr (dump_file, op, TDF_SLIM);
2015 /** ===> Case 1: operand is a constant. **/
2017 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2019 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2021 tree vec_cst;
2023 /* Build a tree with vector elements. */
2024 if (vect_debug_details (NULL))
2025 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2027 for (i = nunits - 1; i >= 0; --i)
2029 t = tree_cons (NULL_TREE, op, t);
2031 vec_cst = build_vector (vectype, t);
2032 return vect_init_vector (stmt, vec_cst);
2035 gcc_assert (TREE_CODE (op) == SSA_NAME);
2037 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2039 def_stmt = SSA_NAME_DEF_STMT (op);
2040 def_stmt_info = vinfo_for_stmt (def_stmt);
2042 if (vect_debug_details (NULL))
2044 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2045 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2049 /** ==> Case 2.1: operand is defined inside the loop. **/
2051 if (def_stmt_info)
2053 /* Get the def from the vectorized stmt. */
2055 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2056 gcc_assert (vec_stmt);
2057 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2058 return vec_oprnd;
2062 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2063 it is a reduction/induction. **/
2065 bb = bb_for_stmt (def_stmt);
2066 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2068 if (vect_debug_details (NULL))
2069 fprintf (dump_file, "reduction/induction - unsupported.");
2070 internal_error ("no support for reduction/induction"); /* FORNOW */
2074 /** ==> Case 2.3: operand is defined outside the loop -
2075 it is a loop invariant. */
2077 switch (TREE_CODE (def_stmt))
2079 case PHI_NODE:
2080 def = PHI_RESULT (def_stmt);
2081 break;
2082 case MODIFY_EXPR:
2083 def = TREE_OPERAND (def_stmt, 0);
2084 break;
2085 case NOP_EXPR:
2086 def = TREE_OPERAND (def_stmt, 0);
2087 gcc_assert (IS_EMPTY_STMT (def_stmt));
2088 def = op;
2089 break;
2090 default:
2091 if (vect_debug_details (NULL))
2093 fprintf (dump_file, "unsupported defining stmt: ");
2094 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2096 internal_error ("unsupported defining stmt");
2099 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2101 if (vect_debug_details (NULL))
2102 fprintf (dump_file, "Create vector_inv.");
2104 for (i = nunits - 1; i >= 0; --i)
2106 t = tree_cons (NULL_TREE, def, t);
2109 vec_inv = build_constructor (vectype, t);
2110 return vect_init_vector (stmt, vec_inv);
2114 /* Function vect_finish_stmt_generation.
2116 Insert a new stmt. */
2118 static void
2119 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2121 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2123 if (vect_debug_details (NULL))
2125 fprintf (dump_file, "add new stmt: ");
2126 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2129 /* Make sure bsi points to the stmt that is being vectorized. */
2131 /* Assumption: any stmts created for the vectorization of stmt S were
2132 inserted before S. BSI is expected to point to S or some new stmt before S. */
2134 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2135 bsi_next (bsi);
2136 gcc_assert (stmt == bsi_stmt (*bsi));
2140 /* Function vectorizable_assignment.
2142 Check if STMT performs an assignment (copy) that can be vectorized.
2143 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2144 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2145 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2147 static bool
2148 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2150 tree vec_dest;
2151 tree scalar_dest;
2152 tree op;
2153 tree vec_oprnd;
2154 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2155 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2156 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2157 tree new_temp;
2159 /* Is vectorizable assignment? */
2161 if (TREE_CODE (stmt) != MODIFY_EXPR)
2162 return false;
2164 scalar_dest = TREE_OPERAND (stmt, 0);
2165 if (TREE_CODE (scalar_dest) != SSA_NAME)
2166 return false;
2168 op = TREE_OPERAND (stmt, 1);
2169 if (!vect_is_simple_use (op, loop, NULL))
2171 if (vect_debug_details (NULL))
2172 fprintf (dump_file, "use not simple.");
2173 return false;
2176 if (!vec_stmt) /* transformation not required. */
2178 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2179 return true;
2182 /** Trasform. **/
2183 if (vect_debug_details (NULL))
2184 fprintf (dump_file, "transform assignment.");
2186 /* Handle def. */
2187 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2189 /* Handle use. */
2190 op = TREE_OPERAND (stmt, 1);
2191 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2193 /* Arguments are ready. create the new vector stmt. */
2194 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2195 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2196 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2197 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2199 return true;
2203 /* Function vectorizable_operation.
2205 Check if STMT performs a binary or unary operation that can be vectorized.
2206 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2207 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2208 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2210 static bool
2211 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2213 tree vec_dest;
2214 tree scalar_dest;
2215 tree operation;
2216 tree op0, op1 = NULL;
2217 tree vec_oprnd0, vec_oprnd1=NULL;
2218 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2219 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2220 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2221 int i;
2222 enum tree_code code;
2223 enum machine_mode vec_mode;
2224 tree new_temp;
2225 int op_type;
2226 tree op;
2227 optab optab;
2229 /* Is STMT a vectorizable binary/unary operation? */
2230 if (TREE_CODE (stmt) != MODIFY_EXPR)
2231 return false;
2233 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2234 return false;
2236 operation = TREE_OPERAND (stmt, 1);
2237 code = TREE_CODE (operation);
2238 optab = optab_for_tree_code (code, vectype);
2240 /* Support only unary or binary operations. */
2241 op_type = TREE_CODE_LENGTH (code);
2242 if (op_type != unary_op && op_type != binary_op)
2244 if (vect_debug_details (NULL))
2245 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2246 return false;
2249 for (i = 0; i < op_type; i++)
2251 op = TREE_OPERAND (operation, i);
2252 if (!vect_is_simple_use (op, loop, NULL))
2254 if (vect_debug_details (NULL))
2255 fprintf (dump_file, "use not simple.");
2256 return false;
2260 /* Supportable by target? */
2261 if (!optab)
2263 if (vect_debug_details (NULL))
2264 fprintf (dump_file, "no optab.");
2265 return false;
2267 vec_mode = TYPE_MODE (vectype);
2268 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2270 if (vect_debug_details (NULL))
2271 fprintf (dump_file, "op not supported by target.");
2272 return false;
2275 if (!vec_stmt) /* transformation not required. */
2277 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2278 return true;
2281 /** Transform. **/
2283 if (vect_debug_details (NULL))
2284 fprintf (dump_file, "transform binary/unary operation.");
2286 /* Handle def. */
2287 scalar_dest = TREE_OPERAND (stmt, 0);
2288 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2290 /* Handle uses. */
2291 op0 = TREE_OPERAND (operation, 0);
2292 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2294 if (op_type == binary_op)
2296 op1 = TREE_OPERAND (operation, 1);
2297 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2300 /* Arguments are ready. create the new vector stmt. */
2302 if (op_type == binary_op)
2303 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2304 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2305 else
2306 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2307 build1 (code, vectype, vec_oprnd0));
2308 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2309 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2310 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2312 return true;
2316 /* Function vectorizable_store.
2318 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2319 can be vectorized.
2320 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2321 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2322 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2324 static bool
2325 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2327 tree scalar_dest;
2328 tree data_ref;
2329 tree op;
2330 tree vec_oprnd1;
2331 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2332 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2333 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2334 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2335 enum machine_mode vec_mode;
2336 tree dummy;
2337 enum dr_alignment_support alignment_support_cheme;
2339 /* Is vectorizable store? */
2341 if (TREE_CODE (stmt) != MODIFY_EXPR)
2342 return false;
2344 scalar_dest = TREE_OPERAND (stmt, 0);
2345 if (TREE_CODE (scalar_dest) != ARRAY_REF
2346 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2347 return false;
2349 op = TREE_OPERAND (stmt, 1);
2350 if (!vect_is_simple_use (op, loop, NULL))
2352 if (vect_debug_details (NULL))
2353 fprintf (dump_file, "use not simple.");
2354 return false;
2357 vec_mode = TYPE_MODE (vectype);
2358 /* FORNOW. In some cases can vectorize even if data-type not supported
2359 (e.g. - array initialization with 0). */
2360 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2361 return false;
2363 if (!STMT_VINFO_DATA_REF (stmt_info))
2364 return false;
2367 if (!vec_stmt) /* transformation not required. */
2369 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2370 return true;
2373 /** Trasform. **/
2375 if (vect_debug_details (NULL))
2376 fprintf (dump_file, "transform store");
2378 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2379 gcc_assert (alignment_support_cheme);
2380 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2382 /* Handle use - get the vectorized def from the defining stmt. */
2383 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2385 /* Handle def. */
2386 /* FORNOW: make sure the data reference is aligned. */
2387 vect_align_data_ref (stmt);
2388 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2389 data_ref = build_fold_indirect_ref (data_ref);
2391 /* Arguments are ready. create the new vector stmt. */
2392 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2393 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2395 return true;
2399 /* vectorizable_load.
2401 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2402 can be vectorized.
2403 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2404 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2405 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2407 static bool
2408 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2410 tree scalar_dest;
2411 tree vec_dest = NULL;
2412 tree data_ref = NULL;
2413 tree op;
2414 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2415 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2416 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2417 tree new_temp;
2418 int mode;
2419 tree init_addr;
2420 tree new_stmt;
2421 tree dummy;
2422 basic_block new_bb;
2423 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2424 edge pe = loop_preheader_edge (loop);
2425 enum dr_alignment_support alignment_support_cheme;
2427 /* Is vectorizable load? */
2429 if (TREE_CODE (stmt) != MODIFY_EXPR)
2430 return false;
2432 scalar_dest = TREE_OPERAND (stmt, 0);
2433 if (TREE_CODE (scalar_dest) != SSA_NAME)
2434 return false;
2436 op = TREE_OPERAND (stmt, 1);
2437 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2438 return false;
2440 if (!STMT_VINFO_DATA_REF (stmt_info))
2441 return false;
2443 mode = (int) TYPE_MODE (vectype);
2445 /* FORNOW. In some cases can vectorize even if data-type not supported
2446 (e.g. - data copies). */
2447 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2449 if (vect_debug_details (loop))
2450 fprintf (dump_file, "Aligned load, but unsupported type.");
2451 return false;
2454 if (!vec_stmt) /* transformation not required. */
2456 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2457 return true;
2460 /** Trasform. **/
2462 if (vect_debug_details (NULL))
2463 fprintf (dump_file, "transform load.");
2465 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2466 gcc_assert (alignment_support_cheme);
2468 if (alignment_support_cheme == dr_aligned
2469 || alignment_support_cheme == dr_unaligned_supported)
2471 /* Create:
2472 p = initial_addr;
2473 indx = 0;
2474 loop {
2475 vec_dest = *(p);
2476 indx = indx + 1;
2480 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2481 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2482 if (aligned_access_p (dr))
2483 data_ref = build_fold_indirect_ref (data_ref);
2484 else
2486 int mis = DR_MISALIGNMENT (dr);
2487 tree tmis = (mis == -1 ?
2488 integer_zero_node :
2489 build_int_cst (integer_type_node, mis));
2490 tmis = int_const_binop (MULT_EXPR, tmis,
2491 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2492 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2494 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2495 new_temp = make_ssa_name (vec_dest, new_stmt);
2496 TREE_OPERAND (new_stmt, 0) = new_temp;
2497 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2499 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2501 /* Create:
2502 p1 = initial_addr;
2503 msq_init = *(floor(p1))
2504 p2 = initial_addr + VS - 1;
2505 magic = have_builtin ? builtin_result : initial_address;
2506 indx = 0;
2507 loop {
2508 p2' = p2 + indx * vectype_size
2509 lsq = *(floor(p2'))
2510 vec_dest = realign_load (msq, lsq, magic)
2511 indx = indx + 1;
2512 msq = lsq;
2516 tree offset;
2517 tree magic;
2518 tree phi_stmt;
2519 tree msq_init;
2520 tree msq, lsq;
2521 tree dataref_ptr;
2522 tree params;
2524 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2525 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2526 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2527 &init_addr, true);
2528 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2529 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2530 new_temp = make_ssa_name (vec_dest, new_stmt);
2531 TREE_OPERAND (new_stmt, 0) = new_temp;
2532 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2533 gcc_assert (!new_bb);
2534 msq_init = TREE_OPERAND (new_stmt, 0);
2537 /* <2> Create lsq = *(floor(p2')) in the loop */
2538 offset = build_int_cst (integer_type_node,
2539 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2540 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2541 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2542 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2543 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2544 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2545 new_temp = make_ssa_name (vec_dest, new_stmt);
2546 TREE_OPERAND (new_stmt, 0) = new_temp;
2547 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2548 lsq = TREE_OPERAND (new_stmt, 0);
2551 /* <3> */
2552 if (targetm.vectorize.builtin_mask_for_load)
2554 /* Create permutation mask, if required, in loop preheader. */
2555 tree builtin_decl;
2556 params = build_tree_list (NULL_TREE, init_addr);
2557 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2558 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2559 new_stmt = build_function_call_expr (builtin_decl, params);
2560 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2561 new_temp = make_ssa_name (vec_dest, new_stmt);
2562 TREE_OPERAND (new_stmt, 0) = new_temp;
2563 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2564 gcc_assert (!new_bb);
2565 magic = TREE_OPERAND (new_stmt, 0);
2567 else
2569 /* Use current address instead of init_addr for reduced reg pressure.
2571 magic = dataref_ptr;
2575 /* <4> Create msq = phi <msq_init, lsq> in loop */
2576 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2577 msq = make_ssa_name (vec_dest, NULL_TREE);
2578 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2579 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2580 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2581 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2584 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2585 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2586 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2587 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2588 new_temp = make_ssa_name (vec_dest, new_stmt);
2589 TREE_OPERAND (new_stmt, 0) = new_temp;
2590 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2592 else
2593 gcc_unreachable ();
2595 *vec_stmt = new_stmt;
2596 return true;
2600 /* Function vect_supportable_dr_alignment
2602 Return whether the data reference DR is supported with respect to its
2603 alignment. */
2605 static enum dr_alignment_support
2606 vect_supportable_dr_alignment (struct data_reference *dr)
2608 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2609 enum machine_mode mode = (int) TYPE_MODE (vectype);
2611 if (aligned_access_p (dr))
2612 return dr_aligned;
2614 /* Possibly unaligned access. */
2616 if (DR_IS_READ (dr))
2618 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2619 && (!targetm.vectorize.builtin_mask_for_load
2620 || targetm.vectorize.builtin_mask_for_load ()))
2621 return dr_unaligned_software_pipeline;
2623 if (targetm.vectorize.misaligned_mem_ok (mode))
2624 /* Can't software pipeline the loads. */
2625 return dr_unaligned_supported;
2628 /* Unsupported. */
2629 return dr_unaligned_unsupported;
2633 /* Function vect_transform_stmt.
2635 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2637 static bool
2638 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2640 bool is_store = false;
2641 tree vec_stmt = NULL_TREE;
2642 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2643 bool done;
2645 switch (STMT_VINFO_TYPE (stmt_info))
2647 case op_vec_info_type:
2648 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2649 gcc_assert (done);
2650 break;
2652 case assignment_vec_info_type:
2653 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2654 gcc_assert (done);
2655 break;
2657 case load_vec_info_type:
2658 done = vectorizable_load (stmt, bsi, &vec_stmt);
2659 gcc_assert (done);
2660 break;
2662 case store_vec_info_type:
2663 done = vectorizable_store (stmt, bsi, &vec_stmt);
2664 gcc_assert (done);
2665 is_store = true;
2666 break;
2667 default:
2668 if (vect_debug_details (NULL))
2669 fprintf (dump_file, "stmt not supported.");
2670 gcc_unreachable ();
2673 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2675 return is_store;
2679 /* This function builds ni_name = number of iterations loop executes
2680 on the loop preheader. */
2682 static tree
2683 vect_build_loop_niters (loop_vec_info loop_vinfo)
2685 tree ni_name, stmt, var;
2686 edge pe;
2687 basic_block new_bb;
2688 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2689 tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
2691 var = create_tmp_var (TREE_TYPE (ni), "niters");
2692 add_referenced_tmp_var (var);
2693 if (TREE_CODE (ni) == INTEGER_CST)
2695 /* This case is generated when treating a known loop bound
2696 indivisible by VF. Here we cannot use force_gimple_operand. */
2697 stmt = build (MODIFY_EXPR, void_type_node, var, ni);
2698 ni_name = make_ssa_name (var, stmt);
2699 TREE_OPERAND (stmt, 0) = ni_name;
2701 else
2702 ni_name = force_gimple_operand (ni, &stmt, false, var);
2704 pe = loop_preheader_edge (loop);
2705 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2706 if (new_bb)
2707 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2709 return ni_name;
2713 /* This function generates the following statements:
2715 ni_name = number of iterations loop executes
2716 ratio = ni_name / vf
2717 ratio_mult_vf_name = ratio * vf
2719 and places them at the loop preheader edge. */
2721 static void
2722 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2723 tree *ratio_mult_vf_name_p, tree *ratio_p)
2726 edge pe;
2727 basic_block new_bb;
2728 tree stmt, ni_name;
2729 tree ratio;
2730 tree ratio_mult_vf_name, ratio_mult_vf;
2731 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2732 tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2734 int vf, i;
2736 /* Generate temporary variable that contains
2737 number of iterations loop executes. */
2739 ni_name = vect_build_loop_niters (loop_vinfo);
2741 /* ratio = ni / vf.
2742 vf is power of 2; then if ratio = = n >> log2 (vf). */
2743 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2744 ratio = vect_build_symbol_bound (ni_name, vf, loop);
2746 /* Update initial conditions of loop copy. */
2748 /* ratio_mult_vf = ratio * vf;
2749 then if ratio_mult_vf = ratio << log2 (vf). */
2751 i = exact_log2 (vf);
2752 ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2753 add_referenced_tmp_var (ratio_mult_vf);
2755 ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2757 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2758 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2759 ratio, build_int_cst (unsigned_type_node,
2760 i)));
2762 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2764 pe = loop_preheader_edge (loop);
2765 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2766 if (new_bb)
2767 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2769 *ni_name_p = ni_name;
2770 *ratio_mult_vf_name_p = ratio_mult_vf_name;
2771 *ratio_p = ratio;
2773 return;
2777 /* This function generates stmt
2779 tmp = n / vf;
2781 and attaches it to preheader of LOOP. */
2783 static tree
2784 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2786 tree var, stmt, var_name;
2787 edge pe;
2788 basic_block new_bb;
2789 int i;
2791 /* create temporary variable */
2792 var = create_tmp_var (TREE_TYPE (n), "bnd");
2793 add_referenced_tmp_var (var);
2795 var_name = make_ssa_name (var, NULL_TREE);
2797 /* vf is power of 2; then n/vf = n >> log2 (vf). */
2799 i = exact_log2 (vf);
2800 stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2801 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2802 n, build_int_cst (unsigned_type_node,i)));
2804 SSA_NAME_DEF_STMT (var_name) = stmt;
2806 pe = loop_preheader_edge (loop);
2807 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2808 if (new_bb)
2809 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2810 else
2811 if (vect_debug_details (NULL))
2812 fprintf (dump_file, "New bb on preheader edge was not generated.");
2814 return var_name;
2818 /* Function vect_transform_loop_bound.
2820 Create a new exit condition for the loop. */
2822 static void
2823 vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
2825 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2826 edge exit_edge = loop->single_exit;
2827 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
2828 tree indx_before_incr, indx_after_incr;
2829 tree orig_cond_expr;
2830 HOST_WIDE_INT old_N = 0;
2831 int vf;
2832 tree cond_stmt;
2833 tree new_loop_bound;
2834 bool symbol_niters;
2835 tree cond;
2836 tree lb_type;
2838 symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
2840 if (!symbol_niters)
2841 old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
2843 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2845 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
2846 #ifdef ENABLE_CHECKING
2847 gcc_assert (orig_cond_expr);
2848 #endif
2849 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
2851 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
2852 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
2854 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
2855 to point to the exit condition. */
2856 bsi_next (&loop_exit_bsi);
2857 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
2859 /* new loop exit test: */
2860 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
2861 if (!symbol_niters)
2862 new_loop_bound = fold_convert (lb_type,
2863 build_int_cst (unsigned_type_node,
2864 old_N/vf));
2865 else
2866 new_loop_bound = niters;
2868 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
2869 cond = build2 (GE_EXPR, boolean_type_node,
2870 indx_after_incr, new_loop_bound);
2871 else /* 'then' edge loops back. */
2872 cond = build2 (LT_EXPR, boolean_type_node,
2873 indx_after_incr, new_loop_bound);
2875 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
2876 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
2878 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
2880 /* remove old loop exit test: */
2881 bsi_remove (&loop_exit_bsi);
2883 if (vect_debug_details (NULL))
2884 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
2888 /* Function vect_update_ivs_after_vectorizer.
2890 "Advance" the induction variables of LOOP to the value they should take
2891 after the execution of LOOP. This is currently necessary because the
2892 vectorizer does not handle induction variables that are used after the
2893 loop. Such a situation occurs when the last iterations of LOOP are
2894 peeled, because:
2895 1. We introduced new uses after LOOP for IVs that were not originally used
2896 after LOOP: the IVs of LOOP are now used by an epilog loop.
2897 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2898 times, whereas the loop IVs should be bumped N times.
2900 Input:
2901 - LOOP - a loop that is going to be vectorized. The last few iterations
2902 of LOOP were peeled.
2903 - NITERS - the number of iterations that LOOP executes (before it is
2904 vectorized). i.e, the number of times the ivs should be bumped.
2906 We have:
2908 bb_before_loop:
2909 if (guard-cond) GOTO bb_before_epilog_loop
2910 else GOTO loop
2912 loop:
2913 do {
2914 } while ...
2916 bb_before_epilog_loop:
2918 bb_before_epilog_loop has edges coming in form the loop exit and
2919 from bb_before_loop. New definitions for ivs will be placed on the edge
2920 from loop->exit to bb_before_epilog_loop. This also requires that we update
2921 the phis in bb_before_epilog_loop. (In the code this bb is denoted
2922 "update_bb").
2924 Assumption 1: Like the rest of the vectorizer, this function assumes
2925 a single loop exit that has a single predecessor.
2927 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2928 organized in the same order.
2930 Assumption 3: The access function of the ivs is simple enough (see
2931 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2934 static void
2935 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
2937 edge exit = loop->exit_edges[0];
2938 tree phi, phi1;
2939 basic_block update_bb = exit->dest;
2940 edge update_e;
2942 /* Generate basic block at the exit from the loop. */
2943 basic_block new_bb = split_edge (exit);
2945 add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father);
2946 loop->exit_edges[0] = EDGE_PRED (new_bb, 0);
2947 update_e = EDGE_SUCC (new_bb, 0);
2949 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2950 phi && phi1;
2951 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2953 tree access_fn = NULL;
2954 tree evolution_part;
2955 tree init_expr;
2956 tree step_expr;
2957 tree var, stmt, ni, ni_name;
2958 block_stmt_iterator last_bsi;
2960 /* Skip virtual phi's. The data dependences that are associated with
2961 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2963 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2965 if (vect_debug_details (NULL))
2966 fprintf (dump_file, "virtual phi. skip.");
2967 continue;
2970 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2971 gcc_assert (access_fn);
2972 evolution_part =
2973 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2975 /* FORNOW: We do not transform initial conditions of IVs
2976 which evolution functions are a polynomial of degree >= 2 or
2977 exponential. */
2978 gcc_assert (!tree_is_chrec (evolution_part));
2980 step_expr = evolution_part;
2981 init_expr = unshare_expr (initial_condition (access_fn));
2983 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2984 build2 (MULT_EXPR, TREE_TYPE (niters),
2985 niters, step_expr), init_expr);
2987 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2988 add_referenced_tmp_var (var);
2990 ni_name = force_gimple_operand (ni, &stmt, false, var);
2992 /* Insert stmt into new_bb. */
2993 last_bsi = bsi_last (new_bb);
2994 if (stmt)
2995 bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT);
2997 /* Fix phi expressions in duplicated loop. */
2998 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
2999 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3000 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
3005 /* This function is the main driver of transformation
3006 to be done for loop before vectorizing it in case of
3007 unknown loop bound. */
3009 static void
3010 vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
3011 struct loops *loops)
3014 tree ni_name, ratio_mult_vf_name;
3015 #ifdef ENABLE_CHECKING
3016 int loop_num;
3017 #endif
3018 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3019 struct loop *new_loop;
3021 if (vect_debug_details (NULL))
3022 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3024 /* Generate the following variables on the preheader of original loop:
3026 ni_name = number of iteration the original loop executes
3027 ratio = ni_name / vf
3028 ratio_mult_vf_name = ratio * vf */
3029 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3030 &ratio_mult_vf_name, ratio);
3032 /* Update loop info. */
3033 loop->pre_header = loop_preheader_edge (loop)->src;
3034 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3036 #ifdef ENABLE_CHECKING
3037 loop_num = loop->num;
3038 #endif
3039 new_loop = tree_duplicate_loop_to_edge (loop, loops, loop->exit_edges[0],
3040 ratio_mult_vf_name, ni_name, true);
3041 #ifdef ENABLE_CHECKING
3042 gcc_assert (new_loop);
3043 gcc_assert (loop_num == loop->num);
3044 #endif
3046 /* Update IVs of original loop as if they were advanced
3047 by ratio_mult_vf_name steps. */
3049 #ifdef ENABLE_CHECKING
3050 /* Check existence of intermediate bb. */
3051 gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header);
3052 #endif
3053 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name);
3055 return;
3060 /* Function vect_gen_niters_for_prolog_loop
3062 Set the number of iterations for the loop represented by LOOP_VINFO
3063 to the minimum between NITERS (the original iteration count of the loop)
3064 and the misalignment of DR - the first data reference recorded in
3065 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3066 this loop, the data reference DR will refer to an aligned location. */
3068 static tree
3069 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
3071 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3072 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3073 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3074 tree var, stmt;
3075 tree iters, iters_name;
3076 edge pe;
3077 basic_block new_bb;
3078 tree dr_stmt = DR_STMT (dr);
3079 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3080 tree start_addr, byte_miss_align, elem_miss_align;
3081 int vec_type_align =
3082 GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3083 / BITS_PER_UNIT;
3084 tree tmp1, tmp2;
3085 tree new_stmt_list = NULL_TREE;
3087 start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3088 &new_stmt_list, NULL_TREE);
3090 pe = loop_preheader_edge (loop);
3091 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
3092 if (new_bb)
3093 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3095 byte_miss_align =
3096 build (BIT_AND_EXPR, integer_type_node, start_addr,
3097 build (MINUS_EXPR, integer_type_node,
3098 build_int_cst (unsigned_type_node,
3099 vec_type_align), integer_one_node));
3100 tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
3101 elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
3102 byte_miss_align, tmp1);
3104 tmp2 =
3105 build (BIT_AND_EXPR, integer_type_node,
3106 build (MINUS_EXPR, integer_type_node,
3107 build_int_cst (unsigned_type_node, vf), elem_miss_align),
3108 build (MINUS_EXPR, integer_type_node,
3109 build_int_cst (unsigned_type_node, vf), integer_one_node));
3111 iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
3112 var = create_tmp_var (TREE_TYPE (iters), "iters");
3113 add_referenced_tmp_var (var);
3114 iters_name = force_gimple_operand (iters, &stmt, false, var);
3116 /* Insert stmt on loop preheader edge. */
3117 pe = loop_preheader_edge (loop);
3118 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3119 if (new_bb)
3120 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3122 return iters_name;
3126 /* Function vect_update_niters_after_peeling
3128 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3129 The new number of iterations is therefore original_niters - NITERS.
3130 Record the new number of iterations in LOOP_VINFO. */
3132 static void
3133 vect_update_niters_after_peeling (loop_vec_info loop_vinfo, tree niters)
3135 tree n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3136 LOOP_VINFO_NITERS (loop_vinfo) =
3137 build (MINUS_EXPR, integer_type_node, n_iters, niters);
3141 /* Function vect_update_inits_of_dr
3143 NITERS iterations were peeled from LOOP. DR represents a data reference
3144 in LOOP. This function updates the information recorded in DR to
3145 account for the fact that the first NITERS iterations had already been
3146 executed. Specifically, it updates the initial_condition of the
3147 access_function of DR. */
3149 static void
3150 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3151 tree niters)
3153 tree access_fn = DR_ACCESS_FN (dr, 0);
3154 tree init, init_new, step;
3156 step = evolution_part_in_loop_num (access_fn, loop->num);
3157 init = initial_condition (access_fn);
3159 init_new = build (PLUS_EXPR, TREE_TYPE (init),
3160 build (MULT_EXPR, TREE_TYPE (niters),
3161 niters, step), init);
3162 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3164 return;
3168 /* Function vect_update_inits_of_drs
3170 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3171 This function updates the information recorded for the data references in
3172 the loop to account for the fact that the first NITERS iterations had
3173 already been executed. Specifically, it updates the initial_condition of the
3174 access_function of all the data_references in the loop. */
3176 static void
3177 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3179 unsigned int i;
3180 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3181 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3182 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3184 if (dump_file && (dump_flags & TDF_DETAILS))
3185 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3187 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3189 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3190 vect_update_inits_of_dr (dr, loop, niters);
3193 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3195 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3196 vect_update_inits_of_dr (dr, loop, niters);
3201 /* Function vect_do_peeling_for_alignment
3203 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3204 'niters' is set to the misalignment of one of the data references in the
3205 loop, thereby forcing it to refer to an aligned location at the beginning
3206 of the execution of this loop. The data reference for which we are
3207 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3209 static void
3210 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3212 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3213 tree niters_of_prolog_loop, ni_name;
3215 if (vect_debug_details (NULL))
3216 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3218 ni_name = vect_build_loop_niters (loop_vinfo);
3219 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3222 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3223 tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop),
3224 niters_of_prolog_loop, ni_name, false);
3226 /* Update number of times loop executes. */
3227 vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop);
3229 /* Update all inits of access functions of all data refs. */
3230 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3232 /* After peeling we have to reset scalar evolution analyzer. */
3233 scev_reset ();
3235 return;
3239 /* Function vect_transform_loop.
3241 The analysis phase has determined that the loop is vectorizable.
3242 Vectorize the loop - created vectorized stmts to replace the scalar
3243 stmts in the loop, and update the loop exit condition. */
3245 static void
3246 vect_transform_loop (loop_vec_info loop_vinfo,
3247 struct loops *loops ATTRIBUTE_UNUSED)
3249 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3250 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3251 int nbbs = loop->num_nodes;
3252 block_stmt_iterator si;
3253 int i;
3254 tree ratio = NULL;
3255 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3257 if (vect_debug_details (NULL))
3258 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3261 /* Peel the loop if there are data refs with unknown alignment.
3262 Only one data ref with unknown store is allowed. */
3265 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3266 vect_do_peeling_for_alignment (loop_vinfo, loops);
3268 /* If the loop has a symbolic number of iterations 'n'
3269 (i.e. it's not a compile time constant),
3270 then an epilog loop needs to be created. We therefore duplicate
3271 the initial loop. The original loop will be vectorized, and will compute
3272 the first (n/VF) iterations. The second copy of the loop will remain
3273 serial and will compute the remaining (n%VF) iterations.
3274 (VF is the vectorization factor). */
3276 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3277 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3279 /* FORNOW: we'll treat the case where niters is constant and
3281 niters % vf != 0
3283 in the way similar to one with symbolic niters.
3284 For this we'll generate variable which value is equal to niters. */
3286 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3287 && (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3288 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3291 /* 1) Make sure the loop header has exactly two entries
3292 2) Make sure we have a preheader basic block. */
3294 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3296 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3299 /* FORNOW: the vectorizer supports only loops which body consist
3300 of one basic block (header + empty latch). When the vectorizer will
3301 support more involved loop forms, the order by which the BBs are
3302 traversed need to be reconsidered. */
3304 for (i = 0; i < nbbs; i++)
3306 basic_block bb = bbs[i];
3308 for (si = bsi_start (bb); !bsi_end_p (si);)
3310 tree stmt = bsi_stmt (si);
3311 stmt_vec_info stmt_info;
3312 bool is_store;
3314 if (vect_debug_details (NULL))
3316 fprintf (dump_file, "------>vectorizing statement: ");
3317 print_generic_expr (dump_file, stmt, TDF_SLIM);
3319 stmt_info = vinfo_for_stmt (stmt);
3320 gcc_assert (stmt_info);
3321 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3323 bsi_next (&si);
3324 continue;
3326 #ifdef ENABLE_CHECKING
3327 /* FORNOW: Verify that all stmts operate on the same number of
3328 units and no inner unrolling is necessary. */
3329 gcc_assert
3330 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3331 == vectorization_factor);
3332 #endif
3333 /* -------- vectorize statement ------------ */
3334 if (vect_debug_details (NULL))
3335 fprintf (dump_file, "transform statement.");
3337 is_store = vect_transform_stmt (stmt, &si);
3338 if (is_store)
3340 /* free the attached stmt_vec_info and remove the stmt. */
3341 stmt_ann_t ann = stmt_ann (stmt);
3342 free (stmt_info);
3343 set_stmt_info (ann, NULL);
3344 bsi_remove (&si);
3345 continue;
3348 bsi_next (&si);
3349 } /* stmts in BB */
3350 } /* BBs in loop */
3352 vect_transform_loop_bound (loop_vinfo, ratio);
3354 if (vect_debug_details (loop))
3355 fprintf (dump_file,"Success! loop vectorized.");
3356 if (vect_debug_stats (loop))
3357 fprintf (dump_file, "LOOP VECTORIZED.");
3361 /* Function vect_is_simple_use.
3363 Input:
3364 LOOP - the loop that is being vectorized.
3365 OPERAND - operand of a stmt in LOOP.
3366 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3368 Returns whether a stmt with OPERAND can be vectorized.
3369 Supportable operands are constants, loop invariants, and operands that are
3370 defined by the current iteration of the loop. Unsupportable operands are
3371 those that are defined by a previous iteration of the loop (as is the case
3372 in reduction/induction computations). */
3374 static bool
3375 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3377 tree def_stmt;
3378 basic_block bb;
3380 if (def)
3381 *def = NULL_TREE;
3383 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3384 return true;
3386 if (TREE_CODE (operand) != SSA_NAME)
3387 return false;
3389 def_stmt = SSA_NAME_DEF_STMT (operand);
3390 if (def_stmt == NULL_TREE )
3392 if (vect_debug_details (NULL))
3393 fprintf (dump_file, "no def_stmt.");
3394 return false;
3397 /* empty stmt is expected only in case of a function argument.
3398 (Otherwise - we expect a phi_node or a modify_expr). */
3399 if (IS_EMPTY_STMT (def_stmt))
3401 tree arg = TREE_OPERAND (def_stmt, 0);
3402 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3403 return true;
3404 if (vect_debug_details (NULL))
3406 fprintf (dump_file, "Unexpected empty stmt: ");
3407 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3409 return false;
3412 /* phi_node inside the loop indicates an induction/reduction pattern.
3413 This is not supported yet. */
3414 bb = bb_for_stmt (def_stmt);
3415 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3417 if (vect_debug_details (NULL))
3418 fprintf (dump_file, "reduction/induction - unsupported.");
3419 return false; /* FORNOW: not supported yet. */
3422 /* Expecting a modify_expr or a phi_node. */
3423 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3424 || TREE_CODE (def_stmt) == PHI_NODE)
3426 if (def)
3427 *def = def_stmt;
3428 return true;
3431 return false;
3435 /* Function vect_analyze_operations.
3437 Scan the loop stmts and make sure they are all vectorizable. */
3439 static bool
3440 vect_analyze_operations (loop_vec_info loop_vinfo)
3442 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3443 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3444 int nbbs = loop->num_nodes;
3445 block_stmt_iterator si;
3446 int vectorization_factor = 0;
3447 int i;
3448 bool ok;
3449 tree scalar_type;
3451 if (vect_debug_details (NULL))
3452 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3454 for (i = 0; i < nbbs; i++)
3456 basic_block bb = bbs[i];
3458 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3460 tree stmt = bsi_stmt (si);
3461 int nunits;
3462 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3463 tree vectype;
3465 if (vect_debug_details (NULL))
3467 fprintf (dump_file, "==> examining statement: ");
3468 print_generic_expr (dump_file, stmt, TDF_SLIM);
3471 gcc_assert (stmt_info);
3473 /* skip stmts which do not need to be vectorized.
3474 this is expected to include:
3475 - the COND_EXPR which is the loop exit condition
3476 - any LABEL_EXPRs in the loop
3477 - computations that are used only for array indexing or loop
3478 control */
3480 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3482 if (vect_debug_details (NULL))
3483 fprintf (dump_file, "irrelevant.");
3484 continue;
3487 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3489 if (vect_debug_stats (loop) || vect_debug_details (loop))
3491 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3492 print_generic_expr (dump_file, stmt, TDF_SLIM);
3494 return false;
3497 if (STMT_VINFO_DATA_REF (stmt_info))
3498 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3499 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3500 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3501 else
3502 scalar_type = TREE_TYPE (stmt);
3504 if (vect_debug_details (NULL))
3506 fprintf (dump_file, "get vectype for scalar type: ");
3507 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3510 vectype = get_vectype_for_scalar_type (scalar_type);
3511 if (!vectype)
3513 if (vect_debug_stats (loop) || vect_debug_details (loop))
3515 fprintf (dump_file, "not vectorized: unsupported data-type ");
3516 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3518 return false;
3521 if (vect_debug_details (NULL))
3523 fprintf (dump_file, "vectype: ");
3524 print_generic_expr (dump_file, vectype, TDF_SLIM);
3526 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3528 ok = (vectorizable_operation (stmt, NULL, NULL)
3529 || vectorizable_assignment (stmt, NULL, NULL)
3530 || vectorizable_load (stmt, NULL, NULL)
3531 || vectorizable_store (stmt, NULL, NULL));
3533 if (!ok)
3535 if (vect_debug_stats (loop) || vect_debug_details (loop))
3537 fprintf (dump_file, "not vectorized: stmt not supported: ");
3538 print_generic_expr (dump_file, stmt, TDF_SLIM);
3540 return false;
3543 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3544 if (vect_debug_details (NULL))
3545 fprintf (dump_file, "nunits = %d", nunits);
3547 if (vectorization_factor)
3549 /* FORNOW: don't allow mixed units.
3550 This restriction will be relaxed in the future. */
3551 if (nunits != vectorization_factor)
3553 if (vect_debug_stats (loop) || vect_debug_details (loop))
3554 fprintf (dump_file, "not vectorized: mixed data-types");
3555 return false;
3558 else
3559 vectorization_factor = nunits;
3561 #ifdef ENABLE_CHECKING
3562 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3563 * vectorization_factor == UNITS_PER_SIMD_WORD);
3564 #endif
3568 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3570 if (vectorization_factor <= 1)
3572 if (vect_debug_stats (loop) || vect_debug_details (loop))
3573 fprintf (dump_file, "not vectorized: unsupported data-type");
3574 return false;
3576 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3579 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3580 && vect_debug_details (NULL))
3581 fprintf (dump_file,
3582 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3583 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3585 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3586 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3588 /* In this case we have to generate epilog loop, that
3589 can be done only for loops with one entry edge. */
3590 if (LOOP_VINFO_LOOP (loop_vinfo)->num_entries != 1
3591 || !(LOOP_VINFO_LOOP (loop_vinfo)->pre_header))
3593 if (vect_debug_stats (loop) || vect_debug_details (loop))
3594 fprintf (dump_file, "not vectorized: more than one entry.");
3595 return false;
3599 return true;
3603 /* Function exist_non_indexing_operands_for_use_p
3605 USE is one of the uses attached to STMT. Check if USE is
3606 used in STMT for anything other than indexing an array. */
3608 static bool
3609 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3611 tree operand;
3612 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3614 /* USE corresponds to some operand in STMT. If there is no data
3615 reference in STMT, then any operand that corresponds to USE
3616 is not indexing an array. */
3617 if (!STMT_VINFO_DATA_REF (stmt_info))
3618 return true;
3620 /* STMT has a data_ref. FORNOW this means that its of one of
3621 the following forms:
3622 -1- ARRAY_REF = var
3623 -2- var = ARRAY_REF
3624 (This should have been verified in analyze_data_refs).
3626 'var' in the second case corresponds to a def, not a use,
3627 so USE cannot correspond to any operands that are not used
3628 for array indexing.
3630 Therefore, all we need to check is if STMT falls into the
3631 first case, and whether var corresponds to USE. */
3633 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3634 return false;
3636 operand = TREE_OPERAND (stmt, 1);
3638 if (TREE_CODE (operand) != SSA_NAME)
3639 return false;
3641 if (operand == use)
3642 return true;
3644 return false;
3648 /* Function vect_is_simple_iv_evolution.
3650 FORNOW: A simple evolution of an induction variables in the loop is
3651 considered a polynomial evolution with constant step. */
3653 static bool
3654 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3655 tree * step, bool strict)
3657 tree init_expr;
3658 tree step_expr;
3660 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3662 /* When there is no evolution in this loop, the evolution function
3663 is not "simple". */
3664 if (evolution_part == NULL_TREE)
3665 return false;
3667 /* When the evolution is a polynomial of degree >= 2
3668 the evolution function is not "simple". */
3669 if (tree_is_chrec (evolution_part))
3670 return false;
3672 step_expr = evolution_part;
3673 init_expr = unshare_expr (initial_condition (access_fn));
3675 if (vect_debug_details (NULL))
3677 fprintf (dump_file, "step: ");
3678 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3679 fprintf (dump_file, ", init: ");
3680 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3683 *init = init_expr;
3684 *step = step_expr;
3686 if (TREE_CODE (step_expr) != INTEGER_CST)
3688 if (vect_debug_details (NULL))
3689 fprintf (dump_file, "step unknown.");
3690 return false;
3693 if (strict)
3694 if (!integer_onep (step_expr))
3696 if (vect_debug_details (NULL))
3697 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3698 return false;
3701 return true;
3705 /* Function vect_analyze_scalar_cycles.
3707 Examine the cross iteration def-use cycles of scalar variables, by
3708 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3709 cycles that they represent do not impede vectorization.
3711 FORNOW: Reduction as in the following loop, is not supported yet:
3712 loop1:
3713 for (i=0; i<N; i++)
3714 sum += a[i];
3715 The cross-iteration cycle corresponding to variable 'sum' will be
3716 considered too complicated and will impede vectorization.
3718 FORNOW: Induction as in the following loop, is not supported yet:
3719 loop2:
3720 for (i=0; i<N; i++)
3721 a[i] = i;
3723 However, the following loop *is* vectorizable:
3724 loop3:
3725 for (i=0; i<N; i++)
3726 a[i] = b[i];
3728 In both loops there exists a def-use cycle for the variable i:
3729 loop: i_2 = PHI (i_0, i_1)
3730 a[i_2] = ...;
3731 i_1 = i_2 + 1;
3732 GOTO loop;
3734 The evolution of the above cycle is considered simple enough,
3735 however, we also check that the cycle does not need to be
3736 vectorized, i.e - we check that the variable that this cycle
3737 defines is only used for array indexing or in stmts that do not
3738 need to be vectorized. This is not the case in loop2, but it
3739 *is* the case in loop3. */
3741 static bool
3742 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3744 tree phi;
3745 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3746 basic_block bb = loop->header;
3747 tree dummy;
3749 if (vect_debug_details (NULL))
3750 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3752 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3754 tree access_fn = NULL;
3756 if (vect_debug_details (NULL))
3758 fprintf (dump_file, "Analyze phi: ");
3759 print_generic_expr (dump_file, phi, TDF_SLIM);
3762 /* Skip virtual phi's. The data dependences that are associated with
3763 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3765 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3767 if (vect_debug_details (NULL))
3768 fprintf (dump_file, "virtual phi. skip.");
3769 continue;
3772 /* Analyze the evolution function. */
3774 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3775 those of loop induction variables; This property is verified here.
3777 Furthermore, if that induction variable is used in an operation
3778 that needs to be vectorized (i.e, is not solely used to index
3779 arrays and check the exit condition) - we do not support its
3780 vectorization yet. This property is verified in vect_is_simple_use,
3781 during vect_analyze_operations. */
3783 access_fn = /* instantiate_parameters
3784 (loop,*/
3785 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3787 if (!access_fn)
3789 if (vect_debug_stats (loop) || vect_debug_details (loop))
3790 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3791 return false;
3794 if (vect_debug_details (NULL))
3796 fprintf (dump_file, "Access function of PHI: ");
3797 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3800 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3801 &dummy, false))
3803 if (vect_debug_stats (loop) || vect_debug_details (loop))
3804 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3805 return false;
3809 return true;
3813 /* Function vect_analyze_data_ref_dependence.
3815 Return TRUE if there (might) exist a dependence between a memory-reference
3816 DRA and a memory-reference DRB. */
3818 static bool
3819 vect_analyze_data_ref_dependence (struct data_reference *dra,
3820 struct data_reference *drb,
3821 struct loop *loop)
3823 bool differ_p;
3824 struct data_dependence_relation *ddr;
3826 if (!array_base_name_differ_p (dra, drb, &differ_p))
3828 if (vect_debug_stats (loop) || vect_debug_details (loop))
3830 fprintf (dump_file,
3831 "not vectorized: can't determine dependence between: ");
3832 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3833 fprintf (dump_file, " and ");
3834 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3836 return true;
3839 if (differ_p)
3840 return false;
3842 ddr = initialize_data_dependence_relation (dra, drb);
3843 compute_affine_dependence (ddr);
3845 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3846 return false;
3848 if (vect_debug_stats (loop) || vect_debug_details (loop))
3850 fprintf (dump_file,
3851 "not vectorized: possible dependence between data-refs ");
3852 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3853 fprintf (dump_file, " and ");
3854 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3857 return true;
3861 /* Function vect_analyze_data_ref_dependences.
3863 Examine all the data references in the loop, and make sure there do not
3864 exist any data dependences between them.
3866 TODO: dependences which distance is greater than the vectorization factor
3867 can be ignored. */
3869 static bool
3870 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3872 unsigned int i, j;
3873 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3874 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3875 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3877 /* Examine store-store (output) dependences. */
3879 if (vect_debug_details (NULL))
3880 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3882 if (vect_debug_details (NULL))
3883 fprintf (dump_file, "compare all store-store pairs.");
3885 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3887 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3889 struct data_reference *dra =
3890 VARRAY_GENERIC_PTR (loop_write_refs, i);
3891 struct data_reference *drb =
3892 VARRAY_GENERIC_PTR (loop_write_refs, j);
3893 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3894 return false;
3898 /* Examine load-store (true/anti) dependences. */
3900 if (vect_debug_details (NULL))
3901 fprintf (dump_file, "compare all load-store pairs.");
3903 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3905 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3907 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3908 struct data_reference *drb =
3909 VARRAY_GENERIC_PTR (loop_write_refs, j);
3910 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3911 return false;
3915 return true;
3919 /* Function vect_get_first_index.
3921 REF is a data reference.
3922 If it is an ARRAY_REF: if its lower bound is simple enough,
3923 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3924 If it is not an ARRAY_REF: REF has no "first index";
3925 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3927 static bool
3928 vect_get_first_index (tree ref, tree *array_first_index)
3930 tree array_start;
3932 if (TREE_CODE (ref) != ARRAY_REF)
3933 *array_first_index = size_zero_node;
3934 else
3936 array_start = array_ref_low_bound (ref);
3937 if (!host_integerp (array_start,0))
3939 if (vect_debug_details (NULL))
3941 fprintf (dump_file, "array min val not simple integer cst.");
3942 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3944 return false;
3946 *array_first_index = array_start;
3949 return true;
3953 /* Function vect_compute_array_base_alignment.
3954 A utility function of vect_compute_array_ref_alignment.
3956 Compute the misalignment of ARRAY in bits.
3958 Input:
3959 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3960 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3961 if NULL: don't compute misalignment, just return the base of ARRAY.
3962 PREV_DIMENSIONS - initialized to one.
3963 MISALIGNMENT - the computed misalignment in bits.
3965 Output:
3966 If VECTYPE is not NULL:
3967 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3968 the base of the array, and put the computed misalignment in MISALIGNMENT.
3969 If VECTYPE is NULL:
3970 Return the base of the array.
3972 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3973 a[idx_N]...[idx_2][idx_1] is
3974 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3975 ... + idx_N * dim_0 * ... * dim_N-1}.
3976 (The misalignment of &a is not checked here).
3977 Note, that every term contains dim_0, therefore, if dim_0 is a
3978 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3979 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3980 NUINTS, we can say that the misalignment of the sum is equal to
3981 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3982 we can't determine this array misalignment, and we return
3983 false.
3984 We proceed recursively in this manner, accumulating total misalignment
3985 and the multiplication of previous dimensions for correct misalignment
3986 calculation. */
3988 static tree
3989 vect_compute_array_base_alignment (tree array,
3990 tree vectype,
3991 tree *prev_dimensions,
3992 tree *misalignment)
3994 tree index;
3995 tree domain;
3996 tree dimension_size;
3997 tree mis;
3998 tree bits_per_vectype;
3999 tree bits_per_vectype_unit;
4001 /* The 'stop condition' of the recursion. */
4002 if (TREE_CODE (array) != ARRAY_REF)
4003 return array;
4005 if (!vectype)
4006 /* Just get the base decl. */
4007 return vect_compute_array_base_alignment
4008 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4010 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
4011 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
4012 return NULL_TREE;
4014 domain = TYPE_DOMAIN (TREE_TYPE (array));
4015 dimension_size =
4016 int_const_binop (PLUS_EXPR,
4017 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4018 TYPE_MIN_VALUE (domain), 1),
4019 size_one_node, 1);
4021 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4022 is a multiple of NUNITS:
4024 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4026 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4027 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4028 if (integer_zerop (mis))
4029 /* This array is aligned. Continue just in order to get the base decl. */
4030 return vect_compute_array_base_alignment
4031 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4033 index = TREE_OPERAND (array, 1);
4034 if (!host_integerp (index, 1))
4035 /* The current index is not constant. */
4036 return NULL_TREE;
4038 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4040 bits_per_vectype = fold_convert (unsigned_type_node,
4041 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4042 GET_MODE_SIZE (TYPE_MODE (vectype))));
4043 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4044 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4045 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4047 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4048 earlier:
4050 *misalignment =
4051 (*misalignment + index_val * dimension_size * *prev_dimensions)
4052 % vectype_nunits;
4055 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4056 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4057 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4058 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4059 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4062 *prev_dimensions = int_const_binop (MULT_EXPR,
4063 *prev_dimensions, dimension_size, 1);
4065 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4066 prev_dimensions,
4067 misalignment);
4071 /* Function vect_compute_data_ref_alignment
4073 Compute the misalignment of the data reference DR.
4075 Output:
4076 1. If during the misalignment computation it is found that the data reference
4077 cannot be vectorized then false is returned.
4078 2. DR_MISALIGNMENT (DR) is defined.
4080 FOR NOW: No analysis is actually performed. Misalignment is calculated
4081 only for trivial cases. TODO. */
4083 static bool
4084 vect_compute_data_ref_alignment (struct data_reference *dr,
4085 loop_vec_info loop_vinfo)
4087 tree stmt = DR_STMT (dr);
4088 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4089 tree ref = DR_REF (dr);
4090 tree vectype;
4091 tree scalar_type;
4092 tree offset = size_zero_node;
4093 tree base, bit_offset, alignment;
4094 tree unit_bits = fold_convert (unsigned_type_node,
4095 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4096 tree dr_base;
4097 bool base_aligned_p;
4099 if (vect_debug_details (NULL))
4100 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4102 /* Initialize misalignment to unknown. */
4103 DR_MISALIGNMENT (dr) = -1;
4105 scalar_type = TREE_TYPE (ref);
4106 vectype = get_vectype_for_scalar_type (scalar_type);
4107 if (!vectype)
4109 if (vect_debug_details (NULL))
4111 fprintf (dump_file, "no vectype for stmt: ");
4112 print_generic_expr (dump_file, stmt, TDF_SLIM);
4113 fprintf (dump_file, " scalar_type: ");
4114 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4116 /* It is not possible to vectorize this data reference. */
4117 return false;
4119 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4120 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4122 if (TREE_CODE (ref) == ARRAY_REF)
4123 dr_base = ref;
4124 else
4125 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4127 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4128 loop_vinfo, &bit_offset, &base_aligned_p);
4129 if (!base)
4131 if (vect_debug_details (NULL))
4133 fprintf (dump_file, "Unknown alignment for access: ");
4134 print_generic_expr (dump_file,
4135 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4137 return true;
4140 if (!base_aligned_p)
4142 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4144 if (vect_debug_details (NULL))
4146 fprintf (dump_file, "can't force alignment of ref: ");
4147 print_generic_expr (dump_file, ref, TDF_SLIM);
4149 return true;
4152 /* Force the alignment of the decl.
4153 NOTE: This is the only change to the code we make during
4154 the analysis phase, before deciding to vectorize the loop. */
4155 if (vect_debug_details (NULL))
4156 fprintf (dump_file, "force alignment");
4157 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4158 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4161 /* At this point we assume that the base is aligned, and the offset from it
4162 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4163 gcc_assert (base_aligned_p
4164 || (TREE_CODE (base) == VAR_DECL
4165 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4167 /* Convert into bytes. */
4168 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4169 /* Check that there is no remainder in bits. */
4170 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4171 if (!integer_zerop (bit_offset))
4173 if (vect_debug_details (NULL))
4175 fprintf (dump_file, "bit offset alignment: ");
4176 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4178 return false;
4181 /* Alignment required, in bytes: */
4182 alignment = fold_convert (unsigned_type_node,
4183 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4185 /* Modulo alignment. */
4186 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4187 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4189 if (vect_debug_details (NULL))
4190 fprintf (dump_file, "unexpected misalign value");
4191 return false;
4194 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4196 if (vect_debug_details (NULL))
4197 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4199 return true;
4203 /* Function vect_compute_array_ref_alignment
4205 Compute the alignment of an array-ref.
4206 The alignment we compute here is relative to
4207 TYPE_ALIGN(VECTYPE) boundary.
4209 Output:
4210 OFFSET - the alignment in bits
4211 Return value - the base of the array-ref. E.g,
4212 if the array-ref is a.b[k].c[i][j] the returned
4213 base is a.b[k].c
4216 static tree
4217 vect_compute_array_ref_alignment (struct data_reference *dr,
4218 loop_vec_info loop_vinfo,
4219 tree vectype,
4220 tree *offset)
4222 tree array_first_index = size_zero_node;
4223 tree init;
4224 tree ref = DR_REF (dr);
4225 tree scalar_type = TREE_TYPE (ref);
4226 tree oprnd0 = TREE_OPERAND (ref, 0);
4227 tree dims = size_one_node;
4228 tree misalign = size_zero_node;
4229 tree next_ref, this_offset = size_zero_node;
4230 tree nunits;
4231 tree nbits;
4233 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4234 /* The reference is an array without its last index. */
4235 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4236 &misalign);
4237 else
4238 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4239 &misalign);
4240 if (!vectype)
4241 /* Alignment is not requested. Just return the base. */
4242 return next_ref;
4244 /* Compute alignment. */
4245 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4246 return NULL_TREE;
4247 this_offset = misalign;
4249 /* Check the first index accessed. */
4250 if (!vect_get_first_index (ref, &array_first_index))
4252 if (vect_debug_details (NULL))
4253 fprintf (dump_file, "no first_index for array.");
4254 return NULL_TREE;
4257 /* Check the index of the array_ref. */
4258 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4259 LOOP_VINFO_LOOP (loop_vinfo)->num);
4261 /* FORNOW: In order to simplify the handling of alignment, we make sure
4262 that the first location at which the array is accessed ('init') is on an
4263 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4264 This is too conservative, since we require that
4265 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4266 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4267 This should be relaxed in the future. */
4269 if (!init || !host_integerp (init, 0))
4271 if (vect_debug_details (NULL))
4272 fprintf (dump_file, "non constant init. ");
4273 return NULL_TREE;
4276 /* bytes per scalar element: */
4277 nunits = fold_convert (unsigned_type_node,
4278 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4279 nbits = int_const_binop (MULT_EXPR, nunits,
4280 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4282 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4283 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4284 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4285 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4287 /* TODO: allow negative misalign values. */
4288 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4290 if (vect_debug_details (NULL))
4291 fprintf (dump_file, "unexpected misalign value");
4292 return NULL_TREE;
4294 *offset = misalign;
4295 return next_ref;
4299 /* Function vect_compute_data_refs_alignment
4301 Compute the misalignment of data references in the loop.
4302 This pass may take place at function granularity instead of at loop
4303 granularity.
4305 FOR NOW: No analysis is actually performed. Misalignment is calculated
4306 only for trivial cases. TODO. */
4308 static bool
4309 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4311 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4312 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4313 unsigned int i;
4315 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4317 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4318 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4319 return false;
4322 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4324 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4325 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4326 return false;
4329 return true;
4333 /* Function vect_enhance_data_refs_alignment
4335 This pass will use loop versioning and loop peeling in order to enhance
4336 the alignment of data references in the loop.
4338 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4339 original loop is to be vectorized; Any other loops that are created by
4340 the transformations performed in this pass - are not supposed to be
4341 vectorized. This restriction will be relaxed.
4343 FOR NOW: No transformation is actually performed. TODO. */
4345 static void
4346 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4348 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4349 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4350 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4351 unsigned int i;
4354 This pass will require a cost model to guide it whether to apply peeling
4355 or versioning or a combination of the two. For example, the scheme that
4356 intel uses when given a loop with several memory accesses, is as follows:
4357 choose one memory access ('p') which alignment you want to force by doing
4358 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4359 other accesses are not necessarily aligned, or (2) use loop versioning to
4360 generate one loop in which all accesses are aligned, and another loop in
4361 which only 'p' is necessarily aligned.
4363 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4364 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4365 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4367 Devising a cost model is the most critical aspect of this work. It will
4368 guide us on which access to peel for, whether to use loop versioning, how
4369 many versions to create, etc. The cost model will probably consist of
4370 generic considerations as well as target specific considerations (on
4371 powerpc for example, misaligned stores are more painful than misaligned
4372 loads).
4374 Here is the general steps involved in alignment enhancements:
4376 -- original loop, before alignment analysis:
4377 for (i=0; i<N; i++){
4378 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4379 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4382 -- After vect_compute_data_refs_alignment:
4383 for (i=0; i<N; i++){
4384 x = q[i]; # DR_MISALIGNMENT(q) = 3
4385 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4388 -- Possibility 1: we do loop versioning:
4389 if (p is aligned) {
4390 for (i=0; i<N; i++){ # loop 1A
4391 x = q[i]; # DR_MISALIGNMENT(q) = 3
4392 p[i] = y; # DR_MISALIGNMENT(p) = 0
4395 else {
4396 for (i=0; i<N; i++){ # loop 1B
4397 x = q[i]; # DR_MISALIGNMENT(q) = 3
4398 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4402 -- Possibility 2: we do loop peeling:
4403 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4404 x = q[i];
4405 p[i] = y;
4407 for (i = 3; i < N; i++){ # loop 2A
4408 x = q[i]; # DR_MISALIGNMENT(q) = 0
4409 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4412 -- Possibility 3: combination of loop peeling and versioning:
4413 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4414 x = q[i];
4415 p[i] = y;
4417 if (p is aligned) {
4418 for (i = 3; i<N; i++){ # loop 3A
4419 x = q[i]; # DR_MISALIGNMENT(q) = 0
4420 p[i] = y; # DR_MISALIGNMENT(p) = 0
4423 else {
4424 for (i = 3; i<N; i++){ # loop 3B
4425 x = q[i]; # DR_MISALIGNMENT(q) = 0
4426 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4430 These loops are later passed to loop_transform to be vectorized. The
4431 vectorizer will use the alignment information to guide the transformation
4432 (whether to generate regular loads/stores, or with special handling for
4433 misalignment).
4436 /* (1) Peeling to force alignment. */
4438 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4439 Considerations:
4440 + How many accesses will become aligned due to the peeling
4441 - How many accesses will become unaligned due to the peeling,
4442 and the cost of misaligned accesses.
4443 - The cost of peeling (the extra runtime checks, the increase
4444 in code size).
4446 The scheme we use FORNOW: peel to force the alignment of the first
4447 misaligned store in the loop.
4448 Rationale: misaligned stores are not yet supported.
4450 TODO: Use a better cost model. */
4452 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4454 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4455 if (!aligned_access_p (dr))
4457 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4458 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4459 break;
4463 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4465 if (vect_debug_details (loop))
4466 fprintf (dump_file, "Peeling for alignment will not be applied.");
4467 return;
4469 else
4470 if (vect_debug_details (loop))
4471 fprintf (dump_file, "Peeling for alignment will be applied.");
4474 /* (1.2) Update the alignment info according to the peeling factor.
4475 If the misalignment of the DR we peel for is M, then the
4476 peeling factor is VF - M, and the misalignment of each access DR_i
4477 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4478 If the misalignment of the DR we peel for is unknown, then the
4479 misalignment of each access DR_i in the loop is also unknown.
4481 FORNOW: set the misalignment of the accesses to unknown even
4482 if the peeling factor is known at compile time.
4484 TODO: - if the peeling factor is known at compile time, use that
4485 when updating the misalignment info of the loop DRs.
4486 - consider accesses that are known to have the same
4487 alignment, even if that alignment is unknown. */
4489 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4491 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4492 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4493 DR_MISALIGNMENT (dr) = 0;
4494 else
4495 DR_MISALIGNMENT (dr) = -1;
4497 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4499 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4500 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4501 DR_MISALIGNMENT (dr) = 0;
4502 else
4503 DR_MISALIGNMENT (dr) = -1;
4508 /* Function vect_analyze_data_refs_alignment
4510 Analyze the alignment of the data-references in the loop.
4511 FOR NOW: Until support for misliagned accesses is in place, only if all
4512 accesses are aligned can the loop be vectorized. This restriction will be
4513 relaxed. */
4515 static bool
4516 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4518 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4519 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4520 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4521 enum dr_alignment_support supportable_dr_alignment;
4522 unsigned int i;
4524 if (vect_debug_details (NULL))
4525 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4528 /* This pass may take place at function granularity instead of at loop
4529 granularity. */
4531 if (!vect_compute_data_refs_alignment (loop_vinfo))
4533 if (vect_debug_details (loop) || vect_debug_stats (loop))
4534 fprintf (dump_file,
4535 "not vectorized: can't calculate alignment for data ref.");
4536 return false;
4540 /* This pass will decide on using loop versioning and/or loop peeling in
4541 order to enhance the alignment of data references in the loop. */
4543 vect_enhance_data_refs_alignment (loop_vinfo);
4546 /* Finally, check that all the data references in the loop can be
4547 handled with respect to their alignment. */
4549 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4551 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4552 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4553 if (!supportable_dr_alignment)
4555 if (vect_debug_details (loop) || vect_debug_stats (loop))
4556 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4557 return false;
4560 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4562 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4563 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4564 if (!supportable_dr_alignment)
4566 if (vect_debug_details (loop) || vect_debug_stats (loop))
4567 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4568 return false;
4572 return true;
4576 /* Function vect_analyze_data_ref_access.
4578 Analyze the access pattern of the data-reference DR. For now, a data access
4579 has to consecutive and aligned to be considered vectorizable. */
4581 static bool
4582 vect_analyze_data_ref_access (struct data_reference *dr)
4584 varray_type access_fns = DR_ACCESS_FNS (dr);
4585 tree access_fn;
4586 tree init, step;
4587 unsigned int dimensions, i;
4589 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4590 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4591 access is contiguous). */
4592 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4594 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4596 access_fn = DR_ACCESS_FN (dr, i);
4598 if (evolution_part_in_loop_num (access_fn,
4599 loop_containing_stmt (DR_STMT (dr))->num))
4601 /* Evolution part is not NULL in this loop (it is neither constant
4602 nor invariant). */
4603 if (vect_debug_details (NULL))
4605 fprintf (dump_file,
4606 "not vectorized: complicated multidim. array access.");
4607 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4609 return false;
4613 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4614 if (!evolution_function_is_constant_p (access_fn)
4615 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4616 access_fn, &init, &step, true))
4618 if (vect_debug_details (NULL))
4620 fprintf (dump_file, "not vectorized: complicated access function.");
4621 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4623 return false;
4626 return true;
4630 /* Function vect_analyze_data_ref_accesses.
4632 Analyze the access pattern of all the data references in the loop.
4634 FORNOW: the only access pattern that is considered vectorizable is a
4635 simple step 1 (consecutive) access.
4637 FORNOW: handle only arrays and pointer accesses. */
4639 static bool
4640 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4642 unsigned int i;
4643 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4644 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4646 if (vect_debug_details (NULL))
4647 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4649 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4651 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4652 bool ok = vect_analyze_data_ref_access (dr);
4653 if (!ok)
4655 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4656 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4657 fprintf (dump_file, "not vectorized: complicated access pattern.");
4658 return false;
4662 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4664 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4665 bool ok = vect_analyze_data_ref_access (dr);
4666 if (!ok)
4668 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4669 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4670 fprintf (dump_file, "not vectorized: complicated access pattern.");
4671 return false;
4675 return true;
4679 /* Function vect_analyze_pointer_ref_access.
4681 Input:
4682 STMT - a stmt that contains a data-ref
4683 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4685 If the data-ref access is vectorizable, return a data_reference structure
4686 that represents it (DR). Otherwise - return NULL. */
4688 static struct data_reference *
4689 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4691 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4692 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4693 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4694 tree init, step;
4695 int step_val;
4696 tree reftype, innertype;
4697 enum machine_mode innermode;
4698 tree indx_access_fn;
4699 int loopnum = loop->num;
4700 struct data_reference *dr;
4702 if (!access_fn)
4704 if (vect_debug_stats (loop) || vect_debug_details (loop))
4705 fprintf (dump_file, "not vectorized: complicated pointer access.");
4706 return NULL;
4709 if (vect_debug_details (NULL))
4711 fprintf (dump_file, "Access function of ptr: ");
4712 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4715 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4717 if (vect_debug_stats (loop) || vect_debug_details (loop))
4718 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4719 return NULL;
4722 STRIP_NOPS (init);
4724 if (!host_integerp (step,0))
4726 if (vect_debug_stats (loop) || vect_debug_details (loop))
4727 fprintf (dump_file,
4728 "not vectorized: non constant step for pointer access.");
4729 return NULL;
4732 step_val = TREE_INT_CST_LOW (step);
4734 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4735 if (TREE_CODE (reftype) != POINTER_TYPE)
4737 if (vect_debug_stats (loop) || vect_debug_details (loop))
4738 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4739 return NULL;
4742 reftype = TREE_TYPE (init);
4743 if (TREE_CODE (reftype) != POINTER_TYPE)
4745 if (vect_debug_stats (loop) || vect_debug_details (loop))
4746 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4747 return NULL;
4750 innertype = TREE_TYPE (reftype);
4751 innermode = TYPE_MODE (innertype);
4752 if (GET_MODE_SIZE (innermode) != step_val)
4754 /* FORNOW: support only consecutive access */
4755 if (vect_debug_stats (loop) || vect_debug_details (loop))
4756 fprintf (dump_file, "not vectorized: non consecutive access.");
4757 return NULL;
4760 indx_access_fn =
4761 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4762 if (vect_debug_details (NULL))
4764 fprintf (dump_file, "Access function of ptr indx: ");
4765 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4767 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4768 return dr;
4772 /* Function vect_get_symbl_and_dr.
4774 The function returns SYMBL - the relevant variable for
4775 memory tag (for aliasing purposes).
4776 Also data reference structure DR is created.
4778 Input:
4779 MEMREF - data reference in STMT
4780 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4782 Output:
4783 DR - data_reference struct for MEMREF
4784 return value - the relevant variable for memory tag (for aliasing purposes).
4788 static tree
4789 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4790 loop_vec_info loop_vinfo, struct data_reference **dr)
4792 tree symbl, oprnd0, oprnd1;
4793 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4794 tree offset;
4795 tree array_base, base;
4796 struct data_reference *new_dr;
4797 bool base_aligned_p;
4799 *dr = NULL;
4800 switch (TREE_CODE (memref))
4802 case INDIRECT_REF:
4803 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4804 if (! new_dr)
4805 return NULL_TREE;
4806 *dr = new_dr;
4807 symbl = DR_BASE_NAME (new_dr);
4808 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4810 switch (TREE_CODE (symbl))
4812 case PLUS_EXPR:
4813 case MINUS_EXPR:
4814 oprnd0 = TREE_OPERAND (symbl, 0);
4815 oprnd1 = TREE_OPERAND (symbl, 1);
4817 STRIP_NOPS(oprnd1);
4818 /* Only {address_base + offset} expressions are supported,
4819 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4820 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4821 TODO: swap operands if {offset + address_base}. */
4822 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4823 && TREE_CODE (oprnd1) != INTEGER_CST)
4824 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4825 return NULL_TREE;
4827 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4828 symbl = oprnd0;
4829 else
4830 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4831 loop_vinfo, &new_dr);
4833 case SSA_NAME:
4834 case ADDR_EXPR:
4835 /* symbl remains unchanged. */
4836 break;
4838 default:
4839 if (vect_debug_details (NULL))
4841 fprintf (dump_file, "unhandled data ref: ");
4842 print_generic_expr (dump_file, memref, TDF_SLIM);
4843 fprintf (dump_file, " (symbl ");
4844 print_generic_expr (dump_file, symbl, TDF_SLIM);
4845 fprintf (dump_file, ") in stmt ");
4846 print_generic_expr (dump_file, stmt, TDF_SLIM);
4848 return NULL_TREE;
4850 break;
4852 case ARRAY_REF:
4853 offset = size_zero_node;
4855 /* Store the array base in the stmt info.
4856 For one dimensional array ref a[i], the base is a,
4857 for multidimensional a[i1][i2]..[iN], the base is
4858 a[i1][i2]..[iN-1]. */
4859 array_base = TREE_OPERAND (memref, 0);
4860 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4862 new_dr = analyze_array (stmt, memref, is_read);
4863 *dr = new_dr;
4865 /* Find the relevant symbol for aliasing purposes. */
4866 base = DR_BASE_NAME (new_dr);
4867 switch (TREE_CODE (base))
4869 case VAR_DECL:
4870 symbl = base;
4871 break;
4873 case INDIRECT_REF:
4874 symbl = TREE_OPERAND (base, 0);
4875 break;
4877 case COMPONENT_REF:
4878 /* Could have recorded more accurate information -
4879 i.e, the actual FIELD_DECL that is being referenced -
4880 but later passes expect VAR_DECL as the nmt. */
4881 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4882 loop_vinfo, &offset, &base_aligned_p);
4883 if (symbl)
4884 break;
4885 /* fall through */
4886 default:
4887 if (vect_debug_details (NULL))
4889 fprintf (dump_file, "unhandled struct/class field access ");
4890 print_generic_expr (dump_file, stmt, TDF_SLIM);
4892 return NULL_TREE;
4894 break;
4896 default:
4897 if (vect_debug_details (NULL))
4899 fprintf (dump_file, "unhandled data ref: ");
4900 print_generic_expr (dump_file, memref, TDF_SLIM);
4901 fprintf (dump_file, " in stmt ");
4902 print_generic_expr (dump_file, stmt, TDF_SLIM);
4904 return NULL_TREE;
4906 return symbl;
4910 /* Function vect_analyze_data_refs.
4912 Find all the data references in the loop.
4914 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4915 which base is really an array (not a pointer) and which alignment
4916 can be forced. This restriction will be relaxed. */
4918 static bool
4919 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4921 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4922 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4923 int nbbs = loop->num_nodes;
4924 block_stmt_iterator si;
4925 int j;
4926 struct data_reference *dr;
4927 tree tag;
4928 tree address_base;
4929 bool base_aligned_p;
4930 tree offset;
4932 if (vect_debug_details (NULL))
4933 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4935 for (j = 0; j < nbbs; j++)
4937 basic_block bb = bbs[j];
4938 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4940 bool is_read = false;
4941 tree stmt = bsi_stmt (si);
4942 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4943 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4944 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4945 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4946 varray_type *datarefs = NULL;
4947 int nvuses, nv_may_defs, nv_must_defs;
4948 tree memref = NULL;
4949 tree symbl;
4951 /* Assumption: there exists a data-ref in stmt, if and only if
4952 it has vuses/vdefs. */
4954 if (!vuses && !v_may_defs && !v_must_defs)
4955 continue;
4957 nvuses = NUM_VUSES (vuses);
4958 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4959 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4961 if (nvuses && (nv_may_defs || nv_must_defs))
4963 if (vect_debug_details (NULL))
4965 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4966 print_generic_expr (dump_file, stmt, TDF_SLIM);
4968 return false;
4971 if (TREE_CODE (stmt) != MODIFY_EXPR)
4973 if (vect_debug_details (NULL))
4975 fprintf (dump_file, "unexpected vops in stmt: ");
4976 print_generic_expr (dump_file, stmt, TDF_SLIM);
4978 return false;
4981 if (vuses)
4983 memref = TREE_OPERAND (stmt, 1);
4984 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4985 is_read = true;
4987 else /* vdefs */
4989 memref = TREE_OPERAND (stmt, 0);
4990 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4991 is_read = false;
4994 /* Analyze MEMREF. If it is of a supported form, build data_reference
4995 struct for it (DR) and find the relevant symbol for aliasing
4996 purposes. */
4997 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
4998 &dr);
4999 if (!symbl)
5001 if (vect_debug_stats (loop) || vect_debug_details (loop))
5003 fprintf (dump_file, "not vectorized: unhandled data ref: ");
5004 print_generic_expr (dump_file, stmt, TDF_SLIM);
5006 return false;
5009 /* Find and record the memtag assigned to this data-ref. */
5010 switch (TREE_CODE (symbl))
5012 case VAR_DECL:
5013 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5014 break;
5016 case SSA_NAME:
5017 symbl = SSA_NAME_VAR (symbl);
5018 tag = get_var_ann (symbl)->type_mem_tag;
5019 if (!tag)
5021 tree ptr = TREE_OPERAND (memref, 0);
5022 if (TREE_CODE (ptr) == SSA_NAME)
5023 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5025 if (!tag)
5027 if (vect_debug_stats (loop) || vect_debug_details (loop))
5028 fprintf (dump_file, "not vectorized: no memtag for ref.");
5029 return false;
5031 STMT_VINFO_MEMTAG (stmt_info) = tag;
5032 break;
5034 case ADDR_EXPR:
5035 address_base = TREE_OPERAND (symbl, 0);
5037 switch (TREE_CODE (address_base))
5039 case ARRAY_REF:
5040 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5041 DR_IS_READ(dr));
5042 STMT_VINFO_MEMTAG (stmt_info) =
5043 vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
5044 loop_vinfo, &offset,
5045 &base_aligned_p);
5046 break;
5048 case VAR_DECL:
5049 STMT_VINFO_MEMTAG (stmt_info) = address_base;
5050 break;
5052 default:
5053 if (vect_debug_stats (loop) || vect_debug_details (loop))
5055 fprintf (dump_file,
5056 "not vectorized: unhandled address expr: ");
5057 print_generic_expr (dump_file, stmt, TDF_SLIM);
5059 return false;
5061 break;
5063 default:
5064 if (vect_debug_stats (loop) || vect_debug_details (loop))
5066 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5067 print_generic_expr (dump_file, memref, TDF_SLIM);
5069 return false;
5072 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5073 STMT_VINFO_DATA_REF (stmt_info) = dr;
5077 return true;
5081 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5083 /* Function vect_mark_relevant.
5085 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5087 static void
5088 vect_mark_relevant (varray_type worklist, tree stmt)
5090 stmt_vec_info stmt_info;
5092 if (vect_debug_details (NULL))
5093 fprintf (dump_file, "mark relevant.");
5095 if (TREE_CODE (stmt) == PHI_NODE)
5097 VARRAY_PUSH_TREE (worklist, stmt);
5098 return;
5101 stmt_info = vinfo_for_stmt (stmt);
5103 if (!stmt_info)
5105 if (vect_debug_details (NULL))
5107 fprintf (dump_file, "mark relevant: no stmt info!!.");
5108 print_generic_expr (dump_file, stmt, TDF_SLIM);
5110 return;
5113 if (STMT_VINFO_RELEVANT_P (stmt_info))
5115 if (vect_debug_details (NULL))
5116 fprintf (dump_file, "already marked relevant.");
5117 return;
5120 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5121 VARRAY_PUSH_TREE (worklist, stmt);
5125 /* Function vect_stmt_relevant_p.
5127 Return true if STMT in loop that is represented by LOOP_VINFO is
5128 "relevant for vectorization".
5130 A stmt is considered "relevant for vectorization" if:
5131 - it has uses outside the loop.
5132 - it has vdefs (it alters memory).
5133 - control stmts in the loop (except for the exit condition).
5135 CHECKME: what other side effects would the vectorizer allow? */
5137 static bool
5138 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5140 v_may_def_optype v_may_defs;
5141 v_must_def_optype v_must_defs;
5142 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5143 int i;
5144 dataflow_t df;
5145 int num_uses;
5147 /* cond stmt other than loop exit cond. */
5148 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5149 return true;
5151 /* changing memory. */
5152 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5153 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5154 if (v_may_defs || v_must_defs)
5156 if (vect_debug_details (NULL))
5157 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5158 return true;
5161 /* uses outside the loop. */
5162 df = get_immediate_uses (stmt);
5163 num_uses = num_immediate_uses (df);
5164 for (i = 0; i < num_uses; i++)
5166 tree use = immediate_use (df, i);
5167 basic_block bb = bb_for_stmt (use);
5168 if (!flow_bb_inside_loop_p (loop, bb))
5170 if (vect_debug_details (NULL))
5171 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5172 return true;
5176 return false;
5180 /* Function vect_mark_stmts_to_be_vectorized.
5182 Not all stmts in the loop need to be vectorized. For example:
5184 for i...
5185 for j...
5186 1. T0 = i + j
5187 2. T1 = a[T0]
5189 3. j = j + 1
5191 Stmt 1 and 3 do not need to be vectorized, because loop control and
5192 addressing of vectorized data-refs are handled differently.
5194 This pass detects such stmts. */
5196 static bool
5197 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5199 varray_type worklist;
5200 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5201 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5202 unsigned int nbbs = loop->num_nodes;
5203 block_stmt_iterator si;
5204 tree stmt;
5205 stmt_ann_t ann;
5206 unsigned int i;
5207 int j;
5208 use_optype use_ops;
5209 stmt_vec_info stmt_info;
5211 if (vect_debug_details (NULL))
5212 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5214 VARRAY_TREE_INIT (worklist, 64, "work list");
5216 /* 1. Init worklist. */
5218 for (i = 0; i < nbbs; i++)
5220 basic_block bb = bbs[i];
5221 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5223 stmt = bsi_stmt (si);
5225 if (vect_debug_details (NULL))
5227 fprintf (dump_file, "init: stmt relevant? ");
5228 print_generic_expr (dump_file, stmt, TDF_SLIM);
5231 stmt_info = vinfo_for_stmt (stmt);
5232 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5234 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5235 vect_mark_relevant (worklist, stmt);
5240 /* 2. Process_worklist */
5242 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5244 stmt = VARRAY_TOP_TREE (worklist);
5245 VARRAY_POP (worklist);
5247 if (vect_debug_details (NULL))
5249 fprintf (dump_file, "worklist: examine stmt: ");
5250 print_generic_expr (dump_file, stmt, TDF_SLIM);
5253 /* Examine the USES in this statement. Mark all the statements which
5254 feed this statement's uses as "relevant", unless the USE is used as
5255 an array index. */
5257 if (TREE_CODE (stmt) == PHI_NODE)
5259 /* follow the def-use chain inside the loop. */
5260 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5262 tree arg = PHI_ARG_DEF (stmt, j);
5263 tree def_stmt = NULL_TREE;
5264 basic_block bb;
5265 if (!vect_is_simple_use (arg, loop, &def_stmt))
5267 if (vect_debug_details (NULL))
5268 fprintf (dump_file, "worklist: unsupported use.");
5269 varray_clear (worklist);
5270 return false;
5272 if (!def_stmt)
5273 continue;
5275 if (vect_debug_details (NULL))
5277 fprintf (dump_file, "worklist: def_stmt: ");
5278 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5281 bb = bb_for_stmt (def_stmt);
5282 if (flow_bb_inside_loop_p (loop, bb))
5283 vect_mark_relevant (worklist, def_stmt);
5287 ann = stmt_ann (stmt);
5288 use_ops = USE_OPS (ann);
5290 for (i = 0; i < NUM_USES (use_ops); i++)
5292 tree use = USE_OP (use_ops, i);
5294 /* We are only interested in uses that need to be vectorized. Uses
5295 that are used for address computation are not considered relevant.
5297 if (exist_non_indexing_operands_for_use_p (use, stmt))
5299 tree def_stmt = NULL_TREE;
5300 basic_block bb;
5301 if (!vect_is_simple_use (use, loop, &def_stmt))
5303 if (vect_debug_details (NULL))
5304 fprintf (dump_file, "worklist: unsupported use.");
5305 varray_clear (worklist);
5306 return false;
5309 if (!def_stmt)
5310 continue;
5312 if (vect_debug_details (NULL))
5314 fprintf (dump_file, "worklist: examine use %d: ", i);
5315 print_generic_expr (dump_file, use, TDF_SLIM);
5318 bb = bb_for_stmt (def_stmt);
5319 if (flow_bb_inside_loop_p (loop, bb))
5320 vect_mark_relevant (worklist, def_stmt);
5323 } /* while worklist */
5325 varray_clear (worklist);
5326 return true;
5330 /* Function vect_analyze_loop_with_symbolic_num_of_iters.
5332 In case the number of iterations that LOOP iterates in unknown at compile
5333 time, an epilog loop will be generated, and the loop induction variables
5334 (IVs) will be "advanced" to the value they are supposed to take just before
5335 the epilog loop. Here we check that the access function of the loop IVs
5336 and the expression that represents the loop bound are simple enough.
5337 These restrictions will be relaxed in the future. */
5339 static bool
5340 vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
5341 struct loop *loop)
5343 basic_block bb = loop->header;
5344 tree phi;
5346 if (vect_debug_details (NULL))
5347 fprintf (dump_file,
5348 "\n<<vect_analyze_loop_with_symbolic_num_of_iters>>\n");
5350 if (chrec_contains_undetermined (niters))
5352 if (vect_debug_details (NULL))
5353 fprintf (dump_file, "Infinite number of iterations.");
5354 return false;
5357 if (!niters)
5359 if (vect_debug_details (NULL))
5360 fprintf (dump_file, "niters is NULL pointer.");
5361 return false;
5364 if (vect_debug_details (NULL))
5366 fprintf (dump_file, "Symbolic number of iterations is ");
5367 print_generic_expr (dump_file, niters, TDF_DETAILS);
5370 /* Analyze phi functions of the loop header. */
5372 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5374 tree access_fn = NULL;
5375 tree evolution_part;
5377 if (vect_debug_details (NULL))
5379 fprintf (dump_file, "Analyze phi: ");
5380 print_generic_expr (dump_file, phi, TDF_SLIM);
5383 /* Skip virtual phi's. The data dependences that are associated with
5384 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5386 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5388 if (vect_debug_details (NULL))
5389 fprintf (dump_file, "virtual phi. skip.");
5390 continue;
5393 /* Analyze the evolution function. */
5395 access_fn = instantiate_parameters
5396 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5398 if (!access_fn)
5400 if (vect_debug_details (NULL))
5401 fprintf (dump_file, "No Access function.");
5402 return false;
5405 if (vect_debug_details (NULL))
5407 fprintf (dump_file, "Access function of PHI: ");
5408 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5411 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5413 if (evolution_part == NULL_TREE)
5414 return false;
5416 /* FORNOW: We do not transform initial conditions of IVs
5417 which evolution functions are a polynomial of degree >= 2. */
5419 if (tree_is_chrec (evolution_part))
5420 return false;
5423 return true;
5427 /* Function vect_get_loop_niters.
5429 Determine how many iterations the loop is executed. */
5431 static tree
5432 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5434 tree niters;
5436 if (vect_debug_details (NULL))
5437 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5439 niters = number_of_iterations_in_loop (loop);
5441 if (niters != NULL_TREE
5442 && niters != chrec_dont_know)
5444 *number_of_iterations = niters;
5446 if (vect_debug_details (NULL))
5448 fprintf (dump_file, "==> get_loop_niters:" );
5449 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5453 return get_loop_exit_condition (loop);
5457 /* Function vect_analyze_loop_form.
5459 Verify the following restrictions (some may be relaxed in the future):
5460 - it's an inner-most loop
5461 - number of BBs = 2 (which are the loop header and the latch)
5462 - the loop has a pre-header
5463 - the loop has a single entry and exit
5464 - the loop exit condition is simple enough, and the number of iterations
5465 can be analyzed (a countable loop). */
5467 static loop_vec_info
5468 vect_analyze_loop_form (struct loop *loop)
5470 loop_vec_info loop_vinfo;
5471 tree loop_cond;
5472 tree number_of_iterations = NULL;
5474 if (vect_debug_details (loop))
5475 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5477 if (loop->inner
5478 || !loop->single_exit
5479 || loop->num_nodes != 2)
5481 if (vect_debug_stats (loop) || vect_debug_details (loop))
5483 fprintf (dump_file, "not vectorized: bad loop form. ");
5484 if (loop->inner)
5485 fprintf (dump_file, "nested loop.");
5486 else if (!loop->single_exit)
5487 fprintf (dump_file, "multiple exits.");
5488 else if (loop->num_nodes != 2)
5489 fprintf (dump_file, "too many BBs in loop.");
5492 return NULL;
5495 /* We assume that the loop exit condition is at the end of the loop. i.e,
5496 that the loop is represented as a do-while (with a proper if-guard
5497 before the loop if needed), where the loop header contains all the
5498 executable statements, and the latch is empty. */
5499 if (!empty_block_p (loop->latch))
5501 if (vect_debug_stats (loop) || vect_debug_details (loop))
5502 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5503 return NULL;
5506 if (empty_block_p (loop->header))
5508 if (vect_debug_stats (loop) || vect_debug_details (loop))
5509 fprintf (dump_file, "not vectorized: empty loop.");
5510 return NULL;
5513 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5514 if (!loop_cond)
5516 if (vect_debug_stats (loop) || vect_debug_details (loop))
5517 fprintf (dump_file, "not vectorized: complicated exit condition.");
5518 return NULL;
5521 if (!number_of_iterations)
5523 if (vect_debug_stats (loop) || vect_debug_details (loop))
5524 fprintf (dump_file,
5525 "not vectorized: number of iterations cannot be computed.");
5526 return NULL;
5529 loop_vinfo = new_loop_vec_info (loop);
5530 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5531 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5533 if (vect_debug_stats (loop) || vect_debug_details (loop))
5534 fprintf (dump_file, "loop bound unknown.");
5536 /* Unknown loop bound. */
5537 if (!vect_analyze_loop_with_symbolic_num_of_iters
5538 (number_of_iterations, loop))
5540 if (vect_debug_stats (loop) || vect_debug_details (loop))
5541 fprintf (dump_file,
5542 "not vectorized: can't determine loop bound.");
5543 return NULL;
5545 else
5547 /* We need only one loop entry for unknown loop bound support. */
5548 if (loop->num_entries != 1 || !loop->pre_header)
5550 if (vect_debug_stats (loop) || vect_debug_details (loop))
5551 fprintf (dump_file,
5552 "not vectorized: more than one loop entry.");
5553 return NULL;
5557 else
5558 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5560 if (vect_debug_stats (loop) || vect_debug_details (loop))
5561 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5562 return NULL;
5565 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5567 return loop_vinfo;
5571 /* Function vect_analyze_loop.
5573 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5574 for it. The different analyses will record information in the
5575 loop_vec_info struct. */
5577 static loop_vec_info
5578 vect_analyze_loop (struct loop *loop)
5580 bool ok;
5581 loop_vec_info loop_vinfo;
5583 if (vect_debug_details (NULL))
5584 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5586 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5588 loop_vinfo = vect_analyze_loop_form (loop);
5589 if (!loop_vinfo)
5591 if (vect_debug_details (loop))
5592 fprintf (dump_file, "bad loop form.");
5593 return NULL;
5596 /* Find all data references in the loop (which correspond to vdefs/vuses)
5597 and analyze their evolution in the loop.
5599 FORNOW: Handle only simple, array references, which
5600 alignment can be forced, and aligned pointer-references. */
5602 ok = vect_analyze_data_refs (loop_vinfo);
5603 if (!ok)
5605 if (vect_debug_details (loop))
5606 fprintf (dump_file, "bad data references.");
5607 destroy_loop_vec_info (loop_vinfo);
5608 return NULL;
5611 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5613 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5614 if (!ok)
5616 if (vect_debug_details (loop))
5617 fprintf (dump_file, "unexpected pattern.");
5618 if (vect_debug_details (loop))
5619 fprintf (dump_file, "not vectorized: unexpected pattern.");
5620 destroy_loop_vec_info (loop_vinfo);
5621 return NULL;
5624 /* Check that all cross-iteration scalar data-flow cycles are OK.
5625 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5627 ok = vect_analyze_scalar_cycles (loop_vinfo);
5628 if (!ok)
5630 if (vect_debug_details (loop))
5631 fprintf (dump_file, "bad scalar cycle.");
5632 destroy_loop_vec_info (loop_vinfo);
5633 return NULL;
5636 /* Analyze data dependences between the data-refs in the loop.
5637 FORNOW: fail at the first data dependence that we encounter. */
5639 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5640 if (!ok)
5642 if (vect_debug_details (loop))
5643 fprintf (dump_file, "bad data dependence.");
5644 destroy_loop_vec_info (loop_vinfo);
5645 return NULL;
5648 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5649 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5651 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5652 if (!ok)
5654 if (vect_debug_details (loop))
5655 fprintf (dump_file, "bad data access.");
5656 destroy_loop_vec_info (loop_vinfo);
5657 return NULL;
5660 /* Analyze the alignment of the data-refs in the loop.
5661 FORNOW: Only aligned accesses are handled. */
5663 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5664 if (!ok)
5666 if (vect_debug_details (loop))
5667 fprintf (dump_file, "bad data alignment.");
5668 destroy_loop_vec_info (loop_vinfo);
5669 return NULL;
5672 /* Scan all the operations in the loop and make sure they are
5673 vectorizable. */
5675 ok = vect_analyze_operations (loop_vinfo);
5676 if (!ok)
5678 if (vect_debug_details (loop))
5679 fprintf (dump_file, "bad operation or unsupported loop bound.");
5680 destroy_loop_vec_info (loop_vinfo);
5681 return NULL;
5684 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5686 return loop_vinfo;
5690 /* Function need_imm_uses_for.
5692 Return whether we ought to include information for 'var'
5693 when calculating immediate uses. For this pass we only want use
5694 information for non-virtual variables. */
5696 static bool
5697 need_imm_uses_for (tree var)
5699 return is_gimple_reg (var);
5703 /* Function vectorize_loops.
5705 Entry Point to loop vectorization phase. */
5707 void
5708 vectorize_loops (struct loops *loops)
5710 unsigned int i, loops_num;
5711 unsigned int num_vectorized_loops = 0;
5713 /* Does the target support SIMD? */
5714 /* FORNOW: until more sophisticated machine modelling is in place. */
5715 if (!UNITS_PER_SIMD_WORD)
5717 if (vect_debug_details (NULL))
5718 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5719 return;
5722 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5724 /* ----------- Analyze loops. ----------- */
5726 /* If some loop was duplicated, it gets bigger number
5727 than all previously defined loops. This fact allows us to run
5728 only over initial loops skipping newly generated ones. */
5729 loops_num = loops->num;
5730 for (i = 1; i < loops_num; i++)
5732 loop_vec_info loop_vinfo;
5733 struct loop *loop = loops->parray[i];
5735 if (!loop)
5736 continue;
5738 loop_vinfo = vect_analyze_loop (loop);
5739 loop->aux = loop_vinfo;
5741 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5742 continue;
5744 vect_transform_loop (loop_vinfo, loops);
5745 num_vectorized_loops++;
5748 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5749 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5750 num_vectorized_loops);
5752 /* ----------- Finalize. ----------- */
5754 free_df ();
5755 for (i = 1; i < loops_num; i++)
5757 struct loop *loop = loops->parray[i];
5758 loop_vec_info loop_vinfo;
5760 if (!loop)
5761 continue;
5762 loop_vinfo = loop->aux;
5763 destroy_loop_vec_info (loop_vinfo);
5764 loop->aux = NULL;
5767 rewrite_into_ssa (false);
5768 if (!bitmap_empty_p (vars_to_rename))
5770 /* The rewrite of ssa names may cause violation of loop closed ssa
5771 form invariants. TODO -- avoid these rewrites completely.
5772 Information in virtual phi nodes is sufficient for it. */
5773 rewrite_into_loop_closed_ssa ();
5775 rewrite_into_loop_closed_ssa ();
5776 bitmap_clear (vars_to_rename);