config/stormy16/stormy16.c (combine_bnp): Add code to handle zero_extension and
[official-gcc.git] / gcc / tree-vectorizer.c
blobf258e4668f53156854efd3a1b7101e5a855f6007
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
132 #include "rtl.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
137 #include "timevar.h"
138 #include "cfgloop.h"
139 #include "cfglayout.h"
140 #include "expr.h"
141 #include "optabs.h"
142 #include "toplev.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
149 /* Main analysis functions. */
150 static loop_vec_info vect_analyze_loop (struct loop *);
151 static loop_vec_info vect_analyze_loop_form (struct loop *);
152 static bool vect_analyze_data_refs (loop_vec_info);
153 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
154 static bool vect_analyze_scalar_cycles (loop_vec_info);
155 static bool vect_analyze_data_ref_accesses (loop_vec_info);
156 static bool vect_analyze_data_refs_alignment (loop_vec_info);
157 static bool vect_compute_data_refs_alignment (loop_vec_info);
158 static bool vect_analyze_operations (loop_vec_info);
160 /* Main code transformation functions. */
161 static void vect_transform_loop (loop_vec_info, struct loops *);
162 static void vect_transform_loop_bound (loop_vec_info, tree niters);
163 static bool vect_transform_stmt (tree, block_stmt_iterator *);
164 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
167 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
168 static enum dr_alignment_support vect_supportable_dr_alignment
169 (struct data_reference *);
170 static void vect_align_data_ref (tree);
171 static void vect_enhance_data_refs_alignment (loop_vec_info);
173 /* Utility functions for the analyses. */
174 static bool vect_is_simple_use (tree , struct loop *, tree *);
175 static bool exist_non_indexing_operands_for_use_p (tree, tree);
176 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
177 static void vect_mark_relevant (varray_type, tree);
178 static bool vect_stmt_relevant_p (tree, loop_vec_info);
179 static tree vect_get_loop_niters (struct loop *, tree *);
180 static bool vect_compute_data_ref_alignment
181 (struct data_reference *, loop_vec_info);
182 static bool vect_analyze_data_ref_access (struct data_reference *);
183 static bool vect_get_first_index (tree, tree *);
184 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
185 static struct data_reference * vect_analyze_pointer_ref_access
186 (tree, tree, bool);
187 static bool vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
188 struct loop *loop);
189 static tree vect_get_base_and_bit_offset
190 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
191 static struct data_reference * vect_analyze_pointer_ref_access
192 (tree, tree, bool);
193 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
194 static tree vect_compute_array_ref_alignment
195 (struct data_reference *, loop_vec_info, tree, tree *);
196 static tree vect_get_ptr_offset (tree, tree, tree *);
197 static tree vect_get_symbl_and_dr
198 (tree, tree, bool, loop_vec_info, struct data_reference **);
200 /* Utility functions for the code transformation. */
201 static tree vect_create_destination_var (tree, tree);
202 static tree vect_create_data_ref_ptr
203 (tree, block_stmt_iterator *, tree, tree *, bool);
204 static tree vect_create_index_for_vector_ref
205 (struct loop *, block_stmt_iterator *);
206 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
207 static tree get_vectype_for_scalar_type (tree);
208 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
209 static tree vect_get_vec_def_for_operand (tree, tree);
210 static tree vect_init_vector (tree, tree);
211 static tree vect_build_symbol_bound (tree, int, struct loop *);
212 static void vect_finish_stmt_generation
213 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
215 static void vect_generate_tmps_on_preheader (loop_vec_info,
216 tree *, tree *,
217 tree *);
218 static tree vect_build_loop_niters (loop_vec_info);
219 static void vect_update_ivs_after_vectorizer (struct loop *, tree);
221 /* Loop transformations prior to vectorization. */
223 /* Loop transformations entry point function.
224 It can be used outside of the vectorizer
225 in case the loop to be manipulated answers conditions specified
226 in function documentation. */
227 struct loop *tree_duplicate_loop_to_edge (struct loop *,
228 struct loops *, edge,
229 tree, tree, bool);
231 static void allocate_new_names (bitmap);
232 static void rename_use_op (use_operand_p);
233 static void rename_def_op (def_operand_p, tree);
234 static void rename_variables_in_bb (basic_block);
235 static void free_new_names (bitmap);
236 static void rename_variables_in_loop (struct loop *);
237 static void copy_phi_nodes (struct loop *, struct loop *, bool);
238 static void update_phis_for_duplicate_loop (struct loop *,
239 struct loop *,
240 bool after);
241 static void update_phi_nodes_for_guard (edge, struct loop *);
242 static void make_loop_iterate_ntimes (struct loop *, tree, tree, tree);
243 static struct loop *tree_duplicate_loop_to_edge_cfg (struct loop *,
244 struct loops *,
245 edge);
246 static edge add_loop_guard (basic_block, tree, basic_block);
247 static bool verify_loop_for_duplication (struct loop *, bool, edge);
249 /* Utilities dealing with loop peeling (not peeling itself). */
250 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
251 static void vect_update_niters_after_peeling (loop_vec_info, tree);
252 static void vect_update_inits_of_dr (struct data_reference *, struct loop *,
253 tree niters);
254 static void vect_update_inits_of_drs (loop_vec_info, tree);
255 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
257 /* Utilities for creation and deletion of vec_info structs. */
258 loop_vec_info new_loop_vec_info (struct loop *loop);
259 void destroy_loop_vec_info (loop_vec_info);
260 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
262 static bool vect_debug_stats (struct loop *loop);
263 static bool vect_debug_details (struct loop *loop);
266 /* Utilities to support loop peeling for vectorization purposes. */
269 /* For each definition in DEFINITIONS this function allocates
270 new ssa name. */
272 static void
273 allocate_new_names (bitmap definitions)
275 unsigned ver;
276 bitmap_iterator bi;
278 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
280 tree def = ssa_name (ver);
281 tree *new_name_ptr = xmalloc (sizeof (tree));
283 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
285 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
286 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
288 SSA_NAME_AUX (def) = new_name_ptr;
293 /* Renames the use *OP_P. */
295 static void
296 rename_use_op (use_operand_p op_p)
298 tree *new_name_ptr;
300 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
301 return;
303 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
305 /* Something defined outside of the loop. */
306 if (!new_name_ptr)
307 return;
309 /* An ordinary ssa name defined in the loop. */
311 SET_USE (op_p, *new_name_ptr);
315 /* Renames the def *OP_P in statement STMT. */
317 static void
318 rename_def_op (def_operand_p op_p, tree stmt)
320 tree *new_name_ptr;
322 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
323 return;
325 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
327 /* Something defined outside of the loop. */
328 if (!new_name_ptr)
329 return;
331 /* An ordinary ssa name defined in the loop. */
333 SET_DEF (op_p, *new_name_ptr);
334 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
338 /* Renames the variables in basic block BB. */
340 static void
341 rename_variables_in_bb (basic_block bb)
343 tree phi;
344 block_stmt_iterator bsi;
345 tree stmt;
346 stmt_ann_t ann;
347 use_optype uses;
348 vuse_optype vuses;
349 def_optype defs;
350 v_may_def_optype v_may_defs;
351 v_must_def_optype v_must_defs;
352 unsigned i;
353 edge e;
354 edge_iterator ei;
356 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
357 rename_def_op (PHI_RESULT_PTR (phi), phi);
359 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
361 stmt = bsi_stmt (bsi);
362 get_stmt_operands (stmt);
363 ann = stmt_ann (stmt);
365 uses = USE_OPS (ann);
366 for (i = 0; i < NUM_USES (uses); i++)
367 rename_use_op (USE_OP_PTR (uses, i));
369 defs = DEF_OPS (ann);
370 for (i = 0; i < NUM_DEFS (defs); i++)
371 rename_def_op (DEF_OP_PTR (defs, i), stmt);
373 vuses = VUSE_OPS (ann);
374 for (i = 0; i < NUM_VUSES (vuses); i++)
375 rename_use_op (VUSE_OP_PTR (vuses, i));
377 v_may_defs = V_MAY_DEF_OPS (ann);
378 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
380 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
381 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
384 v_must_defs = V_MUST_DEF_OPS (ann);
385 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
387 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
388 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
392 FOR_EACH_EDGE (e, ei, bb->succs)
393 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
394 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
398 /* Releases the structures holding the new ssa names. */
400 static void
401 free_new_names (bitmap definitions)
403 unsigned ver;
404 bitmap_iterator bi;
406 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
408 tree def = ssa_name (ver);
410 if (SSA_NAME_AUX (def))
412 free (SSA_NAME_AUX (def));
413 SSA_NAME_AUX (def) = NULL;
419 /* Renames variables in new generated LOOP. */
421 static void
422 rename_variables_in_loop (struct loop *loop)
424 unsigned i;
425 basic_block *bbs;
427 bbs = get_loop_body (loop);
429 for (i = 0; i < loop->num_nodes; i++)
430 rename_variables_in_bb (bbs[i]);
432 free (bbs);
436 /* This function copies phis from LOOP header to
437 NEW_LOOP header. AFTER is as
438 in update_phis_for_duplicate_loop function. */
440 static void
441 copy_phi_nodes (struct loop *loop, struct loop *new_loop,
442 bool after)
444 tree phi, new_phi, def;
445 edge new_e;
446 edge e = (after ? loop_latch_edge (loop) : loop_preheader_edge (loop));
448 /* Second add arguments to newly created phi nodes. */
449 for (phi = phi_nodes (loop->header),
450 new_phi = phi_nodes (new_loop->header);
451 phi;
452 phi = PHI_CHAIN (phi),
453 new_phi = PHI_CHAIN (new_phi))
455 new_e = loop_preheader_edge (new_loop);
456 def = PHI_ARG_DEF_FROM_EDGE (phi, e);
457 add_phi_arg (&new_phi, def, new_e);
462 /* Update the PHI nodes of the NEW_LOOP. AFTER is true if the NEW_LOOP
463 executes after LOOP, and false if it executes before it. */
465 static void
466 update_phis_for_duplicate_loop (struct loop *loop,
467 struct loop *new_loop, bool after)
469 edge old_latch;
470 tree *new_name_ptr, new_ssa_name;
471 tree phi_new, phi_old, def;
472 edge orig_entry_e = loop_preheader_edge (loop);
474 /* Copy phis from loop->header to new_loop->header. */
475 copy_phi_nodes (loop, new_loop, after);
477 old_latch = loop_latch_edge (loop);
479 /* Update PHI args for the new loop latch edge, and
480 the old loop preheader edge, we know that the PHI nodes
481 are ordered appropriately in copy_phi_nodes. */
482 for (phi_new = phi_nodes (new_loop->header),
483 phi_old = phi_nodes (loop->header);
484 phi_new && phi_old;
485 phi_new = PHI_CHAIN (phi_new), phi_old = PHI_CHAIN (phi_old))
487 def = PHI_ARG_DEF_FROM_EDGE (phi_old, old_latch);
489 if (TREE_CODE (def) != SSA_NAME)
490 continue;
492 new_name_ptr = SSA_NAME_AUX (def);
494 /* Something defined outside of the loop. */
495 if (!new_name_ptr)
496 continue;
498 /* An ordinary ssa name defined in the loop. */
499 new_ssa_name = *new_name_ptr;
501 add_phi_arg (&phi_new, new_ssa_name, loop_latch_edge(new_loop));
503 /* Update PHI args for the original loop pre-header edge. */
504 if (! after)
505 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi_old, orig_entry_e),
506 new_ssa_name);
511 /* Update PHI nodes for a guard of the LOOP.
513 LOOP is supposed to have a preheader bb at which a guard condition is
514 located. The true edge of this condition skips the LOOP and ends
515 at the destination of the (unique) LOOP exit. The loop exit bb is supposed
516 to be an empty bb (created by this transformation) with one successor.
518 This function creates phi nodes at the LOOP exit bb. These phis need to be
519 created as a result of adding true edge coming from guard.
521 FORNOW: Only phis which have corresponding phi nodes at the header of the
522 LOOP are created. Here we use the assumption that after the LOOP there
523 are no uses of defs generated in LOOP.
525 After the phis creation, the function updates the values of phi nodes at
526 the LOOP exit successor bb:
528 Original loop:
530 bb0: loop preheader
531 goto bb1
532 bb1: loop header
533 if (exit_cond) goto bb3 else goto bb2
534 bb2: loop latch
535 goto bb1
536 bb3:
539 After guard creation (the loop before this function):
541 bb0: loop preheader
542 if (guard_condition) goto bb4 else goto bb1
543 bb1: loop header
544 if (exit_cond) goto bb4 else goto bb2
545 bb2: loop latch
546 goto bb1
547 bb4: loop exit
548 (new empty bb)
549 goto bb3
550 bb3:
552 This function updates the phi nodes in bb4 and in bb3, to account for the
553 new edge from bb0 to bb4. */
555 static void
556 update_phi_nodes_for_guard (edge guard_true_edge, struct loop * loop)
558 tree phi, phi1;
559 basic_block bb = loop->exit_edges[0]->dest;
561 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
563 tree new_phi;
564 tree phi_arg;
566 /* Generate new phi node. */
567 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (phi)), bb);
569 /* Add argument coming from guard true edge. */
570 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->entry_edges[0]);
571 add_phi_arg (&new_phi, phi_arg, guard_true_edge);
573 /* Add argument coming from loop exit edge. */
574 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
575 add_phi_arg (&new_phi, phi_arg, loop->exit_edges[0]);
577 /* Update all phi nodes at the loop exit successor. */
578 for (phi1 = phi_nodes (EDGE_SUCC (bb, 0)->dest);
579 phi1;
580 phi1 = PHI_CHAIN (phi1))
582 tree old_arg = PHI_ARG_DEF_FROM_EDGE (phi1, EDGE_SUCC (bb, 0));
583 if (old_arg == phi_arg)
585 edge e = EDGE_SUCC (bb, 0);
587 SET_PHI_ARG_DEF (phi1,
588 phi_arg_from_edge (phi1, e),
589 PHI_RESULT (new_phi));
594 set_phi_nodes (bb, phi_reverse (phi_nodes (bb)));
598 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
599 that starts at zero, increases by one and its limit is NITERS. */
601 static void
602 make_loop_iterate_ntimes (struct loop *loop, tree niters,
603 tree begin_label, tree exit_label)
605 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
606 tree orig_cond;
607 edge exit_edge = loop->exit_edges[0];
608 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
610 /* Flow loop scan does not update loop->single_exit field. */
611 loop->single_exit = loop->exit_edges[0];
612 orig_cond = get_loop_exit_condition (loop);
613 gcc_assert (orig_cond);
614 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
615 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
617 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
618 back to the exit condition statement. */
619 bsi_next (&loop_exit_bsi);
620 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
623 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
624 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
625 else /* 'then' edge loops back. */
626 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
628 begin_label = build1 (GOTO_EXPR, void_type_node, begin_label);
629 exit_label = build1 (GOTO_EXPR, void_type_node, exit_label);
630 cond_stmt = build (COND_EXPR, TREE_TYPE (orig_cond), cond,
631 begin_label, exit_label);
632 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
634 /* Remove old loop exit test: */
635 bsi_remove (&loop_exit_bsi);
637 if (vect_debug_stats (loop) || vect_debug_details (loop))
638 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
640 loop->nb_iterations = niters;
644 /* Given LOOP this function generates a new copy of it and puts it
645 on E which is either the entry or exit of LOOP. */
647 static struct loop *
648 tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
649 edge e)
651 struct loop *new_loop;
652 basic_block *new_bbs, *bbs;
653 bool at_exit;
654 bool was_imm_dom;
655 basic_block exit_dest;
656 tree phi, phi_arg;
658 at_exit = (e == loop->exit_edges[0]);
659 if (!at_exit && e != loop_preheader_edge (loop))
661 if (dump_file && (dump_flags & TDF_DETAILS))
662 fprintf (dump_file,
663 "Edge is not an entry nor an exit edge.\n");
664 return NULL;
667 bbs = get_loop_body (loop);
669 /* Check whether duplication is possible. */
670 if (!can_copy_bbs_p (bbs, loop->num_nodes))
672 if (vect_debug_stats (loop) || vect_debug_details (loop))
673 fprintf (dump_file,
674 "Cannot copy basic blocks.\n");
675 free (bbs);
676 return NULL;
679 /* Generate new loop structure. */
680 new_loop = duplicate_loop (loops, loop, loop->outer);
681 if (!new_loop)
683 if (vect_debug_stats (loop) || vect_debug_details (loop))
684 fprintf (dump_file,
685 "The duplicate_loop returns NULL.\n");
686 free (bbs);
687 return NULL;
690 exit_dest = loop->exit_edges[0]->dest;
691 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
692 exit_dest) == loop->header ?
693 true : false);
695 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
697 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
699 /* Duplicating phi args at exit bbs as coming
700 also from exit of duplicated loop. */
701 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
703 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
704 if (phi_arg)
706 edge new_loop_exit_edge;
708 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
709 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
710 else
711 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
713 add_phi_arg (&phi, phi_arg, new_loop_exit_edge);
717 if (at_exit) /* Add the loop copy at exit. */
719 redirect_edge_and_branch_force (e, new_loop->header);
720 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
721 if (was_imm_dom)
722 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
724 else /* Add the copy at entry. */
726 edge new_exit_e;
727 edge entry_e = loop_preheader_edge (loop);
728 basic_block preheader = entry_e->src;
730 if (!flow_bb_inside_loop_p (new_loop,
731 EDGE_SUCC (new_loop->header, 0)->dest))
732 new_exit_e = EDGE_SUCC (new_loop->header, 0);
733 else
734 new_exit_e = EDGE_SUCC (new_loop->header, 1);
736 redirect_edge_and_branch_force (new_exit_e, loop->header);
737 set_immediate_dominator (CDI_DOMINATORS, loop->header,
738 new_exit_e->src);
740 /* We have to add phi args to the loop->header here as coming
741 from new_exit_e edge. */
742 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
744 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
745 if (phi_arg)
746 add_phi_arg (&phi, phi_arg, new_exit_e);
749 redirect_edge_and_branch_force (entry_e, new_loop->header);
750 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
753 flow_loop_scan (new_loop, LOOP_ALL);
754 flow_loop_scan (loop, LOOP_ALL);
755 free (new_bbs);
756 free (bbs);
758 return new_loop;
762 /* Given the condition statement COND, put it as the last statement
763 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
764 Assumes that this is the single exit of the guarded loop.
765 Returns the skip edge. */
767 static edge
768 add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb)
770 block_stmt_iterator bsi;
771 edge new_e, enter_e;
772 tree cond_stmt, then_label, else_label;
774 enter_e = EDGE_SUCC (guard_bb, 0);
775 enter_e->flags &= ~EDGE_FALLTHRU;
776 enter_e->flags |= EDGE_FALSE_VALUE;
777 bsi = bsi_last (guard_bb);
779 then_label = build1 (GOTO_EXPR, void_type_node,
780 tree_block_label (exit_bb));
781 else_label = build1 (GOTO_EXPR, void_type_node,
782 tree_block_label (enter_e->dest));
783 cond_stmt = build (COND_EXPR, void_type_node, cond,
784 then_label, else_label);
785 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
786 /* Add new edge to connect entry block to the second loop. */
787 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
788 set_immediate_dominator (CDI_DOMINATORS, exit_bb, guard_bb);
789 return new_e;
793 /* This function verifies that certain restrictions apply to LOOP. */
795 static bool
796 verify_loop_for_duplication (struct loop *loop,
797 bool update_first_loop_count, edge e)
799 edge exit_e = loop->exit_edges [0];
800 edge entry_e = loop_preheader_edge (loop);
802 /* We duplicate only innermost loops. */
803 if (loop->inner)
805 if (vect_debug_stats (loop) || vect_debug_details (loop))
806 fprintf (dump_file,
807 "Loop duplication failed. Loop is not innermost.\n");
808 return false;
811 /* Only loops with 1 exit. */
812 if (loop->num_exits != 1)
814 if (vect_debug_stats (loop) || vect_debug_details (loop))
815 fprintf (dump_file,
816 "More than one exit from loop.\n");
817 return false;
820 /* Only loops with 1 entry. */
821 if (loop->num_entries != 1)
823 if (vect_debug_stats (loop) || vect_debug_details (loop))
824 fprintf (dump_file,
825 "More than one exit from loop.\n");
826 return false;
829 /* All loops has outers, the only case loop->outer is NULL is for
830 the function itself. */
831 if (!loop->outer)
833 if (vect_debug_stats (loop) || vect_debug_details (loop))
834 fprintf (dump_file,
835 "Loop is outer-most loop.\n");
836 return false;
839 /* Verify that new IV can be created and loop condition
840 can be changed to make first loop iterate first_niters times. */
841 if (!update_first_loop_count)
843 tree orig_cond = get_loop_exit_condition (loop);
844 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
846 if (!orig_cond)
848 if (vect_debug_stats (loop) || vect_debug_details (loop))
849 fprintf (dump_file,
850 "Loop has no exit condition.\n");
851 return false;
853 if (orig_cond != bsi_stmt (loop_exit_bsi))
855 if (vect_debug_stats (loop) || vect_debug_details (loop))
856 fprintf (dump_file,
857 "Loop exit condition is not loop header last stmt.\n");
858 return false;
862 /* Make sure E is either an entry or an exit edge. */
863 if (e != exit_e && e != entry_e)
865 if (vect_debug_stats (loop) || vect_debug_details (loop))
866 fprintf (dump_file,
867 "E is not loop entry or exit edge.\n");
868 return false;
871 return true;
875 /* Given LOOP this function duplicates it to the edge E.
877 This transformation takes place before the loop is vectorized.
878 For now, there are two main cases when it's used
879 by the vectorizer: to support loops with unknown loop bounds
880 (or loop bounds indivisible by vectorization factor) and to force the
881 alignment of data references in the loop. In the first case, LOOP is
882 duplicated to the exit edge, producing epilog loop. In the second case, LOOP
883 is duplicated to the preheader edge thus generating prolog loop. In both
884 cases, the original loop will be vectorized after the transformation.
886 The edge E is supposed to be either preheader edge of the LOOP or
887 its exit edge. If preheader edge is specified, the LOOP copy
888 will precede the original one. Otherwise the copy will be located
889 at the exit of the LOOP.
891 FIRST_NITERS (SSA_NAME) parameter specifies how many times to iterate
892 the first loop. If UPDATE_FIRST_LOOP_COUNT parameter is false, the first
893 loop will be iterated FIRST_NITERS times by introducing additional
894 induction variable and replacing loop exit condition. If
895 UPDATE_FIRST_LOOP_COUNT is true no change to the first loop is made and
896 the caller to tree_duplicate_loop_to_edge is responsible for updating
897 the first loop count.
899 NITERS (also SSA_NAME) parameter defines the number of iteration the
900 original loop iterated. The function generates two if-then guards:
901 one prior to the first loop and the other prior to the second loop.
902 The first guard will be:
904 if (FIRST_NITERS == 0) then skip the first loop
906 The second guard will be:
908 if (FIRST_NITERS == NITERS) then skip the second loop
910 Thus the equivalence to the original code is guaranteed by correct values
911 of NITERS and FIRST_NITERS and generation of if-then loop guards.
913 For now this function supports only loop forms that are candidate for
914 vectorization. Such types are the following:
916 (1) only innermost loops
917 (2) loops built from 2 basic blocks
918 (3) loops with one entry and one exit
919 (4) loops without function calls
920 (5) loops without defs that are used after the loop
922 (1), (3) are checked in this function; (2) - in function
923 vect_analyze_loop_form; (4) - in function vect_analyze_data_refs;
924 (5) is checked as part of the function vect_mark_stmts_to_be_vectorized,
925 when excluding induction/reduction support.
927 The function returns NULL in case one of these checks or
928 transformations failed. */
930 struct loop*
931 tree_duplicate_loop_to_edge (struct loop *loop, struct loops *loops,
932 edge e, tree first_niters,
933 tree niters, bool update_first_loop_count)
935 struct loop *new_loop = NULL, *first_loop, *second_loop;
936 edge skip_e;
937 tree pre_condition;
938 bitmap definitions;
939 basic_block first_exit_bb, second_exit_bb;
940 basic_block pre_header_bb;
941 edge exit_e = loop->exit_edges [0];
943 gcc_assert (!any_marked_for_rewrite_p ());
945 if (!verify_loop_for_duplication (loop, update_first_loop_count, e))
946 return NULL;
948 /* We have to initialize cfg_hooks. Then, when calling
949 cfg_hooks->split_edge, the function tree_split_edge
950 is actually called and, when calling cfg_hooks->duplicate_block,
951 the function tree_duplicate_bb is called. */
952 tree_register_cfg_hooks ();
954 /* 1. Generate a copy of LOOP and put it on E (entry or exit). */
955 if (!(new_loop = tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
957 if (vect_debug_stats (loop) || vect_debug_details (loop))
958 fprintf (dump_file,
959 "The tree_duplicate_loop_to_edge_cfg failed.\n");
960 return NULL;
963 definitions = marked_ssa_names ();
964 allocate_new_names (definitions);
965 update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
966 /* Here, using assumption (5), we do not propagate new names further
967 than on phis of the exit from the second loop. */
968 rename_variables_in_loop (new_loop);
969 free_new_names (definitions);
971 if (e == exit_e)
973 first_loop = loop;
974 second_loop = new_loop;
976 else
978 first_loop = new_loop;
979 second_loop = loop;
982 /* 2. Generate bb between the loops. */
983 first_exit_bb = split_edge (first_loop->exit_edges[0]);
984 add_bb_to_loop (first_exit_bb, first_loop->outer);
986 /* We need to update here first loop exit edge
987 and second loop preheader edge. */
988 flow_loop_scan (first_loop, LOOP_ALL);
989 flow_loop_scan (second_loop, LOOP_ALL);
991 /* 3. Make first loop iterate FIRST_NITERS times, if needed. */
992 if (!update_first_loop_count)
994 tree first_loop_latch_lbl = tree_block_label (first_loop->latch);
995 tree first_loop_exit_lbl = tree_block_label (first_exit_bb);
997 make_loop_iterate_ntimes (first_loop, first_niters,
998 first_loop_latch_lbl,
999 first_loop_exit_lbl);
1002 /* 4. Add the guard before first loop:
1004 if FIRST_NITERS == 0
1005 skip first loop
1006 else
1007 enter first loop */
1009 /* 4a. Generate bb before first loop. */
1010 pre_header_bb = split_edge (loop_preheader_edge (first_loop));
1011 add_bb_to_loop (pre_header_bb, first_loop->outer);
1013 /* First loop preheader edge is changed. */
1014 flow_loop_scan (first_loop, LOOP_ALL);
1016 /* 4b. Generate guard condition. */
1017 pre_condition = build (LE_EXPR, boolean_type_node,
1018 first_niters, integer_zero_node);
1020 /* 4c. Add condition at the end of preheader bb. */
1021 skip_e = add_loop_guard (pre_header_bb, pre_condition, first_exit_bb);
1023 /* 4d. Update phis at first loop exit and propagate changes
1024 to the phis of second loop. */
1025 update_phi_nodes_for_guard (skip_e, first_loop);
1027 /* 5. Add the guard before second loop:
1029 if FIRST_NITERS == NITERS SKIP
1030 skip second loop
1031 else
1032 enter second loop */
1034 /* 5a. Generate empty bb at the exit from the second loop. */
1035 second_exit_bb = split_edge (second_loop->exit_edges[0]);
1036 add_bb_to_loop (second_exit_bb, second_loop->outer);
1038 /* Second loop preheader edge is changed. */
1039 flow_loop_scan (second_loop, LOOP_ALL);
1041 /* 5b. Generate guard condition. */
1042 pre_condition = build (EQ_EXPR, boolean_type_node,
1043 first_niters, niters);
1045 /* 5c. Add condition at the end of preheader bb. */
1046 skip_e = add_loop_guard (first_exit_bb, pre_condition, second_exit_bb);
1047 update_phi_nodes_for_guard (skip_e, second_loop);
1049 BITMAP_XFREE (definitions);
1050 unmark_all_for_rewrite ();
1052 return new_loop;
1057 /* Here the proper Vectorizer starts. */
1059 /* Function new_stmt_vec_info.
1061 Create and initialize a new stmt_vec_info struct for STMT. */
1063 stmt_vec_info
1064 new_stmt_vec_info (tree stmt, struct loop *loop)
1066 stmt_vec_info res;
1067 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1069 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1070 STMT_VINFO_STMT (res) = stmt;
1071 STMT_VINFO_LOOP (res) = loop;
1072 STMT_VINFO_RELEVANT_P (res) = 0;
1073 STMT_VINFO_VECTYPE (res) = NULL;
1074 STMT_VINFO_VEC_STMT (res) = NULL;
1075 STMT_VINFO_DATA_REF (res) = NULL;
1076 STMT_VINFO_MEMTAG (res) = NULL;
1077 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1079 return res;
1083 /* Function new_loop_vec_info.
1085 Create and initialize a new loop_vec_info struct for LOOP, as well as
1086 stmt_vec_info structs for all the stmts in LOOP. */
1088 loop_vec_info
1089 new_loop_vec_info (struct loop *loop)
1091 loop_vec_info res;
1092 basic_block *bbs;
1093 block_stmt_iterator si;
1094 unsigned int i;
1096 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1098 bbs = get_loop_body (loop);
1100 /* Create stmt_info for all stmts in the loop. */
1101 for (i = 0; i < loop->num_nodes; i++)
1103 basic_block bb = bbs[i];
1104 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1106 tree stmt = bsi_stmt (si);
1107 stmt_ann_t ann;
1109 get_stmt_operands (stmt);
1110 ann = stmt_ann (stmt);
1111 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1115 LOOP_VINFO_LOOP (res) = loop;
1116 LOOP_VINFO_BBS (res) = bbs;
1117 LOOP_VINFO_EXIT_COND (res) = NULL;
1118 LOOP_VINFO_NITERS (res) = NULL;
1119 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1120 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1121 LOOP_VINFO_VECT_FACTOR (res) = 0;
1122 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1123 "loop_write_datarefs");
1124 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1125 "loop_read_datarefs");
1126 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1128 return res;
1132 /* Function destroy_loop_vec_info.
1134 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1135 stmts in the loop. */
1137 void
1138 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1140 struct loop *loop;
1141 basic_block *bbs;
1142 int nbbs;
1143 block_stmt_iterator si;
1144 int j;
1146 if (!loop_vinfo)
1147 return;
1149 loop = LOOP_VINFO_LOOP (loop_vinfo);
1151 bbs = LOOP_VINFO_BBS (loop_vinfo);
1152 nbbs = loop->num_nodes;
1154 for (j = 0; j < nbbs; j++)
1156 basic_block bb = bbs[j];
1157 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1159 tree stmt = bsi_stmt (si);
1160 stmt_ann_t ann = stmt_ann (stmt);
1161 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1162 free (stmt_info);
1163 set_stmt_info (ann, NULL);
1167 free (LOOP_VINFO_BBS (loop_vinfo));
1168 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1169 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1171 free (loop_vinfo);
1175 /* Function debug_loop_stats.
1177 For vectorization statistics dumps. */
1179 static bool
1180 vect_debug_stats (struct loop *loop)
1182 basic_block bb;
1183 block_stmt_iterator si;
1184 tree node = NULL_TREE;
1186 if (!dump_file || !(dump_flags & TDF_STATS))
1187 return false;
1189 if (!loop)
1191 fprintf (dump_file, "\n");
1192 return true;
1195 if (!loop->header)
1196 return false;
1198 bb = loop->header;
1200 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1202 node = bsi_stmt (si);
1203 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1204 break;
1207 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1208 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1210 fprintf (dump_file, "\nloop at %s:%d: ",
1211 EXPR_FILENAME (node), EXPR_LINENO (node));
1212 return true;
1215 return false;
1219 /* Function debug_loop_details.
1221 For vectorization debug dumps. */
1223 static bool
1224 vect_debug_details (struct loop *loop)
1226 basic_block bb;
1227 block_stmt_iterator si;
1228 tree node = NULL_TREE;
1230 if (!dump_file || !(dump_flags & TDF_DETAILS))
1231 return false;
1233 if (!loop)
1235 fprintf (dump_file, "\n");
1236 return true;
1239 if (!loop->header)
1240 return false;
1242 bb = loop->header;
1244 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1246 node = bsi_stmt (si);
1247 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1248 break;
1251 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1252 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1254 fprintf (dump_file, "\nloop at %s:%d: ",
1255 EXPR_FILENAME (node), EXPR_LINENO (node));
1256 return true;
1259 return false;
1263 /* Function vect_get_ptr_offset
1265 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1267 static tree
1268 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1269 tree vectype ATTRIBUTE_UNUSED,
1270 tree *offset ATTRIBUTE_UNUSED)
1272 /* TODO: Use alignment information. */
1273 return NULL_TREE;
1277 /* Function vect_get_base_and_bit_offset
1279 Return the BASE of the data reference EXPR.
1280 If VECTYPE is given, also compute the OFFSET from BASE in bits.
1281 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
1282 bits of 'a.b[i] + 4B' from a.
1284 Input:
1285 EXPR - the memory reference that is being analyzed
1286 DR - the data_reference struct of the _original_ memory reference
1287 (Note: DR_REF (DR) is not necessarily EXPR)
1288 VECTYPE - the type that defines the alignment (i.e, we compute
1289 alignment relative to TYPE_ALIGN(VECTYPE))
1291 Output:
1292 BASE (returned value) - the base of the data reference EXPR.
1293 E.g, if EXPR is a.b[k].c[i][j] the returned
1294 base is a.
1295 OFFSET - offset of EXPR from BASE in bits
1296 BASE_ALIGNED_P - indicates if BASE is aligned
1298 If something unexpected is encountered (an unsupported form of data-ref),
1299 or if VECTYPE is given but OFFSET cannot be determined:
1300 then NULL_TREE is returned. */
1302 static tree
1303 vect_get_base_and_bit_offset (struct data_reference *dr,
1304 tree expr,
1305 tree vectype,
1306 loop_vec_info loop_vinfo,
1307 tree *offset,
1308 bool *base_aligned_p)
1310 tree this_offset = size_zero_node;
1311 tree base = NULL_TREE;
1312 tree next_ref;
1313 tree oprnd0, oprnd1;
1314 struct data_reference *array_dr;
1315 enum tree_code code = TREE_CODE (expr);
1317 *base_aligned_p = false;
1319 switch (code)
1321 /* These cases end the recursion: */
1322 case VAR_DECL:
1323 *offset = size_zero_node;
1324 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1325 *base_aligned_p = true;
1326 return expr;
1328 case SSA_NAME:
1329 if (!vectype)
1330 return expr;
1332 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1333 return NULL_TREE;
1335 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1337 base = vect_get_ptr_offset (expr, vectype, offset);
1338 if (base)
1339 *base_aligned_p = true;
1341 else
1343 *base_aligned_p = true;
1344 *offset = size_zero_node;
1345 base = expr;
1347 return base;
1349 case INTEGER_CST:
1350 *offset = int_const_binop (MULT_EXPR, expr,
1351 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
1352 return expr;
1354 /* These cases continue the recursion: */
1355 case COMPONENT_REF:
1356 oprnd0 = TREE_OPERAND (expr, 0);
1357 oprnd1 = TREE_OPERAND (expr, 1);
1359 this_offset = bit_position (oprnd1);
1360 if (vectype && !host_integerp (this_offset, 1))
1361 return NULL_TREE;
1362 next_ref = oprnd0;
1363 break;
1365 case ADDR_EXPR:
1366 oprnd0 = TREE_OPERAND (expr, 0);
1367 next_ref = oprnd0;
1368 break;
1370 case INDIRECT_REF:
1371 oprnd0 = TREE_OPERAND (expr, 0);
1372 next_ref = oprnd0;
1373 break;
1375 case ARRAY_REF:
1376 if (DR_REF (dr) != expr)
1377 /* Build array data_reference struct if the existing DR_REF
1378 doesn't match EXPR. This happens, for example, when the
1379 EXPR is *T and T is initialized to &arr[indx]. The DR struct
1380 contains information on the access of T, not of arr. In order
1381 to continue the analysis, we create a new DR struct that
1382 describes the access of arr.
1384 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
1385 else
1386 array_dr = dr;
1388 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
1389 vectype, &this_offset);
1390 if (!next_ref)
1391 return NULL_TREE;
1393 if (vectype &&
1394 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
1396 *offset = this_offset;
1397 *base_aligned_p = true;
1398 return next_ref;
1400 break;
1402 case PLUS_EXPR:
1403 case MINUS_EXPR:
1404 /* In case we have a PLUS_EXPR of the form
1405 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1406 This is verified in vect_get_symbl_and_dr. */
1407 oprnd0 = TREE_OPERAND (expr, 0);
1408 oprnd1 = TREE_OPERAND (expr, 1);
1410 base = vect_get_base_and_bit_offset
1411 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
1412 if (vectype && !base)
1413 return NULL_TREE;
1415 next_ref = oprnd0;
1416 break;
1418 default:
1419 return NULL_TREE;
1422 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
1423 loop_vinfo, offset, base_aligned_p);
1425 if (vectype && base)
1427 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
1428 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
1429 return NULL_TREE;
1431 if (vect_debug_details (NULL))
1433 print_generic_expr (dump_file, expr, TDF_SLIM);
1434 fprintf (dump_file, " --> total offset for ref: ");
1435 print_generic_expr (dump_file, *offset, TDF_SLIM);
1438 return base;
1442 /* Function vect_force_dr_alignment_p.
1444 Returns whether the alignment of a DECL can be forced to be aligned
1445 on ALIGNMENT bit boundary. */
1447 static bool
1448 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1450 if (TREE_CODE (decl) != VAR_DECL)
1451 return false;
1453 if (DECL_EXTERNAL (decl))
1454 return false;
1456 if (TREE_STATIC (decl))
1457 return (alignment <= MAX_OFILE_ALIGNMENT);
1458 else
1459 /* This is not 100% correct. The absolute correct stack alignment
1460 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1461 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1462 However, until someone implements forced stack alignment, SSE
1463 isn't really usable without this. */
1464 return (alignment <= PREFERRED_STACK_BOUNDARY);
1468 /* Function vect_get_new_vect_var.
1470 Returns a name for a new variable. The current naming scheme appends the
1471 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1472 the name of vectorizer generated variables, and appends that to NAME if
1473 provided. */
1475 static tree
1476 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1478 const char *prefix;
1479 int prefix_len;
1480 tree new_vect_var;
1482 if (var_kind == vect_simple_var)
1483 prefix = "vect_";
1484 else
1485 prefix = "vect_p";
1487 prefix_len = strlen (prefix);
1489 if (name)
1490 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1491 else
1492 new_vect_var = create_tmp_var (type, prefix);
1494 return new_vect_var;
1498 /* Function vect_create_index_for_vector_ref.
1500 Create (and return) an index variable, along with it's update chain in the
1501 loop. This variable will be used to access a memory location in a vector
1502 operation.
1504 Input:
1505 LOOP: The loop being vectorized.
1506 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1507 function can be added here, or in the loop pre-header.
1509 Output:
1510 Return an index that will be used to index a vector array. It is expected
1511 that a pointer to the first vector will be used as the base address for the
1512 indexed reference.
1514 FORNOW: we are not trying to be efficient, just creating a new index each
1515 time from scratch. At this time all vector references could use the same
1516 index.
1518 TODO: create only one index to be used by all vector references. Record
1519 the index in the LOOP_VINFO the first time this procedure is called and
1520 return it on subsequent calls. The increment of this index must be placed
1521 just before the conditional expression that ends the single block loop. */
1523 static tree
1524 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1526 tree init, step;
1527 tree indx_before_incr, indx_after_incr;
1529 /* It is assumed that the base pointer used for vectorized access contains
1530 the address of the first vector. Therefore the index used for vectorized
1531 access must be initialized to zero and incremented by 1. */
1533 init = integer_zero_node;
1534 step = integer_one_node;
1536 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1537 create_iv (init, step, NULL_TREE, loop, bsi, false,
1538 &indx_before_incr, &indx_after_incr);
1540 return indx_before_incr;
1544 /* Function vect_create_addr_base_for_vector_ref.
1546 Create an expression that computes the address of the first memory location
1547 that will be accessed for a data reference.
1549 Input:
1550 STMT: The statement containing the data reference.
1551 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1552 OFFSET: Optional. If supplied, it is be added to the initial address.
1554 Output:
1555 1. Return an SSA_NAME whose value is the address of the memory location of
1556 the first vector of the data reference.
1557 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1558 these statement(s) which define the returned SSA_NAME.
1560 FORNOW: We are only handling array accesses with step 1. */
1562 static tree
1563 vect_create_addr_base_for_vector_ref (tree stmt,
1564 tree *new_stmt_list,
1565 tree offset)
1567 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1568 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1569 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1570 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1571 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1572 tree ref = DR_REF (dr);
1573 tree data_ref_base_type = TREE_TYPE (data_ref_base);
1574 tree scalar_type = TREE_TYPE (ref);
1575 tree scalar_ptr_type = build_pointer_type (scalar_type);
1576 tree access_fn;
1577 tree init_val, step, init_oval;
1578 bool ok;
1579 bool is_ptr_ref, is_array_ref, is_addr_expr;
1580 tree array_base;
1581 tree vec_stmt;
1582 tree new_temp;
1583 tree array_ref;
1584 tree addr_base, addr_expr;
1585 tree dest, new_stmt;
1587 /* Only the access function of the last index is relevant (i_n in
1588 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
1589 access_fn = DR_ACCESS_FN (dr, 0);
1590 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step,
1591 true);
1592 if (!ok)
1593 init_oval = integer_zero_node;
1595 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
1596 && TREE_CODE (data_ref_base) == SSA_NAME;
1597 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
1598 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
1599 || TREE_CODE (data_ref_base) == PLUS_EXPR
1600 || TREE_CODE (data_ref_base) == MINUS_EXPR;
1601 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
1603 /** Create: &(base[init_val])
1605 if data_ref_base is an ARRAY_TYPE:
1606 base = data_ref_base
1608 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
1609 base = *((scalar_array *) data_ref_base)
1612 if (is_array_ref)
1613 array_base = data_ref_base;
1614 else /* is_ptr_ref or is_addr_expr */
1616 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
1617 tree scalar_array_type = build_array_type (scalar_type, 0);
1618 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1619 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1620 add_referenced_tmp_var (array_ptr);
1622 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1623 add_referenced_tmp_var (dest);
1624 data_ref_base =
1625 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1626 append_to_statement_list_force (new_stmt, new_stmt_list);
1628 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
1629 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1630 new_temp = make_ssa_name (array_ptr, vec_stmt);
1631 TREE_OPERAND (vec_stmt, 0) = new_temp;
1632 append_to_statement_list_force (vec_stmt, new_stmt_list);
1634 /* (*array_ptr) */
1635 array_base = build_fold_indirect_ref (new_temp);
1638 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
1639 add_referenced_tmp_var (dest);
1640 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
1641 append_to_statement_list_force (new_stmt, new_stmt_list);
1643 if (offset)
1645 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
1646 add_referenced_tmp_var (tmp);
1647 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
1648 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
1649 init_val = make_ssa_name (tmp, vec_stmt);
1650 TREE_OPERAND (vec_stmt, 0) = init_val;
1651 append_to_statement_list_force (vec_stmt, new_stmt_list);
1654 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
1655 NULL_TREE, NULL_TREE);
1656 addr_base = build_fold_addr_expr (array_ref);
1658 /* addr_expr = addr_base */
1659 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1660 get_name (base_name));
1661 add_referenced_tmp_var (addr_expr);
1662 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1663 new_temp = make_ssa_name (addr_expr, vec_stmt);
1664 TREE_OPERAND (vec_stmt, 0) = new_temp;
1665 append_to_statement_list_force (vec_stmt, new_stmt_list);
1667 return new_temp;
1671 /* Function get_vectype_for_scalar_type.
1673 Returns the vector type corresponding to SCALAR_TYPE as supported
1674 by the target. */
1676 static tree
1677 get_vectype_for_scalar_type (tree scalar_type)
1679 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1680 int nbytes = GET_MODE_SIZE (inner_mode);
1681 int nunits;
1682 tree vectype;
1684 if (nbytes == 0)
1685 return NULL_TREE;
1687 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1688 is expected. */
1689 nunits = UNITS_PER_SIMD_WORD / nbytes;
1691 vectype = build_vector_type (scalar_type, nunits);
1692 if (vect_debug_details (NULL))
1694 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1695 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1698 if (!vectype)
1699 return NULL_TREE;
1701 if (vect_debug_details (NULL))
1703 fprintf (dump_file, "vectype: ");
1704 print_generic_expr (dump_file, vectype, TDF_SLIM);
1707 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
1709 /* TODO: tree-complex.c sometimes can parallelize operations
1710 on generic vectors. We can vectorize the loop in that case,
1711 but then we should re-run the lowering pass. */
1712 if (vect_debug_details (NULL))
1713 fprintf (dump_file, "mode not supported by target.");
1714 return NULL_TREE;
1717 return vectype;
1721 /* Function vect_align_data_ref.
1723 Handle mislignment of a memory accesses.
1725 FORNOW: Can't handle misaligned accesses.
1726 Make sure that the dataref is aligned. */
1728 static void
1729 vect_align_data_ref (tree stmt)
1731 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1732 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1734 /* FORNOW: can't handle misaligned accesses;
1735 all accesses expected to be aligned. */
1736 gcc_assert (aligned_access_p (dr));
1740 /* Function vect_create_data_ref_ptr.
1742 Create a memory reference expression for vector access, to be used in a
1743 vector load/store stmt. The reference is based on a new pointer to vector
1744 type (vp).
1746 Input:
1747 1. STMT: a stmt that references memory. Expected to be of the form
1748 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
1749 2. BSI: block_stmt_iterator where new stmts can be added.
1750 3. OFFSET (optional): an offset to be added to the initial address accessed
1751 by the data-ref in STMT.
1752 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
1753 pointing to the initial address.
1755 Output:
1756 1. Declare a new ptr to vector_type, and have it point to the base of the
1757 data reference (initial addressed accessed by the data reference).
1758 For example, for vector of type V8HI, the following code is generated:
1760 v8hi *vp;
1761 vp = (v8hi *)initial_address;
1763 if OFFSET is not supplied:
1764 initial_address = &a[init];
1765 if OFFSET is supplied:
1766 initial_address = &a[init + OFFSET];
1768 Return the initial_address in INITIAL_ADDRESS.
1770 2. Create a data-reference in the loop based on the new vector pointer vp,
1771 and using a new index variable 'idx' as follows:
1773 vp' = vp + update
1775 where if ONLY_INIT is true:
1776 update = zero
1777 and otherwise
1778 update = idx + vector_type_size
1780 Return the pointer vp'.
1783 FORNOW: handle only aligned and consecutive accesses. */
1785 static tree
1786 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
1787 tree *initial_address, bool only_init)
1789 tree base_name;
1790 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1791 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1792 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1793 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1794 tree vect_ptr_type;
1795 tree vect_ptr;
1796 tree tag;
1797 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1798 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1799 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1800 int nvuses, nv_may_defs, nv_must_defs;
1801 int i;
1802 tree new_temp;
1803 tree vec_stmt;
1804 tree new_stmt_list = NULL_TREE;
1805 tree idx;
1806 edge pe = loop_preheader_edge (loop);
1807 basic_block new_bb;
1808 tree vect_ptr_init;
1809 tree vectype_size;
1810 tree ptr_update;
1811 tree data_ref_ptr;
1813 base_name = unshare_expr (DR_BASE_NAME (dr));
1814 if (vect_debug_details (NULL))
1816 tree data_ref_base = base_name;
1817 fprintf (dump_file, "create array_ref of type: ");
1818 print_generic_expr (dump_file, vectype, TDF_SLIM);
1819 if (TREE_CODE (data_ref_base) == VAR_DECL)
1820 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
1821 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1822 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
1823 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1824 fprintf (dump_file, "vectorizing a record based array ref: ");
1825 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1826 fprintf (dump_file, "vectorizing a pointer ref: ");
1827 print_generic_expr (dump_file, base_name, TDF_SLIM);
1830 /** (1) Create the new vector-pointer variable: **/
1832 vect_ptr_type = build_pointer_type (vectype);
1833 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1834 get_name (base_name));
1835 add_referenced_tmp_var (vect_ptr);
1838 /** (2) Handle aliasing information of the new vector-pointer: **/
1840 tag = STMT_VINFO_MEMTAG (stmt_info);
1841 gcc_assert (tag);
1842 get_var_ann (vect_ptr)->type_mem_tag = tag;
1844 /* Mark for renaming all aliased variables
1845 (i.e, the may-aliases of the type-mem-tag). */
1846 nvuses = NUM_VUSES (vuses);
1847 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1848 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1849 for (i = 0; i < nvuses; i++)
1851 tree use = VUSE_OP (vuses, i);
1852 if (TREE_CODE (use) == SSA_NAME)
1853 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1855 for (i = 0; i < nv_may_defs; i++)
1857 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1858 if (TREE_CODE (def) == SSA_NAME)
1859 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1861 for (i = 0; i < nv_must_defs; i++)
1863 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
1864 if (TREE_CODE (def) == SSA_NAME)
1865 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1869 /** (3) Calculate the initial address the vector-pointer, and set
1870 the vector-pointer to point to it before the loop: **/
1872 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1873 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1874 offset);
1875 pe = loop_preheader_edge (loop);
1876 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1877 gcc_assert (!new_bb);
1878 *initial_address = new_temp;
1880 /* Create: p = (vectype *) initial_base */
1881 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1882 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1883 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1884 TREE_OPERAND (vec_stmt, 0) = new_temp;
1885 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1886 gcc_assert (!new_bb);
1887 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1890 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1892 if (only_init) /* No update in loop is required. */
1893 return vect_ptr_init;
1895 idx = vect_create_index_for_vector_ref (loop, bsi);
1897 /* Create: update = idx * vectype_size */
1898 ptr_update = create_tmp_var (integer_type_node, "update");
1899 add_referenced_tmp_var (ptr_update);
1900 vectype_size = build_int_cst (integer_type_node,
1901 GET_MODE_SIZE (TYPE_MODE (vectype)));
1902 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1903 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1904 new_temp = make_ssa_name (ptr_update, vec_stmt);
1905 TREE_OPERAND (vec_stmt, 0) = new_temp;
1906 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1908 /* Create: data_ref_ptr = vect_ptr_init + update */
1909 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1910 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1911 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1912 TREE_OPERAND (vec_stmt, 0) = new_temp;
1913 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1914 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1916 return data_ref_ptr;
1920 /* Function vect_create_destination_var.
1922 Create a new temporary of type VECTYPE. */
1924 static tree
1925 vect_create_destination_var (tree scalar_dest, tree vectype)
1927 tree vec_dest;
1928 const char *new_name;
1930 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1932 new_name = get_name (scalar_dest);
1933 if (!new_name)
1934 new_name = "var_";
1935 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1936 add_referenced_tmp_var (vec_dest);
1938 return vec_dest;
1942 /* Function vect_init_vector.
1944 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1945 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1946 used in the vectorization of STMT. */
1948 static tree
1949 vect_init_vector (tree stmt, tree vector_var)
1951 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1952 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1953 tree new_var;
1954 tree init_stmt;
1955 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1956 tree vec_oprnd;
1957 edge pe;
1958 tree new_temp;
1959 basic_block new_bb;
1961 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1962 add_referenced_tmp_var (new_var);
1964 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1965 new_temp = make_ssa_name (new_var, init_stmt);
1966 TREE_OPERAND (init_stmt, 0) = new_temp;
1968 pe = loop_preheader_edge (loop);
1969 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1970 gcc_assert (!new_bb);
1972 if (vect_debug_details (NULL))
1974 fprintf (dump_file, "created new init_stmt: ");
1975 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1978 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1979 return vec_oprnd;
1983 /* Function vect_get_vec_def_for_operand.
1985 OP is an operand in STMT. This function returns a (vector) def that will be
1986 used in the vectorized stmt for STMT.
1988 In the case that OP is an SSA_NAME which is defined in the loop, then
1989 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1991 In case OP is an invariant or constant, a new stmt that creates a vector def
1992 needs to be introduced. */
1994 static tree
1995 vect_get_vec_def_for_operand (tree op, tree stmt)
1997 tree vec_oprnd;
1998 tree vec_stmt;
1999 tree def_stmt;
2000 stmt_vec_info def_stmt_info = NULL;
2001 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2002 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2003 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2004 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2005 basic_block bb;
2006 tree vec_inv;
2007 tree t = NULL_TREE;
2008 tree def;
2009 int i;
2011 if (vect_debug_details (NULL))
2013 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2014 print_generic_expr (dump_file, op, TDF_SLIM);
2017 /** ===> Case 1: operand is a constant. **/
2019 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2021 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2023 tree vec_cst;
2025 /* Build a tree with vector elements. */
2026 if (vect_debug_details (NULL))
2027 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2029 for (i = nunits - 1; i >= 0; --i)
2031 t = tree_cons (NULL_TREE, op, t);
2033 vec_cst = build_vector (vectype, t);
2034 return vect_init_vector (stmt, vec_cst);
2037 gcc_assert (TREE_CODE (op) == SSA_NAME);
2039 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2041 def_stmt = SSA_NAME_DEF_STMT (op);
2042 def_stmt_info = vinfo_for_stmt (def_stmt);
2044 if (vect_debug_details (NULL))
2046 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2047 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2051 /** ==> Case 2.1: operand is defined inside the loop. **/
2053 if (def_stmt_info)
2055 /* Get the def from the vectorized stmt. */
2057 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2058 gcc_assert (vec_stmt);
2059 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2060 return vec_oprnd;
2064 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2065 it is a reduction/induction. **/
2067 bb = bb_for_stmt (def_stmt);
2068 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2070 if (vect_debug_details (NULL))
2071 fprintf (dump_file, "reduction/induction - unsupported.");
2072 internal_error ("no support for reduction/induction"); /* FORNOW */
2076 /** ==> Case 2.3: operand is defined outside the loop -
2077 it is a loop invariant. */
2079 switch (TREE_CODE (def_stmt))
2081 case PHI_NODE:
2082 def = PHI_RESULT (def_stmt);
2083 break;
2084 case MODIFY_EXPR:
2085 def = TREE_OPERAND (def_stmt, 0);
2086 break;
2087 case NOP_EXPR:
2088 def = TREE_OPERAND (def_stmt, 0);
2089 gcc_assert (IS_EMPTY_STMT (def_stmt));
2090 def = op;
2091 break;
2092 default:
2093 if (vect_debug_details (NULL))
2095 fprintf (dump_file, "unsupported defining stmt: ");
2096 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2098 internal_error ("unsupported defining stmt");
2101 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2103 if (vect_debug_details (NULL))
2104 fprintf (dump_file, "Create vector_inv.");
2106 for (i = nunits - 1; i >= 0; --i)
2108 t = tree_cons (NULL_TREE, def, t);
2111 vec_inv = build_constructor (vectype, t);
2112 return vect_init_vector (stmt, vec_inv);
2116 /* Function vect_finish_stmt_generation.
2118 Insert a new stmt. */
2120 static void
2121 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2123 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2125 if (vect_debug_details (NULL))
2127 fprintf (dump_file, "add new stmt: ");
2128 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2131 /* Make sure bsi points to the stmt that is being vectorized. */
2133 /* Assumption: any stmts created for the vectorization of stmt S were
2134 inserted before S. BSI is expected to point to S or some new stmt before S. */
2136 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2137 bsi_next (bsi);
2138 gcc_assert (stmt == bsi_stmt (*bsi));
2142 /* Function vectorizable_assignment.
2144 Check if STMT performs an assignment (copy) that can be vectorized.
2145 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2146 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2147 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2149 static bool
2150 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2152 tree vec_dest;
2153 tree scalar_dest;
2154 tree op;
2155 tree vec_oprnd;
2156 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2157 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2158 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2159 tree new_temp;
2161 /* Is vectorizable assignment? */
2163 if (TREE_CODE (stmt) != MODIFY_EXPR)
2164 return false;
2166 scalar_dest = TREE_OPERAND (stmt, 0);
2167 if (TREE_CODE (scalar_dest) != SSA_NAME)
2168 return false;
2170 op = TREE_OPERAND (stmt, 1);
2171 if (!vect_is_simple_use (op, loop, NULL))
2173 if (vect_debug_details (NULL))
2174 fprintf (dump_file, "use not simple.");
2175 return false;
2178 if (!vec_stmt) /* transformation not required. */
2180 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2181 return true;
2184 /** Trasform. **/
2185 if (vect_debug_details (NULL))
2186 fprintf (dump_file, "transform assignment.");
2188 /* Handle def. */
2189 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2191 /* Handle use. */
2192 op = TREE_OPERAND (stmt, 1);
2193 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2195 /* Arguments are ready. create the new vector stmt. */
2196 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2197 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2198 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2199 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2201 return true;
2205 /* Function vectorizable_operation.
2207 Check if STMT performs a binary or unary operation that can be vectorized.
2208 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2209 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2210 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2212 static bool
2213 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2215 tree vec_dest;
2216 tree scalar_dest;
2217 tree operation;
2218 tree op0, op1 = NULL;
2219 tree vec_oprnd0, vec_oprnd1=NULL;
2220 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2221 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2222 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2223 int i;
2224 enum tree_code code;
2225 enum machine_mode vec_mode;
2226 tree new_temp;
2227 int op_type;
2228 tree op;
2229 optab optab;
2231 /* Is STMT a vectorizable binary/unary operation? */
2232 if (TREE_CODE (stmt) != MODIFY_EXPR)
2233 return false;
2235 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2236 return false;
2238 operation = TREE_OPERAND (stmt, 1);
2239 code = TREE_CODE (operation);
2240 optab = optab_for_tree_code (code, vectype);
2242 /* Support only unary or binary operations. */
2243 op_type = TREE_CODE_LENGTH (code);
2244 if (op_type != unary_op && op_type != binary_op)
2246 if (vect_debug_details (NULL))
2247 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2248 return false;
2251 for (i = 0; i < op_type; i++)
2253 op = TREE_OPERAND (operation, i);
2254 if (!vect_is_simple_use (op, loop, NULL))
2256 if (vect_debug_details (NULL))
2257 fprintf (dump_file, "use not simple.");
2258 return false;
2262 /* Supportable by target? */
2263 if (!optab)
2265 if (vect_debug_details (NULL))
2266 fprintf (dump_file, "no optab.");
2267 return false;
2269 vec_mode = TYPE_MODE (vectype);
2270 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2272 if (vect_debug_details (NULL))
2273 fprintf (dump_file, "op not supported by target.");
2274 return false;
2277 if (!vec_stmt) /* transformation not required. */
2279 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2280 return true;
2283 /** Transform. **/
2285 if (vect_debug_details (NULL))
2286 fprintf (dump_file, "transform binary/unary operation.");
2288 /* Handle def. */
2289 scalar_dest = TREE_OPERAND (stmt, 0);
2290 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2292 /* Handle uses. */
2293 op0 = TREE_OPERAND (operation, 0);
2294 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2296 if (op_type == binary_op)
2298 op1 = TREE_OPERAND (operation, 1);
2299 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2302 /* Arguments are ready. create the new vector stmt. */
2304 if (op_type == binary_op)
2305 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2306 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2307 else
2308 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2309 build1 (code, vectype, vec_oprnd0));
2310 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2311 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2312 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2314 return true;
2318 /* Function vectorizable_store.
2320 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2321 can be vectorized.
2322 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2323 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2324 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2326 static bool
2327 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2329 tree scalar_dest;
2330 tree data_ref;
2331 tree op;
2332 tree vec_oprnd1;
2333 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2334 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2335 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2336 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2337 enum machine_mode vec_mode;
2338 tree dummy;
2339 enum dr_alignment_support alignment_support_cheme;
2341 /* Is vectorizable store? */
2343 if (TREE_CODE (stmt) != MODIFY_EXPR)
2344 return false;
2346 scalar_dest = TREE_OPERAND (stmt, 0);
2347 if (TREE_CODE (scalar_dest) != ARRAY_REF
2348 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2349 return false;
2351 op = TREE_OPERAND (stmt, 1);
2352 if (!vect_is_simple_use (op, loop, NULL))
2354 if (vect_debug_details (NULL))
2355 fprintf (dump_file, "use not simple.");
2356 return false;
2359 vec_mode = TYPE_MODE (vectype);
2360 /* FORNOW. In some cases can vectorize even if data-type not supported
2361 (e.g. - array initialization with 0). */
2362 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2363 return false;
2365 if (!STMT_VINFO_DATA_REF (stmt_info))
2366 return false;
2369 if (!vec_stmt) /* transformation not required. */
2371 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2372 return true;
2375 /** Trasform. **/
2377 if (vect_debug_details (NULL))
2378 fprintf (dump_file, "transform store");
2380 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2381 gcc_assert (alignment_support_cheme);
2382 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2384 /* Handle use - get the vectorized def from the defining stmt. */
2385 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2387 /* Handle def. */
2388 /* FORNOW: make sure the data reference is aligned. */
2389 vect_align_data_ref (stmt);
2390 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2391 data_ref = build_fold_indirect_ref (data_ref);
2393 /* Arguments are ready. create the new vector stmt. */
2394 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2395 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2397 return true;
2401 /* vectorizable_load.
2403 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2404 can be vectorized.
2405 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2406 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2407 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2409 static bool
2410 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2412 tree scalar_dest;
2413 tree vec_dest = NULL;
2414 tree data_ref = NULL;
2415 tree op;
2416 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2417 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2418 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2419 tree new_temp;
2420 int mode;
2421 tree init_addr;
2422 tree new_stmt;
2423 tree dummy;
2424 basic_block new_bb;
2425 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2426 edge pe = loop_preheader_edge (loop);
2427 enum dr_alignment_support alignment_support_cheme;
2429 /* Is vectorizable load? */
2431 if (TREE_CODE (stmt) != MODIFY_EXPR)
2432 return false;
2434 scalar_dest = TREE_OPERAND (stmt, 0);
2435 if (TREE_CODE (scalar_dest) != SSA_NAME)
2436 return false;
2438 op = TREE_OPERAND (stmt, 1);
2439 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2440 return false;
2442 if (!STMT_VINFO_DATA_REF (stmt_info))
2443 return false;
2445 mode = (int) TYPE_MODE (vectype);
2447 /* FORNOW. In some cases can vectorize even if data-type not supported
2448 (e.g. - data copies). */
2449 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2451 if (vect_debug_details (loop))
2452 fprintf (dump_file, "Aligned load, but unsupported type.");
2453 return false;
2456 if (!vec_stmt) /* transformation not required. */
2458 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2459 return true;
2462 /** Trasform. **/
2464 if (vect_debug_details (NULL))
2465 fprintf (dump_file, "transform load.");
2467 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2468 gcc_assert (alignment_support_cheme);
2470 if (alignment_support_cheme == dr_aligned
2471 || alignment_support_cheme == dr_unaligned_supported)
2473 /* Create:
2474 p = initial_addr;
2475 indx = 0;
2476 loop {
2477 vec_dest = *(p);
2478 indx = indx + 1;
2482 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2483 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2484 if (aligned_access_p (dr))
2485 data_ref = build_fold_indirect_ref (data_ref);
2486 else
2488 int mis = DR_MISALIGNMENT (dr);
2489 tree tmis = (mis == -1 ?
2490 integer_zero_node :
2491 build_int_cst (integer_type_node, mis));
2492 tmis = int_const_binop (MULT_EXPR, tmis,
2493 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
2494 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2496 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2497 new_temp = make_ssa_name (vec_dest, new_stmt);
2498 TREE_OPERAND (new_stmt, 0) = new_temp;
2499 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2501 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2503 /* Create:
2504 p1 = initial_addr;
2505 msq_init = *(floor(p1))
2506 p2 = initial_addr + VS - 1;
2507 magic = have_builtin ? builtin_result : initial_address;
2508 indx = 0;
2509 loop {
2510 p2' = p2 + indx * vectype_size
2511 lsq = *(floor(p2'))
2512 vec_dest = realign_load (msq, lsq, magic)
2513 indx = indx + 1;
2514 msq = lsq;
2518 tree offset;
2519 tree magic;
2520 tree phi_stmt;
2521 tree msq_init;
2522 tree msq, lsq;
2523 tree dataref_ptr;
2524 tree params;
2526 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2527 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2528 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2529 &init_addr, true);
2530 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2531 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2532 new_temp = make_ssa_name (vec_dest, new_stmt);
2533 TREE_OPERAND (new_stmt, 0) = new_temp;
2534 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2535 gcc_assert (!new_bb);
2536 msq_init = TREE_OPERAND (new_stmt, 0);
2539 /* <2> Create lsq = *(floor(p2')) in the loop */
2540 offset = build_int_cst (integer_type_node,
2541 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2542 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2543 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2544 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2545 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2546 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2547 new_temp = make_ssa_name (vec_dest, new_stmt);
2548 TREE_OPERAND (new_stmt, 0) = new_temp;
2549 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2550 lsq = TREE_OPERAND (new_stmt, 0);
2553 /* <3> */
2554 if (targetm.vectorize.builtin_mask_for_load)
2556 /* Create permutation mask, if required, in loop preheader. */
2557 tree builtin_decl;
2558 params = build_tree_list (NULL_TREE, init_addr);
2559 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2560 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2561 new_stmt = build_function_call_expr (builtin_decl, params);
2562 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2563 new_temp = make_ssa_name (vec_dest, new_stmt);
2564 TREE_OPERAND (new_stmt, 0) = new_temp;
2565 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2566 gcc_assert (!new_bb);
2567 magic = TREE_OPERAND (new_stmt, 0);
2569 else
2571 /* Use current address instead of init_addr for reduced reg pressure.
2573 magic = dataref_ptr;
2577 /* <4> Create msq = phi <msq_init, lsq> in loop */
2578 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2579 msq = make_ssa_name (vec_dest, NULL_TREE);
2580 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2581 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2582 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
2583 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
2586 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2587 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2588 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2589 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2590 new_temp = make_ssa_name (vec_dest, new_stmt);
2591 TREE_OPERAND (new_stmt, 0) = new_temp;
2592 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2594 else
2595 gcc_unreachable ();
2597 *vec_stmt = new_stmt;
2598 return true;
2602 /* Function vect_supportable_dr_alignment
2604 Return whether the data reference DR is supported with respect to its
2605 alignment. */
2607 static enum dr_alignment_support
2608 vect_supportable_dr_alignment (struct data_reference *dr)
2610 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2611 enum machine_mode mode = (int) TYPE_MODE (vectype);
2613 if (aligned_access_p (dr))
2614 return dr_aligned;
2616 /* Possibly unaligned access. */
2618 if (DR_IS_READ (dr))
2620 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2621 && (!targetm.vectorize.builtin_mask_for_load
2622 || targetm.vectorize.builtin_mask_for_load ()))
2623 return dr_unaligned_software_pipeline;
2625 if (targetm.vectorize.misaligned_mem_ok (mode))
2626 /* Can't software pipeline the loads. */
2627 return dr_unaligned_supported;
2630 /* Unsupported. */
2631 return dr_unaligned_unsupported;
2635 /* Function vect_transform_stmt.
2637 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2639 static bool
2640 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2642 bool is_store = false;
2643 tree vec_stmt = NULL_TREE;
2644 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2645 bool done;
2647 switch (STMT_VINFO_TYPE (stmt_info))
2649 case op_vec_info_type:
2650 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2651 gcc_assert (done);
2652 break;
2654 case assignment_vec_info_type:
2655 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2656 gcc_assert (done);
2657 break;
2659 case load_vec_info_type:
2660 done = vectorizable_load (stmt, bsi, &vec_stmt);
2661 gcc_assert (done);
2662 break;
2664 case store_vec_info_type:
2665 done = vectorizable_store (stmt, bsi, &vec_stmt);
2666 gcc_assert (done);
2667 is_store = true;
2668 break;
2669 default:
2670 if (vect_debug_details (NULL))
2671 fprintf (dump_file, "stmt not supported.");
2672 gcc_unreachable ();
2675 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2677 return is_store;
2681 /* This function builds ni_name = number of iterations loop executes
2682 on the loop preheader. */
2684 static tree
2685 vect_build_loop_niters (loop_vec_info loop_vinfo)
2687 tree ni_name, stmt, var;
2688 edge pe;
2689 basic_block new_bb = NULL;
2690 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2691 tree ni = unshare_expr (LOOP_VINFO_NITERS(loop_vinfo));
2693 var = create_tmp_var (TREE_TYPE (ni), "niters");
2694 add_referenced_tmp_var (var);
2695 if (TREE_CODE (ni) == INTEGER_CST)
2697 /* This case is generated when treating a known loop bound
2698 indivisible by VF. Here we cannot use force_gimple_operand. */
2699 stmt = build (MODIFY_EXPR, void_type_node, var, ni);
2700 ni_name = make_ssa_name (var, stmt);
2701 TREE_OPERAND (stmt, 0) = ni_name;
2703 else
2704 ni_name = force_gimple_operand (ni, &stmt, false, var);
2706 pe = loop_preheader_edge (loop);
2707 if (stmt)
2708 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2709 if (new_bb)
2710 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2712 return ni_name;
2716 /* This function generates the following statements:
2718 ni_name = number of iterations loop executes
2719 ratio = ni_name / vf
2720 ratio_mult_vf_name = ratio * vf
2722 and places them at the loop preheader edge. */
2724 static void
2725 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, tree *ni_name_p,
2726 tree *ratio_mult_vf_name_p, tree *ratio_p)
2729 edge pe;
2730 basic_block new_bb;
2731 tree stmt, ni_name;
2732 tree ratio;
2733 tree ratio_mult_vf_name, ratio_mult_vf;
2734 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2735 tree ni = LOOP_VINFO_NITERS(loop_vinfo);
2737 int vf, i;
2739 /* Generate temporary variable that contains
2740 number of iterations loop executes. */
2742 ni_name = vect_build_loop_niters (loop_vinfo);
2744 /* ratio = ni / vf.
2745 vf is power of 2; then if ratio = = n >> log2 (vf). */
2746 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2747 ratio = vect_build_symbol_bound (ni_name, vf, loop);
2749 /* Update initial conditions of loop copy. */
2751 /* ratio_mult_vf = ratio * vf;
2752 then if ratio_mult_vf = ratio << log2 (vf). */
2754 i = exact_log2 (vf);
2755 ratio_mult_vf = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2756 add_referenced_tmp_var (ratio_mult_vf);
2758 ratio_mult_vf_name = make_ssa_name (ratio_mult_vf, NULL_TREE);
2760 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2761 build2 (LSHIFT_EXPR, TREE_TYPE (ratio),
2762 ratio, build_int_cst (unsigned_type_node,
2763 i)));
2765 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2767 pe = loop_preheader_edge (loop);
2768 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2769 if (new_bb)
2770 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2772 *ni_name_p = ni_name;
2773 *ratio_mult_vf_name_p = ratio_mult_vf_name;
2774 *ratio_p = ratio;
2776 return;
2780 /* This function generates stmt
2782 tmp = n / vf;
2784 and attaches it to preheader of LOOP. */
2786 static tree
2787 vect_build_symbol_bound (tree n, int vf, struct loop * loop)
2789 tree var, stmt, var_name;
2790 edge pe;
2791 basic_block new_bb;
2792 int i;
2794 /* create temporary variable */
2795 var = create_tmp_var (TREE_TYPE (n), "bnd");
2796 add_referenced_tmp_var (var);
2798 var_name = make_ssa_name (var, NULL_TREE);
2800 /* vf is power of 2; then n/vf = n >> log2 (vf). */
2802 i = exact_log2 (vf);
2803 stmt = build2 (MODIFY_EXPR, void_type_node, var_name,
2804 build2 (RSHIFT_EXPR, TREE_TYPE (n),
2805 n, build_int_cst (unsigned_type_node,i)));
2807 SSA_NAME_DEF_STMT (var_name) = stmt;
2809 pe = loop_preheader_edge (loop);
2810 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2811 if (new_bb)
2812 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
2813 else
2814 if (vect_debug_details (NULL))
2815 fprintf (dump_file, "New bb on preheader edge was not generated.");
2817 return var_name;
2821 /* Function vect_transform_loop_bound.
2823 Create a new exit condition for the loop. */
2825 static void
2826 vect_transform_loop_bound (loop_vec_info loop_vinfo, tree niters)
2828 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2829 edge exit_edge = loop->single_exit;
2830 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
2831 tree indx_before_incr, indx_after_incr;
2832 tree orig_cond_expr;
2833 HOST_WIDE_INT old_N = 0;
2834 int vf;
2835 tree cond_stmt;
2836 tree new_loop_bound;
2837 bool symbol_niters;
2838 tree cond;
2839 tree lb_type;
2841 symbol_niters = !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo);
2843 if (!symbol_niters)
2844 old_N = LOOP_VINFO_INT_NITERS (loop_vinfo);
2846 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2848 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
2849 #ifdef ENABLE_CHECKING
2850 gcc_assert (orig_cond_expr);
2851 #endif
2852 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
2854 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
2855 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
2857 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
2858 to point to the exit condition. */
2859 bsi_next (&loop_exit_bsi);
2860 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
2862 /* new loop exit test: */
2863 lb_type = TREE_TYPE (TREE_OPERAND (COND_EXPR_COND (orig_cond_expr), 1));
2864 if (!symbol_niters)
2865 new_loop_bound = fold_convert (lb_type,
2866 build_int_cst (unsigned_type_node,
2867 old_N/vf));
2868 else
2869 new_loop_bound = niters;
2871 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
2872 cond = build2 (GE_EXPR, boolean_type_node,
2873 indx_after_incr, new_loop_bound);
2874 else /* 'then' edge loops back. */
2875 cond = build2 (LT_EXPR, boolean_type_node,
2876 indx_after_incr, new_loop_bound);
2878 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
2879 COND_EXPR_THEN (orig_cond_expr),
2880 COND_EXPR_ELSE (orig_cond_expr));
2882 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
2884 /* remove old loop exit test: */
2885 bsi_remove (&loop_exit_bsi);
2887 if (vect_debug_details (NULL))
2888 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
2890 loop->nb_iterations = new_loop_bound;
2894 /* Function vect_update_ivs_after_vectorizer.
2896 "Advance" the induction variables of LOOP to the value they should take
2897 after the execution of LOOP. This is currently necessary because the
2898 vectorizer does not handle induction variables that are used after the
2899 loop. Such a situation occurs when the last iterations of LOOP are
2900 peeled, because:
2901 1. We introduced new uses after LOOP for IVs that were not originally used
2902 after LOOP: the IVs of LOOP are now used by an epilog loop.
2903 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2904 times, whereas the loop IVs should be bumped N times.
2906 Input:
2907 - LOOP - a loop that is going to be vectorized. The last few iterations
2908 of LOOP were peeled.
2909 - NITERS - the number of iterations that LOOP executes (before it is
2910 vectorized). i.e, the number of times the ivs should be bumped.
2912 We have:
2914 bb_before_loop:
2915 if (guard-cond) GOTO bb_before_epilog_loop
2916 else GOTO loop
2918 loop:
2919 do {
2920 } while ...
2922 bb_before_epilog_loop:
2924 bb_before_epilog_loop has edges coming in form the loop exit and
2925 from bb_before_loop. New definitions for ivs will be placed on the edge
2926 from loop->exit to bb_before_epilog_loop. This also requires that we update
2927 the phis in bb_before_epilog_loop. (In the code this bb is denoted
2928 "update_bb").
2930 Assumption 1: Like the rest of the vectorizer, this function assumes
2931 a single loop exit that has a single predecessor.
2933 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2934 organized in the same order.
2936 Assumption 3: The access function of the ivs is simple enough (see
2937 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2940 static void
2941 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters)
2943 edge exit = loop->exit_edges[0];
2944 tree phi, phi1;
2945 basic_block update_bb = exit->dest;
2946 edge update_e;
2948 /* Generate basic block at the exit from the loop. */
2949 basic_block new_bb = split_edge (exit);
2951 add_bb_to_loop (new_bb, EDGE_SUCC (new_bb, 0)->dest->loop_father);
2952 loop->exit_edges[0] = EDGE_PRED (new_bb, 0);
2953 update_e = EDGE_SUCC (new_bb, 0);
2955 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2956 phi && phi1;
2957 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2959 tree access_fn = NULL;
2960 tree evolution_part;
2961 tree init_expr;
2962 tree step_expr;
2963 tree var, stmt, ni, ni_name;
2964 block_stmt_iterator last_bsi;
2966 /* Skip virtual phi's. The data dependences that are associated with
2967 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2969 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2971 if (vect_debug_details (NULL))
2972 fprintf (dump_file, "virtual phi. skip.");
2973 continue;
2976 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2977 gcc_assert (access_fn);
2978 evolution_part =
2979 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2981 /* FORNOW: We do not transform initial conditions of IVs
2982 which evolution functions are a polynomial of degree >= 2 or
2983 exponential. */
2984 gcc_assert (!tree_is_chrec (evolution_part));
2986 step_expr = evolution_part;
2987 init_expr = unshare_expr (initial_condition (access_fn));
2989 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2990 build2 (MULT_EXPR, TREE_TYPE (niters),
2991 niters, step_expr), init_expr);
2993 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2994 add_referenced_tmp_var (var);
2996 ni_name = force_gimple_operand (ni, &stmt, false, var);
2998 /* Insert stmt into new_bb. */
2999 last_bsi = bsi_last (new_bb);
3000 if (stmt)
3001 bsi_insert_after (&last_bsi, stmt, BSI_NEW_STMT);
3003 /* Fix phi expressions in duplicated loop. */
3004 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3005 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3006 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
3011 /* This function is the main driver of transformation
3012 to be done for loop before vectorizing it in case of
3013 unknown loop bound. */
3015 static void
3016 vect_transform_for_unknown_loop_bound (loop_vec_info loop_vinfo, tree * ratio,
3017 struct loops *loops)
3020 tree ni_name, ratio_mult_vf_name;
3021 #ifdef ENABLE_CHECKING
3022 int loop_num;
3023 #endif
3024 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3025 struct loop *new_loop;
3027 if (vect_debug_details (NULL))
3028 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3030 /* Generate the following variables on the preheader of original loop:
3032 ni_name = number of iteration the original loop executes
3033 ratio = ni_name / vf
3034 ratio_mult_vf_name = ratio * vf */
3035 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3036 &ratio_mult_vf_name, ratio);
3038 /* Update loop info. */
3039 loop->pre_header = loop_preheader_edge (loop)->src;
3040 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3042 #ifdef ENABLE_CHECKING
3043 loop_num = loop->num;
3044 #endif
3045 new_loop = tree_duplicate_loop_to_edge (loop, loops, loop->exit_edges[0],
3046 ratio_mult_vf_name, ni_name, true);
3047 #ifdef ENABLE_CHECKING
3048 gcc_assert (new_loop);
3049 gcc_assert (loop_num == loop->num);
3050 #endif
3052 /* Update IVs of original loop as if they were advanced
3053 by ratio_mult_vf_name steps. */
3055 #ifdef ENABLE_CHECKING
3056 /* Check existence of intermediate bb. */
3057 gcc_assert (loop->exit_edges[0]->dest == new_loop->pre_header);
3058 #endif
3059 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name);
3061 return;
3066 /* Function vect_gen_niters_for_prolog_loop
3068 Set the number of iterations for the loop represented by LOOP_VINFO
3069 to the minimum between NITERS (the original iteration count of the loop)
3070 and the misalignment of DR - the first data reference recorded in
3071 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3072 this loop, the data reference DR will refer to an aligned location. */
3074 static tree
3075 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree niters)
3077 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3078 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3079 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3080 tree var, stmt;
3081 tree iters, iters_name;
3082 edge pe;
3083 basic_block new_bb;
3084 tree dr_stmt = DR_STMT (dr);
3085 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3086 tree start_addr, byte_miss_align, elem_miss_align;
3087 int vec_type_align =
3088 GET_MODE_ALIGNMENT (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3089 / BITS_PER_UNIT;
3090 tree tmp1, tmp2;
3091 tree new_stmt_list = NULL_TREE;
3093 start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3094 &new_stmt_list, NULL_TREE);
3096 pe = loop_preheader_edge (loop);
3097 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
3098 if (new_bb)
3099 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3101 byte_miss_align =
3102 build (BIT_AND_EXPR, integer_type_node, start_addr,
3103 build (MINUS_EXPR, integer_type_node,
3104 build_int_cst (unsigned_type_node,
3105 vec_type_align), integer_one_node));
3106 tmp1 = build_int_cst (unsigned_type_node, vec_type_align/vf);
3107 elem_miss_align = build (FLOOR_DIV_EXPR, integer_type_node,
3108 byte_miss_align, tmp1);
3110 tmp2 =
3111 build (BIT_AND_EXPR, integer_type_node,
3112 build (MINUS_EXPR, integer_type_node,
3113 build_int_cst (unsigned_type_node, vf), elem_miss_align),
3114 build (MINUS_EXPR, integer_type_node,
3115 build_int_cst (unsigned_type_node, vf), integer_one_node));
3117 iters = build2 (MIN_EXPR, TREE_TYPE (tmp2), tmp2, niters);
3118 var = create_tmp_var (TREE_TYPE (iters), "iters");
3119 add_referenced_tmp_var (var);
3120 iters_name = force_gimple_operand (iters, &stmt, false, var);
3122 /* Insert stmt on loop preheader edge. */
3123 pe = loop_preheader_edge (loop);
3124 if (stmt)
3125 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3126 if (new_bb)
3127 add_bb_to_loop (new_bb, EDGE_PRED (new_bb, 0)->src->loop_father);
3129 return iters_name;
3133 /* Function vect_update_niters_after_peeling
3135 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3136 The new number of iterations is therefore original_niters - NITERS.
3137 Record the new number of iterations in LOOP_VINFO. */
3139 static void
3140 vect_update_niters_after_peeling (loop_vec_info loop_vinfo, tree niters)
3142 tree n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3143 LOOP_VINFO_NITERS (loop_vinfo) =
3144 build (MINUS_EXPR, integer_type_node, n_iters, niters);
3148 /* Function vect_update_inits_of_dr
3150 NITERS iterations were peeled from LOOP. DR represents a data reference
3151 in LOOP. This function updates the information recorded in DR to
3152 account for the fact that the first NITERS iterations had already been
3153 executed. Specifically, it updates the initial_condition of the
3154 access_function of DR. */
3156 static void
3157 vect_update_inits_of_dr (struct data_reference *dr, struct loop *loop,
3158 tree niters)
3160 tree access_fn = DR_ACCESS_FN (dr, 0);
3161 tree init, init_new, step;
3163 step = evolution_part_in_loop_num (access_fn, loop->num);
3164 init = initial_condition (access_fn);
3166 init_new = build (PLUS_EXPR, TREE_TYPE (init),
3167 build (MULT_EXPR, TREE_TYPE (niters),
3168 niters, step), init);
3169 DR_ACCESS_FN (dr, 0) = chrec_replace_initial_condition (access_fn, init_new);
3171 return;
3175 /* Function vect_update_inits_of_drs
3177 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3178 This function updates the information recorded for the data references in
3179 the loop to account for the fact that the first NITERS iterations had
3180 already been executed. Specifically, it updates the initial_condition of the
3181 access_function of all the data_references in the loop. */
3183 static void
3184 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3186 unsigned int i;
3187 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3188 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3189 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3191 if (dump_file && (dump_flags & TDF_DETAILS))
3192 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3194 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3196 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3197 vect_update_inits_of_dr (dr, loop, niters);
3200 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3202 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3203 vect_update_inits_of_dr (dr, loop, niters);
3208 /* Function vect_do_peeling_for_alignment
3210 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3211 'niters' is set to the misalignment of one of the data references in the
3212 loop, thereby forcing it to refer to an aligned location at the beginning
3213 of the execution of this loop. The data reference for which we are
3214 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3216 static void
3217 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3219 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3220 tree niters_of_prolog_loop, ni_name;
3222 if (vect_debug_details (NULL))
3223 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3225 ni_name = vect_build_loop_niters (loop_vinfo);
3226 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3229 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3230 tree_duplicate_loop_to_edge (loop, loops, loop_preheader_edge(loop),
3231 niters_of_prolog_loop, ni_name, false);
3233 /* Update number of times loop executes. */
3234 vect_update_niters_after_peeling (loop_vinfo, niters_of_prolog_loop);
3236 /* Update all inits of access functions of all data refs. */
3237 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3239 /* After peeling we have to reset scalar evolution analyzer. */
3240 scev_reset ();
3242 return;
3246 /* Function vect_transform_loop.
3248 The analysis phase has determined that the loop is vectorizable.
3249 Vectorize the loop - created vectorized stmts to replace the scalar
3250 stmts in the loop, and update the loop exit condition. */
3252 static void
3253 vect_transform_loop (loop_vec_info loop_vinfo,
3254 struct loops *loops ATTRIBUTE_UNUSED)
3256 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3257 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3258 int nbbs = loop->num_nodes;
3259 block_stmt_iterator si;
3260 int i;
3261 tree ratio = NULL;
3262 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3264 if (vect_debug_details (NULL))
3265 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3268 /* Peel the loop if there are data refs with unknown alignment.
3269 Only one data ref with unknown store is allowed. */
3272 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3273 vect_do_peeling_for_alignment (loop_vinfo, loops);
3275 /* If the loop has a symbolic number of iterations 'n'
3276 (i.e. it's not a compile time constant),
3277 then an epilog loop needs to be created. We therefore duplicate
3278 the initial loop. The original loop will be vectorized, and will compute
3279 the first (n/VF) iterations. The second copy of the loop will remain
3280 serial and will compute the remaining (n%VF) iterations.
3281 (VF is the vectorization factor). */
3283 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3284 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3286 /* FORNOW: we'll treat the case where niters is constant and
3288 niters % vf != 0
3290 in the way similar to one with symbolic niters.
3291 For this we'll generate variable which value is equal to niters. */
3293 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3294 && (LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3295 vect_transform_for_unknown_loop_bound (loop_vinfo, &ratio, loops);
3298 /* 1) Make sure the loop header has exactly two entries
3299 2) Make sure we have a preheader basic block. */
3301 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3303 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3306 /* FORNOW: the vectorizer supports only loops which body consist
3307 of one basic block (header + empty latch). When the vectorizer will
3308 support more involved loop forms, the order by which the BBs are
3309 traversed need to be reconsidered. */
3311 for (i = 0; i < nbbs; i++)
3313 basic_block bb = bbs[i];
3315 for (si = bsi_start (bb); !bsi_end_p (si);)
3317 tree stmt = bsi_stmt (si);
3318 stmt_vec_info stmt_info;
3319 bool is_store;
3321 if (vect_debug_details (NULL))
3323 fprintf (dump_file, "------>vectorizing statement: ");
3324 print_generic_expr (dump_file, stmt, TDF_SLIM);
3326 stmt_info = vinfo_for_stmt (stmt);
3327 gcc_assert (stmt_info);
3328 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3330 bsi_next (&si);
3331 continue;
3333 #ifdef ENABLE_CHECKING
3334 /* FORNOW: Verify that all stmts operate on the same number of
3335 units and no inner unrolling is necessary. */
3336 gcc_assert
3337 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3338 == vectorization_factor);
3339 #endif
3340 /* -------- vectorize statement ------------ */
3341 if (vect_debug_details (NULL))
3342 fprintf (dump_file, "transform statement.");
3344 is_store = vect_transform_stmt (stmt, &si);
3345 if (is_store)
3347 /* free the attached stmt_vec_info and remove the stmt. */
3348 stmt_ann_t ann = stmt_ann (stmt);
3349 free (stmt_info);
3350 set_stmt_info (ann, NULL);
3351 bsi_remove (&si);
3352 continue;
3355 bsi_next (&si);
3356 } /* stmts in BB */
3357 } /* BBs in loop */
3359 vect_transform_loop_bound (loop_vinfo, ratio);
3361 if (vect_debug_details (loop))
3362 fprintf (dump_file,"Success! loop vectorized.");
3363 if (vect_debug_stats (loop))
3364 fprintf (dump_file, "LOOP VECTORIZED.");
3368 /* Function vect_is_simple_use.
3370 Input:
3371 LOOP - the loop that is being vectorized.
3372 OPERAND - operand of a stmt in LOOP.
3373 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3375 Returns whether a stmt with OPERAND can be vectorized.
3376 Supportable operands are constants, loop invariants, and operands that are
3377 defined by the current iteration of the loop. Unsupportable operands are
3378 those that are defined by a previous iteration of the loop (as is the case
3379 in reduction/induction computations). */
3381 static bool
3382 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3384 tree def_stmt;
3385 basic_block bb;
3387 if (def)
3388 *def = NULL_TREE;
3390 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3391 return true;
3393 if (TREE_CODE (operand) != SSA_NAME)
3394 return false;
3396 def_stmt = SSA_NAME_DEF_STMT (operand);
3397 if (def_stmt == NULL_TREE )
3399 if (vect_debug_details (NULL))
3400 fprintf (dump_file, "no def_stmt.");
3401 return false;
3404 /* empty stmt is expected only in case of a function argument.
3405 (Otherwise - we expect a phi_node or a modify_expr). */
3406 if (IS_EMPTY_STMT (def_stmt))
3408 tree arg = TREE_OPERAND (def_stmt, 0);
3409 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3410 return true;
3411 if (vect_debug_details (NULL))
3413 fprintf (dump_file, "Unexpected empty stmt: ");
3414 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3416 return false;
3419 /* phi_node inside the loop indicates an induction/reduction pattern.
3420 This is not supported yet. */
3421 bb = bb_for_stmt (def_stmt);
3422 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3424 if (vect_debug_details (NULL))
3425 fprintf (dump_file, "reduction/induction - unsupported.");
3426 return false; /* FORNOW: not supported yet. */
3429 /* Expecting a modify_expr or a phi_node. */
3430 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3431 || TREE_CODE (def_stmt) == PHI_NODE)
3433 if (def)
3434 *def = def_stmt;
3435 return true;
3438 return false;
3442 /* Function vect_analyze_operations.
3444 Scan the loop stmts and make sure they are all vectorizable. */
3446 static bool
3447 vect_analyze_operations (loop_vec_info loop_vinfo)
3449 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3450 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3451 int nbbs = loop->num_nodes;
3452 block_stmt_iterator si;
3453 int vectorization_factor = 0;
3454 int i;
3455 bool ok;
3456 tree scalar_type;
3458 if (vect_debug_details (NULL))
3459 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3461 for (i = 0; i < nbbs; i++)
3463 basic_block bb = bbs[i];
3465 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3467 tree stmt = bsi_stmt (si);
3468 int nunits;
3469 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3470 tree vectype;
3472 if (vect_debug_details (NULL))
3474 fprintf (dump_file, "==> examining statement: ");
3475 print_generic_expr (dump_file, stmt, TDF_SLIM);
3478 gcc_assert (stmt_info);
3480 /* skip stmts which do not need to be vectorized.
3481 this is expected to include:
3482 - the COND_EXPR which is the loop exit condition
3483 - any LABEL_EXPRs in the loop
3484 - computations that are used only for array indexing or loop
3485 control */
3487 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3489 if (vect_debug_details (NULL))
3490 fprintf (dump_file, "irrelevant.");
3491 continue;
3494 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3496 if (vect_debug_stats (loop) || vect_debug_details (loop))
3498 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3499 print_generic_expr (dump_file, stmt, TDF_SLIM);
3501 return false;
3504 if (STMT_VINFO_DATA_REF (stmt_info))
3505 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3506 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3507 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3508 else
3509 scalar_type = TREE_TYPE (stmt);
3511 if (vect_debug_details (NULL))
3513 fprintf (dump_file, "get vectype for scalar type: ");
3514 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3517 vectype = get_vectype_for_scalar_type (scalar_type);
3518 if (!vectype)
3520 if (vect_debug_stats (loop) || vect_debug_details (loop))
3522 fprintf (dump_file, "not vectorized: unsupported data-type ");
3523 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3525 return false;
3528 if (vect_debug_details (NULL))
3530 fprintf (dump_file, "vectype: ");
3531 print_generic_expr (dump_file, vectype, TDF_SLIM);
3533 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3535 ok = (vectorizable_operation (stmt, NULL, NULL)
3536 || vectorizable_assignment (stmt, NULL, NULL)
3537 || vectorizable_load (stmt, NULL, NULL)
3538 || vectorizable_store (stmt, NULL, NULL));
3540 if (!ok)
3542 if (vect_debug_stats (loop) || vect_debug_details (loop))
3544 fprintf (dump_file, "not vectorized: stmt not supported: ");
3545 print_generic_expr (dump_file, stmt, TDF_SLIM);
3547 return false;
3550 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3551 if (vect_debug_details (NULL))
3552 fprintf (dump_file, "nunits = %d", nunits);
3554 if (vectorization_factor)
3556 /* FORNOW: don't allow mixed units.
3557 This restriction will be relaxed in the future. */
3558 if (nunits != vectorization_factor)
3560 if (vect_debug_stats (loop) || vect_debug_details (loop))
3561 fprintf (dump_file, "not vectorized: mixed data-types");
3562 return false;
3565 else
3566 vectorization_factor = nunits;
3568 #ifdef ENABLE_CHECKING
3569 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3570 * vectorization_factor == UNITS_PER_SIMD_WORD);
3571 #endif
3575 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3577 if (vectorization_factor <= 1)
3579 if (vect_debug_stats (loop) || vect_debug_details (loop))
3580 fprintf (dump_file, "not vectorized: unsupported data-type");
3581 return false;
3583 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3586 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3587 && vect_debug_details (NULL))
3588 fprintf (dump_file,
3589 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3590 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3592 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3593 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3595 /* In this case we have to generate epilog loop, that
3596 can be done only for loops with one entry edge. */
3597 if (LOOP_VINFO_LOOP (loop_vinfo)->num_entries != 1
3598 || !(LOOP_VINFO_LOOP (loop_vinfo)->pre_header))
3600 if (vect_debug_stats (loop) || vect_debug_details (loop))
3601 fprintf (dump_file, "not vectorized: more than one entry.");
3602 return false;
3606 return true;
3610 /* Function exist_non_indexing_operands_for_use_p
3612 USE is one of the uses attached to STMT. Check if USE is
3613 used in STMT for anything other than indexing an array. */
3615 static bool
3616 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3618 tree operand;
3619 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3621 /* USE corresponds to some operand in STMT. If there is no data
3622 reference in STMT, then any operand that corresponds to USE
3623 is not indexing an array. */
3624 if (!STMT_VINFO_DATA_REF (stmt_info))
3625 return true;
3627 /* STMT has a data_ref. FORNOW this means that its of one of
3628 the following forms:
3629 -1- ARRAY_REF = var
3630 -2- var = ARRAY_REF
3631 (This should have been verified in analyze_data_refs).
3633 'var' in the second case corresponds to a def, not a use,
3634 so USE cannot correspond to any operands that are not used
3635 for array indexing.
3637 Therefore, all we need to check is if STMT falls into the
3638 first case, and whether var corresponds to USE. */
3640 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3641 return false;
3643 operand = TREE_OPERAND (stmt, 1);
3645 if (TREE_CODE (operand) != SSA_NAME)
3646 return false;
3648 if (operand == use)
3649 return true;
3651 return false;
3655 /* Function vect_is_simple_iv_evolution.
3657 FORNOW: A simple evolution of an induction variables in the loop is
3658 considered a polynomial evolution with constant step. */
3660 static bool
3661 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3662 tree * step, bool strict)
3664 tree init_expr;
3665 tree step_expr;
3667 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3669 /* When there is no evolution in this loop, the evolution function
3670 is not "simple". */
3671 if (evolution_part == NULL_TREE)
3672 return false;
3674 /* When the evolution is a polynomial of degree >= 2
3675 the evolution function is not "simple". */
3676 if (tree_is_chrec (evolution_part))
3677 return false;
3679 step_expr = evolution_part;
3680 init_expr = unshare_expr (initial_condition (access_fn));
3682 if (vect_debug_details (NULL))
3684 fprintf (dump_file, "step: ");
3685 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3686 fprintf (dump_file, ", init: ");
3687 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3690 *init = init_expr;
3691 *step = step_expr;
3693 if (TREE_CODE (step_expr) != INTEGER_CST)
3695 if (vect_debug_details (NULL))
3696 fprintf (dump_file, "step unknown.");
3697 return false;
3700 if (strict)
3701 if (!integer_onep (step_expr))
3703 if (vect_debug_details (NULL))
3704 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3705 return false;
3708 return true;
3712 /* Function vect_analyze_scalar_cycles.
3714 Examine the cross iteration def-use cycles of scalar variables, by
3715 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3716 cycles that they represent do not impede vectorization.
3718 FORNOW: Reduction as in the following loop, is not supported yet:
3719 loop1:
3720 for (i=0; i<N; i++)
3721 sum += a[i];
3722 The cross-iteration cycle corresponding to variable 'sum' will be
3723 considered too complicated and will impede vectorization.
3725 FORNOW: Induction as in the following loop, is not supported yet:
3726 loop2:
3727 for (i=0; i<N; i++)
3728 a[i] = i;
3730 However, the following loop *is* vectorizable:
3731 loop3:
3732 for (i=0; i<N; i++)
3733 a[i] = b[i];
3735 In both loops there exists a def-use cycle for the variable i:
3736 loop: i_2 = PHI (i_0, i_1)
3737 a[i_2] = ...;
3738 i_1 = i_2 + 1;
3739 GOTO loop;
3741 The evolution of the above cycle is considered simple enough,
3742 however, we also check that the cycle does not need to be
3743 vectorized, i.e - we check that the variable that this cycle
3744 defines is only used for array indexing or in stmts that do not
3745 need to be vectorized. This is not the case in loop2, but it
3746 *is* the case in loop3. */
3748 static bool
3749 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3751 tree phi;
3752 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3753 basic_block bb = loop->header;
3754 tree dummy;
3756 if (vect_debug_details (NULL))
3757 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3759 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3761 tree access_fn = NULL;
3763 if (vect_debug_details (NULL))
3765 fprintf (dump_file, "Analyze phi: ");
3766 print_generic_expr (dump_file, phi, TDF_SLIM);
3769 /* Skip virtual phi's. The data dependences that are associated with
3770 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3772 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3774 if (vect_debug_details (NULL))
3775 fprintf (dump_file, "virtual phi. skip.");
3776 continue;
3779 /* Analyze the evolution function. */
3781 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3782 those of loop induction variables; This property is verified here.
3784 Furthermore, if that induction variable is used in an operation
3785 that needs to be vectorized (i.e, is not solely used to index
3786 arrays and check the exit condition) - we do not support its
3787 vectorization yet. This property is verified in vect_is_simple_use,
3788 during vect_analyze_operations. */
3790 access_fn = /* instantiate_parameters
3791 (loop,*/
3792 analyze_scalar_evolution (loop, PHI_RESULT (phi));
3794 if (!access_fn)
3796 if (vect_debug_stats (loop) || vect_debug_details (loop))
3797 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3798 return false;
3801 if (vect_debug_details (NULL))
3803 fprintf (dump_file, "Access function of PHI: ");
3804 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3807 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
3808 &dummy, false))
3810 if (vect_debug_stats (loop) || vect_debug_details (loop))
3811 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
3812 return false;
3816 return true;
3820 /* Function vect_analyze_data_ref_dependence.
3822 Return TRUE if there (might) exist a dependence between a memory-reference
3823 DRA and a memory-reference DRB. */
3825 static bool
3826 vect_analyze_data_ref_dependence (struct data_reference *dra,
3827 struct data_reference *drb,
3828 struct loop *loop)
3830 bool differ_p;
3831 struct data_dependence_relation *ddr;
3833 if (!array_base_name_differ_p (dra, drb, &differ_p))
3835 if (vect_debug_stats (loop) || vect_debug_details (loop))
3837 fprintf (dump_file,
3838 "not vectorized: can't determine dependence between: ");
3839 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3840 fprintf (dump_file, " and ");
3841 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3843 return true;
3846 if (differ_p)
3847 return false;
3849 ddr = initialize_data_dependence_relation (dra, drb);
3850 compute_affine_dependence (ddr);
3852 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
3853 return false;
3855 if (vect_debug_stats (loop) || vect_debug_details (loop))
3857 fprintf (dump_file,
3858 "not vectorized: possible dependence between data-refs ");
3859 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
3860 fprintf (dump_file, " and ");
3861 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
3864 return true;
3868 /* Function vect_analyze_data_ref_dependences.
3870 Examine all the data references in the loop, and make sure there do not
3871 exist any data dependences between them.
3873 TODO: dependences which distance is greater than the vectorization factor
3874 can be ignored. */
3876 static bool
3877 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
3879 unsigned int i, j;
3880 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3881 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3882 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3884 /* Examine store-store (output) dependences. */
3886 if (vect_debug_details (NULL))
3887 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
3889 if (vect_debug_details (NULL))
3890 fprintf (dump_file, "compare all store-store pairs.");
3892 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
3894 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3896 struct data_reference *dra =
3897 VARRAY_GENERIC_PTR (loop_write_refs, i);
3898 struct data_reference *drb =
3899 VARRAY_GENERIC_PTR (loop_write_refs, j);
3900 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3901 return false;
3905 /* Examine load-store (true/anti) dependences. */
3907 if (vect_debug_details (NULL))
3908 fprintf (dump_file, "compare all load-store pairs.");
3910 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
3912 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
3914 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
3915 struct data_reference *drb =
3916 VARRAY_GENERIC_PTR (loop_write_refs, j);
3917 if (vect_analyze_data_ref_dependence (dra, drb, loop))
3918 return false;
3922 return true;
3926 /* Function vect_get_first_index.
3928 REF is a data reference.
3929 If it is an ARRAY_REF: if its lower bound is simple enough,
3930 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
3931 If it is not an ARRAY_REF: REF has no "first index";
3932 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
3934 static bool
3935 vect_get_first_index (tree ref, tree *array_first_index)
3937 tree array_start;
3939 if (TREE_CODE (ref) != ARRAY_REF)
3940 *array_first_index = size_zero_node;
3941 else
3943 array_start = array_ref_low_bound (ref);
3944 if (!host_integerp (array_start,0))
3946 if (vect_debug_details (NULL))
3948 fprintf (dump_file, "array min val not simple integer cst.");
3949 print_generic_expr (dump_file, array_start, TDF_DETAILS);
3951 return false;
3953 *array_first_index = array_start;
3956 return true;
3960 /* Function vect_compute_array_base_alignment.
3961 A utility function of vect_compute_array_ref_alignment.
3963 Compute the misalignment of ARRAY in bits.
3965 Input:
3966 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
3967 VECTYPE - we are interested in the misalignment modulo the size of vectype.
3968 if NULL: don't compute misalignment, just return the base of ARRAY.
3969 PREV_DIMENSIONS - initialized to one.
3970 MISALIGNMENT - the computed misalignment in bits.
3972 Output:
3973 If VECTYPE is not NULL:
3974 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
3975 the base of the array, and put the computed misalignment in MISALIGNMENT.
3976 If VECTYPE is NULL:
3977 Return the base of the array.
3979 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
3980 a[idx_N]...[idx_2][idx_1] is
3981 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
3982 ... + idx_N * dim_0 * ... * dim_N-1}.
3983 (The misalignment of &a is not checked here).
3984 Note, that every term contains dim_0, therefore, if dim_0 is a
3985 multiple of NUNITS, the whole sum is a multiple of NUNITS.
3986 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
3987 NUINTS, we can say that the misalignment of the sum is equal to
3988 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
3989 we can't determine this array misalignment, and we return
3990 false.
3991 We proceed recursively in this manner, accumulating total misalignment
3992 and the multiplication of previous dimensions for correct misalignment
3993 calculation. */
3995 static tree
3996 vect_compute_array_base_alignment (tree array,
3997 tree vectype,
3998 tree *prev_dimensions,
3999 tree *misalignment)
4001 tree index;
4002 tree domain;
4003 tree dimension_size;
4004 tree mis;
4005 tree bits_per_vectype;
4006 tree bits_per_vectype_unit;
4008 /* The 'stop condition' of the recursion. */
4009 if (TREE_CODE (array) != ARRAY_REF)
4010 return array;
4012 if (!vectype)
4013 /* Just get the base decl. */
4014 return vect_compute_array_base_alignment
4015 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4017 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
4018 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
4019 return NULL_TREE;
4021 domain = TYPE_DOMAIN (TREE_TYPE (array));
4022 dimension_size =
4023 int_const_binop (PLUS_EXPR,
4024 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
4025 TYPE_MIN_VALUE (domain), 1),
4026 size_one_node, 1);
4028 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
4029 is a multiple of NUNITS:
4031 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
4033 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
4034 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
4035 if (integer_zerop (mis))
4036 /* This array is aligned. Continue just in order to get the base decl. */
4037 return vect_compute_array_base_alignment
4038 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
4040 index = TREE_OPERAND (array, 1);
4041 if (!host_integerp (index, 1))
4042 /* The current index is not constant. */
4043 return NULL_TREE;
4045 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
4047 bits_per_vectype = fold_convert (unsigned_type_node,
4048 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4049 GET_MODE_SIZE (TYPE_MODE (vectype))));
4050 bits_per_vectype_unit = fold_convert (unsigned_type_node,
4051 build_int_cst (NULL_TREE, BITS_PER_UNIT *
4052 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
4054 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
4055 earlier:
4057 *misalignment =
4058 (*misalignment + index_val * dimension_size * *prev_dimensions)
4059 % vectype_nunits;
4062 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
4063 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
4064 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
4065 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
4066 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
4069 *prev_dimensions = int_const_binop (MULT_EXPR,
4070 *prev_dimensions, dimension_size, 1);
4072 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
4073 prev_dimensions,
4074 misalignment);
4078 /* Function vect_compute_data_ref_alignment
4080 Compute the misalignment of the data reference DR.
4082 Output:
4083 1. If during the misalignment computation it is found that the data reference
4084 cannot be vectorized then false is returned.
4085 2. DR_MISALIGNMENT (DR) is defined.
4087 FOR NOW: No analysis is actually performed. Misalignment is calculated
4088 only for trivial cases. TODO. */
4090 static bool
4091 vect_compute_data_ref_alignment (struct data_reference *dr,
4092 loop_vec_info loop_vinfo)
4094 tree stmt = DR_STMT (dr);
4095 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4096 tree ref = DR_REF (dr);
4097 tree vectype;
4098 tree scalar_type;
4099 tree offset = size_zero_node;
4100 tree base, bit_offset, alignment;
4101 tree unit_bits = fold_convert (unsigned_type_node,
4102 build_int_cst (NULL_TREE, BITS_PER_UNIT));
4103 tree dr_base;
4104 bool base_aligned_p;
4106 if (vect_debug_details (NULL))
4107 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4109 /* Initialize misalignment to unknown. */
4110 DR_MISALIGNMENT (dr) = -1;
4112 scalar_type = TREE_TYPE (ref);
4113 vectype = get_vectype_for_scalar_type (scalar_type);
4114 if (!vectype)
4116 if (vect_debug_details (NULL))
4118 fprintf (dump_file, "no vectype for stmt: ");
4119 print_generic_expr (dump_file, stmt, TDF_SLIM);
4120 fprintf (dump_file, " scalar_type: ");
4121 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4123 /* It is not possible to vectorize this data reference. */
4124 return false;
4126 STMT_VINFO_VECTYPE (stmt_info) = vectype;
4127 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
4129 if (TREE_CODE (ref) == ARRAY_REF)
4130 dr_base = ref;
4131 else
4132 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4134 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
4135 loop_vinfo, &bit_offset, &base_aligned_p);
4136 if (!base)
4138 if (vect_debug_details (NULL))
4140 fprintf (dump_file, "Unknown alignment for access: ");
4141 print_generic_expr (dump_file,
4142 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
4144 return true;
4147 if (!base_aligned_p)
4149 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4151 if (vect_debug_details (NULL))
4153 fprintf (dump_file, "can't force alignment of ref: ");
4154 print_generic_expr (dump_file, ref, TDF_SLIM);
4156 return true;
4159 /* Force the alignment of the decl.
4160 NOTE: This is the only change to the code we make during
4161 the analysis phase, before deciding to vectorize the loop. */
4162 if (vect_debug_details (NULL))
4163 fprintf (dump_file, "force alignment");
4164 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4165 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
4168 /* At this point we assume that the base is aligned, and the offset from it
4169 (including index, if relevant) has been computed and is in BIT_OFFSET. */
4170 gcc_assert (base_aligned_p
4171 || (TREE_CODE (base) == VAR_DECL
4172 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4174 /* Convert into bytes. */
4175 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
4176 /* Check that there is no remainder in bits. */
4177 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
4178 if (!integer_zerop (bit_offset))
4180 if (vect_debug_details (NULL))
4182 fprintf (dump_file, "bit offset alignment: ");
4183 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
4185 return false;
4188 /* Alignment required, in bytes: */
4189 alignment = fold_convert (unsigned_type_node,
4190 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
4192 /* Modulo alignment. */
4193 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
4194 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
4196 if (vect_debug_details (NULL))
4197 fprintf (dump_file, "unexpected misalign value");
4198 return false;
4201 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
4203 if (vect_debug_details (NULL))
4204 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4206 return true;
4210 /* Function vect_compute_array_ref_alignment
4212 Compute the alignment of an array-ref.
4213 The alignment we compute here is relative to
4214 TYPE_ALIGN(VECTYPE) boundary.
4216 Output:
4217 OFFSET - the alignment in bits
4218 Return value - the base of the array-ref. E.g,
4219 if the array-ref is a.b[k].c[i][j] the returned
4220 base is a.b[k].c
4223 static tree
4224 vect_compute_array_ref_alignment (struct data_reference *dr,
4225 loop_vec_info loop_vinfo,
4226 tree vectype,
4227 tree *offset)
4229 tree array_first_index = size_zero_node;
4230 tree init;
4231 tree ref = DR_REF (dr);
4232 tree scalar_type = TREE_TYPE (ref);
4233 tree oprnd0 = TREE_OPERAND (ref, 0);
4234 tree dims = size_one_node;
4235 tree misalign = size_zero_node;
4236 tree next_ref, this_offset = size_zero_node;
4237 tree nunits;
4238 tree nbits;
4240 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
4241 /* The reference is an array without its last index. */
4242 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims,
4243 &misalign);
4244 else
4245 next_ref = vect_compute_array_base_alignment (oprnd0, vectype, &dims,
4246 &misalign);
4247 if (!vectype)
4248 /* Alignment is not requested. Just return the base. */
4249 return next_ref;
4251 /* Compute alignment. */
4252 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
4253 return NULL_TREE;
4254 this_offset = misalign;
4256 /* Check the first index accessed. */
4257 if (!vect_get_first_index (ref, &array_first_index))
4259 if (vect_debug_details (NULL))
4260 fprintf (dump_file, "no first_index for array.");
4261 return NULL_TREE;
4264 /* Check the index of the array_ref. */
4265 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
4266 LOOP_VINFO_LOOP (loop_vinfo)->num);
4268 /* FORNOW: In order to simplify the handling of alignment, we make sure
4269 that the first location at which the array is accessed ('init') is on an
4270 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
4271 This is too conservative, since we require that
4272 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
4273 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
4274 This should be relaxed in the future. */
4276 if (!init || !host_integerp (init, 0))
4278 if (vect_debug_details (NULL))
4279 fprintf (dump_file, "non constant init. ");
4280 return NULL_TREE;
4283 /* bytes per scalar element: */
4284 nunits = fold_convert (unsigned_type_node,
4285 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
4286 nbits = int_const_binop (MULT_EXPR, nunits,
4287 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
4289 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
4290 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
4291 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
4292 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
4294 /* TODO: allow negative misalign values. */
4295 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
4297 if (vect_debug_details (NULL))
4298 fprintf (dump_file, "unexpected misalign value");
4299 return NULL_TREE;
4301 *offset = misalign;
4302 return next_ref;
4306 /* Function vect_compute_data_refs_alignment
4308 Compute the misalignment of data references in the loop.
4309 This pass may take place at function granularity instead of at loop
4310 granularity.
4312 FOR NOW: No analysis is actually performed. Misalignment is calculated
4313 only for trivial cases. TODO. */
4315 static bool
4316 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4318 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4319 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4320 unsigned int i;
4322 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4324 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4325 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4326 return false;
4329 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4331 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4332 if (!vect_compute_data_ref_alignment (dr, loop_vinfo))
4333 return false;
4336 return true;
4340 /* Function vect_enhance_data_refs_alignment
4342 This pass will use loop versioning and loop peeling in order to enhance
4343 the alignment of data references in the loop.
4345 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4346 original loop is to be vectorized; Any other loops that are created by
4347 the transformations performed in this pass - are not supposed to be
4348 vectorized. This restriction will be relaxed.
4350 FOR NOW: No transformation is actually performed. TODO. */
4352 static void
4353 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4355 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4356 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4357 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4358 unsigned int i;
4361 This pass will require a cost model to guide it whether to apply peeling
4362 or versioning or a combination of the two. For example, the scheme that
4363 intel uses when given a loop with several memory accesses, is as follows:
4364 choose one memory access ('p') which alignment you want to force by doing
4365 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4366 other accesses are not necessarily aligned, or (2) use loop versioning to
4367 generate one loop in which all accesses are aligned, and another loop in
4368 which only 'p' is necessarily aligned.
4370 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4371 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4372 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4374 Devising a cost model is the most critical aspect of this work. It will
4375 guide us on which access to peel for, whether to use loop versioning, how
4376 many versions to create, etc. The cost model will probably consist of
4377 generic considerations as well as target specific considerations (on
4378 powerpc for example, misaligned stores are more painful than misaligned
4379 loads).
4381 Here is the general steps involved in alignment enhancements:
4383 -- original loop, before alignment analysis:
4384 for (i=0; i<N; i++){
4385 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4386 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4389 -- After vect_compute_data_refs_alignment:
4390 for (i=0; i<N; i++){
4391 x = q[i]; # DR_MISALIGNMENT(q) = 3
4392 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4395 -- Possibility 1: we do loop versioning:
4396 if (p is aligned) {
4397 for (i=0; i<N; i++){ # loop 1A
4398 x = q[i]; # DR_MISALIGNMENT(q) = 3
4399 p[i] = y; # DR_MISALIGNMENT(p) = 0
4402 else {
4403 for (i=0; i<N; i++){ # loop 1B
4404 x = q[i]; # DR_MISALIGNMENT(q) = 3
4405 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4409 -- Possibility 2: we do loop peeling:
4410 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4411 x = q[i];
4412 p[i] = y;
4414 for (i = 3; i < N; i++){ # loop 2A
4415 x = q[i]; # DR_MISALIGNMENT(q) = 0
4416 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4419 -- Possibility 3: combination of loop peeling and versioning:
4420 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4421 x = q[i];
4422 p[i] = y;
4424 if (p is aligned) {
4425 for (i = 3; i<N; i++){ # loop 3A
4426 x = q[i]; # DR_MISALIGNMENT(q) = 0
4427 p[i] = y; # DR_MISALIGNMENT(p) = 0
4430 else {
4431 for (i = 3; i<N; i++){ # loop 3B
4432 x = q[i]; # DR_MISALIGNMENT(q) = 0
4433 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4437 These loops are later passed to loop_transform to be vectorized. The
4438 vectorizer will use the alignment information to guide the transformation
4439 (whether to generate regular loads/stores, or with special handling for
4440 misalignment).
4443 /* (1) Peeling to force alignment. */
4445 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4446 Considerations:
4447 + How many accesses will become aligned due to the peeling
4448 - How many accesses will become unaligned due to the peeling,
4449 and the cost of misaligned accesses.
4450 - The cost of peeling (the extra runtime checks, the increase
4451 in code size).
4453 The scheme we use FORNOW: peel to force the alignment of the first
4454 misaligned store in the loop.
4455 Rationale: misaligned stores are not yet supported.
4457 TODO: Use a better cost model. */
4459 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4461 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4462 if (!aligned_access_p (dr))
4464 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4465 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4466 break;
4470 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4472 if (vect_debug_details (loop))
4473 fprintf (dump_file, "Peeling for alignment will not be applied.");
4474 return;
4476 else
4477 if (vect_debug_details (loop))
4478 fprintf (dump_file, "Peeling for alignment will be applied.");
4481 /* (1.2) Update the alignment info according to the peeling factor.
4482 If the misalignment of the DR we peel for is M, then the
4483 peeling factor is VF - M, and the misalignment of each access DR_i
4484 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4485 If the misalignment of the DR we peel for is unknown, then the
4486 misalignment of each access DR_i in the loop is also unknown.
4488 FORNOW: set the misalignment of the accesses to unknown even
4489 if the peeling factor is known at compile time.
4491 TODO: - if the peeling factor is known at compile time, use that
4492 when updating the misalignment info of the loop DRs.
4493 - consider accesses that are known to have the same
4494 alignment, even if that alignment is unknown. */
4496 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4498 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4499 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4500 DR_MISALIGNMENT (dr) = 0;
4501 else
4502 DR_MISALIGNMENT (dr) = -1;
4504 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4506 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4507 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4508 DR_MISALIGNMENT (dr) = 0;
4509 else
4510 DR_MISALIGNMENT (dr) = -1;
4515 /* Function vect_analyze_data_refs_alignment
4517 Analyze the alignment of the data-references in the loop.
4518 FOR NOW: Until support for misliagned accesses is in place, only if all
4519 accesses are aligned can the loop be vectorized. This restriction will be
4520 relaxed. */
4522 static bool
4523 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4525 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4526 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4527 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4528 enum dr_alignment_support supportable_dr_alignment;
4529 unsigned int i;
4531 if (vect_debug_details (NULL))
4532 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4535 /* This pass may take place at function granularity instead of at loop
4536 granularity. */
4538 if (!vect_compute_data_refs_alignment (loop_vinfo))
4540 if (vect_debug_details (loop) || vect_debug_stats (loop))
4541 fprintf (dump_file,
4542 "not vectorized: can't calculate alignment for data ref.");
4543 return false;
4547 /* This pass will decide on using loop versioning and/or loop peeling in
4548 order to enhance the alignment of data references in the loop. */
4550 vect_enhance_data_refs_alignment (loop_vinfo);
4553 /* Finally, check that all the data references in the loop can be
4554 handled with respect to their alignment. */
4556 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4558 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4559 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4560 if (!supportable_dr_alignment)
4562 if (vect_debug_details (loop) || vect_debug_stats (loop))
4563 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4564 return false;
4567 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4569 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4570 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4571 if (!supportable_dr_alignment)
4573 if (vect_debug_details (loop) || vect_debug_stats (loop))
4574 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4575 return false;
4579 return true;
4583 /* Function vect_analyze_data_ref_access.
4585 Analyze the access pattern of the data-reference DR. For now, a data access
4586 has to consecutive and aligned to be considered vectorizable. */
4588 static bool
4589 vect_analyze_data_ref_access (struct data_reference *dr)
4591 varray_type access_fns = DR_ACCESS_FNS (dr);
4592 tree access_fn;
4593 tree init, step;
4594 unsigned int dimensions, i;
4596 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
4597 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
4598 access is contiguous). */
4599 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
4601 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
4603 access_fn = DR_ACCESS_FN (dr, i);
4605 if (evolution_part_in_loop_num (access_fn,
4606 loop_containing_stmt (DR_STMT (dr))->num))
4608 /* Evolution part is not NULL in this loop (it is neither constant
4609 nor invariant). */
4610 if (vect_debug_details (NULL))
4612 fprintf (dump_file,
4613 "not vectorized: complicated multidim. array access.");
4614 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4616 return false;
4620 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
4621 if (!evolution_function_is_constant_p (access_fn)
4622 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
4623 access_fn, &init, &step, true))
4625 if (vect_debug_details (NULL))
4627 fprintf (dump_file, "not vectorized: complicated access function.");
4628 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4630 return false;
4633 return true;
4637 /* Function vect_analyze_data_ref_accesses.
4639 Analyze the access pattern of all the data references in the loop.
4641 FORNOW: the only access pattern that is considered vectorizable is a
4642 simple step 1 (consecutive) access.
4644 FORNOW: handle only arrays and pointer accesses. */
4646 static bool
4647 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4649 unsigned int i;
4650 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4651 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4653 if (vect_debug_details (NULL))
4654 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4656 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4658 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4659 bool ok = vect_analyze_data_ref_access (dr);
4660 if (!ok)
4662 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4663 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4664 fprintf (dump_file, "not vectorized: complicated access pattern.");
4665 return false;
4669 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4671 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4672 bool ok = vect_analyze_data_ref_access (dr);
4673 if (!ok)
4675 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4676 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4677 fprintf (dump_file, "not vectorized: complicated access pattern.");
4678 return false;
4682 return true;
4686 /* Function vect_analyze_pointer_ref_access.
4688 Input:
4689 STMT - a stmt that contains a data-ref
4690 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4692 If the data-ref access is vectorizable, return a data_reference structure
4693 that represents it (DR). Otherwise - return NULL. */
4695 static struct data_reference *
4696 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4698 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4699 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4700 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4701 tree init, step;
4702 int step_val;
4703 tree reftype, innertype;
4704 enum machine_mode innermode;
4705 tree indx_access_fn;
4706 int loopnum = loop->num;
4707 struct data_reference *dr;
4709 if (!access_fn)
4711 if (vect_debug_stats (loop) || vect_debug_details (loop))
4712 fprintf (dump_file, "not vectorized: complicated pointer access.");
4713 return NULL;
4716 if (vect_debug_details (NULL))
4718 fprintf (dump_file, "Access function of ptr: ");
4719 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4722 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4724 if (vect_debug_stats (loop) || vect_debug_details (loop))
4725 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4726 return NULL;
4729 STRIP_NOPS (init);
4731 if (!host_integerp (step,0))
4733 if (vect_debug_stats (loop) || vect_debug_details (loop))
4734 fprintf (dump_file,
4735 "not vectorized: non constant step for pointer access.");
4736 return NULL;
4739 step_val = TREE_INT_CST_LOW (step);
4741 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4742 if (TREE_CODE (reftype) != POINTER_TYPE)
4744 if (vect_debug_stats (loop) || vect_debug_details (loop))
4745 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4746 return NULL;
4749 reftype = TREE_TYPE (init);
4750 if (TREE_CODE (reftype) != POINTER_TYPE)
4752 if (vect_debug_stats (loop) || vect_debug_details (loop))
4753 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4754 return NULL;
4757 innertype = TREE_TYPE (reftype);
4758 innermode = TYPE_MODE (innertype);
4759 if (GET_MODE_SIZE (innermode) != step_val)
4761 /* FORNOW: support only consecutive access */
4762 if (vect_debug_stats (loop) || vect_debug_details (loop))
4763 fprintf (dump_file, "not vectorized: non consecutive access.");
4764 return NULL;
4767 indx_access_fn =
4768 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4769 if (vect_debug_details (NULL))
4771 fprintf (dump_file, "Access function of ptr indx: ");
4772 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4774 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4775 return dr;
4779 /* Function vect_get_symbl_and_dr.
4781 The function returns SYMBL - the relevant variable for
4782 memory tag (for aliasing purposes).
4783 Also data reference structure DR is created.
4785 Input:
4786 MEMREF - data reference in STMT
4787 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4789 Output:
4790 DR - data_reference struct for MEMREF
4791 return value - the relevant variable for memory tag (for aliasing purposes).
4795 static tree
4796 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
4797 loop_vec_info loop_vinfo, struct data_reference **dr)
4799 tree symbl, oprnd0, oprnd1;
4800 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4801 tree offset;
4802 tree array_base, base;
4803 struct data_reference *new_dr;
4804 bool base_aligned_p;
4806 *dr = NULL;
4807 switch (TREE_CODE (memref))
4809 case INDIRECT_REF:
4810 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4811 if (! new_dr)
4812 return NULL_TREE;
4813 *dr = new_dr;
4814 symbl = DR_BASE_NAME (new_dr);
4815 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
4817 switch (TREE_CODE (symbl))
4819 case PLUS_EXPR:
4820 case MINUS_EXPR:
4821 oprnd0 = TREE_OPERAND (symbl, 0);
4822 oprnd1 = TREE_OPERAND (symbl, 1);
4824 STRIP_NOPS(oprnd1);
4825 /* Only {address_base + offset} expressions are supported,
4826 where address_base can be POINTER_TYPE or ARRAY_TYPE and
4827 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
4828 TODO: swap operands if {offset + address_base}. */
4829 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4830 && TREE_CODE (oprnd1) != INTEGER_CST)
4831 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4832 return NULL_TREE;
4834 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
4835 symbl = oprnd0;
4836 else
4837 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
4838 loop_vinfo, &new_dr);
4840 case SSA_NAME:
4841 case ADDR_EXPR:
4842 /* symbl remains unchanged. */
4843 break;
4845 default:
4846 if (vect_debug_details (NULL))
4848 fprintf (dump_file, "unhandled data ref: ");
4849 print_generic_expr (dump_file, memref, TDF_SLIM);
4850 fprintf (dump_file, " (symbl ");
4851 print_generic_expr (dump_file, symbl, TDF_SLIM);
4852 fprintf (dump_file, ") in stmt ");
4853 print_generic_expr (dump_file, stmt, TDF_SLIM);
4855 return NULL_TREE;
4857 break;
4859 case ARRAY_REF:
4860 offset = size_zero_node;
4862 /* Store the array base in the stmt info.
4863 For one dimensional array ref a[i], the base is a,
4864 for multidimensional a[i1][i2]..[iN], the base is
4865 a[i1][i2]..[iN-1]. */
4866 array_base = TREE_OPERAND (memref, 0);
4867 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
4869 new_dr = analyze_array (stmt, memref, is_read);
4870 *dr = new_dr;
4872 /* Find the relevant symbol for aliasing purposes. */
4873 base = DR_BASE_NAME (new_dr);
4874 switch (TREE_CODE (base))
4876 case VAR_DECL:
4877 symbl = base;
4878 break;
4880 case INDIRECT_REF:
4881 symbl = TREE_OPERAND (base, 0);
4882 break;
4884 case COMPONENT_REF:
4885 /* Could have recorded more accurate information -
4886 i.e, the actual FIELD_DECL that is being referenced -
4887 but later passes expect VAR_DECL as the nmt. */
4888 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
4889 loop_vinfo, &offset, &base_aligned_p);
4890 if (symbl)
4891 break;
4892 /* fall through */
4893 default:
4894 if (vect_debug_details (NULL))
4896 fprintf (dump_file, "unhandled struct/class field access ");
4897 print_generic_expr (dump_file, stmt, TDF_SLIM);
4899 return NULL_TREE;
4901 break;
4903 default:
4904 if (vect_debug_details (NULL))
4906 fprintf (dump_file, "unhandled data ref: ");
4907 print_generic_expr (dump_file, memref, TDF_SLIM);
4908 fprintf (dump_file, " in stmt ");
4909 print_generic_expr (dump_file, stmt, TDF_SLIM);
4911 return NULL_TREE;
4913 return symbl;
4917 /* Function vect_analyze_data_refs.
4919 Find all the data references in the loop.
4921 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4922 which base is really an array (not a pointer) and which alignment
4923 can be forced. This restriction will be relaxed. */
4925 static bool
4926 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4928 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4929 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4930 int nbbs = loop->num_nodes;
4931 block_stmt_iterator si;
4932 int j;
4933 struct data_reference *dr;
4934 tree tag;
4935 tree address_base;
4936 bool base_aligned_p;
4937 tree offset;
4939 if (vect_debug_details (NULL))
4940 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4942 for (j = 0; j < nbbs; j++)
4944 basic_block bb = bbs[j];
4945 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4947 bool is_read = false;
4948 tree stmt = bsi_stmt (si);
4949 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4950 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4951 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4952 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4953 varray_type *datarefs = NULL;
4954 int nvuses, nv_may_defs, nv_must_defs;
4955 tree memref = NULL;
4956 tree symbl;
4958 /* Assumption: there exists a data-ref in stmt, if and only if
4959 it has vuses/vdefs. */
4961 if (!vuses && !v_may_defs && !v_must_defs)
4962 continue;
4964 nvuses = NUM_VUSES (vuses);
4965 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4966 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4968 if (nvuses && (nv_may_defs || nv_must_defs))
4970 if (vect_debug_details (NULL))
4972 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4973 print_generic_expr (dump_file, stmt, TDF_SLIM);
4975 return false;
4978 if (TREE_CODE (stmt) != MODIFY_EXPR)
4980 if (vect_debug_details (NULL))
4982 fprintf (dump_file, "unexpected vops in stmt: ");
4983 print_generic_expr (dump_file, stmt, TDF_SLIM);
4985 return false;
4988 if (vuses)
4990 memref = TREE_OPERAND (stmt, 1);
4991 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4992 is_read = true;
4994 else /* vdefs */
4996 memref = TREE_OPERAND (stmt, 0);
4997 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4998 is_read = false;
5001 /* Analyze MEMREF. If it is of a supported form, build data_reference
5002 struct for it (DR) and find the relevant symbol for aliasing
5003 purposes. */
5004 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo,
5005 &dr);
5006 if (!symbl)
5008 if (vect_debug_stats (loop) || vect_debug_details (loop))
5010 fprintf (dump_file, "not vectorized: unhandled data ref: ");
5011 print_generic_expr (dump_file, stmt, TDF_SLIM);
5013 return false;
5016 /* Find and record the memtag assigned to this data-ref. */
5017 switch (TREE_CODE (symbl))
5019 case VAR_DECL:
5020 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5021 break;
5023 case SSA_NAME:
5024 symbl = SSA_NAME_VAR (symbl);
5025 tag = get_var_ann (symbl)->type_mem_tag;
5026 if (!tag)
5028 tree ptr = TREE_OPERAND (memref, 0);
5029 if (TREE_CODE (ptr) == SSA_NAME)
5030 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
5032 if (!tag)
5034 if (vect_debug_stats (loop) || vect_debug_details (loop))
5035 fprintf (dump_file, "not vectorized: no memtag for ref.");
5036 return false;
5038 STMT_VINFO_MEMTAG (stmt_info) = tag;
5039 break;
5041 case ADDR_EXPR:
5042 address_base = TREE_OPERAND (symbl, 0);
5044 switch (TREE_CODE (address_base))
5046 case ARRAY_REF:
5047 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0),
5048 DR_IS_READ(dr));
5049 STMT_VINFO_MEMTAG (stmt_info) =
5050 vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
5051 loop_vinfo, &offset,
5052 &base_aligned_p);
5053 break;
5055 case VAR_DECL:
5056 STMT_VINFO_MEMTAG (stmt_info) = address_base;
5057 break;
5059 default:
5060 if (vect_debug_stats (loop) || vect_debug_details (loop))
5062 fprintf (dump_file,
5063 "not vectorized: unhandled address expr: ");
5064 print_generic_expr (dump_file, stmt, TDF_SLIM);
5066 return false;
5068 break;
5070 default:
5071 if (vect_debug_stats (loop) || vect_debug_details (loop))
5073 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
5074 print_generic_expr (dump_file, memref, TDF_SLIM);
5076 return false;
5079 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5080 STMT_VINFO_DATA_REF (stmt_info) = dr;
5084 return true;
5088 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5090 /* Function vect_mark_relevant.
5092 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5094 static void
5095 vect_mark_relevant (varray_type worklist, tree stmt)
5097 stmt_vec_info stmt_info;
5099 if (vect_debug_details (NULL))
5100 fprintf (dump_file, "mark relevant.");
5102 if (TREE_CODE (stmt) == PHI_NODE)
5104 VARRAY_PUSH_TREE (worklist, stmt);
5105 return;
5108 stmt_info = vinfo_for_stmt (stmt);
5110 if (!stmt_info)
5112 if (vect_debug_details (NULL))
5114 fprintf (dump_file, "mark relevant: no stmt info!!.");
5115 print_generic_expr (dump_file, stmt, TDF_SLIM);
5117 return;
5120 if (STMT_VINFO_RELEVANT_P (stmt_info))
5122 if (vect_debug_details (NULL))
5123 fprintf (dump_file, "already marked relevant.");
5124 return;
5127 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5128 VARRAY_PUSH_TREE (worklist, stmt);
5132 /* Function vect_stmt_relevant_p.
5134 Return true if STMT in loop that is represented by LOOP_VINFO is
5135 "relevant for vectorization".
5137 A stmt is considered "relevant for vectorization" if:
5138 - it has uses outside the loop.
5139 - it has vdefs (it alters memory).
5140 - control stmts in the loop (except for the exit condition).
5142 CHECKME: what other side effects would the vectorizer allow? */
5144 static bool
5145 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5147 v_may_def_optype v_may_defs;
5148 v_must_def_optype v_must_defs;
5149 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5150 int i;
5151 dataflow_t df;
5152 int num_uses;
5154 /* cond stmt other than loop exit cond. */
5155 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5156 return true;
5158 /* changing memory. */
5159 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5160 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5161 if (v_may_defs || v_must_defs)
5163 if (vect_debug_details (NULL))
5164 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5165 return true;
5168 /* uses outside the loop. */
5169 df = get_immediate_uses (stmt);
5170 num_uses = num_immediate_uses (df);
5171 for (i = 0; i < num_uses; i++)
5173 tree use = immediate_use (df, i);
5174 basic_block bb = bb_for_stmt (use);
5175 if (!flow_bb_inside_loop_p (loop, bb))
5177 if (vect_debug_details (NULL))
5178 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5179 return true;
5183 return false;
5187 /* Function vect_mark_stmts_to_be_vectorized.
5189 Not all stmts in the loop need to be vectorized. For example:
5191 for i...
5192 for j...
5193 1. T0 = i + j
5194 2. T1 = a[T0]
5196 3. j = j + 1
5198 Stmt 1 and 3 do not need to be vectorized, because loop control and
5199 addressing of vectorized data-refs are handled differently.
5201 This pass detects such stmts. */
5203 static bool
5204 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5206 varray_type worklist;
5207 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5208 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5209 unsigned int nbbs = loop->num_nodes;
5210 block_stmt_iterator si;
5211 tree stmt;
5212 stmt_ann_t ann;
5213 unsigned int i;
5214 int j;
5215 use_optype use_ops;
5216 stmt_vec_info stmt_info;
5218 if (vect_debug_details (NULL))
5219 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5221 VARRAY_TREE_INIT (worklist, 64, "work list");
5223 /* 1. Init worklist. */
5225 for (i = 0; i < nbbs; i++)
5227 basic_block bb = bbs[i];
5228 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5230 stmt = bsi_stmt (si);
5232 if (vect_debug_details (NULL))
5234 fprintf (dump_file, "init: stmt relevant? ");
5235 print_generic_expr (dump_file, stmt, TDF_SLIM);
5238 stmt_info = vinfo_for_stmt (stmt);
5239 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5241 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5242 vect_mark_relevant (worklist, stmt);
5247 /* 2. Process_worklist */
5249 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5251 stmt = VARRAY_TOP_TREE (worklist);
5252 VARRAY_POP (worklist);
5254 if (vect_debug_details (NULL))
5256 fprintf (dump_file, "worklist: examine stmt: ");
5257 print_generic_expr (dump_file, stmt, TDF_SLIM);
5260 /* Examine the USES in this statement. Mark all the statements which
5261 feed this statement's uses as "relevant", unless the USE is used as
5262 an array index. */
5264 if (TREE_CODE (stmt) == PHI_NODE)
5266 /* follow the def-use chain inside the loop. */
5267 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5269 tree arg = PHI_ARG_DEF (stmt, j);
5270 tree def_stmt = NULL_TREE;
5271 basic_block bb;
5272 if (!vect_is_simple_use (arg, loop, &def_stmt))
5274 if (vect_debug_details (NULL))
5275 fprintf (dump_file, "worklist: unsupported use.");
5276 varray_clear (worklist);
5277 return false;
5279 if (!def_stmt)
5280 continue;
5282 if (vect_debug_details (NULL))
5284 fprintf (dump_file, "worklist: def_stmt: ");
5285 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5288 bb = bb_for_stmt (def_stmt);
5289 if (flow_bb_inside_loop_p (loop, bb))
5290 vect_mark_relevant (worklist, def_stmt);
5294 ann = stmt_ann (stmt);
5295 use_ops = USE_OPS (ann);
5297 for (i = 0; i < NUM_USES (use_ops); i++)
5299 tree use = USE_OP (use_ops, i);
5301 /* We are only interested in uses that need to be vectorized. Uses
5302 that are used for address computation are not considered relevant.
5304 if (exist_non_indexing_operands_for_use_p (use, stmt))
5306 tree def_stmt = NULL_TREE;
5307 basic_block bb;
5308 if (!vect_is_simple_use (use, loop, &def_stmt))
5310 if (vect_debug_details (NULL))
5311 fprintf (dump_file, "worklist: unsupported use.");
5312 varray_clear (worklist);
5313 return false;
5316 if (!def_stmt)
5317 continue;
5319 if (vect_debug_details (NULL))
5321 fprintf (dump_file, "worklist: examine use %d: ", i);
5322 print_generic_expr (dump_file, use, TDF_SLIM);
5325 bb = bb_for_stmt (def_stmt);
5326 if (flow_bb_inside_loop_p (loop, bb))
5327 vect_mark_relevant (worklist, def_stmt);
5330 } /* while worklist */
5332 varray_clear (worklist);
5333 return true;
5337 /* Function vect_analyze_loop_with_symbolic_num_of_iters.
5339 In case the number of iterations that LOOP iterates in unknown at compile
5340 time, an epilog loop will be generated, and the loop induction variables
5341 (IVs) will be "advanced" to the value they are supposed to take just before
5342 the epilog loop. Here we check that the access function of the loop IVs
5343 and the expression that represents the loop bound are simple enough.
5344 These restrictions will be relaxed in the future. */
5346 static bool
5347 vect_analyze_loop_with_symbolic_num_of_iters (tree niters,
5348 struct loop *loop)
5350 basic_block bb = loop->header;
5351 tree phi;
5353 if (vect_debug_details (NULL))
5354 fprintf (dump_file,
5355 "\n<<vect_analyze_loop_with_symbolic_num_of_iters>>\n");
5357 if (chrec_contains_undetermined (niters))
5359 if (vect_debug_details (NULL))
5360 fprintf (dump_file, "Infinite number of iterations.");
5361 return false;
5364 if (!niters)
5366 if (vect_debug_details (NULL))
5367 fprintf (dump_file, "niters is NULL pointer.");
5368 return false;
5371 if (vect_debug_details (NULL))
5373 fprintf (dump_file, "Symbolic number of iterations is ");
5374 print_generic_expr (dump_file, niters, TDF_DETAILS);
5377 /* Analyze phi functions of the loop header. */
5379 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5381 tree access_fn = NULL;
5382 tree evolution_part;
5384 if (vect_debug_details (NULL))
5386 fprintf (dump_file, "Analyze phi: ");
5387 print_generic_expr (dump_file, phi, TDF_SLIM);
5390 /* Skip virtual phi's. The data dependences that are associated with
5391 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5393 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5395 if (vect_debug_details (NULL))
5396 fprintf (dump_file, "virtual phi. skip.");
5397 continue;
5400 /* Analyze the evolution function. */
5402 access_fn = instantiate_parameters
5403 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5405 if (!access_fn)
5407 if (vect_debug_details (NULL))
5408 fprintf (dump_file, "No Access function.");
5409 return false;
5412 if (vect_debug_details (NULL))
5414 fprintf (dump_file, "Access function of PHI: ");
5415 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5418 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5420 if (evolution_part == NULL_TREE)
5421 return false;
5423 /* FORNOW: We do not transform initial conditions of IVs
5424 which evolution functions are a polynomial of degree >= 2. */
5426 if (tree_is_chrec (evolution_part))
5427 return false;
5430 return true;
5434 /* Function vect_get_loop_niters.
5436 Determine how many iterations the loop is executed. */
5438 static tree
5439 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5441 tree niters;
5443 if (vect_debug_details (NULL))
5444 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5446 niters = number_of_iterations_in_loop (loop);
5448 if (niters != NULL_TREE
5449 && niters != chrec_dont_know)
5451 *number_of_iterations = niters;
5453 if (vect_debug_details (NULL))
5455 fprintf (dump_file, "==> get_loop_niters:" );
5456 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5460 return get_loop_exit_condition (loop);
5464 /* Function vect_analyze_loop_form.
5466 Verify the following restrictions (some may be relaxed in the future):
5467 - it's an inner-most loop
5468 - number of BBs = 2 (which are the loop header and the latch)
5469 - the loop has a pre-header
5470 - the loop has a single entry and exit
5471 - the loop exit condition is simple enough, and the number of iterations
5472 can be analyzed (a countable loop). */
5474 static loop_vec_info
5475 vect_analyze_loop_form (struct loop *loop)
5477 loop_vec_info loop_vinfo;
5478 tree loop_cond;
5479 tree number_of_iterations = NULL;
5481 if (vect_debug_details (loop))
5482 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5484 if (loop->inner
5485 || !loop->single_exit
5486 || loop->num_nodes != 2)
5488 if (vect_debug_stats (loop) || vect_debug_details (loop))
5490 fprintf (dump_file, "not vectorized: bad loop form. ");
5491 if (loop->inner)
5492 fprintf (dump_file, "nested loop.");
5493 else if (!loop->single_exit)
5494 fprintf (dump_file, "multiple exits.");
5495 else if (loop->num_nodes != 2)
5496 fprintf (dump_file, "too many BBs in loop.");
5499 return NULL;
5502 /* We assume that the loop exit condition is at the end of the loop. i.e,
5503 that the loop is represented as a do-while (with a proper if-guard
5504 before the loop if needed), where the loop header contains all the
5505 executable statements, and the latch is empty. */
5506 if (!empty_block_p (loop->latch))
5508 if (vect_debug_stats (loop) || vect_debug_details (loop))
5509 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5510 return NULL;
5513 if (empty_block_p (loop->header))
5515 if (vect_debug_stats (loop) || vect_debug_details (loop))
5516 fprintf (dump_file, "not vectorized: empty loop.");
5517 return NULL;
5520 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5521 if (!loop_cond)
5523 if (vect_debug_stats (loop) || vect_debug_details (loop))
5524 fprintf (dump_file, "not vectorized: complicated exit condition.");
5525 return NULL;
5528 if (!number_of_iterations)
5530 if (vect_debug_stats (loop) || vect_debug_details (loop))
5531 fprintf (dump_file,
5532 "not vectorized: number of iterations cannot be computed.");
5533 return NULL;
5536 loop_vinfo = new_loop_vec_info (loop);
5537 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5538 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5540 if (vect_debug_stats (loop) || vect_debug_details (loop))
5541 fprintf (dump_file, "loop bound unknown.");
5543 /* Unknown loop bound. */
5544 if (!vect_analyze_loop_with_symbolic_num_of_iters
5545 (number_of_iterations, loop))
5547 if (vect_debug_stats (loop) || vect_debug_details (loop))
5548 fprintf (dump_file,
5549 "not vectorized: can't determine loop bound.");
5550 return NULL;
5552 else
5554 /* We need only one loop entry for unknown loop bound support. */
5555 if (loop->num_entries != 1 || !loop->pre_header)
5557 if (vect_debug_stats (loop) || vect_debug_details (loop))
5558 fprintf (dump_file,
5559 "not vectorized: more than one loop entry.");
5560 return NULL;
5564 else
5565 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5567 if (vect_debug_stats (loop) || vect_debug_details (loop))
5568 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5569 return NULL;
5572 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5574 return loop_vinfo;
5578 /* Function vect_analyze_loop.
5580 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5581 for it. The different analyses will record information in the
5582 loop_vec_info struct. */
5584 static loop_vec_info
5585 vect_analyze_loop (struct loop *loop)
5587 bool ok;
5588 loop_vec_info loop_vinfo;
5590 if (vect_debug_details (NULL))
5591 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5593 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5595 loop_vinfo = vect_analyze_loop_form (loop);
5596 if (!loop_vinfo)
5598 if (vect_debug_details (loop))
5599 fprintf (dump_file, "bad loop form.");
5600 return NULL;
5603 /* Find all data references in the loop (which correspond to vdefs/vuses)
5604 and analyze their evolution in the loop.
5606 FORNOW: Handle only simple, array references, which
5607 alignment can be forced, and aligned pointer-references. */
5609 ok = vect_analyze_data_refs (loop_vinfo);
5610 if (!ok)
5612 if (vect_debug_details (loop))
5613 fprintf (dump_file, "bad data references.");
5614 destroy_loop_vec_info (loop_vinfo);
5615 return NULL;
5618 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5620 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5621 if (!ok)
5623 if (vect_debug_details (loop))
5624 fprintf (dump_file, "unexpected pattern.");
5625 if (vect_debug_details (loop))
5626 fprintf (dump_file, "not vectorized: unexpected pattern.");
5627 destroy_loop_vec_info (loop_vinfo);
5628 return NULL;
5631 /* Check that all cross-iteration scalar data-flow cycles are OK.
5632 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5634 ok = vect_analyze_scalar_cycles (loop_vinfo);
5635 if (!ok)
5637 if (vect_debug_details (loop))
5638 fprintf (dump_file, "bad scalar cycle.");
5639 destroy_loop_vec_info (loop_vinfo);
5640 return NULL;
5643 /* Analyze data dependences between the data-refs in the loop.
5644 FORNOW: fail at the first data dependence that we encounter. */
5646 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5647 if (!ok)
5649 if (vect_debug_details (loop))
5650 fprintf (dump_file, "bad data dependence.");
5651 destroy_loop_vec_info (loop_vinfo);
5652 return NULL;
5655 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5656 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5658 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5659 if (!ok)
5661 if (vect_debug_details (loop))
5662 fprintf (dump_file, "bad data access.");
5663 destroy_loop_vec_info (loop_vinfo);
5664 return NULL;
5667 /* Analyze the alignment of the data-refs in the loop.
5668 FORNOW: Only aligned accesses are handled. */
5670 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5671 if (!ok)
5673 if (vect_debug_details (loop))
5674 fprintf (dump_file, "bad data alignment.");
5675 destroy_loop_vec_info (loop_vinfo);
5676 return NULL;
5679 /* Scan all the operations in the loop and make sure they are
5680 vectorizable. */
5682 ok = vect_analyze_operations (loop_vinfo);
5683 if (!ok)
5685 if (vect_debug_details (loop))
5686 fprintf (dump_file, "bad operation or unsupported loop bound.");
5687 destroy_loop_vec_info (loop_vinfo);
5688 return NULL;
5691 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5693 return loop_vinfo;
5697 /* Function need_imm_uses_for.
5699 Return whether we ought to include information for 'var'
5700 when calculating immediate uses. For this pass we only want use
5701 information for non-virtual variables. */
5703 static bool
5704 need_imm_uses_for (tree var)
5706 return is_gimple_reg (var);
5710 /* Function vectorize_loops.
5712 Entry Point to loop vectorization phase. */
5714 void
5715 vectorize_loops (struct loops *loops)
5717 unsigned int i, loops_num;
5718 unsigned int num_vectorized_loops = 0;
5720 /* Does the target support SIMD? */
5721 /* FORNOW: until more sophisticated machine modelling is in place. */
5722 if (!UNITS_PER_SIMD_WORD)
5724 if (vect_debug_details (NULL))
5725 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5726 return;
5729 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5731 /* ----------- Analyze loops. ----------- */
5733 /* If some loop was duplicated, it gets bigger number
5734 than all previously defined loops. This fact allows us to run
5735 only over initial loops skipping newly generated ones. */
5736 loops_num = loops->num;
5737 for (i = 1; i < loops_num; i++)
5739 loop_vec_info loop_vinfo;
5740 struct loop *loop = loops->parray[i];
5742 if (!loop)
5743 continue;
5745 loop_vinfo = vect_analyze_loop (loop);
5746 loop->aux = loop_vinfo;
5748 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5749 continue;
5751 vect_transform_loop (loop_vinfo, loops);
5752 num_vectorized_loops++;
5755 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5756 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5757 num_vectorized_loops);
5759 /* ----------- Finalize. ----------- */
5761 free_df ();
5762 for (i = 1; i < loops_num; i++)
5764 struct loop *loop = loops->parray[i];
5765 loop_vec_info loop_vinfo;
5767 if (!loop)
5768 continue;
5769 loop_vinfo = loop->aux;
5770 destroy_loop_vec_info (loop_vinfo);
5771 loop->aux = NULL;
5774 rewrite_into_ssa (false);
5775 if (!bitmap_empty_p (vars_to_rename))
5777 /* The rewrite of ssa names may cause violation of loop closed ssa
5778 form invariants. TODO -- avoid these rewrites completely.
5779 Information in virtual phi nodes is sufficient for it. */
5780 rewrite_into_loop_closed_ssa ();
5782 rewrite_into_loop_closed_ssa ();
5783 bitmap_clear (vars_to_rename);