PR tree-optimization/47427
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob1ec59efaac6074ae9bedad9e56f71160f3f63933
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return the smallest scalar part of STMT.
47 This is used to determine the vectype of the stmt. We generally set the
48 vectype according to the type of the result (lhs). For stmts whose
49 result-type is different than the type of the arguments (e.g., demotion,
50 promotion), vectype will be reset appropriately (later). Note that we have
51 to visit the smallest datatype in this function, because that determines the
52 VF. If the smallest datatype in the loop is present only as the rhs of a
53 promotion operation - we'd miss it.
54 Such a case, where a variable of this datatype does not appear in the lhs
55 anywhere in the loop, can only occur if it's an invariant: e.g.:
56 'int_x = (int) short_inv', which we'd expect to have been optimized away by
57 invariant motion. However, we cannot rely on invariant motion to always
58 take invariants out of the loop, and so in the case of promotion we also
59 have to check the rhs.
60 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
61 types. */
63 tree
64 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
65 HOST_WIDE_INT *rhs_size_unit)
67 tree scalar_type = gimple_expr_type (stmt);
68 HOST_WIDE_INT lhs, rhs;
70 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
72 if (is_gimple_assign (stmt)
73 && (gimple_assign_cast_p (stmt)
74 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
75 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
77 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
79 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
80 if (rhs < lhs)
81 scalar_type = rhs_type;
84 *lhs_size_unit = lhs;
85 *rhs_size_unit = rhs;
86 return scalar_type;
90 /* Find the place of the data-ref in STMT in the interleaving chain that starts
91 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
93 int
94 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
96 gimple next_stmt = first_stmt;
97 int result = 0;
99 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
100 return -1;
102 while (next_stmt && next_stmt != stmt)
104 result++;
105 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
108 if (next_stmt)
109 return result;
110 else
111 return -1;
115 /* Function vect_insert_into_interleaving_chain.
117 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
119 static void
120 vect_insert_into_interleaving_chain (struct data_reference *dra,
121 struct data_reference *drb)
123 gimple prev, next;
124 tree next_init;
125 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
126 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
128 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
129 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
130 while (next)
132 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
133 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
135 /* Insert here. */
136 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
137 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
138 return;
140 prev = next;
141 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
144 /* We got to the end of the list. Insert here. */
145 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
146 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
150 /* Function vect_update_interleaving_chain.
152 For two data-refs DRA and DRB that are a part of a chain interleaved data
153 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
155 There are four possible cases:
156 1. New stmts - both DRA and DRB are not a part of any chain:
157 FIRST_DR = DRB
158 NEXT_DR (DRB) = DRA
159 2. DRB is a part of a chain and DRA is not:
160 no need to update FIRST_DR
161 no need to insert DRB
162 insert DRA according to init
163 3. DRA is a part of a chain and DRB is not:
164 if (init of FIRST_DR > init of DRB)
165 FIRST_DR = DRB
166 NEXT(FIRST_DR) = previous FIRST_DR
167 else
168 insert DRB according to its init
169 4. both DRA and DRB are in some interleaving chains:
170 choose the chain with the smallest init of FIRST_DR
171 insert the nodes of the second chain into the first one. */
173 static void
174 vect_update_interleaving_chain (struct data_reference *drb,
175 struct data_reference *dra)
177 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
178 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
179 tree next_init, init_dra_chain, init_drb_chain;
180 gimple first_a, first_b;
181 tree node_init;
182 gimple node, prev, next, first_stmt;
184 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
185 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
187 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
188 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
189 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
190 return;
193 /* 2. DRB is a part of a chain and DRA is not. */
194 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
196 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
197 /* Insert DRA into the chain of DRB. */
198 vect_insert_into_interleaving_chain (dra, drb);
199 return;
202 /* 3. DRA is a part of a chain and DRB is not. */
203 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
205 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
206 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
207 old_first_stmt)));
208 gimple tmp;
210 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
212 /* DRB's init is smaller than the init of the stmt previously marked
213 as the first stmt of the interleaving chain of DRA. Therefore, we
214 update FIRST_STMT and put DRB in the head of the list. */
215 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
216 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
218 /* Update all the stmts in the list to point to the new FIRST_STMT. */
219 tmp = old_first_stmt;
220 while (tmp)
222 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
223 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
226 else
228 /* Insert DRB in the list of DRA. */
229 vect_insert_into_interleaving_chain (drb, dra);
230 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
232 return;
235 /* 4. both DRA and DRB are in some interleaving chains. */
236 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
237 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
238 if (first_a == first_b)
239 return;
240 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
241 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
243 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
245 /* Insert the nodes of DRA chain into the DRB chain.
246 After inserting a node, continue from this node of the DRB chain (don't
247 start from the beginning. */
248 node = DR_GROUP_FIRST_DR (stmtinfo_a);
249 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
250 first_stmt = first_b;
252 else
254 /* Insert the nodes of DRB chain into the DRA chain.
255 After inserting a node, continue from this node of the DRA chain (don't
256 start from the beginning. */
257 node = DR_GROUP_FIRST_DR (stmtinfo_b);
258 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
259 first_stmt = first_a;
262 while (node)
264 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
265 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
266 while (next)
268 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
269 if (tree_int_cst_compare (next_init, node_init) > 0)
271 /* Insert here. */
272 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
274 prev = node;
275 break;
277 prev = next;
278 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
280 if (!next)
282 /* We got to the end of the list. Insert here. */
283 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
285 prev = node;
287 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
288 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
293 /* Function vect_equal_offsets.
295 Check if OFFSET1 and OFFSET2 are identical expressions. */
297 static bool
298 vect_equal_offsets (tree offset1, tree offset2)
300 bool res;
302 STRIP_NOPS (offset1);
303 STRIP_NOPS (offset2);
305 if (offset1 == offset2)
306 return true;
308 if (TREE_CODE (offset1) != TREE_CODE (offset2)
309 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
310 return false;
312 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
313 TREE_OPERAND (offset2, 0));
315 if (!res || !BINARY_CLASS_P (offset1))
316 return res;
318 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
319 TREE_OPERAND (offset2, 1));
321 return res;
325 /* Check dependence between DRA and DRB for basic block vectorization.
326 If the accesses share same bases and offsets, we can compare their initial
327 constant offsets to decide whether they differ or not. In case of a read-
328 write dependence we check that the load is before the store to ensure that
329 vectorization will not change the order of the accesses. */
331 static bool
332 vect_drs_dependent_in_basic_block (struct data_reference *dra,
333 struct data_reference *drb)
335 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
336 gimple earlier_stmt;
338 /* We only call this function for pairs of loads and stores, but we verify
339 it here. */
340 if (DR_IS_READ (dra) == DR_IS_READ (drb))
342 if (DR_IS_READ (dra))
343 return false;
344 else
345 return true;
348 /* Check that the data-refs have same bases and offsets. If not, we can't
349 determine if they are dependent. */
350 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
351 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
352 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
353 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
354 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
355 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
356 return true;
358 /* Check the types. */
359 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
360 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
362 if (type_size_a != type_size_b
363 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
364 TREE_TYPE (DR_REF (drb))))
365 return true;
367 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
368 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
370 /* Two different locations - no dependence. */
371 if (init_a != init_b)
372 return false;
374 /* We have a read-write dependence. Check that the load is before the store.
375 When we vectorize basic blocks, vector load can be only before
376 corresponding scalar load, and vector store can be only after its
377 corresponding scalar store. So the order of the acceses is preserved in
378 case the load is before the store. */
379 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
380 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
381 return false;
383 return true;
387 /* Function vect_check_interleaving.
389 Check if DRA and DRB are a part of interleaving. In case they are, insert
390 DRA and DRB in an interleaving chain. */
392 static bool
393 vect_check_interleaving (struct data_reference *dra,
394 struct data_reference *drb)
396 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
398 /* Check that the data-refs have same first location (except init) and they
399 are both either store or load (not load and store). */
400 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
401 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
402 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
403 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
404 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
405 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
406 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
407 || DR_IS_READ (dra) != DR_IS_READ (drb))
408 return false;
410 /* Check:
411 1. data-refs are of the same type
412 2. their steps are equal
413 3. the step (if greater than zero) is greater than the difference between
414 data-refs' inits. */
415 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
416 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
418 if (type_size_a != type_size_b
419 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
420 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
421 TREE_TYPE (DR_REF (drb))))
422 return false;
424 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
425 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
426 step = TREE_INT_CST_LOW (DR_STEP (dra));
428 if (init_a > init_b)
430 /* If init_a == init_b + the size of the type * k, we have an interleaving,
431 and DRB is accessed before DRA. */
432 diff_mod_size = (init_a - init_b) % type_size_a;
434 if (step && (init_a - init_b) > step)
435 return false;
437 if (diff_mod_size == 0)
439 vect_update_interleaving_chain (drb, dra);
440 if (vect_print_dump_info (REPORT_DR_DETAILS))
442 fprintf (vect_dump, "Detected interleaving ");
443 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
444 fprintf (vect_dump, " and ");
445 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
447 return true;
450 else
452 /* If init_b == init_a + the size of the type * k, we have an
453 interleaving, and DRA is accessed before DRB. */
454 diff_mod_size = (init_b - init_a) % type_size_a;
456 if (step && (init_b - init_a) > step)
457 return false;
459 if (diff_mod_size == 0)
461 vect_update_interleaving_chain (dra, drb);
462 if (vect_print_dump_info (REPORT_DR_DETAILS))
464 fprintf (vect_dump, "Detected interleaving ");
465 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
466 fprintf (vect_dump, " and ");
467 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
469 return true;
473 return false;
476 /* Check if data references pointed by DR_I and DR_J are same or
477 belong to same interleaving group. Return FALSE if drs are
478 different, otherwise return TRUE. */
480 static bool
481 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
483 gimple stmt_i = DR_STMT (dr_i);
484 gimple stmt_j = DR_STMT (dr_j);
486 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
487 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
488 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
489 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
490 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
491 return true;
492 else
493 return false;
496 /* If address ranges represented by DDR_I and DDR_J are equal,
497 return TRUE, otherwise return FALSE. */
499 static bool
500 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
502 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
504 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
505 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
506 return true;
507 else
508 return false;
511 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
512 tested at run-time. Return TRUE if DDR was successfully inserted.
513 Return false if versioning is not supported. */
515 static bool
516 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
518 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
520 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
521 return false;
523 if (vect_print_dump_info (REPORT_DR_DETAILS))
525 fprintf (vect_dump, "mark for run-time aliasing test between ");
526 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
527 fprintf (vect_dump, " and ");
528 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
531 if (optimize_loop_nest_for_size_p (loop))
533 if (vect_print_dump_info (REPORT_DR_DETAILS))
534 fprintf (vect_dump, "versioning not supported when optimizing for size.");
535 return false;
538 /* FORNOW: We don't support versioning with outer-loop vectorization. */
539 if (loop->inner)
541 if (vect_print_dump_info (REPORT_DR_DETAILS))
542 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
543 return false;
546 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
547 return true;
551 /* Function vect_analyze_data_ref_dependence.
553 Return TRUE if there (might) exist a dependence between a memory-reference
554 DRA and a memory-reference DRB. When versioning for alias may check a
555 dependence at run-time, return FALSE. Adjust *MAX_VF according to
556 the data dependence. */
558 static bool
559 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
560 loop_vec_info loop_vinfo, int *max_vf,
561 bool *data_dependence_in_bb)
563 unsigned int i;
564 struct loop *loop = NULL;
565 struct data_reference *dra = DDR_A (ddr);
566 struct data_reference *drb = DDR_B (ddr);
567 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
568 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
569 lambda_vector dist_v;
570 unsigned int loop_depth;
572 /* Don't bother to analyze statements marked as unvectorizable. */
573 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
574 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
575 return false;
577 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
579 /* Independent data accesses. */
580 vect_check_interleaving (dra, drb);
581 return false;
584 if (loop_vinfo)
585 loop = LOOP_VINFO_LOOP (loop_vinfo);
587 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
588 return false;
590 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
592 if (loop_vinfo)
594 if (vect_print_dump_info (REPORT_DR_DETAILS))
596 fprintf (vect_dump, "versioning for alias required: "
597 "can't determine dependence between ");
598 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
599 fprintf (vect_dump, " and ");
600 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
603 /* Add to list of ddrs that need to be tested at run-time. */
604 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
607 /* When vectorizing a basic block unknown depnedence can still mean
608 strided access. */
609 if (vect_check_interleaving (dra, drb))
610 return false;
612 if (vect_print_dump_info (REPORT_DR_DETAILS))
614 fprintf (vect_dump, "can't determine dependence between ");
615 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
616 fprintf (vect_dump, " and ");
617 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
620 /* We do not vectorize basic blocks with write-write dependencies. */
621 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
622 return true;
624 /* We deal with read-write dependencies in basic blocks later (by
625 verifying that all the loads in the basic block are before all the
626 stores). */
627 *data_dependence_in_bb = true;
628 return false;
631 /* Versioning for alias is not yet supported for basic block SLP, and
632 dependence distance is unapplicable, hence, in case of known data
633 dependence, basic block vectorization is impossible for now. */
634 if (!loop_vinfo)
636 if (dra != drb && vect_check_interleaving (dra, drb))
637 return false;
639 if (vect_print_dump_info (REPORT_DR_DETAILS))
641 fprintf (vect_dump, "determined dependence between ");
642 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
643 fprintf (vect_dump, " and ");
644 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
647 /* Do not vectorize basic blcoks with write-write dependences. */
648 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
649 return true;
651 /* Check if this dependence is allowed in basic block vectorization. */
652 return vect_drs_dependent_in_basic_block (dra, drb);
655 /* Loop-based vectorization and known data dependence. */
656 if (DDR_NUM_DIST_VECTS (ddr) == 0)
658 if (vect_print_dump_info (REPORT_DR_DETAILS))
660 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
661 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
662 fprintf (vect_dump, " and ");
663 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
665 /* Add to list of ddrs that need to be tested at run-time. */
666 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
669 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
670 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
672 int dist = dist_v[loop_depth];
674 if (vect_print_dump_info (REPORT_DR_DETAILS))
675 fprintf (vect_dump, "dependence distance = %d.", dist);
677 if (dist == 0)
679 if (vect_print_dump_info (REPORT_DR_DETAILS))
681 fprintf (vect_dump, "dependence distance == 0 between ");
682 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
683 fprintf (vect_dump, " and ");
684 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
687 /* For interleaving, mark that there is a read-write dependency if
688 necessary. We check before that one of the data-refs is store. */
689 if (DR_IS_READ (dra))
690 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
691 else
693 if (DR_IS_READ (drb))
694 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
697 continue;
700 if (dist > 0 && DDR_REVERSED_P (ddr))
702 /* If DDR_REVERSED_P the order of the data-refs in DDR was
703 reversed (to make distance vector positive), and the actual
704 distance is negative. */
705 if (vect_print_dump_info (REPORT_DR_DETAILS))
706 fprintf (vect_dump, "dependence distance negative.");
707 continue;
710 if (abs (dist) >= 2
711 && abs (dist) < *max_vf)
713 /* The dependence distance requires reduction of the maximal
714 vectorization factor. */
715 *max_vf = abs (dist);
716 if (vect_print_dump_info (REPORT_DR_DETAILS))
717 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
718 *max_vf);
721 if (abs (dist) >= *max_vf)
723 /* Dependence distance does not create dependence, as far as
724 vectorization is concerned, in this case. */
725 if (vect_print_dump_info (REPORT_DR_DETAILS))
726 fprintf (vect_dump, "dependence distance >= VF.");
727 continue;
730 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
732 fprintf (vect_dump, "not vectorized, possible dependence "
733 "between data-refs ");
734 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
735 fprintf (vect_dump, " and ");
736 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
739 return true;
742 return false;
745 /* Function vect_analyze_data_ref_dependences.
747 Examine all the data references in the loop, and make sure there do not
748 exist any data dependences between them. Set *MAX_VF according to
749 the maximum vectorization factor the data dependences allow. */
751 bool
752 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
753 bb_vec_info bb_vinfo, int *max_vf,
754 bool *data_dependence_in_bb)
756 unsigned int i;
757 VEC (ddr_p, heap) *ddrs = NULL;
758 struct data_dependence_relation *ddr;
760 if (vect_print_dump_info (REPORT_DETAILS))
761 fprintf (vect_dump, "=== vect_analyze_dependences ===");
763 if (loop_vinfo)
764 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
765 else
766 ddrs = BB_VINFO_DDRS (bb_vinfo);
768 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
769 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
770 data_dependence_in_bb))
771 return false;
773 return true;
777 /* Function vect_compute_data_ref_alignment
779 Compute the misalignment of the data reference DR.
781 Output:
782 1. If during the misalignment computation it is found that the data reference
783 cannot be vectorized then false is returned.
784 2. DR_MISALIGNMENT (DR) is defined.
786 FOR NOW: No analysis is actually performed. Misalignment is calculated
787 only for trivial cases. TODO. */
789 static bool
790 vect_compute_data_ref_alignment (struct data_reference *dr)
792 gimple stmt = DR_STMT (dr);
793 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
794 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
795 struct loop *loop = NULL;
796 tree ref = DR_REF (dr);
797 tree vectype;
798 tree base, base_addr;
799 bool base_aligned;
800 tree misalign;
801 tree aligned_to, alignment;
803 if (vect_print_dump_info (REPORT_DETAILS))
804 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
806 if (loop_vinfo)
807 loop = LOOP_VINFO_LOOP (loop_vinfo);
809 /* Initialize misalignment to unknown. */
810 SET_DR_MISALIGNMENT (dr, -1);
812 misalign = DR_INIT (dr);
813 aligned_to = DR_ALIGNED_TO (dr);
814 base_addr = DR_BASE_ADDRESS (dr);
815 vectype = STMT_VINFO_VECTYPE (stmt_info);
817 /* In case the dataref is in an inner-loop of the loop that is being
818 vectorized (LOOP), we use the base and misalignment information
819 relative to the outer-loop (LOOP). This is ok only if the misalignment
820 stays the same throughout the execution of the inner-loop, which is why
821 we have to check that the stride of the dataref in the inner-loop evenly
822 divides by the vector size. */
823 if (loop && nested_in_vect_loop_p (loop, stmt))
825 tree step = DR_STEP (dr);
826 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
828 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
830 if (vect_print_dump_info (REPORT_ALIGNMENT))
831 fprintf (vect_dump, "inner step divides the vector-size.");
832 misalign = STMT_VINFO_DR_INIT (stmt_info);
833 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
834 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
836 else
838 if (vect_print_dump_info (REPORT_ALIGNMENT))
839 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
840 misalign = NULL_TREE;
844 base = build_fold_indirect_ref (base_addr);
845 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
847 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
848 || !misalign)
850 if (vect_print_dump_info (REPORT_ALIGNMENT))
852 fprintf (vect_dump, "Unknown alignment for access: ");
853 print_generic_expr (vect_dump, base, TDF_SLIM);
855 return true;
858 if ((DECL_P (base)
859 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
860 alignment) >= 0)
861 || (TREE_CODE (base_addr) == SSA_NAME
862 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
863 TREE_TYPE (base_addr)))),
864 alignment) >= 0))
865 base_aligned = true;
866 else
867 base_aligned = false;
869 if (!base_aligned)
871 /* Do not change the alignment of global variables if
872 flag_section_anchors is enabled. */
873 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
874 || (TREE_STATIC (base) && flag_section_anchors))
876 if (vect_print_dump_info (REPORT_DETAILS))
878 fprintf (vect_dump, "can't force alignment of ref: ");
879 print_generic_expr (vect_dump, ref, TDF_SLIM);
881 return true;
884 /* Force the alignment of the decl.
885 NOTE: This is the only change to the code we make during
886 the analysis phase, before deciding to vectorize the loop. */
887 if (vect_print_dump_info (REPORT_DETAILS))
889 fprintf (vect_dump, "force alignment of ");
890 print_generic_expr (vect_dump, ref, TDF_SLIM);
893 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
894 DECL_USER_ALIGN (base) = 1;
897 /* At this point we assume that the base is aligned. */
898 gcc_assert (base_aligned
899 || (TREE_CODE (base) == VAR_DECL
900 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
902 /* If this is a backward running DR then first access in the larger
903 vectype actually is N-1 elements before the address in the DR.
904 Adjust misalign accordingly. */
905 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
907 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
908 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
909 otherwise we wouldn't be here. */
910 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
911 /* PLUS because DR_STEP was negative. */
912 misalign = size_binop (PLUS_EXPR, misalign, offset);
915 /* Modulo alignment. */
916 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
918 if (!host_integerp (misalign, 1))
920 /* Negative or overflowed misalignment value. */
921 if (vect_print_dump_info (REPORT_DETAILS))
922 fprintf (vect_dump, "unexpected misalign value");
923 return false;
926 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
928 if (vect_print_dump_info (REPORT_DETAILS))
930 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
931 print_generic_expr (vect_dump, ref, TDF_SLIM);
934 return true;
938 /* Function vect_compute_data_refs_alignment
940 Compute the misalignment of data references in the loop.
941 Return FALSE if a data reference is found that cannot be vectorized. */
943 static bool
944 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
945 bb_vec_info bb_vinfo)
947 VEC (data_reference_p, heap) *datarefs;
948 struct data_reference *dr;
949 unsigned int i;
951 if (loop_vinfo)
952 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
953 else
954 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
956 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
957 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
958 && !vect_compute_data_ref_alignment (dr))
960 if (bb_vinfo)
962 /* Mark unsupported statement as unvectorizable. */
963 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
964 continue;
966 else
967 return false;
970 return true;
974 /* Function vect_update_misalignment_for_peel
976 DR - the data reference whose misalignment is to be adjusted.
977 DR_PEEL - the data reference whose misalignment is being made
978 zero in the vector loop by the peel.
979 NPEEL - the number of iterations in the peel loop if the misalignment
980 of DR_PEEL is known at compile time. */
982 static void
983 vect_update_misalignment_for_peel (struct data_reference *dr,
984 struct data_reference *dr_peel, int npeel)
986 unsigned int i;
987 VEC(dr_p,heap) *same_align_drs;
988 struct data_reference *current_dr;
989 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
990 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
991 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
992 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
994 /* For interleaved data accesses the step in the loop must be multiplied by
995 the size of the interleaving group. */
996 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
997 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
998 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
999 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1001 /* It can be assumed that the data refs with the same alignment as dr_peel
1002 are aligned in the vector loop. */
1003 same_align_drs
1004 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1005 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1007 if (current_dr != dr)
1008 continue;
1009 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1010 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1011 SET_DR_MISALIGNMENT (dr, 0);
1012 return;
1015 if (known_alignment_for_access_p (dr)
1016 && known_alignment_for_access_p (dr_peel))
1018 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1019 int misal = DR_MISALIGNMENT (dr);
1020 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1021 misal += negative ? -npeel * dr_size : npeel * dr_size;
1022 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1023 SET_DR_MISALIGNMENT (dr, misal);
1024 return;
1027 if (vect_print_dump_info (REPORT_DETAILS))
1028 fprintf (vect_dump, "Setting misalignment to -1.");
1029 SET_DR_MISALIGNMENT (dr, -1);
1033 /* Function vect_verify_datarefs_alignment
1035 Return TRUE if all data references in the loop can be
1036 handled with respect to alignment. */
1038 bool
1039 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1041 VEC (data_reference_p, heap) *datarefs;
1042 struct data_reference *dr;
1043 enum dr_alignment_support supportable_dr_alignment;
1044 unsigned int i;
1046 if (loop_vinfo)
1047 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1048 else
1049 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1051 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1053 gimple stmt = DR_STMT (dr);
1054 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1056 /* For interleaving, only the alignment of the first access matters.
1057 Skip statements marked as not vectorizable. */
1058 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1059 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1060 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1061 continue;
1063 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1064 if (!supportable_dr_alignment)
1066 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1068 if (DR_IS_READ (dr))
1069 fprintf (vect_dump,
1070 "not vectorized: unsupported unaligned load.");
1071 else
1072 fprintf (vect_dump,
1073 "not vectorized: unsupported unaligned store.");
1075 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1077 return false;
1079 if (supportable_dr_alignment != dr_aligned
1080 && vect_print_dump_info (REPORT_ALIGNMENT))
1081 fprintf (vect_dump, "Vectorizing an unaligned access.");
1083 return true;
1087 /* Function vector_alignment_reachable_p
1089 Return true if vector alignment for DR is reachable by peeling
1090 a few loop iterations. Return false otherwise. */
1092 static bool
1093 vector_alignment_reachable_p (struct data_reference *dr)
1095 gimple stmt = DR_STMT (dr);
1096 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1097 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1099 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1101 /* For interleaved access we peel only if number of iterations in
1102 the prolog loop ({VF - misalignment}), is a multiple of the
1103 number of the interleaved accesses. */
1104 int elem_size, mis_in_elements;
1105 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1107 /* FORNOW: handle only known alignment. */
1108 if (!known_alignment_for_access_p (dr))
1109 return false;
1111 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1112 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1114 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1115 return false;
1118 /* If misalignment is known at the compile time then allow peeling
1119 only if natural alignment is reachable through peeling. */
1120 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1122 HOST_WIDE_INT elmsize =
1123 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1124 if (vect_print_dump_info (REPORT_DETAILS))
1126 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1127 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1129 if (DR_MISALIGNMENT (dr) % elmsize)
1131 if (vect_print_dump_info (REPORT_DETAILS))
1132 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1133 return false;
1137 if (!known_alignment_for_access_p (dr))
1139 tree type = (TREE_TYPE (DR_REF (dr)));
1140 tree ba = DR_BASE_OBJECT (dr);
1141 bool is_packed = false;
1143 if (ba)
1144 is_packed = contains_packed_reference (ba);
1146 if (vect_print_dump_info (REPORT_DETAILS))
1147 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1148 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1149 return true;
1150 else
1151 return false;
1154 return true;
1158 /* Calculate the cost of the memory access represented by DR. */
1160 static void
1161 vect_get_data_access_cost (struct data_reference *dr,
1162 unsigned int *inside_cost,
1163 unsigned int *outside_cost)
1165 gimple stmt = DR_STMT (dr);
1166 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1167 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1168 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1169 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1170 int ncopies = vf / nunits;
1171 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1173 if (!supportable_dr_alignment)
1174 *inside_cost = VECT_MAX_COST;
1175 else
1177 if (DR_IS_READ (dr))
1178 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1179 else
1180 vect_get_store_cost (dr, ncopies, inside_cost);
1183 if (vect_print_dump_info (REPORT_COST))
1184 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1185 "outside_cost = %d.", *inside_cost, *outside_cost);
1189 static hashval_t
1190 vect_peeling_hash (const void *elem)
1192 const struct _vect_peel_info *peel_info;
1194 peel_info = (const struct _vect_peel_info *) elem;
1195 return (hashval_t) peel_info->npeel;
1199 static int
1200 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1202 const struct _vect_peel_info *a, *b;
1204 a = (const struct _vect_peel_info *) elem1;
1205 b = (const struct _vect_peel_info *) elem2;
1206 return (a->npeel == b->npeel);
1210 /* Insert DR into peeling hash table with NPEEL as key. */
1212 static void
1213 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1214 int npeel)
1216 struct _vect_peel_info elem, *slot;
1217 void **new_slot;
1218 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1220 elem.npeel = npeel;
1221 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1222 &elem);
1223 if (slot)
1224 slot->count++;
1225 else
1227 slot = XNEW (struct _vect_peel_info);
1228 slot->npeel = npeel;
1229 slot->dr = dr;
1230 slot->count = 1;
1231 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1232 INSERT);
1233 *new_slot = slot;
1236 if (!supportable_dr_alignment && !flag_vect_cost_model)
1237 slot->count += VECT_MAX_COST;
1241 /* Traverse peeling hash table to find peeling option that aligns maximum
1242 number of data accesses. */
1244 static int
1245 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1247 vect_peel_info elem = (vect_peel_info) *slot;
1248 vect_peel_extended_info max = (vect_peel_extended_info) data;
1250 if (elem->count > max->peel_info.count)
1252 max->peel_info.npeel = elem->npeel;
1253 max->peel_info.count = elem->count;
1254 max->peel_info.dr = elem->dr;
1257 return 1;
1261 /* Traverse peeling hash table and calculate cost for each peeling option.
1262 Find the one with the lowest cost. */
1264 static int
1265 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1267 vect_peel_info elem = (vect_peel_info) *slot;
1268 vect_peel_extended_info min = (vect_peel_extended_info) data;
1269 int save_misalignment, dummy;
1270 unsigned int inside_cost = 0, outside_cost = 0, i;
1271 gimple stmt = DR_STMT (elem->dr);
1272 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1273 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1274 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1275 struct data_reference *dr;
1277 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1279 stmt = DR_STMT (dr);
1280 stmt_info = vinfo_for_stmt (stmt);
1281 /* For interleaving, only the alignment of the first access
1282 matters. */
1283 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1284 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1285 continue;
1287 save_misalignment = DR_MISALIGNMENT (dr);
1288 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1289 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1290 SET_DR_MISALIGNMENT (dr, save_misalignment);
1293 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1294 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1296 if (inside_cost < min->inside_cost
1297 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1299 min->inside_cost = inside_cost;
1300 min->outside_cost = outside_cost;
1301 min->peel_info.dr = elem->dr;
1302 min->peel_info.npeel = elem->npeel;
1305 return 1;
1309 /* Choose best peeling option by traversing peeling hash table and either
1310 choosing an option with the lowest cost (if cost model is enabled) or the
1311 option that aligns as many accesses as possible. */
1313 static struct data_reference *
1314 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1315 unsigned int *npeel)
1317 struct _vect_peel_extended_info res;
1319 res.peel_info.dr = NULL;
1321 if (flag_vect_cost_model)
1323 res.inside_cost = INT_MAX;
1324 res.outside_cost = INT_MAX;
1325 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1326 vect_peeling_hash_get_lowest_cost, &res);
1328 else
1330 res.peel_info.count = 0;
1331 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1332 vect_peeling_hash_get_most_frequent, &res);
1335 *npeel = res.peel_info.npeel;
1336 return res.peel_info.dr;
1340 /* Function vect_enhance_data_refs_alignment
1342 This pass will use loop versioning and loop peeling in order to enhance
1343 the alignment of data references in the loop.
1345 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1346 original loop is to be vectorized. Any other loops that are created by
1347 the transformations performed in this pass - are not supposed to be
1348 vectorized. This restriction will be relaxed.
1350 This pass will require a cost model to guide it whether to apply peeling
1351 or versioning or a combination of the two. For example, the scheme that
1352 intel uses when given a loop with several memory accesses, is as follows:
1353 choose one memory access ('p') which alignment you want to force by doing
1354 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1355 other accesses are not necessarily aligned, or (2) use loop versioning to
1356 generate one loop in which all accesses are aligned, and another loop in
1357 which only 'p' is necessarily aligned.
1359 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1360 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1361 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1363 Devising a cost model is the most critical aspect of this work. It will
1364 guide us on which access to peel for, whether to use loop versioning, how
1365 many versions to create, etc. The cost model will probably consist of
1366 generic considerations as well as target specific considerations (on
1367 powerpc for example, misaligned stores are more painful than misaligned
1368 loads).
1370 Here are the general steps involved in alignment enhancements:
1372 -- original loop, before alignment analysis:
1373 for (i=0; i<N; i++){
1374 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1375 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1378 -- After vect_compute_data_refs_alignment:
1379 for (i=0; i<N; i++){
1380 x = q[i]; # DR_MISALIGNMENT(q) = 3
1381 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1384 -- Possibility 1: we do loop versioning:
1385 if (p is aligned) {
1386 for (i=0; i<N; i++){ # loop 1A
1387 x = q[i]; # DR_MISALIGNMENT(q) = 3
1388 p[i] = y; # DR_MISALIGNMENT(p) = 0
1391 else {
1392 for (i=0; i<N; i++){ # loop 1B
1393 x = q[i]; # DR_MISALIGNMENT(q) = 3
1394 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1398 -- Possibility 2: we do loop peeling:
1399 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1400 x = q[i];
1401 p[i] = y;
1403 for (i = 3; i < N; i++){ # loop 2A
1404 x = q[i]; # DR_MISALIGNMENT(q) = 0
1405 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1408 -- Possibility 3: combination of loop peeling and versioning:
1409 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1410 x = q[i];
1411 p[i] = y;
1413 if (p is aligned) {
1414 for (i = 3; i<N; i++){ # loop 3A
1415 x = q[i]; # DR_MISALIGNMENT(q) = 0
1416 p[i] = y; # DR_MISALIGNMENT(p) = 0
1419 else {
1420 for (i = 3; i<N; i++){ # loop 3B
1421 x = q[i]; # DR_MISALIGNMENT(q) = 0
1422 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1426 These loops are later passed to loop_transform to be vectorized. The
1427 vectorizer will use the alignment information to guide the transformation
1428 (whether to generate regular loads/stores, or with special handling for
1429 misalignment). */
1431 bool
1432 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1434 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1435 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1436 enum dr_alignment_support supportable_dr_alignment;
1437 struct data_reference *dr0 = NULL, *first_store = NULL;
1438 struct data_reference *dr;
1439 unsigned int i, j;
1440 bool do_peeling = false;
1441 bool do_versioning = false;
1442 bool stat;
1443 gimple stmt;
1444 stmt_vec_info stmt_info;
1445 int vect_versioning_for_alias_required;
1446 unsigned int npeel = 0;
1447 bool all_misalignments_unknown = true;
1448 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1449 unsigned possible_npeel_number = 1;
1450 tree vectype;
1451 unsigned int nelements, mis, same_align_drs_max = 0;
1453 if (vect_print_dump_info (REPORT_DETAILS))
1454 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1456 /* While cost model enhancements are expected in the future, the high level
1457 view of the code at this time is as follows:
1459 A) If there is a misaligned access then see if peeling to align
1460 this access can make all data references satisfy
1461 vect_supportable_dr_alignment. If so, update data structures
1462 as needed and return true.
1464 B) If peeling wasn't possible and there is a data reference with an
1465 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1466 then see if loop versioning checks can be used to make all data
1467 references satisfy vect_supportable_dr_alignment. If so, update
1468 data structures as needed and return true.
1470 C) If neither peeling nor versioning were successful then return false if
1471 any data reference does not satisfy vect_supportable_dr_alignment.
1473 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1475 Note, Possibility 3 above (which is peeling and versioning together) is not
1476 being done at this time. */
1478 /* (1) Peeling to force alignment. */
1480 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1481 Considerations:
1482 + How many accesses will become aligned due to the peeling
1483 - How many accesses will become unaligned due to the peeling,
1484 and the cost of misaligned accesses.
1485 - The cost of peeling (the extra runtime checks, the increase
1486 in code size). */
1488 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1490 stmt = DR_STMT (dr);
1491 stmt_info = vinfo_for_stmt (stmt);
1493 /* For interleaving, only the alignment of the first access
1494 matters. */
1495 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1496 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1497 continue;
1499 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1500 do_peeling = vector_alignment_reachable_p (dr);
1501 if (do_peeling)
1503 if (known_alignment_for_access_p (dr))
1505 unsigned int npeel_tmp;
1506 bool negative = tree_int_cst_compare (DR_STEP (dr),
1507 size_zero_node) < 0;
1509 /* Save info about DR in the hash table. */
1510 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1511 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1512 htab_create (1, vect_peeling_hash,
1513 vect_peeling_hash_eq, free);
1515 vectype = STMT_VINFO_VECTYPE (stmt_info);
1516 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1517 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1518 TREE_TYPE (DR_REF (dr))));
1519 npeel_tmp = (negative
1520 ? (mis - nelements) : (nelements - mis))
1521 & (nelements - 1);
1523 /* For multiple types, it is possible that the bigger type access
1524 will have more than one peeling option. E.g., a loop with two
1525 types: one of size (vector size / 4), and the other one of
1526 size (vector size / 8). Vectorization factor will 8. If both
1527 access are misaligned by 3, the first one needs one scalar
1528 iteration to be aligned, and the second one needs 5. But the
1529 the first one will be aligned also by peeling 5 scalar
1530 iterations, and in that case both accesses will be aligned.
1531 Hence, except for the immediate peeling amount, we also want
1532 to try to add full vector size, while we don't exceed
1533 vectorization factor.
1534 We do this automtically for cost model, since we calculate cost
1535 for every peeling option. */
1536 if (!flag_vect_cost_model)
1537 possible_npeel_number = vf /nelements;
1539 /* Handle the aligned case. We may decide to align some other
1540 access, making DR unaligned. */
1541 if (DR_MISALIGNMENT (dr) == 0)
1543 npeel_tmp = 0;
1544 if (!flag_vect_cost_model)
1545 possible_npeel_number++;
1548 for (j = 0; j < possible_npeel_number; j++)
1550 gcc_assert (npeel_tmp <= vf);
1551 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1552 npeel_tmp += nelements;
1555 all_misalignments_unknown = false;
1556 /* Data-ref that was chosen for the case that all the
1557 misalignments are unknown is not relevant anymore, since we
1558 have a data-ref with known alignment. */
1559 dr0 = NULL;
1561 else
1563 /* If we don't know all the misalignment values, we prefer
1564 peeling for data-ref that has maximum number of data-refs
1565 with the same alignment, unless the target prefers to align
1566 stores over load. */
1567 if (all_misalignments_unknown)
1569 if (same_align_drs_max < VEC_length (dr_p,
1570 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1571 || !dr0)
1573 same_align_drs_max = VEC_length (dr_p,
1574 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1575 dr0 = dr;
1578 if (!first_store && DR_IS_WRITE (dr))
1579 first_store = dr;
1582 /* If there are both known and unknown misaligned accesses in the
1583 loop, we choose peeling amount according to the known
1584 accesses. */
1587 if (!supportable_dr_alignment)
1589 dr0 = dr;
1590 if (!first_store && DR_IS_WRITE (dr))
1591 first_store = dr;
1595 else
1597 if (!aligned_access_p (dr))
1599 if (vect_print_dump_info (REPORT_DETAILS))
1600 fprintf (vect_dump, "vector alignment may not be reachable");
1602 break;
1607 vect_versioning_for_alias_required
1608 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1610 /* Temporarily, if versioning for alias is required, we disable peeling
1611 until we support peeling and versioning. Often peeling for alignment
1612 will require peeling for loop-bound, which in turn requires that we
1613 know how to adjust the loop ivs after the loop. */
1614 if (vect_versioning_for_alias_required
1615 || !vect_can_advance_ivs_p (loop_vinfo)
1616 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1617 do_peeling = false;
1619 if (do_peeling && all_misalignments_unknown
1620 && vect_supportable_dr_alignment (dr0, false))
1623 /* Check if the target requires to prefer stores over loads, i.e., if
1624 misaligned stores are more expensive than misaligned loads (taking
1625 drs with same alignment into account). */
1626 if (first_store && DR_IS_READ (dr0))
1628 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1629 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1630 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1631 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1633 vect_get_data_access_cost (dr0, &load_inside_cost,
1634 &load_outside_cost);
1635 vect_get_data_access_cost (first_store, &store_inside_cost,
1636 &store_outside_cost);
1638 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1639 aligning the load DR0). */
1640 load_inside_penalty = store_inside_cost;
1641 load_outside_penalty = store_outside_cost;
1642 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1643 (vinfo_for_stmt (DR_STMT (first_store))),
1644 i, dr);
1645 i++)
1646 if (DR_IS_READ (dr))
1648 load_inside_penalty += load_inside_cost;
1649 load_outside_penalty += load_outside_cost;
1651 else
1653 load_inside_penalty += store_inside_cost;
1654 load_outside_penalty += store_outside_cost;
1657 /* Calculate the penalty for leaving DR0 unaligned (by
1658 aligning the FIRST_STORE). */
1659 store_inside_penalty = load_inside_cost;
1660 store_outside_penalty = load_outside_cost;
1661 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1662 (vinfo_for_stmt (DR_STMT (dr0))),
1663 i, dr);
1664 i++)
1665 if (DR_IS_READ (dr))
1667 store_inside_penalty += load_inside_cost;
1668 store_outside_penalty += load_outside_cost;
1670 else
1672 store_inside_penalty += store_inside_cost;
1673 store_outside_penalty += store_outside_cost;
1676 if (load_inside_penalty > store_inside_penalty
1677 || (load_inside_penalty == store_inside_penalty
1678 && load_outside_penalty > store_outside_penalty))
1679 dr0 = first_store;
1682 /* In case there are only loads with different unknown misalignments, use
1683 peeling only if it may help to align other accesses in the loop. */
1684 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1685 (vinfo_for_stmt (DR_STMT (dr0))))
1686 && vect_supportable_dr_alignment (dr0, false)
1687 != dr_unaligned_supported)
1688 do_peeling = false;
1691 if (do_peeling && !dr0)
1693 /* Peeling is possible, but there is no data access that is not supported
1694 unless aligned. So we try to choose the best possible peeling. */
1696 /* We should get here only if there are drs with known misalignment. */
1697 gcc_assert (!all_misalignments_unknown);
1699 /* Choose the best peeling from the hash table. */
1700 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1701 if (!dr0 || !npeel)
1702 do_peeling = false;
1705 if (do_peeling)
1707 stmt = DR_STMT (dr0);
1708 stmt_info = vinfo_for_stmt (stmt);
1709 vectype = STMT_VINFO_VECTYPE (stmt_info);
1710 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1712 if (known_alignment_for_access_p (dr0))
1714 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1715 size_zero_node) < 0;
1716 if (!npeel)
1718 /* Since it's known at compile time, compute the number of
1719 iterations in the peeled loop (the peeling factor) for use in
1720 updating DR_MISALIGNMENT values. The peeling factor is the
1721 vectorization factor minus the misalignment as an element
1722 count. */
1723 mis = DR_MISALIGNMENT (dr0);
1724 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1725 npeel = ((negative ? mis - nelements : nelements - mis)
1726 & (nelements - 1));
1729 /* For interleaved data access every iteration accesses all the
1730 members of the group, therefore we divide the number of iterations
1731 by the group size. */
1732 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1733 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1734 npeel /= DR_GROUP_SIZE (stmt_info);
1736 if (vect_print_dump_info (REPORT_DETAILS))
1737 fprintf (vect_dump, "Try peeling by %d", npeel);
1740 /* Ensure that all data refs can be vectorized after the peel. */
1741 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1743 int save_misalignment;
1745 if (dr == dr0)
1746 continue;
1748 stmt = DR_STMT (dr);
1749 stmt_info = vinfo_for_stmt (stmt);
1750 /* For interleaving, only the alignment of the first access
1751 matters. */
1752 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1753 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1754 continue;
1756 save_misalignment = DR_MISALIGNMENT (dr);
1757 vect_update_misalignment_for_peel (dr, dr0, npeel);
1758 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1759 SET_DR_MISALIGNMENT (dr, save_misalignment);
1761 if (!supportable_dr_alignment)
1763 do_peeling = false;
1764 break;
1768 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1770 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1771 if (!stat)
1772 do_peeling = false;
1773 else
1774 return stat;
1777 if (do_peeling)
1779 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1780 If the misalignment of DR_i is identical to that of dr0 then set
1781 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1782 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1783 by the peeling factor times the element size of DR_i (MOD the
1784 vectorization factor times the size). Otherwise, the
1785 misalignment of DR_i must be set to unknown. */
1786 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1787 if (dr != dr0)
1788 vect_update_misalignment_for_peel (dr, dr0, npeel);
1790 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1791 if (npeel)
1792 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1793 else
1794 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1795 SET_DR_MISALIGNMENT (dr0, 0);
1796 if (vect_print_dump_info (REPORT_ALIGNMENT))
1797 fprintf (vect_dump, "Alignment of access forced using peeling.");
1799 if (vect_print_dump_info (REPORT_DETAILS))
1800 fprintf (vect_dump, "Peeling for alignment will be applied.");
1802 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1803 gcc_assert (stat);
1804 return stat;
1809 /* (2) Versioning to force alignment. */
1811 /* Try versioning if:
1812 1) flag_tree_vect_loop_version is TRUE
1813 2) optimize loop for speed
1814 3) there is at least one unsupported misaligned data ref with an unknown
1815 misalignment, and
1816 4) all misaligned data refs with a known misalignment are supported, and
1817 5) the number of runtime alignment checks is within reason. */
1819 do_versioning =
1820 flag_tree_vect_loop_version
1821 && optimize_loop_nest_for_speed_p (loop)
1822 && (!loop->inner); /* FORNOW */
1824 if (do_versioning)
1826 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1828 stmt = DR_STMT (dr);
1829 stmt_info = vinfo_for_stmt (stmt);
1831 /* For interleaving, only the alignment of the first access
1832 matters. */
1833 if (aligned_access_p (dr)
1834 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1835 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1836 continue;
1838 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1840 if (!supportable_dr_alignment)
1842 gimple stmt;
1843 int mask;
1844 tree vectype;
1846 if (known_alignment_for_access_p (dr)
1847 || VEC_length (gimple,
1848 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1849 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1851 do_versioning = false;
1852 break;
1855 stmt = DR_STMT (dr);
1856 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1857 gcc_assert (vectype);
1859 /* The rightmost bits of an aligned address must be zeros.
1860 Construct the mask needed for this test. For example,
1861 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1862 mask must be 15 = 0xf. */
1863 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1865 /* FORNOW: use the same mask to test all potentially unaligned
1866 references in the loop. The vectorizer currently supports
1867 a single vector size, see the reference to
1868 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1869 vectorization factor is computed. */
1870 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1871 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1872 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1873 VEC_safe_push (gimple, heap,
1874 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1875 DR_STMT (dr));
1879 /* Versioning requires at least one misaligned data reference. */
1880 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1881 do_versioning = false;
1882 else if (!do_versioning)
1883 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1886 if (do_versioning)
1888 VEC(gimple,heap) *may_misalign_stmts
1889 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1890 gimple stmt;
1892 /* It can now be assumed that the data references in the statements
1893 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1894 of the loop being vectorized. */
1895 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1897 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1898 dr = STMT_VINFO_DATA_REF (stmt_info);
1899 SET_DR_MISALIGNMENT (dr, 0);
1900 if (vect_print_dump_info (REPORT_ALIGNMENT))
1901 fprintf (vect_dump, "Alignment of access forced using versioning.");
1904 if (vect_print_dump_info (REPORT_DETAILS))
1905 fprintf (vect_dump, "Versioning for alignment will be applied.");
1907 /* Peeling and versioning can't be done together at this time. */
1908 gcc_assert (! (do_peeling && do_versioning));
1910 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1911 gcc_assert (stat);
1912 return stat;
1915 /* This point is reached if neither peeling nor versioning is being done. */
1916 gcc_assert (! (do_peeling || do_versioning));
1918 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1919 return stat;
1923 /* Function vect_find_same_alignment_drs.
1925 Update group and alignment relations according to the chosen
1926 vectorization factor. */
1928 static void
1929 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1930 loop_vec_info loop_vinfo)
1932 unsigned int i;
1933 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1934 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1935 struct data_reference *dra = DDR_A (ddr);
1936 struct data_reference *drb = DDR_B (ddr);
1937 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1938 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1939 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1940 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1941 lambda_vector dist_v;
1942 unsigned int loop_depth;
1944 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1945 return;
1947 if (dra == drb)
1948 return;
1950 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1951 return;
1953 /* Loop-based vectorization and known data dependence. */
1954 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1955 return;
1957 /* Data-dependence analysis reports a distance vector of zero
1958 for data-references that overlap only in the first iteration
1959 but have different sign step (see PR45764).
1960 So as a sanity check require equal DR_STEP. */
1961 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1962 return;
1964 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1965 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1967 int dist = dist_v[loop_depth];
1969 if (vect_print_dump_info (REPORT_DR_DETAILS))
1970 fprintf (vect_dump, "dependence distance = %d.", dist);
1972 /* Same loop iteration. */
1973 if (dist == 0
1974 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1976 /* Two references with distance zero have the same alignment. */
1977 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1978 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1979 if (vect_print_dump_info (REPORT_ALIGNMENT))
1980 fprintf (vect_dump, "accesses have the same alignment.");
1981 if (vect_print_dump_info (REPORT_DR_DETAILS))
1983 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1984 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1985 fprintf (vect_dump, " and ");
1986 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1993 /* Function vect_analyze_data_refs_alignment
1995 Analyze the alignment of the data-references in the loop.
1996 Return FALSE if a data reference is found that cannot be vectorized. */
1998 bool
1999 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2000 bb_vec_info bb_vinfo)
2002 if (vect_print_dump_info (REPORT_DETAILS))
2003 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2005 /* Mark groups of data references with same alignment using
2006 data dependence information. */
2007 if (loop_vinfo)
2009 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2010 struct data_dependence_relation *ddr;
2011 unsigned int i;
2013 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2014 vect_find_same_alignment_drs (ddr, loop_vinfo);
2017 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2019 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2020 fprintf (vect_dump,
2021 "not vectorized: can't calculate alignment for data ref.");
2022 return false;
2025 return true;
2029 /* Analyze groups of strided accesses: check that DR belongs to a group of
2030 strided accesses of legal size, step, etc. Detect gaps, single element
2031 interleaving, and other special cases. Set strided access info.
2032 Collect groups of strided stores for further use in SLP analysis. */
2034 static bool
2035 vect_analyze_group_access (struct data_reference *dr)
2037 tree step = DR_STEP (dr);
2038 tree scalar_type = TREE_TYPE (DR_REF (dr));
2039 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2040 gimple stmt = DR_STMT (dr);
2041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2042 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2043 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2044 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2045 HOST_WIDE_INT stride;
2046 bool slp_impossible = false;
2048 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2049 interleaving group (including gaps). */
2050 stride = dr_step / type_size;
2052 /* Not consecutive access is possible only if it is a part of interleaving. */
2053 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2055 /* Check if it this DR is a part of interleaving, and is a single
2056 element of the group that is accessed in the loop. */
2058 /* Gaps are supported only for loads. STEP must be a multiple of the type
2059 size. The size of the group must be a power of 2. */
2060 if (DR_IS_READ (dr)
2061 && (dr_step % type_size) == 0
2062 && stride > 0
2063 && exact_log2 (stride) != -1)
2065 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2066 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2067 if (vect_print_dump_info (REPORT_DR_DETAILS))
2069 fprintf (vect_dump, "Detected single element interleaving ");
2070 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2071 fprintf (vect_dump, " step ");
2072 print_generic_expr (vect_dump, step, TDF_SLIM);
2074 return true;
2077 if (vect_print_dump_info (REPORT_DETAILS))
2079 fprintf (vect_dump, "not consecutive access ");
2080 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2083 if (bb_vinfo)
2085 /* Mark the statement as unvectorizable. */
2086 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2087 return true;
2090 return false;
2093 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2095 /* First stmt in the interleaving chain. Check the chain. */
2096 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2097 struct data_reference *data_ref = dr;
2098 unsigned int count = 1;
2099 tree next_step;
2100 tree prev_init = DR_INIT (data_ref);
2101 gimple prev = stmt;
2102 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2104 while (next)
2106 /* Skip same data-refs. In case that two or more stmts share
2107 data-ref (supported only for loads), we vectorize only the first
2108 stmt, and the rest get their vectorized loads from the first
2109 one. */
2110 if (!tree_int_cst_compare (DR_INIT (data_ref),
2111 DR_INIT (STMT_VINFO_DATA_REF (
2112 vinfo_for_stmt (next)))))
2114 if (DR_IS_WRITE (data_ref))
2116 if (vect_print_dump_info (REPORT_DETAILS))
2117 fprintf (vect_dump, "Two store stmts share the same dr.");
2118 return false;
2121 /* Check that there is no load-store dependencies for this loads
2122 to prevent a case of load-store-load to the same location. */
2123 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2124 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2126 if (vect_print_dump_info (REPORT_DETAILS))
2127 fprintf (vect_dump,
2128 "READ_WRITE dependence in interleaving.");
2129 return false;
2132 /* For load use the same data-ref load. */
2133 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2135 prev = next;
2136 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2137 continue;
2139 prev = next;
2141 /* Check that all the accesses have the same STEP. */
2142 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2143 if (tree_int_cst_compare (step, next_step))
2145 if (vect_print_dump_info (REPORT_DETAILS))
2146 fprintf (vect_dump, "not consecutive access in interleaving");
2147 return false;
2150 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2151 /* Check that the distance between two accesses is equal to the type
2152 size. Otherwise, we have gaps. */
2153 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2154 - TREE_INT_CST_LOW (prev_init)) / type_size;
2155 if (diff != 1)
2157 /* FORNOW: SLP of accesses with gaps is not supported. */
2158 slp_impossible = true;
2159 if (DR_IS_WRITE (data_ref))
2161 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump, "interleaved store with gaps");
2163 return false;
2166 gaps += diff - 1;
2169 /* Store the gap from the previous member of the group. If there is no
2170 gap in the access, DR_GROUP_GAP is always 1. */
2171 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2173 prev_init = DR_INIT (data_ref);
2174 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2175 /* Count the number of data-refs in the chain. */
2176 count++;
2179 /* COUNT is the number of accesses found, we multiply it by the size of
2180 the type to get COUNT_IN_BYTES. */
2181 count_in_bytes = type_size * count;
2183 /* Check that the size of the interleaving (including gaps) is not
2184 greater than STEP. */
2185 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2187 if (vect_print_dump_info (REPORT_DETAILS))
2189 fprintf (vect_dump, "interleaving size is greater than step for ");
2190 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2192 return false;
2195 /* Check that the size of the interleaving is equal to STEP for stores,
2196 i.e., that there are no gaps. */
2197 if (dr_step && dr_step != count_in_bytes)
2199 if (DR_IS_READ (dr))
2201 slp_impossible = true;
2202 /* There is a gap after the last load in the group. This gap is a
2203 difference between the stride and the number of elements. When
2204 there is no gap, this difference should be 0. */
2205 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2207 else
2209 if (vect_print_dump_info (REPORT_DETAILS))
2210 fprintf (vect_dump, "interleaved store with gaps");
2211 return false;
2215 /* Check that STEP is a multiple of type size. */
2216 if (dr_step && (dr_step % type_size) != 0)
2218 if (vect_print_dump_info (REPORT_DETAILS))
2220 fprintf (vect_dump, "step is not a multiple of type size: step ");
2221 print_generic_expr (vect_dump, step, TDF_SLIM);
2222 fprintf (vect_dump, " size ");
2223 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2224 TDF_SLIM);
2226 return false;
2229 /* FORNOW: we handle only interleaving that is a power of 2.
2230 We don't fail here if it may be still possible to vectorize the
2231 group using SLP. If not, the size of the group will be checked in
2232 vect_analyze_operations, and the vectorization will fail. */
2233 if (exact_log2 (stride) == -1)
2235 if (vect_print_dump_info (REPORT_DETAILS))
2236 fprintf (vect_dump, "interleaving is not a power of 2");
2238 if (slp_impossible)
2239 return false;
2242 if (stride == 0)
2243 stride = count;
2245 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2246 if (vect_print_dump_info (REPORT_DETAILS))
2247 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2249 /* SLP: create an SLP data structure for every interleaving group of
2250 stores for further analysis in vect_analyse_slp. */
2251 if (DR_IS_WRITE (dr) && !slp_impossible)
2253 if (loop_vinfo)
2254 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2255 stmt);
2256 if (bb_vinfo)
2257 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2258 stmt);
2262 return true;
2266 /* Analyze the access pattern of the data-reference DR.
2267 In case of non-consecutive accesses call vect_analyze_group_access() to
2268 analyze groups of strided accesses. */
2270 static bool
2271 vect_analyze_data_ref_access (struct data_reference *dr)
2273 tree step = DR_STEP (dr);
2274 tree scalar_type = TREE_TYPE (DR_REF (dr));
2275 gimple stmt = DR_STMT (dr);
2276 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2277 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2278 struct loop *loop = NULL;
2279 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2281 if (loop_vinfo)
2282 loop = LOOP_VINFO_LOOP (loop_vinfo);
2284 if (loop_vinfo && !step)
2286 if (vect_print_dump_info (REPORT_DETAILS))
2287 fprintf (vect_dump, "bad data-ref access in loop");
2288 return false;
2291 /* Don't allow invariant accesses in loops. */
2292 if (loop_vinfo && dr_step == 0)
2293 return false;
2295 if (loop && nested_in_vect_loop_p (loop, stmt))
2297 /* Interleaved accesses are not yet supported within outer-loop
2298 vectorization for references in the inner-loop. */
2299 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2301 /* For the rest of the analysis we use the outer-loop step. */
2302 step = STMT_VINFO_DR_STEP (stmt_info);
2303 dr_step = TREE_INT_CST_LOW (step);
2305 if (dr_step == 0)
2307 if (vect_print_dump_info (REPORT_ALIGNMENT))
2308 fprintf (vect_dump, "zero step in outer loop.");
2309 if (DR_IS_READ (dr))
2310 return true;
2311 else
2312 return false;
2316 /* Consecutive? */
2317 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2318 || (dr_step < 0
2319 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2321 /* Mark that it is not interleaving. */
2322 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2323 return true;
2326 if (loop && nested_in_vect_loop_p (loop, stmt))
2328 if (vect_print_dump_info (REPORT_ALIGNMENT))
2329 fprintf (vect_dump, "strided access in outer loop.");
2330 return false;
2333 /* Not consecutive access - check if it's a part of interleaving group. */
2334 return vect_analyze_group_access (dr);
2338 /* Function vect_analyze_data_ref_accesses.
2340 Analyze the access pattern of all the data references in the loop.
2342 FORNOW: the only access pattern that is considered vectorizable is a
2343 simple step 1 (consecutive) access.
2345 FORNOW: handle only arrays and pointer accesses. */
2347 bool
2348 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2350 unsigned int i;
2351 VEC (data_reference_p, heap) *datarefs;
2352 struct data_reference *dr;
2354 if (vect_print_dump_info (REPORT_DETAILS))
2355 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2357 if (loop_vinfo)
2358 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2359 else
2360 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2362 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2363 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2364 && !vect_analyze_data_ref_access (dr))
2366 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2367 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2369 if (bb_vinfo)
2371 /* Mark the statement as not vectorizable. */
2372 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2373 continue;
2375 else
2376 return false;
2379 return true;
2382 /* Function vect_prune_runtime_alias_test_list.
2384 Prune a list of ddrs to be tested at run-time by versioning for alias.
2385 Return FALSE if resulting list of ddrs is longer then allowed by
2386 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2388 bool
2389 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2391 VEC (ddr_p, heap) * ddrs =
2392 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2393 unsigned i, j;
2395 if (vect_print_dump_info (REPORT_DETAILS))
2396 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2398 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2400 bool found;
2401 ddr_p ddr_i;
2403 ddr_i = VEC_index (ddr_p, ddrs, i);
2404 found = false;
2406 for (j = 0; j < i; j++)
2408 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2410 if (vect_vfa_range_equal (ddr_i, ddr_j))
2412 if (vect_print_dump_info (REPORT_DR_DETAILS))
2414 fprintf (vect_dump, "found equal ranges ");
2415 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2416 fprintf (vect_dump, ", ");
2417 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2418 fprintf (vect_dump, " and ");
2419 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2420 fprintf (vect_dump, ", ");
2421 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2423 found = true;
2424 break;
2428 if (found)
2430 VEC_ordered_remove (ddr_p, ddrs, i);
2431 continue;
2433 i++;
2436 if (VEC_length (ddr_p, ddrs) >
2437 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2439 if (vect_print_dump_info (REPORT_DR_DETAILS))
2441 fprintf (vect_dump,
2442 "disable versioning for alias - max number of generated "
2443 "checks exceeded.");
2446 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2448 return false;
2451 return true;
2455 /* Function vect_analyze_data_refs.
2457 Find all the data references in the loop or basic block.
2459 The general structure of the analysis of data refs in the vectorizer is as
2460 follows:
2461 1- vect_analyze_data_refs(loop/bb): call
2462 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2463 in the loop/bb and their dependences.
2464 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2465 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2466 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2470 bool
2471 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2472 bb_vec_info bb_vinfo,
2473 int *min_vf)
2475 struct loop *loop = NULL;
2476 basic_block bb = NULL;
2477 unsigned int i;
2478 VEC (data_reference_p, heap) *datarefs;
2479 struct data_reference *dr;
2480 tree scalar_type;
2481 bool res;
2483 if (vect_print_dump_info (REPORT_DETAILS))
2484 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2486 if (loop_vinfo)
2488 loop = LOOP_VINFO_LOOP (loop_vinfo);
2489 res = compute_data_dependences_for_loop
2490 (loop, true,
2491 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2492 &LOOP_VINFO_DATAREFS (loop_vinfo),
2493 &LOOP_VINFO_DDRS (loop_vinfo));
2495 if (!res)
2497 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2498 fprintf (vect_dump, "not vectorized: loop contains function calls"
2499 " or data references that cannot be analyzed");
2500 return false;
2503 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2505 else
2507 bb = BB_VINFO_BB (bb_vinfo);
2508 res = compute_data_dependences_for_bb (bb, true,
2509 &BB_VINFO_DATAREFS (bb_vinfo),
2510 &BB_VINFO_DDRS (bb_vinfo));
2511 if (!res)
2513 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2514 fprintf (vect_dump, "not vectorized: basic block contains function"
2515 " calls or data references that cannot be analyzed");
2516 return false;
2519 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2522 /* Go through the data-refs, check that the analysis succeeded. Update
2523 pointer from stmt_vec_info struct to DR and vectype. */
2525 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2527 gimple stmt;
2528 stmt_vec_info stmt_info;
2529 tree base, offset, init;
2530 int vf;
2532 if (!dr || !DR_REF (dr))
2534 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2535 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2536 return false;
2539 stmt = DR_STMT (dr);
2540 stmt_info = vinfo_for_stmt (stmt);
2542 /* Check that analysis of the data-ref succeeded. */
2543 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2544 || !DR_STEP (dr))
2546 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2548 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2549 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2552 if (bb_vinfo)
2554 /* Mark the statement as not vectorizable. */
2555 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2556 continue;
2558 else
2559 return false;
2562 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2564 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2565 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2566 "constant");
2567 if (bb_vinfo)
2569 /* Mark the statement as not vectorizable. */
2570 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2571 continue;
2573 else
2574 return false;
2577 base = unshare_expr (DR_BASE_ADDRESS (dr));
2578 offset = unshare_expr (DR_OFFSET (dr));
2579 init = unshare_expr (DR_INIT (dr));
2581 if (stmt_could_throw_p (stmt))
2583 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2585 fprintf (vect_dump, "not vectorized: statement can throw an "
2586 "exception ");
2587 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2589 return false;
2592 /* Update DR field in stmt_vec_info struct. */
2594 /* If the dataref is in an inner-loop of the loop that is considered for
2595 for vectorization, we also want to analyze the access relative to
2596 the outer-loop (DR contains information only relative to the
2597 inner-most enclosing loop). We do that by building a reference to the
2598 first location accessed by the inner-loop, and analyze it relative to
2599 the outer-loop. */
2600 if (loop && nested_in_vect_loop_p (loop, stmt))
2602 tree outer_step, outer_base, outer_init;
2603 HOST_WIDE_INT pbitsize, pbitpos;
2604 tree poffset;
2605 enum machine_mode pmode;
2606 int punsignedp, pvolatilep;
2607 affine_iv base_iv, offset_iv;
2608 tree dinit;
2610 /* Build a reference to the first location accessed by the
2611 inner-loop: *(BASE+INIT). (The first location is actually
2612 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2613 tree inner_base = build_fold_indirect_ref
2614 (fold_build2 (POINTER_PLUS_EXPR,
2615 TREE_TYPE (base), base,
2616 fold_convert (sizetype, init)));
2618 if (vect_print_dump_info (REPORT_DETAILS))
2620 fprintf (vect_dump, "analyze in outer-loop: ");
2621 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2624 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2625 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2626 gcc_assert (outer_base != NULL_TREE);
2628 if (pbitpos % BITS_PER_UNIT != 0)
2630 if (vect_print_dump_info (REPORT_DETAILS))
2631 fprintf (vect_dump, "failed: bit offset alignment.\n");
2632 return false;
2635 outer_base = build_fold_addr_expr (outer_base);
2636 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2637 &base_iv, false))
2639 if (vect_print_dump_info (REPORT_DETAILS))
2640 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2641 return false;
2644 if (offset)
2646 if (poffset)
2647 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2648 poffset);
2649 else
2650 poffset = offset;
2653 if (!poffset)
2655 offset_iv.base = ssize_int (0);
2656 offset_iv.step = ssize_int (0);
2658 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2659 &offset_iv, false))
2661 if (vect_print_dump_info (REPORT_DETAILS))
2662 fprintf (vect_dump, "evolution of offset is not affine.\n");
2663 return false;
2666 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2667 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2668 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2669 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2670 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2672 outer_step = size_binop (PLUS_EXPR,
2673 fold_convert (ssizetype, base_iv.step),
2674 fold_convert (ssizetype, offset_iv.step));
2676 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2677 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2678 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2679 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2680 STMT_VINFO_DR_OFFSET (stmt_info) =
2681 fold_convert (ssizetype, offset_iv.base);
2682 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2683 size_int (highest_pow2_factor (offset_iv.base));
2685 if (vect_print_dump_info (REPORT_DETAILS))
2687 fprintf (vect_dump, "\touter base_address: ");
2688 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2689 fprintf (vect_dump, "\n\touter offset from base address: ");
2690 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2691 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2692 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2693 fprintf (vect_dump, "\n\touter step: ");
2694 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2695 fprintf (vect_dump, "\n\touter aligned to: ");
2696 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2700 if (STMT_VINFO_DATA_REF (stmt_info))
2702 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2704 fprintf (vect_dump,
2705 "not vectorized: more than one data ref in stmt: ");
2706 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2708 return false;
2711 STMT_VINFO_DATA_REF (stmt_info) = dr;
2713 /* Set vectype for STMT. */
2714 scalar_type = TREE_TYPE (DR_REF (dr));
2715 STMT_VINFO_VECTYPE (stmt_info) =
2716 get_vectype_for_scalar_type (scalar_type);
2717 if (!STMT_VINFO_VECTYPE (stmt_info))
2719 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2721 fprintf (vect_dump,
2722 "not vectorized: no vectype for stmt: ");
2723 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2724 fprintf (vect_dump, " scalar_type: ");
2725 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2728 if (bb_vinfo)
2730 /* Mark the statement as not vectorizable. */
2731 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2732 continue;
2734 else
2735 return false;
2738 /* Adjust the minimal vectorization factor according to the
2739 vector type. */
2740 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2741 if (vf > *min_vf)
2742 *min_vf = vf;
2745 return true;
2749 /* Function vect_get_new_vect_var.
2751 Returns a name for a new variable. The current naming scheme appends the
2752 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2753 the name of vectorizer generated variables, and appends that to NAME if
2754 provided. */
2756 tree
2757 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2759 const char *prefix;
2760 tree new_vect_var;
2762 switch (var_kind)
2764 case vect_simple_var:
2765 prefix = "vect_";
2766 break;
2767 case vect_scalar_var:
2768 prefix = "stmp_";
2769 break;
2770 case vect_pointer_var:
2771 prefix = "vect_p";
2772 break;
2773 default:
2774 gcc_unreachable ();
2777 if (name)
2779 char* tmp = concat (prefix, name, NULL);
2780 new_vect_var = create_tmp_var (type, tmp);
2781 free (tmp);
2783 else
2784 new_vect_var = create_tmp_var (type, prefix);
2786 /* Mark vector typed variable as a gimple register variable. */
2787 if (TREE_CODE (type) == VECTOR_TYPE)
2788 DECL_GIMPLE_REG_P (new_vect_var) = true;
2790 return new_vect_var;
2794 /* Function vect_create_addr_base_for_vector_ref.
2796 Create an expression that computes the address of the first memory location
2797 that will be accessed for a data reference.
2799 Input:
2800 STMT: The statement containing the data reference.
2801 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2802 OFFSET: Optional. If supplied, it is be added to the initial address.
2803 LOOP: Specify relative to which loop-nest should the address be computed.
2804 For example, when the dataref is in an inner-loop nested in an
2805 outer-loop that is now being vectorized, LOOP can be either the
2806 outer-loop, or the inner-loop. The first memory location accessed
2807 by the following dataref ('in' points to short):
2809 for (i=0; i<N; i++)
2810 for (j=0; j<M; j++)
2811 s += in[i+j]
2813 is as follows:
2814 if LOOP=i_loop: &in (relative to i_loop)
2815 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2817 Output:
2818 1. Return an SSA_NAME whose value is the address of the memory location of
2819 the first vector of the data reference.
2820 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2821 these statement(s) which define the returned SSA_NAME.
2823 FORNOW: We are only handling array accesses with step 1. */
2825 tree
2826 vect_create_addr_base_for_vector_ref (gimple stmt,
2827 gimple_seq *new_stmt_list,
2828 tree offset,
2829 struct loop *loop)
2831 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2832 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2833 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2834 tree base_name;
2835 tree data_ref_base_var;
2836 tree vec_stmt;
2837 tree addr_base, addr_expr;
2838 tree dest;
2839 gimple_seq seq = NULL;
2840 tree base_offset = unshare_expr (DR_OFFSET (dr));
2841 tree init = unshare_expr (DR_INIT (dr));
2842 tree vect_ptr_type;
2843 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2844 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2845 tree base;
2847 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2849 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2851 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2853 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2854 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2855 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2858 if (loop_vinfo)
2859 base_name = build_fold_indirect_ref (data_ref_base);
2860 else
2862 base_offset = ssize_int (0);
2863 init = ssize_int (0);
2864 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2867 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2868 add_referenced_var (data_ref_base_var);
2869 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2870 data_ref_base_var);
2871 gimple_seq_add_seq (new_stmt_list, seq);
2873 /* Create base_offset */
2874 base_offset = size_binop (PLUS_EXPR,
2875 fold_convert (sizetype, base_offset),
2876 fold_convert (sizetype, init));
2877 dest = create_tmp_var (sizetype, "base_off");
2878 add_referenced_var (dest);
2879 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2880 gimple_seq_add_seq (new_stmt_list, seq);
2882 if (offset)
2884 tree tmp = create_tmp_var (sizetype, "offset");
2886 add_referenced_var (tmp);
2887 offset = fold_build2 (MULT_EXPR, sizetype,
2888 fold_convert (sizetype, offset), step);
2889 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2890 base_offset, offset);
2891 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2892 gimple_seq_add_seq (new_stmt_list, seq);
2895 /* base + base_offset */
2896 if (loop_vinfo)
2897 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2898 data_ref_base, base_offset);
2899 else
2901 addr_base = build1 (ADDR_EXPR,
2902 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2903 unshare_expr (DR_REF (dr)));
2906 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2907 base = get_base_address (DR_REF (dr));
2908 if (base
2909 && TREE_CODE (base) == MEM_REF)
2910 vect_ptr_type
2911 = build_qualified_type (vect_ptr_type,
2912 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2914 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2915 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2916 get_name (base_name));
2917 add_referenced_var (addr_expr);
2918 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2919 gimple_seq_add_seq (new_stmt_list, seq);
2921 if (DR_PTR_INFO (dr)
2922 && TREE_CODE (vec_stmt) == SSA_NAME)
2924 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2925 if (offset)
2927 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2928 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2932 if (vect_print_dump_info (REPORT_DETAILS))
2934 fprintf (vect_dump, "created ");
2935 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2938 return vec_stmt;
2942 /* Function vect_create_data_ref_ptr.
2944 Create a new pointer to vector type (vp), that points to the first location
2945 accessed in the loop by STMT, along with the def-use update chain to
2946 appropriately advance the pointer through the loop iterations. Also set
2947 aliasing information for the pointer. This vector pointer is used by the
2948 callers to this function to create a memory reference expression for vector
2949 load/store access.
2951 Input:
2952 1. STMT: a stmt that references memory. Expected to be of the form
2953 GIMPLE_ASSIGN <name, data-ref> or
2954 GIMPLE_ASSIGN <data-ref, name>.
2955 2. AT_LOOP: the loop where the vector memref is to be created.
2956 3. OFFSET (optional): an offset to be added to the initial address accessed
2957 by the data-ref in STMT.
2958 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2959 pointing to the initial address.
2960 5. TYPE: if not NULL indicates the required type of the data-ref.
2962 Output:
2963 1. Declare a new ptr to vector_type, and have it point to the base of the
2964 data reference (initial addressed accessed by the data reference).
2965 For example, for vector of type V8HI, the following code is generated:
2967 v8hi *vp;
2968 vp = (v8hi *)initial_address;
2970 if OFFSET is not supplied:
2971 initial_address = &a[init];
2972 if OFFSET is supplied:
2973 initial_address = &a[init + OFFSET];
2975 Return the initial_address in INITIAL_ADDRESS.
2977 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2978 update the pointer in each iteration of the loop.
2980 Return the increment stmt that updates the pointer in PTR_INCR.
2982 3. Set INV_P to true if the access pattern of the data reference in the
2983 vectorized loop is invariant. Set it to false otherwise.
2985 4. Return the pointer. */
2987 tree
2988 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2989 tree offset, tree *initial_address, gimple *ptr_incr,
2990 bool only_init, bool *inv_p)
2992 tree base_name;
2993 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2994 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2995 struct loop *loop = NULL;
2996 bool nested_in_vect_loop = false;
2997 struct loop *containing_loop = NULL;
2998 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2999 tree vect_ptr_type;
3000 tree vect_ptr;
3001 tree new_temp;
3002 gimple vec_stmt;
3003 gimple_seq new_stmt_list = NULL;
3004 edge pe = NULL;
3005 basic_block new_bb;
3006 tree vect_ptr_init;
3007 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3008 tree vptr;
3009 gimple_stmt_iterator incr_gsi;
3010 bool insert_after;
3011 bool negative;
3012 tree indx_before_incr, indx_after_incr;
3013 gimple incr;
3014 tree step;
3015 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3016 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3017 tree base;
3019 if (loop_vinfo)
3021 loop = LOOP_VINFO_LOOP (loop_vinfo);
3022 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3023 containing_loop = (gimple_bb (stmt))->loop_father;
3024 pe = loop_preheader_edge (loop);
3026 else
3028 gcc_assert (bb_vinfo);
3029 only_init = true;
3030 *ptr_incr = NULL;
3033 /* Check the step (evolution) of the load in LOOP, and record
3034 whether it's invariant. */
3035 if (nested_in_vect_loop)
3036 step = STMT_VINFO_DR_STEP (stmt_info);
3037 else
3038 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3040 if (tree_int_cst_compare (step, size_zero_node) == 0)
3041 *inv_p = true;
3042 else
3043 *inv_p = false;
3044 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3046 /* Create an expression for the first address accessed by this load
3047 in LOOP. */
3048 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3050 if (vect_print_dump_info (REPORT_DETAILS))
3052 tree data_ref_base = base_name;
3053 fprintf (vect_dump, "create vector-pointer variable to type: ");
3054 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3055 if (TREE_CODE (data_ref_base) == VAR_DECL
3056 || TREE_CODE (data_ref_base) == ARRAY_REF)
3057 fprintf (vect_dump, " vectorizing an array ref: ");
3058 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3059 fprintf (vect_dump, " vectorizing a record based array ref: ");
3060 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3061 fprintf (vect_dump, " vectorizing a pointer ref: ");
3062 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3065 /* (1) Create the new vector-pointer variable. */
3066 vect_ptr_type = build_pointer_type (vectype);
3067 base = get_base_address (DR_REF (dr));
3068 if (base
3069 && TREE_CODE (base) == MEM_REF)
3070 vect_ptr_type
3071 = build_qualified_type (vect_ptr_type,
3072 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3073 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3074 get_name (base_name));
3076 /* Vector types inherit the alias set of their component type by default so
3077 we need to use a ref-all pointer if the data reference does not conflict
3078 with the created vector data reference because it is not addressable. */
3079 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3080 get_alias_set (DR_REF (dr))))
3082 vect_ptr_type
3083 = build_pointer_type_for_mode (vectype,
3084 TYPE_MODE (vect_ptr_type), true);
3085 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3086 get_name (base_name));
3089 /* Likewise for any of the data references in the stmt group. */
3090 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
3092 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
3095 tree lhs = gimple_assign_lhs (orig_stmt);
3096 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3097 get_alias_set (lhs)))
3099 vect_ptr_type
3100 = build_pointer_type_for_mode (vectype,
3101 TYPE_MODE (vect_ptr_type), true);
3102 vect_ptr
3103 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3104 get_name (base_name));
3105 break;
3108 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
3110 while (orig_stmt);
3113 add_referenced_var (vect_ptr);
3115 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3116 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3117 def-use update cycles for the pointer: one relative to the outer-loop
3118 (LOOP), which is what steps (3) and (4) below do. The other is relative
3119 to the inner-loop (which is the inner-most loop containing the dataref),
3120 and this is done be step (5) below.
3122 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3123 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3124 redundant. Steps (3),(4) create the following:
3126 vp0 = &base_addr;
3127 LOOP: vp1 = phi(vp0,vp2)
3130 vp2 = vp1 + step
3131 goto LOOP
3133 If there is an inner-loop nested in loop, then step (5) will also be
3134 applied, and an additional update in the inner-loop will be created:
3136 vp0 = &base_addr;
3137 LOOP: vp1 = phi(vp0,vp2)
3139 inner: vp3 = phi(vp1,vp4)
3140 vp4 = vp3 + inner_step
3141 if () goto inner
3143 vp2 = vp1 + step
3144 if () goto LOOP */
3146 /* (2) Calculate the initial address the vector-pointer, and set
3147 the vector-pointer to point to it before the loop. */
3149 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3151 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3152 offset, loop);
3153 if (new_stmt_list)
3155 if (pe)
3157 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3158 gcc_assert (!new_bb);
3160 else
3161 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
3164 *initial_address = new_temp;
3166 /* Create: p = (vectype *) initial_base */
3167 if (TREE_CODE (new_temp) != SSA_NAME
3168 || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
3170 vec_stmt = gimple_build_assign (vect_ptr,
3171 fold_convert (vect_ptr_type, new_temp));
3172 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
3173 /* Copy the points-to information if it exists. */
3174 if (DR_PTR_INFO (dr))
3175 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
3176 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
3177 if (pe)
3179 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3180 gcc_assert (!new_bb);
3182 else
3183 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
3185 else
3186 vect_ptr_init = new_temp;
3188 /* (3) Handle the updating of the vector-pointer inside the loop.
3189 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3190 inner-loop nested in LOOP (during outer-loop vectorization). */
3192 /* No update in loop is required. */
3193 if (only_init && (!loop_vinfo || at_loop == loop))
3194 vptr = vect_ptr_init;
3195 else
3197 /* The step of the vector pointer is the Vector Size. */
3198 tree step = TYPE_SIZE_UNIT (vectype);
3199 /* One exception to the above is when the scalar step of the load in
3200 LOOP is zero. In this case the step here is also zero. */
3201 if (*inv_p)
3202 step = size_zero_node;
3203 else if (negative)
3204 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3206 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3208 create_iv (vect_ptr_init,
3209 fold_convert (vect_ptr_type, step),
3210 vect_ptr, loop, &incr_gsi, insert_after,
3211 &indx_before_incr, &indx_after_incr);
3212 incr = gsi_stmt (incr_gsi);
3213 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3215 /* Copy the points-to information if it exists. */
3216 if (DR_PTR_INFO (dr))
3218 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3219 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3221 if (ptr_incr)
3222 *ptr_incr = incr;
3224 vptr = indx_before_incr;
3227 if (!nested_in_vect_loop || only_init)
3228 return vptr;
3231 /* (4) Handle the updating of the vector-pointer inside the inner-loop
3232 nested in LOOP, if exists. */
3234 gcc_assert (nested_in_vect_loop);
3235 if (!only_init)
3237 standard_iv_increment_position (containing_loop, &incr_gsi,
3238 &insert_after);
3239 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
3240 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3241 &indx_after_incr);
3242 incr = gsi_stmt (incr_gsi);
3243 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3245 /* Copy the points-to information if it exists. */
3246 if (DR_PTR_INFO (dr))
3248 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3249 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3251 if (ptr_incr)
3252 *ptr_incr = incr;
3254 return indx_before_incr;
3256 else
3257 gcc_unreachable ();
3261 /* Function bump_vector_ptr
3263 Increment a pointer (to a vector type) by vector-size. If requested,
3264 i.e. if PTR-INCR is given, then also connect the new increment stmt
3265 to the existing def-use update-chain of the pointer, by modifying
3266 the PTR_INCR as illustrated below:
3268 The pointer def-use update-chain before this function:
3269 DATAREF_PTR = phi (p_0, p_2)
3270 ....
3271 PTR_INCR: p_2 = DATAREF_PTR + step
3273 The pointer def-use update-chain after this function:
3274 DATAREF_PTR = phi (p_0, p_2)
3275 ....
3276 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3277 ....
3278 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3280 Input:
3281 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3282 in the loop.
3283 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3284 the loop. The increment amount across iterations is expected
3285 to be vector_size.
3286 BSI - location where the new update stmt is to be placed.
3287 STMT - the original scalar memory-access stmt that is being vectorized.
3288 BUMP - optional. The offset by which to bump the pointer. If not given,
3289 the offset is assumed to be vector_size.
3291 Output: Return NEW_DATAREF_PTR as illustrated above.
3295 tree
3296 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3297 gimple stmt, tree bump)
3299 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3300 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3301 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3302 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3303 tree update = TYPE_SIZE_UNIT (vectype);
3304 gimple incr_stmt;
3305 ssa_op_iter iter;
3306 use_operand_p use_p;
3307 tree new_dataref_ptr;
3309 if (bump)
3310 update = bump;
3312 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3313 dataref_ptr, update);
3314 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3315 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3316 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3318 /* Copy the points-to information if it exists. */
3319 if (DR_PTR_INFO (dr))
3321 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3322 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3323 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3326 if (!ptr_incr)
3327 return new_dataref_ptr;
3329 /* Update the vector-pointer's cross-iteration increment. */
3330 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3332 tree use = USE_FROM_PTR (use_p);
3334 if (use == dataref_ptr)
3335 SET_USE (use_p, new_dataref_ptr);
3336 else
3337 gcc_assert (tree_int_cst_compare (use, update) == 0);
3340 return new_dataref_ptr;
3344 /* Function vect_create_destination_var.
3346 Create a new temporary of type VECTYPE. */
3348 tree
3349 vect_create_destination_var (tree scalar_dest, tree vectype)
3351 tree vec_dest;
3352 const char *new_name;
3353 tree type;
3354 enum vect_var_kind kind;
3356 kind = vectype ? vect_simple_var : vect_scalar_var;
3357 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3359 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3361 new_name = get_name (scalar_dest);
3362 if (!new_name)
3363 new_name = "var_";
3364 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3365 add_referenced_var (vec_dest);
3367 return vec_dest;
3370 /* Function vect_strided_store_supported.
3372 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3373 and FALSE otherwise. */
3375 bool
3376 vect_strided_store_supported (tree vectype)
3378 optab interleave_high_optab, interleave_low_optab;
3379 enum machine_mode mode;
3381 mode = TYPE_MODE (vectype);
3383 /* Check that the operation is supported. */
3384 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3385 vectype, optab_default);
3386 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3387 vectype, optab_default);
3388 if (!interleave_high_optab || !interleave_low_optab)
3390 if (vect_print_dump_info (REPORT_DETAILS))
3391 fprintf (vect_dump, "no optab for interleave.");
3392 return false;
3395 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3396 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3398 if (vect_print_dump_info (REPORT_DETAILS))
3399 fprintf (vect_dump, "interleave op not supported by target.");
3400 return false;
3403 return true;
3407 /* Function vect_permute_store_chain.
3409 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3410 a power of 2, generate interleave_high/low stmts to reorder the data
3411 correctly for the stores. Return the final references for stores in
3412 RESULT_CHAIN.
3414 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3415 The input is 4 vectors each containing 8 elements. We assign a number to
3416 each element, the input sequence is:
3418 1st vec: 0 1 2 3 4 5 6 7
3419 2nd vec: 8 9 10 11 12 13 14 15
3420 3rd vec: 16 17 18 19 20 21 22 23
3421 4th vec: 24 25 26 27 28 29 30 31
3423 The output sequence should be:
3425 1st vec: 0 8 16 24 1 9 17 25
3426 2nd vec: 2 10 18 26 3 11 19 27
3427 3rd vec: 4 12 20 28 5 13 21 30
3428 4th vec: 6 14 22 30 7 15 23 31
3430 i.e., we interleave the contents of the four vectors in their order.
3432 We use interleave_high/low instructions to create such output. The input of
3433 each interleave_high/low operation is two vectors:
3434 1st vec 2nd vec
3435 0 1 2 3 4 5 6 7
3436 the even elements of the result vector are obtained left-to-right from the
3437 high/low elements of the first vector. The odd elements of the result are
3438 obtained left-to-right from the high/low elements of the second vector.
3439 The output of interleave_high will be: 0 4 1 5
3440 and of interleave_low: 2 6 3 7
3443 The permutation is done in log LENGTH stages. In each stage interleave_high
3444 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3445 where the first argument is taken from the first half of DR_CHAIN and the
3446 second argument from it's second half.
3447 In our example,
3449 I1: interleave_high (1st vec, 3rd vec)
3450 I2: interleave_low (1st vec, 3rd vec)
3451 I3: interleave_high (2nd vec, 4th vec)
3452 I4: interleave_low (2nd vec, 4th vec)
3454 The output for the first stage is:
3456 I1: 0 16 1 17 2 18 3 19
3457 I2: 4 20 5 21 6 22 7 23
3458 I3: 8 24 9 25 10 26 11 27
3459 I4: 12 28 13 29 14 30 15 31
3461 The output of the second stage, i.e. the final result is:
3463 I1: 0 8 16 24 1 9 17 25
3464 I2: 2 10 18 26 3 11 19 27
3465 I3: 4 12 20 28 5 13 21 30
3466 I4: 6 14 22 30 7 15 23 31. */
3468 bool
3469 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3470 unsigned int length,
3471 gimple stmt,
3472 gimple_stmt_iterator *gsi,
3473 VEC(tree,heap) **result_chain)
3475 tree perm_dest, vect1, vect2, high, low;
3476 gimple perm_stmt;
3477 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3478 int i;
3479 unsigned int j;
3480 enum tree_code high_code, low_code;
3482 /* Check that the operation is supported. */
3483 if (!vect_strided_store_supported (vectype))
3484 return false;
3486 *result_chain = VEC_copy (tree, heap, dr_chain);
3488 for (i = 0; i < exact_log2 (length); i++)
3490 for (j = 0; j < length/2; j++)
3492 vect1 = VEC_index (tree, dr_chain, j);
3493 vect2 = VEC_index (tree, dr_chain, j+length/2);
3495 /* Create interleaving stmt:
3496 in the case of big endian:
3497 high = interleave_high (vect1, vect2)
3498 and in the case of little endian:
3499 high = interleave_low (vect1, vect2). */
3500 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3501 DECL_GIMPLE_REG_P (perm_dest) = 1;
3502 add_referenced_var (perm_dest);
3503 if (BYTES_BIG_ENDIAN)
3505 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3506 low_code = VEC_INTERLEAVE_LOW_EXPR;
3508 else
3510 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3511 high_code = VEC_INTERLEAVE_LOW_EXPR;
3513 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3514 vect1, vect2);
3515 high = make_ssa_name (perm_dest, perm_stmt);
3516 gimple_assign_set_lhs (perm_stmt, high);
3517 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3518 VEC_replace (tree, *result_chain, 2*j, high);
3520 /* Create interleaving stmt:
3521 in the case of big endian:
3522 low = interleave_low (vect1, vect2)
3523 and in the case of little endian:
3524 low = interleave_high (vect1, vect2). */
3525 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3526 DECL_GIMPLE_REG_P (perm_dest) = 1;
3527 add_referenced_var (perm_dest);
3528 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3529 vect1, vect2);
3530 low = make_ssa_name (perm_dest, perm_stmt);
3531 gimple_assign_set_lhs (perm_stmt, low);
3532 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3533 VEC_replace (tree, *result_chain, 2*j+1, low);
3535 dr_chain = VEC_copy (tree, heap, *result_chain);
3537 return true;
3540 /* Function vect_setup_realignment
3542 This function is called when vectorizing an unaligned load using
3543 the dr_explicit_realign[_optimized] scheme.
3544 This function generates the following code at the loop prolog:
3546 p = initial_addr;
3547 x msq_init = *(floor(p)); # prolog load
3548 realignment_token = call target_builtin;
3549 loop:
3550 x msq = phi (msq_init, ---)
3552 The stmts marked with x are generated only for the case of
3553 dr_explicit_realign_optimized.
3555 The code above sets up a new (vector) pointer, pointing to the first
3556 location accessed by STMT, and a "floor-aligned" load using that pointer.
3557 It also generates code to compute the "realignment-token" (if the relevant
3558 target hook was defined), and creates a phi-node at the loop-header bb
3559 whose arguments are the result of the prolog-load (created by this
3560 function) and the result of a load that takes place in the loop (to be
3561 created by the caller to this function).
3563 For the case of dr_explicit_realign_optimized:
3564 The caller to this function uses the phi-result (msq) to create the
3565 realignment code inside the loop, and sets up the missing phi argument,
3566 as follows:
3567 loop:
3568 msq = phi (msq_init, lsq)
3569 lsq = *(floor(p')); # load in loop
3570 result = realign_load (msq, lsq, realignment_token);
3572 For the case of dr_explicit_realign:
3573 loop:
3574 msq = *(floor(p)); # load in loop
3575 p' = p + (VS-1);
3576 lsq = *(floor(p')); # load in loop
3577 result = realign_load (msq, lsq, realignment_token);
3579 Input:
3580 STMT - (scalar) load stmt to be vectorized. This load accesses
3581 a memory location that may be unaligned.
3582 BSI - place where new code is to be inserted.
3583 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3584 is used.
3586 Output:
3587 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3588 target hook, if defined.
3589 Return value - the result of the loop-header phi node. */
3591 tree
3592 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3593 tree *realignment_token,
3594 enum dr_alignment_support alignment_support_scheme,
3595 tree init_addr,
3596 struct loop **at_loop)
3598 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3599 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3600 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3601 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3602 struct loop *loop = NULL;
3603 edge pe = NULL;
3604 tree scalar_dest = gimple_assign_lhs (stmt);
3605 tree vec_dest;
3606 gimple inc;
3607 tree ptr;
3608 tree data_ref;
3609 gimple new_stmt;
3610 basic_block new_bb;
3611 tree msq_init = NULL_TREE;
3612 tree new_temp;
3613 gimple phi_stmt;
3614 tree msq = NULL_TREE;
3615 gimple_seq stmts = NULL;
3616 bool inv_p;
3617 bool compute_in_loop = false;
3618 bool nested_in_vect_loop = false;
3619 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3620 struct loop *loop_for_initial_load = NULL;
3622 if (loop_vinfo)
3624 loop = LOOP_VINFO_LOOP (loop_vinfo);
3625 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3628 gcc_assert (alignment_support_scheme == dr_explicit_realign
3629 || alignment_support_scheme == dr_explicit_realign_optimized);
3631 /* We need to generate three things:
3632 1. the misalignment computation
3633 2. the extra vector load (for the optimized realignment scheme).
3634 3. the phi node for the two vectors from which the realignment is
3635 done (for the optimized realignment scheme). */
3637 /* 1. Determine where to generate the misalignment computation.
3639 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3640 calculation will be generated by this function, outside the loop (in the
3641 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3642 caller, inside the loop.
3644 Background: If the misalignment remains fixed throughout the iterations of
3645 the loop, then both realignment schemes are applicable, and also the
3646 misalignment computation can be done outside LOOP. This is because we are
3647 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3648 are a multiple of VS (the Vector Size), and therefore the misalignment in
3649 different vectorized LOOP iterations is always the same.
3650 The problem arises only if the memory access is in an inner-loop nested
3651 inside LOOP, which is now being vectorized using outer-loop vectorization.
3652 This is the only case when the misalignment of the memory access may not
3653 remain fixed throughout the iterations of the inner-loop (as explained in
3654 detail in vect_supportable_dr_alignment). In this case, not only is the
3655 optimized realignment scheme not applicable, but also the misalignment
3656 computation (and generation of the realignment token that is passed to
3657 REALIGN_LOAD) have to be done inside the loop.
3659 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3660 or not, which in turn determines if the misalignment is computed inside
3661 the inner-loop, or outside LOOP. */
3663 if (init_addr != NULL_TREE || !loop_vinfo)
3665 compute_in_loop = true;
3666 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3670 /* 2. Determine where to generate the extra vector load.
3672 For the optimized realignment scheme, instead of generating two vector
3673 loads in each iteration, we generate a single extra vector load in the
3674 preheader of the loop, and in each iteration reuse the result of the
3675 vector load from the previous iteration. In case the memory access is in
3676 an inner-loop nested inside LOOP, which is now being vectorized using
3677 outer-loop vectorization, we need to determine whether this initial vector
3678 load should be generated at the preheader of the inner-loop, or can be
3679 generated at the preheader of LOOP. If the memory access has no evolution
3680 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3681 to be generated inside LOOP (in the preheader of the inner-loop). */
3683 if (nested_in_vect_loop)
3685 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3686 bool invariant_in_outerloop =
3687 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3688 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3690 else
3691 loop_for_initial_load = loop;
3692 if (at_loop)
3693 *at_loop = loop_for_initial_load;
3695 if (loop_for_initial_load)
3696 pe = loop_preheader_edge (loop_for_initial_load);
3698 /* 3. For the case of the optimized realignment, create the first vector
3699 load at the loop preheader. */
3701 if (alignment_support_scheme == dr_explicit_realign_optimized)
3703 /* Create msq_init = *(floor(p1)) in the loop preheader */
3705 gcc_assert (!compute_in_loop);
3706 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3707 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3708 &init_addr, &inc, true, &inv_p);
3709 new_stmt = gimple_build_assign_with_ops
3710 (BIT_AND_EXPR, NULL_TREE, ptr,
3711 build_int_cst (TREE_TYPE (ptr),
3712 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3713 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3714 gimple_assign_set_lhs (new_stmt, new_temp);
3715 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3716 gcc_assert (!new_bb);
3717 data_ref
3718 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3719 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3720 new_stmt = gimple_build_assign (vec_dest, data_ref);
3721 new_temp = make_ssa_name (vec_dest, new_stmt);
3722 gimple_assign_set_lhs (new_stmt, new_temp);
3723 mark_symbols_for_renaming (new_stmt);
3724 if (pe)
3726 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3727 gcc_assert (!new_bb);
3729 else
3730 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3732 msq_init = gimple_assign_lhs (new_stmt);
3735 /* 4. Create realignment token using a target builtin, if available.
3736 It is done either inside the containing loop, or before LOOP (as
3737 determined above). */
3739 if (targetm.vectorize.builtin_mask_for_load)
3741 tree builtin_decl;
3743 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3744 if (!init_addr)
3746 /* Generate the INIT_ADDR computation outside LOOP. */
3747 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3748 NULL_TREE, loop);
3749 if (loop)
3751 pe = loop_preheader_edge (loop);
3752 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3753 gcc_assert (!new_bb);
3755 else
3756 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3759 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3760 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3761 vec_dest =
3762 vect_create_destination_var (scalar_dest,
3763 gimple_call_return_type (new_stmt));
3764 new_temp = make_ssa_name (vec_dest, new_stmt);
3765 gimple_call_set_lhs (new_stmt, new_temp);
3767 if (compute_in_loop)
3768 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3769 else
3771 /* Generate the misalignment computation outside LOOP. */
3772 pe = loop_preheader_edge (loop);
3773 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3774 gcc_assert (!new_bb);
3777 *realignment_token = gimple_call_lhs (new_stmt);
3779 /* The result of the CALL_EXPR to this builtin is determined from
3780 the value of the parameter and no global variables are touched
3781 which makes the builtin a "const" function. Requiring the
3782 builtin to have the "const" attribute makes it unnecessary
3783 to call mark_call_clobbered. */
3784 gcc_assert (TREE_READONLY (builtin_decl));
3787 if (alignment_support_scheme == dr_explicit_realign)
3788 return msq;
3790 gcc_assert (!compute_in_loop);
3791 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3794 /* 5. Create msq = phi <msq_init, lsq> in loop */
3796 pe = loop_preheader_edge (containing_loop);
3797 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3798 msq = make_ssa_name (vec_dest, NULL);
3799 phi_stmt = create_phi_node (msq, containing_loop->header);
3800 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3801 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3803 return msq;
3807 /* Function vect_strided_load_supported.
3809 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3810 and FALSE otherwise. */
3812 bool
3813 vect_strided_load_supported (tree vectype)
3815 optab perm_even_optab, perm_odd_optab;
3816 enum machine_mode mode;
3818 mode = TYPE_MODE (vectype);
3820 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3821 optab_default);
3822 if (!perm_even_optab)
3824 if (vect_print_dump_info (REPORT_DETAILS))
3825 fprintf (vect_dump, "no optab for perm_even.");
3826 return false;
3829 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3831 if (vect_print_dump_info (REPORT_DETAILS))
3832 fprintf (vect_dump, "perm_even op not supported by target.");
3833 return false;
3836 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3837 optab_default);
3838 if (!perm_odd_optab)
3840 if (vect_print_dump_info (REPORT_DETAILS))
3841 fprintf (vect_dump, "no optab for perm_odd.");
3842 return false;
3845 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3847 if (vect_print_dump_info (REPORT_DETAILS))
3848 fprintf (vect_dump, "perm_odd op not supported by target.");
3849 return false;
3851 return true;
3855 /* Function vect_permute_load_chain.
3857 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3858 a power of 2, generate extract_even/odd stmts to reorder the input data
3859 correctly. Return the final references for loads in RESULT_CHAIN.
3861 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3862 The input is 4 vectors each containing 8 elements. We assign a number to each
3863 element, the input sequence is:
3865 1st vec: 0 1 2 3 4 5 6 7
3866 2nd vec: 8 9 10 11 12 13 14 15
3867 3rd vec: 16 17 18 19 20 21 22 23
3868 4th vec: 24 25 26 27 28 29 30 31
3870 The output sequence should be:
3872 1st vec: 0 4 8 12 16 20 24 28
3873 2nd vec: 1 5 9 13 17 21 25 29
3874 3rd vec: 2 6 10 14 18 22 26 30
3875 4th vec: 3 7 11 15 19 23 27 31
3877 i.e., the first output vector should contain the first elements of each
3878 interleaving group, etc.
3880 We use extract_even/odd instructions to create such output. The input of
3881 each extract_even/odd operation is two vectors
3882 1st vec 2nd vec
3883 0 1 2 3 4 5 6 7
3885 and the output is the vector of extracted even/odd elements. The output of
3886 extract_even will be: 0 2 4 6
3887 and of extract_odd: 1 3 5 7
3890 The permutation is done in log LENGTH stages. In each stage extract_even
3891 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3892 their order. In our example,
3894 E1: extract_even (1st vec, 2nd vec)
3895 E2: extract_odd (1st vec, 2nd vec)
3896 E3: extract_even (3rd vec, 4th vec)
3897 E4: extract_odd (3rd vec, 4th vec)
3899 The output for the first stage will be:
3901 E1: 0 2 4 6 8 10 12 14
3902 E2: 1 3 5 7 9 11 13 15
3903 E3: 16 18 20 22 24 26 28 30
3904 E4: 17 19 21 23 25 27 29 31
3906 In order to proceed and create the correct sequence for the next stage (or
3907 for the correct output, if the second stage is the last one, as in our
3908 example), we first put the output of extract_even operation and then the
3909 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3910 The input for the second stage is:
3912 1st vec (E1): 0 2 4 6 8 10 12 14
3913 2nd vec (E3): 16 18 20 22 24 26 28 30
3914 3rd vec (E2): 1 3 5 7 9 11 13 15
3915 4th vec (E4): 17 19 21 23 25 27 29 31
3917 The output of the second stage:
3919 E1: 0 4 8 12 16 20 24 28
3920 E2: 2 6 10 14 18 22 26 30
3921 E3: 1 5 9 13 17 21 25 29
3922 E4: 3 7 11 15 19 23 27 31
3924 And RESULT_CHAIN after reordering:
3926 1st vec (E1): 0 4 8 12 16 20 24 28
3927 2nd vec (E3): 1 5 9 13 17 21 25 29
3928 3rd vec (E2): 2 6 10 14 18 22 26 30
3929 4th vec (E4): 3 7 11 15 19 23 27 31. */
3931 bool
3932 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3933 unsigned int length,
3934 gimple stmt,
3935 gimple_stmt_iterator *gsi,
3936 VEC(tree,heap) **result_chain)
3938 tree perm_dest, data_ref, first_vect, second_vect;
3939 gimple perm_stmt;
3940 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3941 int i;
3942 unsigned int j;
3944 /* Check that the operation is supported. */
3945 if (!vect_strided_load_supported (vectype))
3946 return false;
3948 *result_chain = VEC_copy (tree, heap, dr_chain);
3949 for (i = 0; i < exact_log2 (length); i++)
3951 for (j = 0; j < length; j +=2)
3953 first_vect = VEC_index (tree, dr_chain, j);
3954 second_vect = VEC_index (tree, dr_chain, j+1);
3956 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3957 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3958 DECL_GIMPLE_REG_P (perm_dest) = 1;
3959 add_referenced_var (perm_dest);
3961 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3962 perm_dest, first_vect,
3963 second_vect);
3965 data_ref = make_ssa_name (perm_dest, perm_stmt);
3966 gimple_assign_set_lhs (perm_stmt, data_ref);
3967 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3968 mark_symbols_for_renaming (perm_stmt);
3970 VEC_replace (tree, *result_chain, j/2, data_ref);
3972 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3973 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3974 DECL_GIMPLE_REG_P (perm_dest) = 1;
3975 add_referenced_var (perm_dest);
3977 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3978 perm_dest, first_vect,
3979 second_vect);
3980 data_ref = make_ssa_name (perm_dest, perm_stmt);
3981 gimple_assign_set_lhs (perm_stmt, data_ref);
3982 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3983 mark_symbols_for_renaming (perm_stmt);
3985 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3987 dr_chain = VEC_copy (tree, heap, *result_chain);
3989 return true;
3993 /* Function vect_transform_strided_load.
3995 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3996 to perform their permutation and ascribe the result vectorized statements to
3997 the scalar statements.
4000 bool
4001 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4002 gimple_stmt_iterator *gsi)
4004 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4005 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4006 gimple next_stmt, new_stmt;
4007 VEC(tree,heap) *result_chain = NULL;
4008 unsigned int i, gap_count;
4009 tree tmp_data_ref;
4011 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4012 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4013 vectors, that are ready for vector computation. */
4014 result_chain = VEC_alloc (tree, heap, size);
4015 /* Permute. */
4016 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
4017 return false;
4019 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4020 Since we scan the chain starting from it's first node, their order
4021 corresponds the order of data-refs in RESULT_CHAIN. */
4022 next_stmt = first_stmt;
4023 gap_count = 1;
4024 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4026 if (!next_stmt)
4027 break;
4029 /* Skip the gaps. Loads created for the gaps will be removed by dead
4030 code elimination pass later. No need to check for the first stmt in
4031 the group, since it always exists.
4032 DR_GROUP_GAP is the number of steps in elements from the previous
4033 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4034 correspond to the gaps. */
4035 if (next_stmt != first_stmt
4036 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4038 gap_count++;
4039 continue;
4042 while (next_stmt)
4044 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4045 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4046 copies, and we put the new vector statement in the first available
4047 RELATED_STMT. */
4048 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4049 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4050 else
4052 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4054 gimple prev_stmt =
4055 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4056 gimple rel_stmt =
4057 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4058 while (rel_stmt)
4060 prev_stmt = rel_stmt;
4061 rel_stmt =
4062 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4065 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4066 new_stmt;
4070 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4071 gap_count = 1;
4072 /* If NEXT_STMT accesses the same DR as the previous statement,
4073 put the same TMP_DATA_REF as its vectorized statement; otherwise
4074 get the next data-ref from RESULT_CHAIN. */
4075 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4076 break;
4080 VEC_free (tree, heap, result_chain);
4081 return true;
4084 /* Function vect_force_dr_alignment_p.
4086 Returns whether the alignment of a DECL can be forced to be aligned
4087 on ALIGNMENT bit boundary. */
4089 bool
4090 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4092 if (TREE_CODE (decl) != VAR_DECL)
4093 return false;
4095 if (DECL_EXTERNAL (decl))
4096 return false;
4098 if (TREE_ASM_WRITTEN (decl))
4099 return false;
4101 if (TREE_STATIC (decl))
4102 return (alignment <= MAX_OFILE_ALIGNMENT);
4103 else
4104 return (alignment <= MAX_STACK_ALIGNMENT);
4108 /* Return whether the data reference DR is supported with respect to its
4109 alignment.
4110 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4111 it is aligned, i.e., check if it is possible to vectorize it with different
4112 alignment. */
4114 enum dr_alignment_support
4115 vect_supportable_dr_alignment (struct data_reference *dr,
4116 bool check_aligned_accesses)
4118 gimple stmt = DR_STMT (dr);
4119 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4120 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4121 enum machine_mode mode = TYPE_MODE (vectype);
4122 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4123 struct loop *vect_loop = NULL;
4124 bool nested_in_vect_loop = false;
4126 if (aligned_access_p (dr) && !check_aligned_accesses)
4127 return dr_aligned;
4129 if (loop_vinfo)
4131 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4132 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4135 /* Possibly unaligned access. */
4137 /* We can choose between using the implicit realignment scheme (generating
4138 a misaligned_move stmt) and the explicit realignment scheme (generating
4139 aligned loads with a REALIGN_LOAD). There are two variants to the
4140 explicit realignment scheme: optimized, and unoptimized.
4141 We can optimize the realignment only if the step between consecutive
4142 vector loads is equal to the vector size. Since the vector memory
4143 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4144 is guaranteed that the misalignment amount remains the same throughout the
4145 execution of the vectorized loop. Therefore, we can create the
4146 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4147 at the loop preheader.
4149 However, in the case of outer-loop vectorization, when vectorizing a
4150 memory access in the inner-loop nested within the LOOP that is now being
4151 vectorized, while it is guaranteed that the misalignment of the
4152 vectorized memory access will remain the same in different outer-loop
4153 iterations, it is *not* guaranteed that is will remain the same throughout
4154 the execution of the inner-loop. This is because the inner-loop advances
4155 with the original scalar step (and not in steps of VS). If the inner-loop
4156 step happens to be a multiple of VS, then the misalignment remains fixed
4157 and we can use the optimized realignment scheme. For example:
4159 for (i=0; i<N; i++)
4160 for (j=0; j<M; j++)
4161 s += a[i+j];
4163 When vectorizing the i-loop in the above example, the step between
4164 consecutive vector loads is 1, and so the misalignment does not remain
4165 fixed across the execution of the inner-loop, and the realignment cannot
4166 be optimized (as illustrated in the following pseudo vectorized loop):
4168 for (i=0; i<N; i+=4)
4169 for (j=0; j<M; j++){
4170 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4171 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4172 // (assuming that we start from an aligned address).
4175 We therefore have to use the unoptimized realignment scheme:
4177 for (i=0; i<N; i+=4)
4178 for (j=k; j<M; j+=4)
4179 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4180 // that the misalignment of the initial address is
4181 // 0).
4183 The loop can then be vectorized as follows:
4185 for (k=0; k<4; k++){
4186 rt = get_realignment_token (&vp[k]);
4187 for (i=0; i<N; i+=4){
4188 v1 = vp[i+k];
4189 for (j=k; j<M; j+=4){
4190 v2 = vp[i+j+VS-1];
4191 va = REALIGN_LOAD <v1,v2,rt>;
4192 vs += va;
4193 v1 = v2;
4196 } */
4198 if (DR_IS_READ (dr))
4200 bool is_packed = false;
4201 tree type = (TREE_TYPE (DR_REF (dr)));
4203 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4204 && (!targetm.vectorize.builtin_mask_for_load
4205 || targetm.vectorize.builtin_mask_for_load ()))
4207 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4208 if ((nested_in_vect_loop
4209 && (TREE_INT_CST_LOW (DR_STEP (dr))
4210 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4211 || !loop_vinfo)
4212 return dr_explicit_realign;
4213 else
4214 return dr_explicit_realign_optimized;
4216 if (!known_alignment_for_access_p (dr))
4218 tree ba = DR_BASE_OBJECT (dr);
4220 if (ba)
4221 is_packed = contains_packed_reference (ba);
4224 if (targetm.vectorize.
4225 support_vector_misalignment (mode, type,
4226 DR_MISALIGNMENT (dr), is_packed))
4227 /* Can't software pipeline the loads, but can at least do them. */
4228 return dr_unaligned_supported;
4230 else
4232 bool is_packed = false;
4233 tree type = (TREE_TYPE (DR_REF (dr)));
4235 if (!known_alignment_for_access_p (dr))
4237 tree ba = DR_BASE_OBJECT (dr);
4239 if (ba)
4240 is_packed = contains_packed_reference (ba);
4243 if (targetm.vectorize.
4244 support_vector_misalignment (mode, type,
4245 DR_MISALIGNMENT (dr), is_packed))
4246 return dr_unaligned_supported;
4249 /* Unsupported. */
4250 return dr_unaligned_unsupported;