2010-12-20 Tobias Burnus <burnus@net-b.de>
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob9a94df4b7f7497f0226ff4e052a835b78e1070ac
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return the smallest scalar part of STMT.
47 This is used to determine the vectype of the stmt. We generally set the
48 vectype according to the type of the result (lhs). For stmts whose
49 result-type is different than the type of the arguments (e.g., demotion,
50 promotion), vectype will be reset appropriately (later). Note that we have
51 to visit the smallest datatype in this function, because that determines the
52 VF. If the smallest datatype in the loop is present only as the rhs of a
53 promotion operation - we'd miss it.
54 Such a case, where a variable of this datatype does not appear in the lhs
55 anywhere in the loop, can only occur if it's an invariant: e.g.:
56 'int_x = (int) short_inv', which we'd expect to have been optimized away by
57 invariant motion. However, we cannot rely on invariant motion to always
58 take invariants out of the loop, and so in the case of promotion we also
59 have to check the rhs.
60 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
61 types. */
63 tree
64 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
65 HOST_WIDE_INT *rhs_size_unit)
67 tree scalar_type = gimple_expr_type (stmt);
68 HOST_WIDE_INT lhs, rhs;
70 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
72 if (is_gimple_assign (stmt)
73 && (gimple_assign_cast_p (stmt)
74 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
75 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
77 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
79 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
80 if (rhs < lhs)
81 scalar_type = rhs_type;
84 *lhs_size_unit = lhs;
85 *rhs_size_unit = rhs;
86 return scalar_type;
90 /* Find the place of the data-ref in STMT in the interleaving chain that starts
91 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
93 int
94 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
96 gimple next_stmt = first_stmt;
97 int result = 0;
99 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
100 return -1;
102 while (next_stmt && next_stmt != stmt)
104 result++;
105 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
108 if (next_stmt)
109 return result;
110 else
111 return -1;
115 /* Function vect_insert_into_interleaving_chain.
117 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
119 static void
120 vect_insert_into_interleaving_chain (struct data_reference *dra,
121 struct data_reference *drb)
123 gimple prev, next;
124 tree next_init;
125 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
126 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
128 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
129 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
130 while (next)
132 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
133 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
135 /* Insert here. */
136 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
137 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
138 return;
140 prev = next;
141 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
144 /* We got to the end of the list. Insert here. */
145 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
146 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
150 /* Function vect_update_interleaving_chain.
152 For two data-refs DRA and DRB that are a part of a chain interleaved data
153 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
155 There are four possible cases:
156 1. New stmts - both DRA and DRB are not a part of any chain:
157 FIRST_DR = DRB
158 NEXT_DR (DRB) = DRA
159 2. DRB is a part of a chain and DRA is not:
160 no need to update FIRST_DR
161 no need to insert DRB
162 insert DRA according to init
163 3. DRA is a part of a chain and DRB is not:
164 if (init of FIRST_DR > init of DRB)
165 FIRST_DR = DRB
166 NEXT(FIRST_DR) = previous FIRST_DR
167 else
168 insert DRB according to its init
169 4. both DRA and DRB are in some interleaving chains:
170 choose the chain with the smallest init of FIRST_DR
171 insert the nodes of the second chain into the first one. */
173 static void
174 vect_update_interleaving_chain (struct data_reference *drb,
175 struct data_reference *dra)
177 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
178 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
179 tree next_init, init_dra_chain, init_drb_chain;
180 gimple first_a, first_b;
181 tree node_init;
182 gimple node, prev, next, first_stmt;
184 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
185 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
187 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
188 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
189 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
190 return;
193 /* 2. DRB is a part of a chain and DRA is not. */
194 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
196 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
197 /* Insert DRA into the chain of DRB. */
198 vect_insert_into_interleaving_chain (dra, drb);
199 return;
202 /* 3. DRA is a part of a chain and DRB is not. */
203 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
205 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
206 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
207 old_first_stmt)));
208 gimple tmp;
210 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
212 /* DRB's init is smaller than the init of the stmt previously marked
213 as the first stmt of the interleaving chain of DRA. Therefore, we
214 update FIRST_STMT and put DRB in the head of the list. */
215 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
216 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
218 /* Update all the stmts in the list to point to the new FIRST_STMT. */
219 tmp = old_first_stmt;
220 while (tmp)
222 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
223 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
226 else
228 /* Insert DRB in the list of DRA. */
229 vect_insert_into_interleaving_chain (drb, dra);
230 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
232 return;
235 /* 4. both DRA and DRB are in some interleaving chains. */
236 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
237 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
238 if (first_a == first_b)
239 return;
240 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
241 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
243 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
245 /* Insert the nodes of DRA chain into the DRB chain.
246 After inserting a node, continue from this node of the DRB chain (don't
247 start from the beginning. */
248 node = DR_GROUP_FIRST_DR (stmtinfo_a);
249 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
250 first_stmt = first_b;
252 else
254 /* Insert the nodes of DRB chain into the DRA chain.
255 After inserting a node, continue from this node of the DRA chain (don't
256 start from the beginning. */
257 node = DR_GROUP_FIRST_DR (stmtinfo_b);
258 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
259 first_stmt = first_a;
262 while (node)
264 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
265 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
266 while (next)
268 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
269 if (tree_int_cst_compare (next_init, node_init) > 0)
271 /* Insert here. */
272 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
274 prev = node;
275 break;
277 prev = next;
278 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
280 if (!next)
282 /* We got to the end of the list. Insert here. */
283 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
285 prev = node;
287 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
288 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
293 /* Function vect_equal_offsets.
295 Check if OFFSET1 and OFFSET2 are identical expressions. */
297 static bool
298 vect_equal_offsets (tree offset1, tree offset2)
300 bool res;
302 STRIP_NOPS (offset1);
303 STRIP_NOPS (offset2);
305 if (offset1 == offset2)
306 return true;
308 if (TREE_CODE (offset1) != TREE_CODE (offset2)
309 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
310 return false;
312 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
313 TREE_OPERAND (offset2, 0));
315 if (!res || !BINARY_CLASS_P (offset1))
316 return res;
318 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
319 TREE_OPERAND (offset2, 1));
321 return res;
325 /* Check dependence between DRA and DRB for basic block vectorization.
326 If the accesses share same bases and offsets, we can compare their initial
327 constant offsets to decide whether they differ or not. In case of a read-
328 write dependence we check that the load is before the store to ensure that
329 vectorization will not change the order of the accesses. */
331 static bool
332 vect_drs_dependent_in_basic_block (struct data_reference *dra,
333 struct data_reference *drb)
335 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
336 gimple earlier_stmt;
338 /* We only call this function for pairs of loads and stores, but we verify
339 it here. */
340 if (DR_IS_READ (dra) == DR_IS_READ (drb))
342 if (DR_IS_READ (dra))
343 return false;
344 else
345 return true;
348 /* Check that the data-refs have same bases and offsets. If not, we can't
349 determine if they are dependent. */
350 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
351 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
352 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
353 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
354 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
355 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
356 return true;
358 /* Check the types. */
359 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
360 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
362 if (type_size_a != type_size_b
363 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
364 TREE_TYPE (DR_REF (drb))))
365 return true;
367 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
368 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
370 /* Two different locations - no dependence. */
371 if (init_a != init_b)
372 return false;
374 /* We have a read-write dependence. Check that the load is before the store.
375 When we vectorize basic blocks, vector load can be only before
376 corresponding scalar load, and vector store can be only after its
377 corresponding scalar store. So the order of the acceses is preserved in
378 case the load is before the store. */
379 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
380 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
381 return false;
383 return true;
387 /* Function vect_check_interleaving.
389 Check if DRA and DRB are a part of interleaving. In case they are, insert
390 DRA and DRB in an interleaving chain. */
392 static bool
393 vect_check_interleaving (struct data_reference *dra,
394 struct data_reference *drb)
396 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
398 /* Check that the data-refs have same first location (except init) and they
399 are both either store or load (not load and store). */
400 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
401 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
402 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
403 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
404 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
405 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
406 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
407 || DR_IS_READ (dra) != DR_IS_READ (drb))
408 return false;
410 /* Check:
411 1. data-refs are of the same type
412 2. their steps are equal
413 3. the step (if greater than zero) is greater than the difference between
414 data-refs' inits. */
415 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
416 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
418 if (type_size_a != type_size_b
419 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
420 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
421 TREE_TYPE (DR_REF (drb))))
422 return false;
424 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
425 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
426 step = TREE_INT_CST_LOW (DR_STEP (dra));
428 if (init_a > init_b)
430 /* If init_a == init_b + the size of the type * k, we have an interleaving,
431 and DRB is accessed before DRA. */
432 diff_mod_size = (init_a - init_b) % type_size_a;
434 if (step && (init_a - init_b) > step)
435 return false;
437 if (diff_mod_size == 0)
439 vect_update_interleaving_chain (drb, dra);
440 if (vect_print_dump_info (REPORT_DR_DETAILS))
442 fprintf (vect_dump, "Detected interleaving ");
443 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
444 fprintf (vect_dump, " and ");
445 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
447 return true;
450 else
452 /* If init_b == init_a + the size of the type * k, we have an
453 interleaving, and DRA is accessed before DRB. */
454 diff_mod_size = (init_b - init_a) % type_size_a;
456 if (step && (init_b - init_a) > step)
457 return false;
459 if (diff_mod_size == 0)
461 vect_update_interleaving_chain (dra, drb);
462 if (vect_print_dump_info (REPORT_DR_DETAILS))
464 fprintf (vect_dump, "Detected interleaving ");
465 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
466 fprintf (vect_dump, " and ");
467 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
469 return true;
473 return false;
476 /* Check if data references pointed by DR_I and DR_J are same or
477 belong to same interleaving group. Return FALSE if drs are
478 different, otherwise return TRUE. */
480 static bool
481 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
483 gimple stmt_i = DR_STMT (dr_i);
484 gimple stmt_j = DR_STMT (dr_j);
486 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
487 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
488 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
489 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
490 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
491 return true;
492 else
493 return false;
496 /* If address ranges represented by DDR_I and DDR_J are equal,
497 return TRUE, otherwise return FALSE. */
499 static bool
500 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
502 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
504 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
505 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
506 return true;
507 else
508 return false;
511 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
512 tested at run-time. Return TRUE if DDR was successfully inserted.
513 Return false if versioning is not supported. */
515 static bool
516 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
518 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
520 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
521 return false;
523 if (vect_print_dump_info (REPORT_DR_DETAILS))
525 fprintf (vect_dump, "mark for run-time aliasing test between ");
526 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
527 fprintf (vect_dump, " and ");
528 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
531 if (optimize_loop_nest_for_size_p (loop))
533 if (vect_print_dump_info (REPORT_DR_DETAILS))
534 fprintf (vect_dump, "versioning not supported when optimizing for size.");
535 return false;
538 /* FORNOW: We don't support versioning with outer-loop vectorization. */
539 if (loop->inner)
541 if (vect_print_dump_info (REPORT_DR_DETAILS))
542 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
543 return false;
546 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
547 return true;
551 /* Function vect_analyze_data_ref_dependence.
553 Return TRUE if there (might) exist a dependence between a memory-reference
554 DRA and a memory-reference DRB. When versioning for alias may check a
555 dependence at run-time, return FALSE. Adjust *MAX_VF according to
556 the data dependence. */
558 static bool
559 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
560 loop_vec_info loop_vinfo, int *max_vf,
561 bool *data_dependence_in_bb)
563 unsigned int i;
564 struct loop *loop = NULL;
565 struct data_reference *dra = DDR_A (ddr);
566 struct data_reference *drb = DDR_B (ddr);
567 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
568 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
569 lambda_vector dist_v;
570 unsigned int loop_depth;
572 /* Don't bother to analyze statements marked as unvectorizable. */
573 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
574 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
575 return false;
577 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
579 /* Independent data accesses. */
580 vect_check_interleaving (dra, drb);
581 return false;
584 if (loop_vinfo)
585 loop = LOOP_VINFO_LOOP (loop_vinfo);
587 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
588 return false;
590 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
592 if (loop_vinfo)
594 if (vect_print_dump_info (REPORT_DR_DETAILS))
596 fprintf (vect_dump, "versioning for alias required: "
597 "can't determine dependence between ");
598 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
599 fprintf (vect_dump, " and ");
600 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
603 /* Add to list of ddrs that need to be tested at run-time. */
604 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
607 /* When vectorizing a basic block unknown depnedence can still mean
608 strided access. */
609 if (vect_check_interleaving (dra, drb))
610 return false;
612 if (vect_print_dump_info (REPORT_DR_DETAILS))
614 fprintf (vect_dump, "can't determine dependence between ");
615 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
616 fprintf (vect_dump, " and ");
617 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
620 /* We do not vectorize basic blocks with write-write dependencies. */
621 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
622 return true;
624 /* We deal with read-write dependencies in basic blocks later (by
625 verifying that all the loads in the basic block are before all the
626 stores). */
627 *data_dependence_in_bb = true;
628 return false;
631 /* Versioning for alias is not yet supported for basic block SLP, and
632 dependence distance is unapplicable, hence, in case of known data
633 dependence, basic block vectorization is impossible for now. */
634 if (!loop_vinfo)
636 if (dra != drb && vect_check_interleaving (dra, drb))
637 return false;
639 if (vect_print_dump_info (REPORT_DR_DETAILS))
641 fprintf (vect_dump, "determined dependence between ");
642 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
643 fprintf (vect_dump, " and ");
644 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
647 /* Do not vectorize basic blcoks with write-write dependences. */
648 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
649 return true;
651 /* Check if this dependence is allowed in basic block vectorization. */
652 return vect_drs_dependent_in_basic_block (dra, drb);
655 /* Loop-based vectorization and known data dependence. */
656 if (DDR_NUM_DIST_VECTS (ddr) == 0)
658 if (vect_print_dump_info (REPORT_DR_DETAILS))
660 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
661 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
662 fprintf (vect_dump, " and ");
663 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
665 /* Add to list of ddrs that need to be tested at run-time. */
666 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
669 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
670 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
672 int dist = dist_v[loop_depth];
674 if (vect_print_dump_info (REPORT_DR_DETAILS))
675 fprintf (vect_dump, "dependence distance = %d.", dist);
677 if (dist == 0)
679 if (vect_print_dump_info (REPORT_DR_DETAILS))
681 fprintf (vect_dump, "dependence distance == 0 between ");
682 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
683 fprintf (vect_dump, " and ");
684 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
687 /* For interleaving, mark that there is a read-write dependency if
688 necessary. We check before that one of the data-refs is store. */
689 if (DR_IS_READ (dra))
690 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
691 else
693 if (DR_IS_READ (drb))
694 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
697 continue;
700 if (dist > 0 && DDR_REVERSED_P (ddr))
702 /* If DDR_REVERSED_P the order of the data-refs in DDR was
703 reversed (to make distance vector positive), and the actual
704 distance is negative. */
705 if (vect_print_dump_info (REPORT_DR_DETAILS))
706 fprintf (vect_dump, "dependence distance negative.");
707 continue;
710 if (abs (dist) >= 2
711 && abs (dist) < *max_vf)
713 /* The dependence distance requires reduction of the maximal
714 vectorization factor. */
715 *max_vf = abs (dist);
716 if (vect_print_dump_info (REPORT_DR_DETAILS))
717 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
718 *max_vf);
721 if (abs (dist) >= *max_vf)
723 /* Dependence distance does not create dependence, as far as
724 vectorization is concerned, in this case. */
725 if (vect_print_dump_info (REPORT_DR_DETAILS))
726 fprintf (vect_dump, "dependence distance >= VF.");
727 continue;
730 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
732 fprintf (vect_dump, "not vectorized, possible dependence "
733 "between data-refs ");
734 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
735 fprintf (vect_dump, " and ");
736 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
739 return true;
742 return false;
745 /* Function vect_analyze_data_ref_dependences.
747 Examine all the data references in the loop, and make sure there do not
748 exist any data dependences between them. Set *MAX_VF according to
749 the maximum vectorization factor the data dependences allow. */
751 bool
752 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
753 bb_vec_info bb_vinfo, int *max_vf,
754 bool *data_dependence_in_bb)
756 unsigned int i;
757 VEC (ddr_p, heap) *ddrs = NULL;
758 struct data_dependence_relation *ddr;
760 if (vect_print_dump_info (REPORT_DETAILS))
761 fprintf (vect_dump, "=== vect_analyze_dependences ===");
763 if (loop_vinfo)
764 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
765 else
766 ddrs = BB_VINFO_DDRS (bb_vinfo);
768 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
769 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
770 data_dependence_in_bb))
771 return false;
773 return true;
777 /* Function vect_compute_data_ref_alignment
779 Compute the misalignment of the data reference DR.
781 Output:
782 1. If during the misalignment computation it is found that the data reference
783 cannot be vectorized then false is returned.
784 2. DR_MISALIGNMENT (DR) is defined.
786 FOR NOW: No analysis is actually performed. Misalignment is calculated
787 only for trivial cases. TODO. */
789 static bool
790 vect_compute_data_ref_alignment (struct data_reference *dr)
792 gimple stmt = DR_STMT (dr);
793 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
794 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
795 struct loop *loop = NULL;
796 tree ref = DR_REF (dr);
797 tree vectype;
798 tree base, base_addr;
799 bool base_aligned;
800 tree misalign;
801 tree aligned_to, alignment;
803 if (vect_print_dump_info (REPORT_DETAILS))
804 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
806 if (loop_vinfo)
807 loop = LOOP_VINFO_LOOP (loop_vinfo);
809 /* Initialize misalignment to unknown. */
810 SET_DR_MISALIGNMENT (dr, -1);
812 misalign = DR_INIT (dr);
813 aligned_to = DR_ALIGNED_TO (dr);
814 base_addr = DR_BASE_ADDRESS (dr);
815 vectype = STMT_VINFO_VECTYPE (stmt_info);
817 /* In case the dataref is in an inner-loop of the loop that is being
818 vectorized (LOOP), we use the base and misalignment information
819 relative to the outer-loop (LOOP). This is ok only if the misalignment
820 stays the same throughout the execution of the inner-loop, which is why
821 we have to check that the stride of the dataref in the inner-loop evenly
822 divides by the vector size. */
823 if (loop && nested_in_vect_loop_p (loop, stmt))
825 tree step = DR_STEP (dr);
826 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
828 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
830 if (vect_print_dump_info (REPORT_ALIGNMENT))
831 fprintf (vect_dump, "inner step divides the vector-size.");
832 misalign = STMT_VINFO_DR_INIT (stmt_info);
833 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
834 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
836 else
838 if (vect_print_dump_info (REPORT_ALIGNMENT))
839 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
840 misalign = NULL_TREE;
844 base = build_fold_indirect_ref (base_addr);
845 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
847 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
848 || !misalign)
850 if (vect_print_dump_info (REPORT_ALIGNMENT))
852 fprintf (vect_dump, "Unknown alignment for access: ");
853 print_generic_expr (vect_dump, base, TDF_SLIM);
855 return true;
858 if ((DECL_P (base)
859 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
860 alignment) >= 0)
861 || (TREE_CODE (base_addr) == SSA_NAME
862 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
863 TREE_TYPE (base_addr)))),
864 alignment) >= 0))
865 base_aligned = true;
866 else
867 base_aligned = false;
869 if (!base_aligned)
871 /* Do not change the alignment of global variables if
872 flag_section_anchors is enabled. */
873 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
874 || (TREE_STATIC (base) && flag_section_anchors))
876 if (vect_print_dump_info (REPORT_DETAILS))
878 fprintf (vect_dump, "can't force alignment of ref: ");
879 print_generic_expr (vect_dump, ref, TDF_SLIM);
881 return true;
884 /* Force the alignment of the decl.
885 NOTE: This is the only change to the code we make during
886 the analysis phase, before deciding to vectorize the loop. */
887 if (vect_print_dump_info (REPORT_DETAILS))
889 fprintf (vect_dump, "force alignment of ");
890 print_generic_expr (vect_dump, ref, TDF_SLIM);
893 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
894 DECL_USER_ALIGN (base) = 1;
897 /* At this point we assume that the base is aligned. */
898 gcc_assert (base_aligned
899 || (TREE_CODE (base) == VAR_DECL
900 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
902 /* If this is a backward running DR then first access in the larger
903 vectype actually is N-1 elements before the address in the DR.
904 Adjust misalign accordingly. */
905 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
907 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
908 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
909 otherwise we wouldn't be here. */
910 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
911 /* PLUS because DR_STEP was negative. */
912 misalign = size_binop (PLUS_EXPR, misalign, offset);
915 /* Modulo alignment. */
916 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
918 if (!host_integerp (misalign, 1))
920 /* Negative or overflowed misalignment value. */
921 if (vect_print_dump_info (REPORT_DETAILS))
922 fprintf (vect_dump, "unexpected misalign value");
923 return false;
926 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
928 if (vect_print_dump_info (REPORT_DETAILS))
930 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
931 print_generic_expr (vect_dump, ref, TDF_SLIM);
934 return true;
938 /* Function vect_compute_data_refs_alignment
940 Compute the misalignment of data references in the loop.
941 Return FALSE if a data reference is found that cannot be vectorized. */
943 static bool
944 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
945 bb_vec_info bb_vinfo)
947 VEC (data_reference_p, heap) *datarefs;
948 struct data_reference *dr;
949 unsigned int i;
951 if (loop_vinfo)
952 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
953 else
954 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
956 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
957 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
958 && !vect_compute_data_ref_alignment (dr))
960 if (bb_vinfo)
962 /* Mark unsupported statement as unvectorizable. */
963 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
964 continue;
966 else
967 return false;
970 return true;
974 /* Function vect_update_misalignment_for_peel
976 DR - the data reference whose misalignment is to be adjusted.
977 DR_PEEL - the data reference whose misalignment is being made
978 zero in the vector loop by the peel.
979 NPEEL - the number of iterations in the peel loop if the misalignment
980 of DR_PEEL is known at compile time. */
982 static void
983 vect_update_misalignment_for_peel (struct data_reference *dr,
984 struct data_reference *dr_peel, int npeel)
986 unsigned int i;
987 VEC(dr_p,heap) *same_align_drs;
988 struct data_reference *current_dr;
989 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
990 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
991 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
992 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
994 /* For interleaved data accesses the step in the loop must be multiplied by
995 the size of the interleaving group. */
996 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
997 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
998 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
999 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1001 /* It can be assumed that the data refs with the same alignment as dr_peel
1002 are aligned in the vector loop. */
1003 same_align_drs
1004 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1005 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1007 if (current_dr != dr)
1008 continue;
1009 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1010 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1011 SET_DR_MISALIGNMENT (dr, 0);
1012 return;
1015 if (known_alignment_for_access_p (dr)
1016 && known_alignment_for_access_p (dr_peel))
1018 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1019 int misal = DR_MISALIGNMENT (dr);
1020 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1021 misal += negative ? -npeel * dr_size : npeel * dr_size;
1022 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1023 SET_DR_MISALIGNMENT (dr, misal);
1024 return;
1027 if (vect_print_dump_info (REPORT_DETAILS))
1028 fprintf (vect_dump, "Setting misalignment to -1.");
1029 SET_DR_MISALIGNMENT (dr, -1);
1033 /* Function vect_verify_datarefs_alignment
1035 Return TRUE if all data references in the loop can be
1036 handled with respect to alignment. */
1038 bool
1039 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1041 VEC (data_reference_p, heap) *datarefs;
1042 struct data_reference *dr;
1043 enum dr_alignment_support supportable_dr_alignment;
1044 unsigned int i;
1046 if (loop_vinfo)
1047 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1048 else
1049 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1051 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1053 gimple stmt = DR_STMT (dr);
1054 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1056 /* For interleaving, only the alignment of the first access matters.
1057 Skip statements marked as not vectorizable. */
1058 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1059 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1060 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1061 continue;
1063 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1064 if (!supportable_dr_alignment)
1066 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1068 if (DR_IS_READ (dr))
1069 fprintf (vect_dump,
1070 "not vectorized: unsupported unaligned load.");
1071 else
1072 fprintf (vect_dump,
1073 "not vectorized: unsupported unaligned store.");
1075 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1077 return false;
1079 if (supportable_dr_alignment != dr_aligned
1080 && vect_print_dump_info (REPORT_ALIGNMENT))
1081 fprintf (vect_dump, "Vectorizing an unaligned access.");
1083 return true;
1087 /* Function vector_alignment_reachable_p
1089 Return true if vector alignment for DR is reachable by peeling
1090 a few loop iterations. Return false otherwise. */
1092 static bool
1093 vector_alignment_reachable_p (struct data_reference *dr)
1095 gimple stmt = DR_STMT (dr);
1096 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1097 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1099 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1101 /* For interleaved access we peel only if number of iterations in
1102 the prolog loop ({VF - misalignment}), is a multiple of the
1103 number of the interleaved accesses. */
1104 int elem_size, mis_in_elements;
1105 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1107 /* FORNOW: handle only known alignment. */
1108 if (!known_alignment_for_access_p (dr))
1109 return false;
1111 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1112 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1114 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1115 return false;
1118 /* If misalignment is known at the compile time then allow peeling
1119 only if natural alignment is reachable through peeling. */
1120 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1122 HOST_WIDE_INT elmsize =
1123 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1124 if (vect_print_dump_info (REPORT_DETAILS))
1126 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1127 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1129 if (DR_MISALIGNMENT (dr) % elmsize)
1131 if (vect_print_dump_info (REPORT_DETAILS))
1132 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1133 return false;
1137 if (!known_alignment_for_access_p (dr))
1139 tree type = (TREE_TYPE (DR_REF (dr)));
1140 tree ba = DR_BASE_OBJECT (dr);
1141 bool is_packed = false;
1143 if (ba)
1144 is_packed = contains_packed_reference (ba);
1146 if (vect_print_dump_info (REPORT_DETAILS))
1147 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1148 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1149 return true;
1150 else
1151 return false;
1154 return true;
1158 /* Calculate the cost of the memory access represented by DR. */
1160 static void
1161 vect_get_data_access_cost (struct data_reference *dr,
1162 unsigned int *inside_cost,
1163 unsigned int *outside_cost)
1165 gimple stmt = DR_STMT (dr);
1166 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1167 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1168 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1169 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1170 int ncopies = vf / nunits;
1171 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1173 if (!supportable_dr_alignment)
1174 *inside_cost = VECT_MAX_COST;
1175 else
1177 if (DR_IS_READ (dr))
1178 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1179 else
1180 vect_get_store_cost (dr, ncopies, inside_cost);
1183 if (vect_print_dump_info (REPORT_COST))
1184 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1185 "outside_cost = %d.", *inside_cost, *outside_cost);
1189 static hashval_t
1190 vect_peeling_hash (const void *elem)
1192 const struct _vect_peel_info *peel_info;
1194 peel_info = (const struct _vect_peel_info *) elem;
1195 return (hashval_t) peel_info->npeel;
1199 static int
1200 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1202 const struct _vect_peel_info *a, *b;
1204 a = (const struct _vect_peel_info *) elem1;
1205 b = (const struct _vect_peel_info *) elem2;
1206 return (a->npeel == b->npeel);
1210 /* Insert DR into peeling hash table with NPEEL as key. */
1212 static void
1213 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1214 int npeel)
1216 struct _vect_peel_info elem, *slot;
1217 void **new_slot;
1218 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1220 elem.npeel = npeel;
1221 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1222 &elem);
1223 if (slot)
1224 slot->count++;
1225 else
1227 slot = XNEW (struct _vect_peel_info);
1228 slot->npeel = npeel;
1229 slot->dr = dr;
1230 slot->count = 1;
1231 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1232 INSERT);
1233 *new_slot = slot;
1236 if (!supportable_dr_alignment && !flag_vect_cost_model)
1237 slot->count += VECT_MAX_COST;
1241 /* Traverse peeling hash table to find peeling option that aligns maximum
1242 number of data accesses. */
1244 static int
1245 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1247 vect_peel_info elem = (vect_peel_info) *slot;
1248 vect_peel_extended_info max = (vect_peel_extended_info) data;
1250 if (elem->count > max->peel_info.count)
1252 max->peel_info.npeel = elem->npeel;
1253 max->peel_info.count = elem->count;
1254 max->peel_info.dr = elem->dr;
1257 return 1;
1261 /* Traverse peeling hash table and calculate cost for each peeling option.
1262 Find the one with the lowest cost. */
1264 static int
1265 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1267 vect_peel_info elem = (vect_peel_info) *slot;
1268 vect_peel_extended_info min = (vect_peel_extended_info) data;
1269 int save_misalignment, dummy;
1270 unsigned int inside_cost = 0, outside_cost = 0, i;
1271 gimple stmt = DR_STMT (elem->dr);
1272 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1273 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1274 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1275 struct data_reference *dr;
1277 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1279 stmt = DR_STMT (dr);
1280 stmt_info = vinfo_for_stmt (stmt);
1281 /* For interleaving, only the alignment of the first access
1282 matters. */
1283 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1284 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1285 continue;
1287 save_misalignment = DR_MISALIGNMENT (dr);
1288 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1289 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1290 SET_DR_MISALIGNMENT (dr, save_misalignment);
1293 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1294 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1296 if (inside_cost < min->inside_cost
1297 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1299 min->inside_cost = inside_cost;
1300 min->outside_cost = outside_cost;
1301 min->peel_info.dr = elem->dr;
1302 min->peel_info.npeel = elem->npeel;
1305 return 1;
1309 /* Choose best peeling option by traversing peeling hash table and either
1310 choosing an option with the lowest cost (if cost model is enabled) or the
1311 option that aligns as many accesses as possible. */
1313 static struct data_reference *
1314 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1315 unsigned int *npeel)
1317 struct _vect_peel_extended_info res;
1319 res.peel_info.dr = NULL;
1321 if (flag_vect_cost_model)
1323 res.inside_cost = INT_MAX;
1324 res.outside_cost = INT_MAX;
1325 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1326 vect_peeling_hash_get_lowest_cost, &res);
1328 else
1330 res.peel_info.count = 0;
1331 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1332 vect_peeling_hash_get_most_frequent, &res);
1335 *npeel = res.peel_info.npeel;
1336 return res.peel_info.dr;
1340 /* Function vect_enhance_data_refs_alignment
1342 This pass will use loop versioning and loop peeling in order to enhance
1343 the alignment of data references in the loop.
1345 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1346 original loop is to be vectorized. Any other loops that are created by
1347 the transformations performed in this pass - are not supposed to be
1348 vectorized. This restriction will be relaxed.
1350 This pass will require a cost model to guide it whether to apply peeling
1351 or versioning or a combination of the two. For example, the scheme that
1352 intel uses when given a loop with several memory accesses, is as follows:
1353 choose one memory access ('p') which alignment you want to force by doing
1354 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1355 other accesses are not necessarily aligned, or (2) use loop versioning to
1356 generate one loop in which all accesses are aligned, and another loop in
1357 which only 'p' is necessarily aligned.
1359 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1360 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1361 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1363 Devising a cost model is the most critical aspect of this work. It will
1364 guide us on which access to peel for, whether to use loop versioning, how
1365 many versions to create, etc. The cost model will probably consist of
1366 generic considerations as well as target specific considerations (on
1367 powerpc for example, misaligned stores are more painful than misaligned
1368 loads).
1370 Here are the general steps involved in alignment enhancements:
1372 -- original loop, before alignment analysis:
1373 for (i=0; i<N; i++){
1374 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1375 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1378 -- After vect_compute_data_refs_alignment:
1379 for (i=0; i<N; i++){
1380 x = q[i]; # DR_MISALIGNMENT(q) = 3
1381 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1384 -- Possibility 1: we do loop versioning:
1385 if (p is aligned) {
1386 for (i=0; i<N; i++){ # loop 1A
1387 x = q[i]; # DR_MISALIGNMENT(q) = 3
1388 p[i] = y; # DR_MISALIGNMENT(p) = 0
1391 else {
1392 for (i=0; i<N; i++){ # loop 1B
1393 x = q[i]; # DR_MISALIGNMENT(q) = 3
1394 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1398 -- Possibility 2: we do loop peeling:
1399 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1400 x = q[i];
1401 p[i] = y;
1403 for (i = 3; i < N; i++){ # loop 2A
1404 x = q[i]; # DR_MISALIGNMENT(q) = 0
1405 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1408 -- Possibility 3: combination of loop peeling and versioning:
1409 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1410 x = q[i];
1411 p[i] = y;
1413 if (p is aligned) {
1414 for (i = 3; i<N; i++){ # loop 3A
1415 x = q[i]; # DR_MISALIGNMENT(q) = 0
1416 p[i] = y; # DR_MISALIGNMENT(p) = 0
1419 else {
1420 for (i = 3; i<N; i++){ # loop 3B
1421 x = q[i]; # DR_MISALIGNMENT(q) = 0
1422 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1426 These loops are later passed to loop_transform to be vectorized. The
1427 vectorizer will use the alignment information to guide the transformation
1428 (whether to generate regular loads/stores, or with special handling for
1429 misalignment). */
1431 bool
1432 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1434 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1435 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1436 enum dr_alignment_support supportable_dr_alignment;
1437 struct data_reference *dr0 = NULL, *first_store = NULL;
1438 struct data_reference *dr;
1439 unsigned int i, j;
1440 bool do_peeling = false;
1441 bool do_versioning = false;
1442 bool stat;
1443 gimple stmt;
1444 stmt_vec_info stmt_info;
1445 int vect_versioning_for_alias_required;
1446 unsigned int npeel = 0;
1447 bool all_misalignments_unknown = true;
1448 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1449 unsigned possible_npeel_number = 1;
1450 tree vectype;
1451 unsigned int nelements, mis, same_align_drs_max = 0;
1453 if (vect_print_dump_info (REPORT_DETAILS))
1454 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1456 /* While cost model enhancements are expected in the future, the high level
1457 view of the code at this time is as follows:
1459 A) If there is a misaligned access then see if peeling to align
1460 this access can make all data references satisfy
1461 vect_supportable_dr_alignment. If so, update data structures
1462 as needed and return true.
1464 B) If peeling wasn't possible and there is a data reference with an
1465 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1466 then see if loop versioning checks can be used to make all data
1467 references satisfy vect_supportable_dr_alignment. If so, update
1468 data structures as needed and return true.
1470 C) If neither peeling nor versioning were successful then return false if
1471 any data reference does not satisfy vect_supportable_dr_alignment.
1473 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1475 Note, Possibility 3 above (which is peeling and versioning together) is not
1476 being done at this time. */
1478 /* (1) Peeling to force alignment. */
1480 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1481 Considerations:
1482 + How many accesses will become aligned due to the peeling
1483 - How many accesses will become unaligned due to the peeling,
1484 and the cost of misaligned accesses.
1485 - The cost of peeling (the extra runtime checks, the increase
1486 in code size). */
1488 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1490 stmt = DR_STMT (dr);
1491 stmt_info = vinfo_for_stmt (stmt);
1493 /* For interleaving, only the alignment of the first access
1494 matters. */
1495 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1496 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1497 continue;
1499 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1500 do_peeling = vector_alignment_reachable_p (dr);
1501 if (do_peeling)
1503 if (known_alignment_for_access_p (dr))
1505 unsigned int npeel_tmp;
1506 bool negative = tree_int_cst_compare (DR_STEP (dr),
1507 size_zero_node) < 0;
1509 /* Save info about DR in the hash table. */
1510 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1511 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1512 htab_create (1, vect_peeling_hash,
1513 vect_peeling_hash_eq, free);
1515 vectype = STMT_VINFO_VECTYPE (stmt_info);
1516 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1517 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1518 TREE_TYPE (DR_REF (dr))));
1519 npeel_tmp = (negative
1520 ? (mis - nelements) : (nelements - mis))
1521 & (nelements - 1);
1523 /* For multiple types, it is possible that the bigger type access
1524 will have more than one peeling option. E.g., a loop with two
1525 types: one of size (vector size / 4), and the other one of
1526 size (vector size / 8). Vectorization factor will 8. If both
1527 access are misaligned by 3, the first one needs one scalar
1528 iteration to be aligned, and the second one needs 5. But the
1529 the first one will be aligned also by peeling 5 scalar
1530 iterations, and in that case both accesses will be aligned.
1531 Hence, except for the immediate peeling amount, we also want
1532 to try to add full vector size, while we don't exceed
1533 vectorization factor.
1534 We do this automtically for cost model, since we calculate cost
1535 for every peeling option. */
1536 if (!flag_vect_cost_model)
1537 possible_npeel_number = vf /nelements;
1539 /* Handle the aligned case. We may decide to align some other
1540 access, making DR unaligned. */
1541 if (DR_MISALIGNMENT (dr) == 0)
1543 npeel_tmp = 0;
1544 if (!flag_vect_cost_model)
1545 possible_npeel_number++;
1548 for (j = 0; j < possible_npeel_number; j++)
1550 gcc_assert (npeel_tmp <= vf);
1551 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1552 npeel_tmp += nelements;
1555 all_misalignments_unknown = false;
1556 /* Data-ref that was chosen for the case that all the
1557 misalignments are unknown is not relevant anymore, since we
1558 have a data-ref with known alignment. */
1559 dr0 = NULL;
1561 else
1563 /* If we don't know all the misalignment values, we prefer
1564 peeling for data-ref that has maximum number of data-refs
1565 with the same alignment, unless the target prefers to align
1566 stores over load. */
1567 if (all_misalignments_unknown)
1569 if (same_align_drs_max < VEC_length (dr_p,
1570 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1571 || !dr0)
1573 same_align_drs_max = VEC_length (dr_p,
1574 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1575 dr0 = dr;
1578 if (!first_store && DR_IS_WRITE (dr))
1579 first_store = dr;
1582 /* If there are both known and unknown misaligned accesses in the
1583 loop, we choose peeling amount according to the known
1584 accesses. */
1587 if (!supportable_dr_alignment)
1589 dr0 = dr;
1590 if (!first_store && DR_IS_WRITE (dr))
1591 first_store = dr;
1595 else
1597 if (!aligned_access_p (dr))
1599 if (vect_print_dump_info (REPORT_DETAILS))
1600 fprintf (vect_dump, "vector alignment may not be reachable");
1602 break;
1607 vect_versioning_for_alias_required
1608 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1610 /* Temporarily, if versioning for alias is required, we disable peeling
1611 until we support peeling and versioning. Often peeling for alignment
1612 will require peeling for loop-bound, which in turn requires that we
1613 know how to adjust the loop ivs after the loop. */
1614 if (vect_versioning_for_alias_required
1615 || !vect_can_advance_ivs_p (loop_vinfo)
1616 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1617 do_peeling = false;
1619 if (do_peeling && all_misalignments_unknown
1620 && vect_supportable_dr_alignment (dr0, false))
1623 /* Check if the target requires to prefer stores over loads, i.e., if
1624 misaligned stores are more expensive than misaligned loads (taking
1625 drs with same alignment into account). */
1626 if (first_store && DR_IS_READ (dr0))
1628 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1629 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1630 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1631 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1633 vect_get_data_access_cost (dr0, &load_inside_cost,
1634 &load_outside_cost);
1635 vect_get_data_access_cost (first_store, &store_inside_cost,
1636 &store_outside_cost);
1638 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1639 aligning the load DR0). */
1640 load_inside_penalty = store_inside_cost;
1641 load_outside_penalty = store_outside_cost;
1642 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1643 (vinfo_for_stmt (DR_STMT (first_store))),
1644 i, dr);
1645 i++)
1646 if (DR_IS_READ (dr))
1648 load_inside_penalty += load_inside_cost;
1649 load_outside_penalty += load_outside_cost;
1651 else
1653 load_inside_penalty += store_inside_cost;
1654 load_outside_penalty += store_outside_cost;
1657 /* Calculate the penalty for leaving DR0 unaligned (by
1658 aligning the FIRST_STORE). */
1659 store_inside_penalty = load_inside_cost;
1660 store_outside_penalty = load_outside_cost;
1661 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1662 (vinfo_for_stmt (DR_STMT (dr0))),
1663 i, dr);
1664 i++)
1665 if (DR_IS_READ (dr))
1667 store_inside_penalty += load_inside_cost;
1668 store_outside_penalty += load_outside_cost;
1670 else
1672 store_inside_penalty += store_inside_cost;
1673 store_outside_penalty += store_outside_cost;
1676 if (load_inside_penalty > store_inside_penalty
1677 || (load_inside_penalty == store_inside_penalty
1678 && load_outside_penalty > store_outside_penalty))
1679 dr0 = first_store;
1682 /* In case there are only loads with different unknown misalignments, use
1683 peeling only if it may help to align other accesses in the loop. */
1684 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1685 (vinfo_for_stmt (DR_STMT (dr0))))
1686 && vect_supportable_dr_alignment (dr0, false)
1687 != dr_unaligned_supported)
1688 do_peeling = false;
1691 if (do_peeling && !dr0)
1693 /* Peeling is possible, but there is no data access that is not supported
1694 unless aligned. So we try to choose the best possible peeling. */
1696 /* We should get here only if there are drs with known misalignment. */
1697 gcc_assert (!all_misalignments_unknown);
1699 /* Choose the best peeling from the hash table. */
1700 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1701 if (!dr0 || !npeel)
1702 do_peeling = false;
1705 if (do_peeling)
1707 stmt = DR_STMT (dr0);
1708 stmt_info = vinfo_for_stmt (stmt);
1709 vectype = STMT_VINFO_VECTYPE (stmt_info);
1710 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1712 if (known_alignment_for_access_p (dr0))
1714 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1715 size_zero_node) < 0;
1716 if (!npeel)
1718 /* Since it's known at compile time, compute the number of
1719 iterations in the peeled loop (the peeling factor) for use in
1720 updating DR_MISALIGNMENT values. The peeling factor is the
1721 vectorization factor minus the misalignment as an element
1722 count. */
1723 mis = DR_MISALIGNMENT (dr0);
1724 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1725 npeel = ((negative ? mis - nelements : nelements - mis)
1726 & (nelements - 1));
1729 /* For interleaved data access every iteration accesses all the
1730 members of the group, therefore we divide the number of iterations
1731 by the group size. */
1732 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1733 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1734 npeel /= DR_GROUP_SIZE (stmt_info);
1736 if (vect_print_dump_info (REPORT_DETAILS))
1737 fprintf (vect_dump, "Try peeling by %d", npeel);
1740 /* Ensure that all data refs can be vectorized after the peel. */
1741 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1743 int save_misalignment;
1745 if (dr == dr0)
1746 continue;
1748 stmt = DR_STMT (dr);
1749 stmt_info = vinfo_for_stmt (stmt);
1750 /* For interleaving, only the alignment of the first access
1751 matters. */
1752 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1753 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1754 continue;
1756 save_misalignment = DR_MISALIGNMENT (dr);
1757 vect_update_misalignment_for_peel (dr, dr0, npeel);
1758 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1759 SET_DR_MISALIGNMENT (dr, save_misalignment);
1761 if (!supportable_dr_alignment)
1763 do_peeling = false;
1764 break;
1768 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1770 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1771 if (!stat)
1772 do_peeling = false;
1773 else
1774 return stat;
1777 if (do_peeling)
1779 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1780 If the misalignment of DR_i is identical to that of dr0 then set
1781 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1782 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1783 by the peeling factor times the element size of DR_i (MOD the
1784 vectorization factor times the size). Otherwise, the
1785 misalignment of DR_i must be set to unknown. */
1786 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1787 if (dr != dr0)
1788 vect_update_misalignment_for_peel (dr, dr0, npeel);
1790 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1791 if (npeel)
1792 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1793 else
1794 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1795 SET_DR_MISALIGNMENT (dr0, 0);
1796 if (vect_print_dump_info (REPORT_ALIGNMENT))
1797 fprintf (vect_dump, "Alignment of access forced using peeling.");
1799 if (vect_print_dump_info (REPORT_DETAILS))
1800 fprintf (vect_dump, "Peeling for alignment will be applied.");
1802 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1803 gcc_assert (stat);
1804 return stat;
1809 /* (2) Versioning to force alignment. */
1811 /* Try versioning if:
1812 1) flag_tree_vect_loop_version is TRUE
1813 2) optimize loop for speed
1814 3) there is at least one unsupported misaligned data ref with an unknown
1815 misalignment, and
1816 4) all misaligned data refs with a known misalignment are supported, and
1817 5) the number of runtime alignment checks is within reason. */
1819 do_versioning =
1820 flag_tree_vect_loop_version
1821 && optimize_loop_nest_for_speed_p (loop)
1822 && (!loop->inner); /* FORNOW */
1824 if (do_versioning)
1826 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1828 stmt = DR_STMT (dr);
1829 stmt_info = vinfo_for_stmt (stmt);
1831 /* For interleaving, only the alignment of the first access
1832 matters. */
1833 if (aligned_access_p (dr)
1834 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1835 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1836 continue;
1838 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1840 if (!supportable_dr_alignment)
1842 gimple stmt;
1843 int mask;
1844 tree vectype;
1846 if (known_alignment_for_access_p (dr)
1847 || VEC_length (gimple,
1848 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1849 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1851 do_versioning = false;
1852 break;
1855 stmt = DR_STMT (dr);
1856 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1857 gcc_assert (vectype);
1859 /* The rightmost bits of an aligned address must be zeros.
1860 Construct the mask needed for this test. For example,
1861 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1862 mask must be 15 = 0xf. */
1863 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1865 /* FORNOW: use the same mask to test all potentially unaligned
1866 references in the loop. The vectorizer currently supports
1867 a single vector size, see the reference to
1868 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1869 vectorization factor is computed. */
1870 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1871 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1872 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1873 VEC_safe_push (gimple, heap,
1874 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1875 DR_STMT (dr));
1879 /* Versioning requires at least one misaligned data reference. */
1880 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1881 do_versioning = false;
1882 else if (!do_versioning)
1883 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1886 if (do_versioning)
1888 VEC(gimple,heap) *may_misalign_stmts
1889 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1890 gimple stmt;
1892 /* It can now be assumed that the data references in the statements
1893 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1894 of the loop being vectorized. */
1895 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1897 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1898 dr = STMT_VINFO_DATA_REF (stmt_info);
1899 SET_DR_MISALIGNMENT (dr, 0);
1900 if (vect_print_dump_info (REPORT_ALIGNMENT))
1901 fprintf (vect_dump, "Alignment of access forced using versioning.");
1904 if (vect_print_dump_info (REPORT_DETAILS))
1905 fprintf (vect_dump, "Versioning for alignment will be applied.");
1907 /* Peeling and versioning can't be done together at this time. */
1908 gcc_assert (! (do_peeling && do_versioning));
1910 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1911 gcc_assert (stat);
1912 return stat;
1915 /* This point is reached if neither peeling nor versioning is being done. */
1916 gcc_assert (! (do_peeling || do_versioning));
1918 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1919 return stat;
1923 /* Function vect_find_same_alignment_drs.
1925 Update group and alignment relations according to the chosen
1926 vectorization factor. */
1928 static void
1929 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1930 loop_vec_info loop_vinfo)
1932 unsigned int i;
1933 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1934 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1935 struct data_reference *dra = DDR_A (ddr);
1936 struct data_reference *drb = DDR_B (ddr);
1937 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1938 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1939 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1940 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1941 lambda_vector dist_v;
1942 unsigned int loop_depth;
1944 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1945 return;
1947 if (dra == drb)
1948 return;
1950 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1951 return;
1953 /* Loop-based vectorization and known data dependence. */
1954 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1955 return;
1957 /* Data-dependence analysis reports a distance vector of zero
1958 for data-references that overlap only in the first iteration
1959 but have different sign step (see PR45764).
1960 So as a sanity check require equal DR_STEP. */
1961 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1962 return;
1964 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1965 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1967 int dist = dist_v[loop_depth];
1969 if (vect_print_dump_info (REPORT_DR_DETAILS))
1970 fprintf (vect_dump, "dependence distance = %d.", dist);
1972 /* Same loop iteration. */
1973 if (dist == 0
1974 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1976 /* Two references with distance zero have the same alignment. */
1977 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1978 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1979 if (vect_print_dump_info (REPORT_ALIGNMENT))
1980 fprintf (vect_dump, "accesses have the same alignment.");
1981 if (vect_print_dump_info (REPORT_DR_DETAILS))
1983 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1984 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1985 fprintf (vect_dump, " and ");
1986 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1993 /* Function vect_analyze_data_refs_alignment
1995 Analyze the alignment of the data-references in the loop.
1996 Return FALSE if a data reference is found that cannot be vectorized. */
1998 bool
1999 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2000 bb_vec_info bb_vinfo)
2002 if (vect_print_dump_info (REPORT_DETAILS))
2003 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2005 /* Mark groups of data references with same alignment using
2006 data dependence information. */
2007 if (loop_vinfo)
2009 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2010 struct data_dependence_relation *ddr;
2011 unsigned int i;
2013 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2014 vect_find_same_alignment_drs (ddr, loop_vinfo);
2017 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2019 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2020 fprintf (vect_dump,
2021 "not vectorized: can't calculate alignment for data ref.");
2022 return false;
2025 return true;
2029 /* Analyze groups of strided accesses: check that DR belongs to a group of
2030 strided accesses of legal size, step, etc. Detect gaps, single element
2031 interleaving, and other special cases. Set strided access info.
2032 Collect groups of strided stores for further use in SLP analysis. */
2034 static bool
2035 vect_analyze_group_access (struct data_reference *dr)
2037 tree step = DR_STEP (dr);
2038 tree scalar_type = TREE_TYPE (DR_REF (dr));
2039 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2040 gimple stmt = DR_STMT (dr);
2041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2042 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2043 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2044 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2045 HOST_WIDE_INT stride;
2046 bool slp_impossible = false;
2048 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2049 interleaving group (including gaps). */
2050 stride = dr_step / type_size;
2052 /* Not consecutive access is possible only if it is a part of interleaving. */
2053 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2055 /* Check if it this DR is a part of interleaving, and is a single
2056 element of the group that is accessed in the loop. */
2058 /* Gaps are supported only for loads. STEP must be a multiple of the type
2059 size. The size of the group must be a power of 2. */
2060 if (DR_IS_READ (dr)
2061 && (dr_step % type_size) == 0
2062 && stride > 0
2063 && exact_log2 (stride) != -1)
2065 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2066 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2067 if (vect_print_dump_info (REPORT_DR_DETAILS))
2069 fprintf (vect_dump, "Detected single element interleaving ");
2070 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2071 fprintf (vect_dump, " step ");
2072 print_generic_expr (vect_dump, step, TDF_SLIM);
2074 return true;
2077 if (vect_print_dump_info (REPORT_DETAILS))
2079 fprintf (vect_dump, "not consecutive access ");
2080 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2083 if (bb_vinfo)
2085 /* Mark the statement as unvectorizable. */
2086 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2087 return true;
2090 return false;
2093 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2095 /* First stmt in the interleaving chain. Check the chain. */
2096 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2097 struct data_reference *data_ref = dr;
2098 unsigned int count = 1;
2099 tree next_step;
2100 tree prev_init = DR_INIT (data_ref);
2101 gimple prev = stmt;
2102 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2104 while (next)
2106 /* Skip same data-refs. In case that two or more stmts share
2107 data-ref (supported only for loads), we vectorize only the first
2108 stmt, and the rest get their vectorized loads from the first
2109 one. */
2110 if (!tree_int_cst_compare (DR_INIT (data_ref),
2111 DR_INIT (STMT_VINFO_DATA_REF (
2112 vinfo_for_stmt (next)))))
2114 if (DR_IS_WRITE (data_ref))
2116 if (vect_print_dump_info (REPORT_DETAILS))
2117 fprintf (vect_dump, "Two store stmts share the same dr.");
2118 return false;
2121 /* Check that there is no load-store dependencies for this loads
2122 to prevent a case of load-store-load to the same location. */
2123 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2124 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2126 if (vect_print_dump_info (REPORT_DETAILS))
2127 fprintf (vect_dump,
2128 "READ_WRITE dependence in interleaving.");
2129 return false;
2132 /* For load use the same data-ref load. */
2133 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2135 prev = next;
2136 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2137 continue;
2139 prev = next;
2141 /* Check that all the accesses have the same STEP. */
2142 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2143 if (tree_int_cst_compare (step, next_step))
2145 if (vect_print_dump_info (REPORT_DETAILS))
2146 fprintf (vect_dump, "not consecutive access in interleaving");
2147 return false;
2150 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2151 /* Check that the distance between two accesses is equal to the type
2152 size. Otherwise, we have gaps. */
2153 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2154 - TREE_INT_CST_LOW (prev_init)) / type_size;
2155 if (diff != 1)
2157 /* FORNOW: SLP of accesses with gaps is not supported. */
2158 slp_impossible = true;
2159 if (DR_IS_WRITE (data_ref))
2161 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump, "interleaved store with gaps");
2163 return false;
2166 gaps += diff - 1;
2169 /* Store the gap from the previous member of the group. If there is no
2170 gap in the access, DR_GROUP_GAP is always 1. */
2171 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2173 prev_init = DR_INIT (data_ref);
2174 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2175 /* Count the number of data-refs in the chain. */
2176 count++;
2179 /* COUNT is the number of accesses found, we multiply it by the size of
2180 the type to get COUNT_IN_BYTES. */
2181 count_in_bytes = type_size * count;
2183 /* Check that the size of the interleaving (including gaps) is not
2184 greater than STEP. */
2185 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2187 if (vect_print_dump_info (REPORT_DETAILS))
2189 fprintf (vect_dump, "interleaving size is greater than step for ");
2190 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2192 return false;
2195 /* Check that the size of the interleaving is equal to STEP for stores,
2196 i.e., that there are no gaps. */
2197 if (dr_step && dr_step != count_in_bytes)
2199 if (DR_IS_READ (dr))
2201 slp_impossible = true;
2202 /* There is a gap after the last load in the group. This gap is a
2203 difference between the stride and the number of elements. When
2204 there is no gap, this difference should be 0. */
2205 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2207 else
2209 if (vect_print_dump_info (REPORT_DETAILS))
2210 fprintf (vect_dump, "interleaved store with gaps");
2211 return false;
2215 /* Check that STEP is a multiple of type size. */
2216 if (dr_step && (dr_step % type_size) != 0)
2218 if (vect_print_dump_info (REPORT_DETAILS))
2220 fprintf (vect_dump, "step is not a multiple of type size: step ");
2221 print_generic_expr (vect_dump, step, TDF_SLIM);
2222 fprintf (vect_dump, " size ");
2223 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2224 TDF_SLIM);
2226 return false;
2229 /* FORNOW: we handle only interleaving that is a power of 2.
2230 We don't fail here if it may be still possible to vectorize the
2231 group using SLP. If not, the size of the group will be checked in
2232 vect_analyze_operations, and the vectorization will fail. */
2233 if (exact_log2 (stride) == -1)
2235 if (vect_print_dump_info (REPORT_DETAILS))
2236 fprintf (vect_dump, "interleaving is not a power of 2");
2238 if (slp_impossible)
2239 return false;
2242 if (stride == 0)
2243 stride = count;
2245 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2246 if (vect_print_dump_info (REPORT_DETAILS))
2247 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2249 /* SLP: create an SLP data structure for every interleaving group of
2250 stores for further analysis in vect_analyse_slp. */
2251 if (DR_IS_WRITE (dr) && !slp_impossible)
2253 if (loop_vinfo)
2254 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2255 stmt);
2256 if (bb_vinfo)
2257 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2258 stmt);
2262 return true;
2266 /* Analyze the access pattern of the data-reference DR.
2267 In case of non-consecutive accesses call vect_analyze_group_access() to
2268 analyze groups of strided accesses. */
2270 static bool
2271 vect_analyze_data_ref_access (struct data_reference *dr)
2273 tree step = DR_STEP (dr);
2274 tree scalar_type = TREE_TYPE (DR_REF (dr));
2275 gimple stmt = DR_STMT (dr);
2276 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2277 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2278 struct loop *loop = NULL;
2279 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2281 if (loop_vinfo)
2282 loop = LOOP_VINFO_LOOP (loop_vinfo);
2284 if (loop_vinfo && !step)
2286 if (vect_print_dump_info (REPORT_DETAILS))
2287 fprintf (vect_dump, "bad data-ref access in loop");
2288 return false;
2291 /* Don't allow invariant accesses in loops. */
2292 if (loop_vinfo && dr_step == 0)
2293 return false;
2295 if (loop && nested_in_vect_loop_p (loop, stmt))
2297 /* Interleaved accesses are not yet supported within outer-loop
2298 vectorization for references in the inner-loop. */
2299 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2301 /* For the rest of the analysis we use the outer-loop step. */
2302 step = STMT_VINFO_DR_STEP (stmt_info);
2303 dr_step = TREE_INT_CST_LOW (step);
2305 if (dr_step == 0)
2307 if (vect_print_dump_info (REPORT_ALIGNMENT))
2308 fprintf (vect_dump, "zero step in outer loop.");
2309 if (DR_IS_READ (dr))
2310 return true;
2311 else
2312 return false;
2316 /* Consecutive? */
2317 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2318 || (dr_step < 0
2319 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2321 /* Mark that it is not interleaving. */
2322 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2323 return true;
2326 if (loop && nested_in_vect_loop_p (loop, stmt))
2328 if (vect_print_dump_info (REPORT_ALIGNMENT))
2329 fprintf (vect_dump, "strided access in outer loop.");
2330 return false;
2333 /* Not consecutive access - check if it's a part of interleaving group. */
2334 return vect_analyze_group_access (dr);
2338 /* Function vect_analyze_data_ref_accesses.
2340 Analyze the access pattern of all the data references in the loop.
2342 FORNOW: the only access pattern that is considered vectorizable is a
2343 simple step 1 (consecutive) access.
2345 FORNOW: handle only arrays and pointer accesses. */
2347 bool
2348 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2350 unsigned int i;
2351 VEC (data_reference_p, heap) *datarefs;
2352 struct data_reference *dr;
2354 if (vect_print_dump_info (REPORT_DETAILS))
2355 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2357 if (loop_vinfo)
2358 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2359 else
2360 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2362 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2363 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2364 && !vect_analyze_data_ref_access (dr))
2366 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2367 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2369 if (bb_vinfo)
2371 /* Mark the statement as not vectorizable. */
2372 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2373 continue;
2375 else
2376 return false;
2379 return true;
2382 /* Function vect_prune_runtime_alias_test_list.
2384 Prune a list of ddrs to be tested at run-time by versioning for alias.
2385 Return FALSE if resulting list of ddrs is longer then allowed by
2386 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2388 bool
2389 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2391 VEC (ddr_p, heap) * ddrs =
2392 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2393 unsigned i, j;
2395 if (vect_print_dump_info (REPORT_DETAILS))
2396 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2398 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2400 bool found;
2401 ddr_p ddr_i;
2403 ddr_i = VEC_index (ddr_p, ddrs, i);
2404 found = false;
2406 for (j = 0; j < i; j++)
2408 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2410 if (vect_vfa_range_equal (ddr_i, ddr_j))
2412 if (vect_print_dump_info (REPORT_DR_DETAILS))
2414 fprintf (vect_dump, "found equal ranges ");
2415 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2416 fprintf (vect_dump, ", ");
2417 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2418 fprintf (vect_dump, " and ");
2419 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2420 fprintf (vect_dump, ", ");
2421 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2423 found = true;
2424 break;
2428 if (found)
2430 VEC_ordered_remove (ddr_p, ddrs, i);
2431 continue;
2433 i++;
2436 if (VEC_length (ddr_p, ddrs) >
2437 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2439 if (vect_print_dump_info (REPORT_DR_DETAILS))
2441 fprintf (vect_dump,
2442 "disable versioning for alias - max number of generated "
2443 "checks exceeded.");
2446 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2448 return false;
2451 return true;
2455 /* Function vect_analyze_data_refs.
2457 Find all the data references in the loop or basic block.
2459 The general structure of the analysis of data refs in the vectorizer is as
2460 follows:
2461 1- vect_analyze_data_refs(loop/bb): call
2462 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2463 in the loop/bb and their dependences.
2464 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2465 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2466 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2470 bool
2471 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2472 bb_vec_info bb_vinfo,
2473 int *min_vf)
2475 struct loop *loop = NULL;
2476 basic_block bb = NULL;
2477 unsigned int i;
2478 VEC (data_reference_p, heap) *datarefs;
2479 struct data_reference *dr;
2480 tree scalar_type;
2481 bool res;
2483 if (vect_print_dump_info (REPORT_DETAILS))
2484 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2486 if (loop_vinfo)
2488 loop = LOOP_VINFO_LOOP (loop_vinfo);
2489 res = compute_data_dependences_for_loop
2490 (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
2491 &LOOP_VINFO_DDRS (loop_vinfo));
2493 if (!res)
2495 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2496 fprintf (vect_dump, "not vectorized: loop contains function calls"
2497 " or data references that cannot be analyzed");
2498 return false;
2501 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2503 else
2505 bb = BB_VINFO_BB (bb_vinfo);
2506 res = compute_data_dependences_for_bb (bb, true,
2507 &BB_VINFO_DATAREFS (bb_vinfo),
2508 &BB_VINFO_DDRS (bb_vinfo));
2509 if (!res)
2511 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2512 fprintf (vect_dump, "not vectorized: basic block contains function"
2513 " calls or data references that cannot be analyzed");
2514 return false;
2517 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2520 /* Go through the data-refs, check that the analysis succeeded. Update
2521 pointer from stmt_vec_info struct to DR and vectype. */
2523 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2525 gimple stmt;
2526 stmt_vec_info stmt_info;
2527 tree base, offset, init;
2528 int vf;
2530 if (!dr || !DR_REF (dr))
2532 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2533 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2534 return false;
2537 stmt = DR_STMT (dr);
2538 stmt_info = vinfo_for_stmt (stmt);
2540 /* Check that analysis of the data-ref succeeded. */
2541 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2542 || !DR_STEP (dr))
2544 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2546 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2547 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2550 if (bb_vinfo)
2552 /* Mark the statement as not vectorizable. */
2553 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2554 continue;
2556 else
2557 return false;
2560 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2562 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2563 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2564 "constant");
2565 if (bb_vinfo)
2567 /* Mark the statement as not vectorizable. */
2568 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2569 continue;
2571 else
2572 return false;
2575 base = unshare_expr (DR_BASE_ADDRESS (dr));
2576 offset = unshare_expr (DR_OFFSET (dr));
2577 init = unshare_expr (DR_INIT (dr));
2579 if (stmt_could_throw_p (stmt))
2581 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2583 fprintf (vect_dump, "not vectorized: statement can throw an "
2584 "exception ");
2585 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2587 return false;
2590 /* Update DR field in stmt_vec_info struct. */
2592 /* If the dataref is in an inner-loop of the loop that is considered for
2593 for vectorization, we also want to analyze the access relative to
2594 the outer-loop (DR contains information only relative to the
2595 inner-most enclosing loop). We do that by building a reference to the
2596 first location accessed by the inner-loop, and analyze it relative to
2597 the outer-loop. */
2598 if (loop && nested_in_vect_loop_p (loop, stmt))
2600 tree outer_step, outer_base, outer_init;
2601 HOST_WIDE_INT pbitsize, pbitpos;
2602 tree poffset;
2603 enum machine_mode pmode;
2604 int punsignedp, pvolatilep;
2605 affine_iv base_iv, offset_iv;
2606 tree dinit;
2608 /* Build a reference to the first location accessed by the
2609 inner-loop: *(BASE+INIT). (The first location is actually
2610 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2611 tree inner_base = build_fold_indirect_ref
2612 (fold_build2 (POINTER_PLUS_EXPR,
2613 TREE_TYPE (base), base,
2614 fold_convert (sizetype, init)));
2616 if (vect_print_dump_info (REPORT_DETAILS))
2618 fprintf (vect_dump, "analyze in outer-loop: ");
2619 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2622 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2623 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2624 gcc_assert (outer_base != NULL_TREE);
2626 if (pbitpos % BITS_PER_UNIT != 0)
2628 if (vect_print_dump_info (REPORT_DETAILS))
2629 fprintf (vect_dump, "failed: bit offset alignment.\n");
2630 return false;
2633 outer_base = build_fold_addr_expr (outer_base);
2634 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2635 &base_iv, false))
2637 if (vect_print_dump_info (REPORT_DETAILS))
2638 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2639 return false;
2642 if (offset)
2644 if (poffset)
2645 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2646 poffset);
2647 else
2648 poffset = offset;
2651 if (!poffset)
2653 offset_iv.base = ssize_int (0);
2654 offset_iv.step = ssize_int (0);
2656 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2657 &offset_iv, false))
2659 if (vect_print_dump_info (REPORT_DETAILS))
2660 fprintf (vect_dump, "evolution of offset is not affine.\n");
2661 return false;
2664 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2665 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2666 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2667 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2668 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2670 outer_step = size_binop (PLUS_EXPR,
2671 fold_convert (ssizetype, base_iv.step),
2672 fold_convert (ssizetype, offset_iv.step));
2674 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2675 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2676 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2677 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2678 STMT_VINFO_DR_OFFSET (stmt_info) =
2679 fold_convert (ssizetype, offset_iv.base);
2680 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2681 size_int (highest_pow2_factor (offset_iv.base));
2683 if (vect_print_dump_info (REPORT_DETAILS))
2685 fprintf (vect_dump, "\touter base_address: ");
2686 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2687 fprintf (vect_dump, "\n\touter offset from base address: ");
2688 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2689 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2690 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2691 fprintf (vect_dump, "\n\touter step: ");
2692 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2693 fprintf (vect_dump, "\n\touter aligned to: ");
2694 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2698 if (STMT_VINFO_DATA_REF (stmt_info))
2700 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2702 fprintf (vect_dump,
2703 "not vectorized: more than one data ref in stmt: ");
2704 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2706 return false;
2709 STMT_VINFO_DATA_REF (stmt_info) = dr;
2711 /* Set vectype for STMT. */
2712 scalar_type = TREE_TYPE (DR_REF (dr));
2713 STMT_VINFO_VECTYPE (stmt_info) =
2714 get_vectype_for_scalar_type (scalar_type);
2715 if (!STMT_VINFO_VECTYPE (stmt_info))
2717 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2719 fprintf (vect_dump,
2720 "not vectorized: no vectype for stmt: ");
2721 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2722 fprintf (vect_dump, " scalar_type: ");
2723 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2726 if (bb_vinfo)
2728 /* Mark the statement as not vectorizable. */
2729 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2730 continue;
2732 else
2733 return false;
2736 /* Adjust the minimal vectorization factor according to the
2737 vector type. */
2738 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2739 if (vf > *min_vf)
2740 *min_vf = vf;
2743 return true;
2747 /* Function vect_get_new_vect_var.
2749 Returns a name for a new variable. The current naming scheme appends the
2750 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2751 the name of vectorizer generated variables, and appends that to NAME if
2752 provided. */
2754 tree
2755 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2757 const char *prefix;
2758 tree new_vect_var;
2760 switch (var_kind)
2762 case vect_simple_var:
2763 prefix = "vect_";
2764 break;
2765 case vect_scalar_var:
2766 prefix = "stmp_";
2767 break;
2768 case vect_pointer_var:
2769 prefix = "vect_p";
2770 break;
2771 default:
2772 gcc_unreachable ();
2775 if (name)
2777 char* tmp = concat (prefix, name, NULL);
2778 new_vect_var = create_tmp_var (type, tmp);
2779 free (tmp);
2781 else
2782 new_vect_var = create_tmp_var (type, prefix);
2784 /* Mark vector typed variable as a gimple register variable. */
2785 if (TREE_CODE (type) == VECTOR_TYPE)
2786 DECL_GIMPLE_REG_P (new_vect_var) = true;
2788 return new_vect_var;
2792 /* Function vect_create_addr_base_for_vector_ref.
2794 Create an expression that computes the address of the first memory location
2795 that will be accessed for a data reference.
2797 Input:
2798 STMT: The statement containing the data reference.
2799 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2800 OFFSET: Optional. If supplied, it is be added to the initial address.
2801 LOOP: Specify relative to which loop-nest should the address be computed.
2802 For example, when the dataref is in an inner-loop nested in an
2803 outer-loop that is now being vectorized, LOOP can be either the
2804 outer-loop, or the inner-loop. The first memory location accessed
2805 by the following dataref ('in' points to short):
2807 for (i=0; i<N; i++)
2808 for (j=0; j<M; j++)
2809 s += in[i+j]
2811 is as follows:
2812 if LOOP=i_loop: &in (relative to i_loop)
2813 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2815 Output:
2816 1. Return an SSA_NAME whose value is the address of the memory location of
2817 the first vector of the data reference.
2818 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2819 these statement(s) which define the returned SSA_NAME.
2821 FORNOW: We are only handling array accesses with step 1. */
2823 tree
2824 vect_create_addr_base_for_vector_ref (gimple stmt,
2825 gimple_seq *new_stmt_list,
2826 tree offset,
2827 struct loop *loop)
2829 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2830 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2831 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2832 tree base_name;
2833 tree data_ref_base_var;
2834 tree vec_stmt;
2835 tree addr_base, addr_expr;
2836 tree dest;
2837 gimple_seq seq = NULL;
2838 tree base_offset = unshare_expr (DR_OFFSET (dr));
2839 tree init = unshare_expr (DR_INIT (dr));
2840 tree vect_ptr_type;
2841 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2843 tree base;
2845 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2847 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2849 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2851 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2852 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2853 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2856 if (loop_vinfo)
2857 base_name = build_fold_indirect_ref (data_ref_base);
2858 else
2860 base_offset = ssize_int (0);
2861 init = ssize_int (0);
2862 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2865 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2866 add_referenced_var (data_ref_base_var);
2867 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2868 data_ref_base_var);
2869 gimple_seq_add_seq (new_stmt_list, seq);
2871 /* Create base_offset */
2872 base_offset = size_binop (PLUS_EXPR,
2873 fold_convert (sizetype, base_offset),
2874 fold_convert (sizetype, init));
2875 dest = create_tmp_var (sizetype, "base_off");
2876 add_referenced_var (dest);
2877 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2878 gimple_seq_add_seq (new_stmt_list, seq);
2880 if (offset)
2882 tree tmp = create_tmp_var (sizetype, "offset");
2884 add_referenced_var (tmp);
2885 offset = fold_build2 (MULT_EXPR, sizetype,
2886 fold_convert (sizetype, offset), step);
2887 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2888 base_offset, offset);
2889 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2890 gimple_seq_add_seq (new_stmt_list, seq);
2893 /* base + base_offset */
2894 if (loop_vinfo)
2895 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2896 data_ref_base, base_offset);
2897 else
2899 addr_base = build1 (ADDR_EXPR,
2900 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2901 unshare_expr (DR_REF (dr)));
2904 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2905 base = get_base_address (DR_REF (dr));
2906 if (base
2907 && TREE_CODE (base) == MEM_REF)
2908 vect_ptr_type
2909 = build_qualified_type (vect_ptr_type,
2910 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2912 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2913 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2914 get_name (base_name));
2915 add_referenced_var (addr_expr);
2916 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2917 gimple_seq_add_seq (new_stmt_list, seq);
2919 if (DR_PTR_INFO (dr)
2920 && TREE_CODE (vec_stmt) == SSA_NAME)
2922 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2923 if (offset)
2925 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2926 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2930 if (vect_print_dump_info (REPORT_DETAILS))
2932 fprintf (vect_dump, "created ");
2933 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2936 return vec_stmt;
2940 /* Function vect_create_data_ref_ptr.
2942 Create a new pointer to vector type (vp), that points to the first location
2943 accessed in the loop by STMT, along with the def-use update chain to
2944 appropriately advance the pointer through the loop iterations. Also set
2945 aliasing information for the pointer. This vector pointer is used by the
2946 callers to this function to create a memory reference expression for vector
2947 load/store access.
2949 Input:
2950 1. STMT: a stmt that references memory. Expected to be of the form
2951 GIMPLE_ASSIGN <name, data-ref> or
2952 GIMPLE_ASSIGN <data-ref, name>.
2953 2. AT_LOOP: the loop where the vector memref is to be created.
2954 3. OFFSET (optional): an offset to be added to the initial address accessed
2955 by the data-ref in STMT.
2956 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2957 pointing to the initial address.
2958 5. TYPE: if not NULL indicates the required type of the data-ref.
2960 Output:
2961 1. Declare a new ptr to vector_type, and have it point to the base of the
2962 data reference (initial addressed accessed by the data reference).
2963 For example, for vector of type V8HI, the following code is generated:
2965 v8hi *vp;
2966 vp = (v8hi *)initial_address;
2968 if OFFSET is not supplied:
2969 initial_address = &a[init];
2970 if OFFSET is supplied:
2971 initial_address = &a[init + OFFSET];
2973 Return the initial_address in INITIAL_ADDRESS.
2975 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2976 update the pointer in each iteration of the loop.
2978 Return the increment stmt that updates the pointer in PTR_INCR.
2980 3. Set INV_P to true if the access pattern of the data reference in the
2981 vectorized loop is invariant. Set it to false otherwise.
2983 4. Return the pointer. */
2985 tree
2986 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2987 tree offset, tree *initial_address, gimple *ptr_incr,
2988 bool only_init, bool *inv_p)
2990 tree base_name;
2991 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2992 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2993 struct loop *loop = NULL;
2994 bool nested_in_vect_loop = false;
2995 struct loop *containing_loop = NULL;
2996 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2997 tree vect_ptr_type;
2998 tree vect_ptr;
2999 tree new_temp;
3000 gimple vec_stmt;
3001 gimple_seq new_stmt_list = NULL;
3002 edge pe = NULL;
3003 basic_block new_bb;
3004 tree vect_ptr_init;
3005 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3006 tree vptr;
3007 gimple_stmt_iterator incr_gsi;
3008 bool insert_after;
3009 bool negative;
3010 tree indx_before_incr, indx_after_incr;
3011 gimple incr;
3012 tree step;
3013 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3014 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3015 tree base;
3017 if (loop_vinfo)
3019 loop = LOOP_VINFO_LOOP (loop_vinfo);
3020 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3021 containing_loop = (gimple_bb (stmt))->loop_father;
3022 pe = loop_preheader_edge (loop);
3024 else
3026 gcc_assert (bb_vinfo);
3027 only_init = true;
3028 *ptr_incr = NULL;
3031 /* Check the step (evolution) of the load in LOOP, and record
3032 whether it's invariant. */
3033 if (nested_in_vect_loop)
3034 step = STMT_VINFO_DR_STEP (stmt_info);
3035 else
3036 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3038 if (tree_int_cst_compare (step, size_zero_node) == 0)
3039 *inv_p = true;
3040 else
3041 *inv_p = false;
3042 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3044 /* Create an expression for the first address accessed by this load
3045 in LOOP. */
3046 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3048 if (vect_print_dump_info (REPORT_DETAILS))
3050 tree data_ref_base = base_name;
3051 fprintf (vect_dump, "create vector-pointer variable to type: ");
3052 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3053 if (TREE_CODE (data_ref_base) == VAR_DECL
3054 || TREE_CODE (data_ref_base) == ARRAY_REF)
3055 fprintf (vect_dump, " vectorizing an array ref: ");
3056 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3057 fprintf (vect_dump, " vectorizing a record based array ref: ");
3058 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3059 fprintf (vect_dump, " vectorizing a pointer ref: ");
3060 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3063 /* (1) Create the new vector-pointer variable. */
3064 vect_ptr_type = build_pointer_type (vectype);
3065 base = get_base_address (DR_REF (dr));
3066 if (base
3067 && TREE_CODE (base) == MEM_REF)
3068 vect_ptr_type
3069 = build_qualified_type (vect_ptr_type,
3070 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3071 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3072 get_name (base_name));
3074 /* Vector types inherit the alias set of their component type by default so
3075 we need to use a ref-all pointer if the data reference does not conflict
3076 with the created vector data reference because it is not addressable. */
3077 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3078 get_alias_set (DR_REF (dr))))
3080 vect_ptr_type
3081 = build_pointer_type_for_mode (vectype,
3082 TYPE_MODE (vect_ptr_type), true);
3083 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3084 get_name (base_name));
3087 /* Likewise for any of the data references in the stmt group. */
3088 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
3090 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
3093 tree lhs = gimple_assign_lhs (orig_stmt);
3094 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3095 get_alias_set (lhs)))
3097 vect_ptr_type
3098 = build_pointer_type_for_mode (vectype,
3099 TYPE_MODE (vect_ptr_type), true);
3100 vect_ptr
3101 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3102 get_name (base_name));
3103 break;
3106 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
3108 while (orig_stmt);
3111 add_referenced_var (vect_ptr);
3113 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3114 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3115 def-use update cycles for the pointer: one relative to the outer-loop
3116 (LOOP), which is what steps (3) and (4) below do. The other is relative
3117 to the inner-loop (which is the inner-most loop containing the dataref),
3118 and this is done be step (5) below.
3120 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3121 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3122 redundant. Steps (3),(4) create the following:
3124 vp0 = &base_addr;
3125 LOOP: vp1 = phi(vp0,vp2)
3128 vp2 = vp1 + step
3129 goto LOOP
3131 If there is an inner-loop nested in loop, then step (5) will also be
3132 applied, and an additional update in the inner-loop will be created:
3134 vp0 = &base_addr;
3135 LOOP: vp1 = phi(vp0,vp2)
3137 inner: vp3 = phi(vp1,vp4)
3138 vp4 = vp3 + inner_step
3139 if () goto inner
3141 vp2 = vp1 + step
3142 if () goto LOOP */
3144 /* (2) Calculate the initial address the vector-pointer, and set
3145 the vector-pointer to point to it before the loop. */
3147 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3149 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3150 offset, loop);
3151 if (new_stmt_list)
3153 if (pe)
3155 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3156 gcc_assert (!new_bb);
3158 else
3159 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
3162 *initial_address = new_temp;
3164 /* Create: p = (vectype *) initial_base */
3165 if (TREE_CODE (new_temp) != SSA_NAME
3166 || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
3168 vec_stmt = gimple_build_assign (vect_ptr,
3169 fold_convert (vect_ptr_type, new_temp));
3170 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
3171 /* Copy the points-to information if it exists. */
3172 if (DR_PTR_INFO (dr))
3173 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
3174 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
3175 if (pe)
3177 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3178 gcc_assert (!new_bb);
3180 else
3181 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
3183 else
3184 vect_ptr_init = new_temp;
3186 /* (3) Handle the updating of the vector-pointer inside the loop.
3187 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3188 inner-loop nested in LOOP (during outer-loop vectorization). */
3190 /* No update in loop is required. */
3191 if (only_init && (!loop_vinfo || at_loop == loop))
3192 vptr = vect_ptr_init;
3193 else
3195 /* The step of the vector pointer is the Vector Size. */
3196 tree step = TYPE_SIZE_UNIT (vectype);
3197 /* One exception to the above is when the scalar step of the load in
3198 LOOP is zero. In this case the step here is also zero. */
3199 if (*inv_p)
3200 step = size_zero_node;
3201 else if (negative)
3202 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3204 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3206 create_iv (vect_ptr_init,
3207 fold_convert (vect_ptr_type, step),
3208 vect_ptr, loop, &incr_gsi, insert_after,
3209 &indx_before_incr, &indx_after_incr);
3210 incr = gsi_stmt (incr_gsi);
3211 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3213 /* Copy the points-to information if it exists. */
3214 if (DR_PTR_INFO (dr))
3216 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3217 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3219 if (ptr_incr)
3220 *ptr_incr = incr;
3222 vptr = indx_before_incr;
3225 if (!nested_in_vect_loop || only_init)
3226 return vptr;
3229 /* (4) Handle the updating of the vector-pointer inside the inner-loop
3230 nested in LOOP, if exists. */
3232 gcc_assert (nested_in_vect_loop);
3233 if (!only_init)
3235 standard_iv_increment_position (containing_loop, &incr_gsi,
3236 &insert_after);
3237 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
3238 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3239 &indx_after_incr);
3240 incr = gsi_stmt (incr_gsi);
3241 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3243 /* Copy the points-to information if it exists. */
3244 if (DR_PTR_INFO (dr))
3246 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3247 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3249 if (ptr_incr)
3250 *ptr_incr = incr;
3252 return indx_before_incr;
3254 else
3255 gcc_unreachable ();
3259 /* Function bump_vector_ptr
3261 Increment a pointer (to a vector type) by vector-size. If requested,
3262 i.e. if PTR-INCR is given, then also connect the new increment stmt
3263 to the existing def-use update-chain of the pointer, by modifying
3264 the PTR_INCR as illustrated below:
3266 The pointer def-use update-chain before this function:
3267 DATAREF_PTR = phi (p_0, p_2)
3268 ....
3269 PTR_INCR: p_2 = DATAREF_PTR + step
3271 The pointer def-use update-chain after this function:
3272 DATAREF_PTR = phi (p_0, p_2)
3273 ....
3274 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3275 ....
3276 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3278 Input:
3279 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3280 in the loop.
3281 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3282 the loop. The increment amount across iterations is expected
3283 to be vector_size.
3284 BSI - location where the new update stmt is to be placed.
3285 STMT - the original scalar memory-access stmt that is being vectorized.
3286 BUMP - optional. The offset by which to bump the pointer. If not given,
3287 the offset is assumed to be vector_size.
3289 Output: Return NEW_DATAREF_PTR as illustrated above.
3293 tree
3294 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3295 gimple stmt, tree bump)
3297 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3298 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3299 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3300 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3301 tree update = TYPE_SIZE_UNIT (vectype);
3302 gimple incr_stmt;
3303 ssa_op_iter iter;
3304 use_operand_p use_p;
3305 tree new_dataref_ptr;
3307 if (bump)
3308 update = bump;
3310 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3311 dataref_ptr, update);
3312 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3313 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3314 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3316 /* Copy the points-to information if it exists. */
3317 if (DR_PTR_INFO (dr))
3319 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3320 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3321 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3324 if (!ptr_incr)
3325 return new_dataref_ptr;
3327 /* Update the vector-pointer's cross-iteration increment. */
3328 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3330 tree use = USE_FROM_PTR (use_p);
3332 if (use == dataref_ptr)
3333 SET_USE (use_p, new_dataref_ptr);
3334 else
3335 gcc_assert (tree_int_cst_compare (use, update) == 0);
3338 return new_dataref_ptr;
3342 /* Function vect_create_destination_var.
3344 Create a new temporary of type VECTYPE. */
3346 tree
3347 vect_create_destination_var (tree scalar_dest, tree vectype)
3349 tree vec_dest;
3350 const char *new_name;
3351 tree type;
3352 enum vect_var_kind kind;
3354 kind = vectype ? vect_simple_var : vect_scalar_var;
3355 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3357 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3359 new_name = get_name (scalar_dest);
3360 if (!new_name)
3361 new_name = "var_";
3362 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3363 add_referenced_var (vec_dest);
3365 return vec_dest;
3368 /* Function vect_strided_store_supported.
3370 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3371 and FALSE otherwise. */
3373 bool
3374 vect_strided_store_supported (tree vectype)
3376 optab interleave_high_optab, interleave_low_optab;
3377 enum machine_mode mode;
3379 mode = TYPE_MODE (vectype);
3381 /* Check that the operation is supported. */
3382 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3383 vectype, optab_default);
3384 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3385 vectype, optab_default);
3386 if (!interleave_high_optab || !interleave_low_optab)
3388 if (vect_print_dump_info (REPORT_DETAILS))
3389 fprintf (vect_dump, "no optab for interleave.");
3390 return false;
3393 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3394 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3396 if (vect_print_dump_info (REPORT_DETAILS))
3397 fprintf (vect_dump, "interleave op not supported by target.");
3398 return false;
3401 return true;
3405 /* Function vect_permute_store_chain.
3407 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3408 a power of 2, generate interleave_high/low stmts to reorder the data
3409 correctly for the stores. Return the final references for stores in
3410 RESULT_CHAIN.
3412 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3413 The input is 4 vectors each containing 8 elements. We assign a number to
3414 each element, the input sequence is:
3416 1st vec: 0 1 2 3 4 5 6 7
3417 2nd vec: 8 9 10 11 12 13 14 15
3418 3rd vec: 16 17 18 19 20 21 22 23
3419 4th vec: 24 25 26 27 28 29 30 31
3421 The output sequence should be:
3423 1st vec: 0 8 16 24 1 9 17 25
3424 2nd vec: 2 10 18 26 3 11 19 27
3425 3rd vec: 4 12 20 28 5 13 21 30
3426 4th vec: 6 14 22 30 7 15 23 31
3428 i.e., we interleave the contents of the four vectors in their order.
3430 We use interleave_high/low instructions to create such output. The input of
3431 each interleave_high/low operation is two vectors:
3432 1st vec 2nd vec
3433 0 1 2 3 4 5 6 7
3434 the even elements of the result vector are obtained left-to-right from the
3435 high/low elements of the first vector. The odd elements of the result are
3436 obtained left-to-right from the high/low elements of the second vector.
3437 The output of interleave_high will be: 0 4 1 5
3438 and of interleave_low: 2 6 3 7
3441 The permutation is done in log LENGTH stages. In each stage interleave_high
3442 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3443 where the first argument is taken from the first half of DR_CHAIN and the
3444 second argument from it's second half.
3445 In our example,
3447 I1: interleave_high (1st vec, 3rd vec)
3448 I2: interleave_low (1st vec, 3rd vec)
3449 I3: interleave_high (2nd vec, 4th vec)
3450 I4: interleave_low (2nd vec, 4th vec)
3452 The output for the first stage is:
3454 I1: 0 16 1 17 2 18 3 19
3455 I2: 4 20 5 21 6 22 7 23
3456 I3: 8 24 9 25 10 26 11 27
3457 I4: 12 28 13 29 14 30 15 31
3459 The output of the second stage, i.e. the final result is:
3461 I1: 0 8 16 24 1 9 17 25
3462 I2: 2 10 18 26 3 11 19 27
3463 I3: 4 12 20 28 5 13 21 30
3464 I4: 6 14 22 30 7 15 23 31. */
3466 bool
3467 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3468 unsigned int length,
3469 gimple stmt,
3470 gimple_stmt_iterator *gsi,
3471 VEC(tree,heap) **result_chain)
3473 tree perm_dest, vect1, vect2, high, low;
3474 gimple perm_stmt;
3475 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3476 int i;
3477 unsigned int j;
3478 enum tree_code high_code, low_code;
3480 /* Check that the operation is supported. */
3481 if (!vect_strided_store_supported (vectype))
3482 return false;
3484 *result_chain = VEC_copy (tree, heap, dr_chain);
3486 for (i = 0; i < exact_log2 (length); i++)
3488 for (j = 0; j < length/2; j++)
3490 vect1 = VEC_index (tree, dr_chain, j);
3491 vect2 = VEC_index (tree, dr_chain, j+length/2);
3493 /* Create interleaving stmt:
3494 in the case of big endian:
3495 high = interleave_high (vect1, vect2)
3496 and in the case of little endian:
3497 high = interleave_low (vect1, vect2). */
3498 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3499 DECL_GIMPLE_REG_P (perm_dest) = 1;
3500 add_referenced_var (perm_dest);
3501 if (BYTES_BIG_ENDIAN)
3503 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3504 low_code = VEC_INTERLEAVE_LOW_EXPR;
3506 else
3508 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3509 high_code = VEC_INTERLEAVE_LOW_EXPR;
3511 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3512 vect1, vect2);
3513 high = make_ssa_name (perm_dest, perm_stmt);
3514 gimple_assign_set_lhs (perm_stmt, high);
3515 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3516 VEC_replace (tree, *result_chain, 2*j, high);
3518 /* Create interleaving stmt:
3519 in the case of big endian:
3520 low = interleave_low (vect1, vect2)
3521 and in the case of little endian:
3522 low = interleave_high (vect1, vect2). */
3523 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3524 DECL_GIMPLE_REG_P (perm_dest) = 1;
3525 add_referenced_var (perm_dest);
3526 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3527 vect1, vect2);
3528 low = make_ssa_name (perm_dest, perm_stmt);
3529 gimple_assign_set_lhs (perm_stmt, low);
3530 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3531 VEC_replace (tree, *result_chain, 2*j+1, low);
3533 dr_chain = VEC_copy (tree, heap, *result_chain);
3535 return true;
3538 /* Function vect_setup_realignment
3540 This function is called when vectorizing an unaligned load using
3541 the dr_explicit_realign[_optimized] scheme.
3542 This function generates the following code at the loop prolog:
3544 p = initial_addr;
3545 x msq_init = *(floor(p)); # prolog load
3546 realignment_token = call target_builtin;
3547 loop:
3548 x msq = phi (msq_init, ---)
3550 The stmts marked with x are generated only for the case of
3551 dr_explicit_realign_optimized.
3553 The code above sets up a new (vector) pointer, pointing to the first
3554 location accessed by STMT, and a "floor-aligned" load using that pointer.
3555 It also generates code to compute the "realignment-token" (if the relevant
3556 target hook was defined), and creates a phi-node at the loop-header bb
3557 whose arguments are the result of the prolog-load (created by this
3558 function) and the result of a load that takes place in the loop (to be
3559 created by the caller to this function).
3561 For the case of dr_explicit_realign_optimized:
3562 The caller to this function uses the phi-result (msq) to create the
3563 realignment code inside the loop, and sets up the missing phi argument,
3564 as follows:
3565 loop:
3566 msq = phi (msq_init, lsq)
3567 lsq = *(floor(p')); # load in loop
3568 result = realign_load (msq, lsq, realignment_token);
3570 For the case of dr_explicit_realign:
3571 loop:
3572 msq = *(floor(p)); # load in loop
3573 p' = p + (VS-1);
3574 lsq = *(floor(p')); # load in loop
3575 result = realign_load (msq, lsq, realignment_token);
3577 Input:
3578 STMT - (scalar) load stmt to be vectorized. This load accesses
3579 a memory location that may be unaligned.
3580 BSI - place where new code is to be inserted.
3581 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3582 is used.
3584 Output:
3585 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3586 target hook, if defined.
3587 Return value - the result of the loop-header phi node. */
3589 tree
3590 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3591 tree *realignment_token,
3592 enum dr_alignment_support alignment_support_scheme,
3593 tree init_addr,
3594 struct loop **at_loop)
3596 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3597 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3598 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3599 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3600 struct loop *loop = NULL;
3601 edge pe = NULL;
3602 tree scalar_dest = gimple_assign_lhs (stmt);
3603 tree vec_dest;
3604 gimple inc;
3605 tree ptr;
3606 tree data_ref;
3607 gimple new_stmt;
3608 basic_block new_bb;
3609 tree msq_init = NULL_TREE;
3610 tree new_temp;
3611 gimple phi_stmt;
3612 tree msq = NULL_TREE;
3613 gimple_seq stmts = NULL;
3614 bool inv_p;
3615 bool compute_in_loop = false;
3616 bool nested_in_vect_loop = false;
3617 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3618 struct loop *loop_for_initial_load = NULL;
3620 if (loop_vinfo)
3622 loop = LOOP_VINFO_LOOP (loop_vinfo);
3623 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3626 gcc_assert (alignment_support_scheme == dr_explicit_realign
3627 || alignment_support_scheme == dr_explicit_realign_optimized);
3629 /* We need to generate three things:
3630 1. the misalignment computation
3631 2. the extra vector load (for the optimized realignment scheme).
3632 3. the phi node for the two vectors from which the realignment is
3633 done (for the optimized realignment scheme). */
3635 /* 1. Determine where to generate the misalignment computation.
3637 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3638 calculation will be generated by this function, outside the loop (in the
3639 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3640 caller, inside the loop.
3642 Background: If the misalignment remains fixed throughout the iterations of
3643 the loop, then both realignment schemes are applicable, and also the
3644 misalignment computation can be done outside LOOP. This is because we are
3645 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3646 are a multiple of VS (the Vector Size), and therefore the misalignment in
3647 different vectorized LOOP iterations is always the same.
3648 The problem arises only if the memory access is in an inner-loop nested
3649 inside LOOP, which is now being vectorized using outer-loop vectorization.
3650 This is the only case when the misalignment of the memory access may not
3651 remain fixed throughout the iterations of the inner-loop (as explained in
3652 detail in vect_supportable_dr_alignment). In this case, not only is the
3653 optimized realignment scheme not applicable, but also the misalignment
3654 computation (and generation of the realignment token that is passed to
3655 REALIGN_LOAD) have to be done inside the loop.
3657 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3658 or not, which in turn determines if the misalignment is computed inside
3659 the inner-loop, or outside LOOP. */
3661 if (init_addr != NULL_TREE || !loop_vinfo)
3663 compute_in_loop = true;
3664 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3668 /* 2. Determine where to generate the extra vector load.
3670 For the optimized realignment scheme, instead of generating two vector
3671 loads in each iteration, we generate a single extra vector load in the
3672 preheader of the loop, and in each iteration reuse the result of the
3673 vector load from the previous iteration. In case the memory access is in
3674 an inner-loop nested inside LOOP, which is now being vectorized using
3675 outer-loop vectorization, we need to determine whether this initial vector
3676 load should be generated at the preheader of the inner-loop, or can be
3677 generated at the preheader of LOOP. If the memory access has no evolution
3678 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3679 to be generated inside LOOP (in the preheader of the inner-loop). */
3681 if (nested_in_vect_loop)
3683 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3684 bool invariant_in_outerloop =
3685 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3686 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3688 else
3689 loop_for_initial_load = loop;
3690 if (at_loop)
3691 *at_loop = loop_for_initial_load;
3693 if (loop_for_initial_load)
3694 pe = loop_preheader_edge (loop_for_initial_load);
3696 /* 3. For the case of the optimized realignment, create the first vector
3697 load at the loop preheader. */
3699 if (alignment_support_scheme == dr_explicit_realign_optimized)
3701 /* Create msq_init = *(floor(p1)) in the loop preheader */
3703 gcc_assert (!compute_in_loop);
3704 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3705 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3706 &init_addr, &inc, true, &inv_p);
3707 new_stmt = gimple_build_assign_with_ops
3708 (BIT_AND_EXPR, NULL_TREE, ptr,
3709 build_int_cst (TREE_TYPE (ptr),
3710 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3711 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3712 gimple_assign_set_lhs (new_stmt, new_temp);
3713 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3714 gcc_assert (!new_bb);
3715 data_ref
3716 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3717 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3718 new_stmt = gimple_build_assign (vec_dest, data_ref);
3719 new_temp = make_ssa_name (vec_dest, new_stmt);
3720 gimple_assign_set_lhs (new_stmt, new_temp);
3721 mark_symbols_for_renaming (new_stmt);
3722 if (pe)
3724 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3725 gcc_assert (!new_bb);
3727 else
3728 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3730 msq_init = gimple_assign_lhs (new_stmt);
3733 /* 4. Create realignment token using a target builtin, if available.
3734 It is done either inside the containing loop, or before LOOP (as
3735 determined above). */
3737 if (targetm.vectorize.builtin_mask_for_load)
3739 tree builtin_decl;
3741 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3742 if (!init_addr)
3744 /* Generate the INIT_ADDR computation outside LOOP. */
3745 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3746 NULL_TREE, loop);
3747 if (loop)
3749 pe = loop_preheader_edge (loop);
3750 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3751 gcc_assert (!new_bb);
3753 else
3754 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3757 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3758 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3759 vec_dest =
3760 vect_create_destination_var (scalar_dest,
3761 gimple_call_return_type (new_stmt));
3762 new_temp = make_ssa_name (vec_dest, new_stmt);
3763 gimple_call_set_lhs (new_stmt, new_temp);
3765 if (compute_in_loop)
3766 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3767 else
3769 /* Generate the misalignment computation outside LOOP. */
3770 pe = loop_preheader_edge (loop);
3771 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3772 gcc_assert (!new_bb);
3775 *realignment_token = gimple_call_lhs (new_stmt);
3777 /* The result of the CALL_EXPR to this builtin is determined from
3778 the value of the parameter and no global variables are touched
3779 which makes the builtin a "const" function. Requiring the
3780 builtin to have the "const" attribute makes it unnecessary
3781 to call mark_call_clobbered. */
3782 gcc_assert (TREE_READONLY (builtin_decl));
3785 if (alignment_support_scheme == dr_explicit_realign)
3786 return msq;
3788 gcc_assert (!compute_in_loop);
3789 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3792 /* 5. Create msq = phi <msq_init, lsq> in loop */
3794 pe = loop_preheader_edge (containing_loop);
3795 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3796 msq = make_ssa_name (vec_dest, NULL);
3797 phi_stmt = create_phi_node (msq, containing_loop->header);
3798 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3799 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3801 return msq;
3805 /* Function vect_strided_load_supported.
3807 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3808 and FALSE otherwise. */
3810 bool
3811 vect_strided_load_supported (tree vectype)
3813 optab perm_even_optab, perm_odd_optab;
3814 enum machine_mode mode;
3816 mode = TYPE_MODE (vectype);
3818 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3819 optab_default);
3820 if (!perm_even_optab)
3822 if (vect_print_dump_info (REPORT_DETAILS))
3823 fprintf (vect_dump, "no optab for perm_even.");
3824 return false;
3827 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3829 if (vect_print_dump_info (REPORT_DETAILS))
3830 fprintf (vect_dump, "perm_even op not supported by target.");
3831 return false;
3834 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3835 optab_default);
3836 if (!perm_odd_optab)
3838 if (vect_print_dump_info (REPORT_DETAILS))
3839 fprintf (vect_dump, "no optab for perm_odd.");
3840 return false;
3843 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3845 if (vect_print_dump_info (REPORT_DETAILS))
3846 fprintf (vect_dump, "perm_odd op not supported by target.");
3847 return false;
3849 return true;
3853 /* Function vect_permute_load_chain.
3855 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3856 a power of 2, generate extract_even/odd stmts to reorder the input data
3857 correctly. Return the final references for loads in RESULT_CHAIN.
3859 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3860 The input is 4 vectors each containing 8 elements. We assign a number to each
3861 element, the input sequence is:
3863 1st vec: 0 1 2 3 4 5 6 7
3864 2nd vec: 8 9 10 11 12 13 14 15
3865 3rd vec: 16 17 18 19 20 21 22 23
3866 4th vec: 24 25 26 27 28 29 30 31
3868 The output sequence should be:
3870 1st vec: 0 4 8 12 16 20 24 28
3871 2nd vec: 1 5 9 13 17 21 25 29
3872 3rd vec: 2 6 10 14 18 22 26 30
3873 4th vec: 3 7 11 15 19 23 27 31
3875 i.e., the first output vector should contain the first elements of each
3876 interleaving group, etc.
3878 We use extract_even/odd instructions to create such output. The input of
3879 each extract_even/odd operation is two vectors
3880 1st vec 2nd vec
3881 0 1 2 3 4 5 6 7
3883 and the output is the vector of extracted even/odd elements. The output of
3884 extract_even will be: 0 2 4 6
3885 and of extract_odd: 1 3 5 7
3888 The permutation is done in log LENGTH stages. In each stage extract_even
3889 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3890 their order. In our example,
3892 E1: extract_even (1st vec, 2nd vec)
3893 E2: extract_odd (1st vec, 2nd vec)
3894 E3: extract_even (3rd vec, 4th vec)
3895 E4: extract_odd (3rd vec, 4th vec)
3897 The output for the first stage will be:
3899 E1: 0 2 4 6 8 10 12 14
3900 E2: 1 3 5 7 9 11 13 15
3901 E3: 16 18 20 22 24 26 28 30
3902 E4: 17 19 21 23 25 27 29 31
3904 In order to proceed and create the correct sequence for the next stage (or
3905 for the correct output, if the second stage is the last one, as in our
3906 example), we first put the output of extract_even operation and then the
3907 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3908 The input for the second stage is:
3910 1st vec (E1): 0 2 4 6 8 10 12 14
3911 2nd vec (E3): 16 18 20 22 24 26 28 30
3912 3rd vec (E2): 1 3 5 7 9 11 13 15
3913 4th vec (E4): 17 19 21 23 25 27 29 31
3915 The output of the second stage:
3917 E1: 0 4 8 12 16 20 24 28
3918 E2: 2 6 10 14 18 22 26 30
3919 E3: 1 5 9 13 17 21 25 29
3920 E4: 3 7 11 15 19 23 27 31
3922 And RESULT_CHAIN after reordering:
3924 1st vec (E1): 0 4 8 12 16 20 24 28
3925 2nd vec (E3): 1 5 9 13 17 21 25 29
3926 3rd vec (E2): 2 6 10 14 18 22 26 30
3927 4th vec (E4): 3 7 11 15 19 23 27 31. */
3929 bool
3930 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3931 unsigned int length,
3932 gimple stmt,
3933 gimple_stmt_iterator *gsi,
3934 VEC(tree,heap) **result_chain)
3936 tree perm_dest, data_ref, first_vect, second_vect;
3937 gimple perm_stmt;
3938 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3939 int i;
3940 unsigned int j;
3942 /* Check that the operation is supported. */
3943 if (!vect_strided_load_supported (vectype))
3944 return false;
3946 *result_chain = VEC_copy (tree, heap, dr_chain);
3947 for (i = 0; i < exact_log2 (length); i++)
3949 for (j = 0; j < length; j +=2)
3951 first_vect = VEC_index (tree, dr_chain, j);
3952 second_vect = VEC_index (tree, dr_chain, j+1);
3954 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3955 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3956 DECL_GIMPLE_REG_P (perm_dest) = 1;
3957 add_referenced_var (perm_dest);
3959 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3960 perm_dest, first_vect,
3961 second_vect);
3963 data_ref = make_ssa_name (perm_dest, perm_stmt);
3964 gimple_assign_set_lhs (perm_stmt, data_ref);
3965 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3966 mark_symbols_for_renaming (perm_stmt);
3968 VEC_replace (tree, *result_chain, j/2, data_ref);
3970 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3971 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3972 DECL_GIMPLE_REG_P (perm_dest) = 1;
3973 add_referenced_var (perm_dest);
3975 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3976 perm_dest, first_vect,
3977 second_vect);
3978 data_ref = make_ssa_name (perm_dest, perm_stmt);
3979 gimple_assign_set_lhs (perm_stmt, data_ref);
3980 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3981 mark_symbols_for_renaming (perm_stmt);
3983 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3985 dr_chain = VEC_copy (tree, heap, *result_chain);
3987 return true;
3991 /* Function vect_transform_strided_load.
3993 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3994 to perform their permutation and ascribe the result vectorized statements to
3995 the scalar statements.
3998 bool
3999 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4000 gimple_stmt_iterator *gsi)
4002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4003 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4004 gimple next_stmt, new_stmt;
4005 VEC(tree,heap) *result_chain = NULL;
4006 unsigned int i, gap_count;
4007 tree tmp_data_ref;
4009 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4010 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4011 vectors, that are ready for vector computation. */
4012 result_chain = VEC_alloc (tree, heap, size);
4013 /* Permute. */
4014 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
4015 return false;
4017 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4018 Since we scan the chain starting from it's first node, their order
4019 corresponds the order of data-refs in RESULT_CHAIN. */
4020 next_stmt = first_stmt;
4021 gap_count = 1;
4022 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4024 if (!next_stmt)
4025 break;
4027 /* Skip the gaps. Loads created for the gaps will be removed by dead
4028 code elimination pass later. No need to check for the first stmt in
4029 the group, since it always exists.
4030 DR_GROUP_GAP is the number of steps in elements from the previous
4031 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4032 correspond to the gaps. */
4033 if (next_stmt != first_stmt
4034 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4036 gap_count++;
4037 continue;
4040 while (next_stmt)
4042 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4043 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4044 copies, and we put the new vector statement in the first available
4045 RELATED_STMT. */
4046 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4047 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4048 else
4050 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4052 gimple prev_stmt =
4053 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4054 gimple rel_stmt =
4055 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4056 while (rel_stmt)
4058 prev_stmt = rel_stmt;
4059 rel_stmt =
4060 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4063 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4064 new_stmt;
4068 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4069 gap_count = 1;
4070 /* If NEXT_STMT accesses the same DR as the previous statement,
4071 put the same TMP_DATA_REF as its vectorized statement; otherwise
4072 get the next data-ref from RESULT_CHAIN. */
4073 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4074 break;
4078 VEC_free (tree, heap, result_chain);
4079 return true;
4082 /* Function vect_force_dr_alignment_p.
4084 Returns whether the alignment of a DECL can be forced to be aligned
4085 on ALIGNMENT bit boundary. */
4087 bool
4088 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4090 if (TREE_CODE (decl) != VAR_DECL)
4091 return false;
4093 if (DECL_EXTERNAL (decl))
4094 return false;
4096 if (TREE_ASM_WRITTEN (decl))
4097 return false;
4099 if (TREE_STATIC (decl))
4100 return (alignment <= MAX_OFILE_ALIGNMENT);
4101 else
4102 return (alignment <= MAX_STACK_ALIGNMENT);
4106 /* Return whether the data reference DR is supported with respect to its
4107 alignment.
4108 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4109 it is aligned, i.e., check if it is possible to vectorize it with different
4110 alignment. */
4112 enum dr_alignment_support
4113 vect_supportable_dr_alignment (struct data_reference *dr,
4114 bool check_aligned_accesses)
4116 gimple stmt = DR_STMT (dr);
4117 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4118 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4119 enum machine_mode mode = TYPE_MODE (vectype);
4120 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4121 struct loop *vect_loop = NULL;
4122 bool nested_in_vect_loop = false;
4124 if (aligned_access_p (dr) && !check_aligned_accesses)
4125 return dr_aligned;
4127 if (loop_vinfo)
4129 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4130 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4133 /* Possibly unaligned access. */
4135 /* We can choose between using the implicit realignment scheme (generating
4136 a misaligned_move stmt) and the explicit realignment scheme (generating
4137 aligned loads with a REALIGN_LOAD). There are two variants to the
4138 explicit realignment scheme: optimized, and unoptimized.
4139 We can optimize the realignment only if the step between consecutive
4140 vector loads is equal to the vector size. Since the vector memory
4141 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4142 is guaranteed that the misalignment amount remains the same throughout the
4143 execution of the vectorized loop. Therefore, we can create the
4144 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4145 at the loop preheader.
4147 However, in the case of outer-loop vectorization, when vectorizing a
4148 memory access in the inner-loop nested within the LOOP that is now being
4149 vectorized, while it is guaranteed that the misalignment of the
4150 vectorized memory access will remain the same in different outer-loop
4151 iterations, it is *not* guaranteed that is will remain the same throughout
4152 the execution of the inner-loop. This is because the inner-loop advances
4153 with the original scalar step (and not in steps of VS). If the inner-loop
4154 step happens to be a multiple of VS, then the misalignment remains fixed
4155 and we can use the optimized realignment scheme. For example:
4157 for (i=0; i<N; i++)
4158 for (j=0; j<M; j++)
4159 s += a[i+j];
4161 When vectorizing the i-loop in the above example, the step between
4162 consecutive vector loads is 1, and so the misalignment does not remain
4163 fixed across the execution of the inner-loop, and the realignment cannot
4164 be optimized (as illustrated in the following pseudo vectorized loop):
4166 for (i=0; i<N; i+=4)
4167 for (j=0; j<M; j++){
4168 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4169 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4170 // (assuming that we start from an aligned address).
4173 We therefore have to use the unoptimized realignment scheme:
4175 for (i=0; i<N; i+=4)
4176 for (j=k; j<M; j+=4)
4177 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4178 // that the misalignment of the initial address is
4179 // 0).
4181 The loop can then be vectorized as follows:
4183 for (k=0; k<4; k++){
4184 rt = get_realignment_token (&vp[k]);
4185 for (i=0; i<N; i+=4){
4186 v1 = vp[i+k];
4187 for (j=k; j<M; j+=4){
4188 v2 = vp[i+j+VS-1];
4189 va = REALIGN_LOAD <v1,v2,rt>;
4190 vs += va;
4191 v1 = v2;
4194 } */
4196 if (DR_IS_READ (dr))
4198 bool is_packed = false;
4199 tree type = (TREE_TYPE (DR_REF (dr)));
4201 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4202 && (!targetm.vectorize.builtin_mask_for_load
4203 || targetm.vectorize.builtin_mask_for_load ()))
4205 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4206 if ((nested_in_vect_loop
4207 && (TREE_INT_CST_LOW (DR_STEP (dr))
4208 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4209 || !loop_vinfo)
4210 return dr_explicit_realign;
4211 else
4212 return dr_explicit_realign_optimized;
4214 if (!known_alignment_for_access_p (dr))
4216 tree ba = DR_BASE_OBJECT (dr);
4218 if (ba)
4219 is_packed = contains_packed_reference (ba);
4222 if (targetm.vectorize.
4223 support_vector_misalignment (mode, type,
4224 DR_MISALIGNMENT (dr), is_packed))
4225 /* Can't software pipeline the loads, but can at least do them. */
4226 return dr_unaligned_supported;
4228 else
4230 bool is_packed = false;
4231 tree type = (TREE_TYPE (DR_REF (dr)));
4233 if (!known_alignment_for_access_p (dr))
4235 tree ba = DR_BASE_OBJECT (dr);
4237 if (ba)
4238 is_packed = contains_packed_reference (ba);
4241 if (targetm.vectorize.
4242 support_vector_misalignment (mode, type,
4243 DR_MISALIGNMENT (dr), is_packed))
4244 return dr_unaligned_supported;
4247 /* Unsupported. */
4248 return dr_unaligned_unsupported;