1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
50 vect_lanes_optab_supported_p (const char *name
, convert_optab optab
,
51 tree vectype
, unsigned HOST_WIDE_INT count
)
53 enum machine_mode mode
, array_mode
;
56 mode
= TYPE_MODE (vectype
);
57 limit_p
= !targetm
.array_mode_supported_p (mode
, count
);
58 array_mode
= mode_for_size (count
* GET_MODE_BITSIZE (mode
),
61 if (array_mode
== BLKmode
)
63 if (vect_print_dump_info (REPORT_DETAILS
))
64 fprintf (vect_dump
, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC
"]",
65 GET_MODE_NAME (mode
), count
);
69 if (convert_optab_handler (optab
, array_mode
, mode
) == CODE_FOR_nothing
)
71 if (vect_print_dump_info (REPORT_DETAILS
))
72 fprintf (vect_dump
, "cannot use %s<%s><%s>",
73 name
, GET_MODE_NAME (array_mode
), GET_MODE_NAME (mode
));
77 if (vect_print_dump_info (REPORT_DETAILS
))
78 fprintf (vect_dump
, "can use %s<%s><%s>",
79 name
, GET_MODE_NAME (array_mode
), GET_MODE_NAME (mode
));
85 /* Return the smallest scalar part of STMT.
86 This is used to determine the vectype of the stmt. We generally set the
87 vectype according to the type of the result (lhs). For stmts whose
88 result-type is different than the type of the arguments (e.g., demotion,
89 promotion), vectype will be reset appropriately (later). Note that we have
90 to visit the smallest datatype in this function, because that determines the
91 VF. If the smallest datatype in the loop is present only as the rhs of a
92 promotion operation - we'd miss it.
93 Such a case, where a variable of this datatype does not appear in the lhs
94 anywhere in the loop, can only occur if it's an invariant: e.g.:
95 'int_x = (int) short_inv', which we'd expect to have been optimized away by
96 invariant motion. However, we cannot rely on invariant motion to always
97 take invariants out of the loop, and so in the case of promotion we also
98 have to check the rhs.
99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
103 vect_get_smallest_scalar_type (gimple stmt
, HOST_WIDE_INT
*lhs_size_unit
,
104 HOST_WIDE_INT
*rhs_size_unit
)
106 tree scalar_type
= gimple_expr_type (stmt
);
107 HOST_WIDE_INT lhs
, rhs
;
109 lhs
= rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
111 if (is_gimple_assign (stmt
)
112 && (gimple_assign_cast_p (stmt
)
113 || gimple_assign_rhs_code (stmt
) == WIDEN_MULT_EXPR
114 || gimple_assign_rhs_code (stmt
) == WIDEN_LSHIFT_EXPR
115 || gimple_assign_rhs_code (stmt
) == FLOAT_EXPR
))
117 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
119 rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type
));
121 scalar_type
= rhs_type
;
124 *lhs_size_unit
= lhs
;
125 *rhs_size_unit
= rhs
;
130 /* Find the place of the data-ref in STMT in the interleaving chain that starts
131 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
134 vect_get_place_in_interleaving_chain (gimple stmt
, gimple first_stmt
)
136 gimple next_stmt
= first_stmt
;
139 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
142 while (next_stmt
&& next_stmt
!= stmt
)
145 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
155 /* Function vect_insert_into_interleaving_chain.
157 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
160 vect_insert_into_interleaving_chain (struct data_reference
*dra
,
161 struct data_reference
*drb
)
165 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
166 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
168 prev
= GROUP_FIRST_ELEMENT (stmtinfo_b
);
169 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
));
172 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
173 if (tree_int_cst_compare (next_init
, DR_INIT (dra
)) > 0)
176 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
177 GROUP_NEXT_ELEMENT (stmtinfo_a
) = next
;
181 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
));
184 /* We got to the end of the list. Insert here. */
185 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
186 GROUP_NEXT_ELEMENT (stmtinfo_a
) = NULL
;
190 /* Function vect_update_interleaving_chain.
192 For two data-refs DRA and DRB that are a part of a chain interleaved data
193 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
195 There are four possible cases:
196 1. New stmts - both DRA and DRB are not a part of any chain:
199 2. DRB is a part of a chain and DRA is not:
200 no need to update FIRST_DR
201 no need to insert DRB
202 insert DRA according to init
203 3. DRA is a part of a chain and DRB is not:
204 if (init of FIRST_DR > init of DRB)
206 NEXT(FIRST_DR) = previous FIRST_DR
208 insert DRB according to its init
209 4. both DRA and DRB are in some interleaving chains:
210 choose the chain with the smallest init of FIRST_DR
211 insert the nodes of the second chain into the first one. */
214 vect_update_interleaving_chain (struct data_reference
*drb
,
215 struct data_reference
*dra
)
217 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
218 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
219 tree next_init
, init_dra_chain
, init_drb_chain
;
220 gimple first_a
, first_b
;
222 gimple node
, prev
, next
, first_stmt
;
224 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
225 if (!GROUP_FIRST_ELEMENT (stmtinfo_a
) && !GROUP_FIRST_ELEMENT (stmtinfo_b
))
227 GROUP_FIRST_ELEMENT (stmtinfo_a
) = DR_STMT (drb
);
228 GROUP_FIRST_ELEMENT (stmtinfo_b
) = DR_STMT (drb
);
229 GROUP_NEXT_ELEMENT (stmtinfo_b
) = DR_STMT (dra
);
233 /* 2. DRB is a part of a chain and DRA is not. */
234 if (!GROUP_FIRST_ELEMENT (stmtinfo_a
) && GROUP_FIRST_ELEMENT (stmtinfo_b
))
236 GROUP_FIRST_ELEMENT (stmtinfo_a
) = GROUP_FIRST_ELEMENT (stmtinfo_b
);
237 /* Insert DRA into the chain of DRB. */
238 vect_insert_into_interleaving_chain (dra
, drb
);
242 /* 3. DRA is a part of a chain and DRB is not. */
243 if (GROUP_FIRST_ELEMENT (stmtinfo_a
) && !GROUP_FIRST_ELEMENT (stmtinfo_b
))
245 gimple old_first_stmt
= GROUP_FIRST_ELEMENT (stmtinfo_a
);
246 tree init_old
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
250 if (tree_int_cst_compare (init_old
, DR_INIT (drb
)) > 0)
252 /* DRB's init is smaller than the init of the stmt previously marked
253 as the first stmt of the interleaving chain of DRA. Therefore, we
254 update FIRST_STMT and put DRB in the head of the list. */
255 GROUP_FIRST_ELEMENT (stmtinfo_b
) = DR_STMT (drb
);
256 GROUP_NEXT_ELEMENT (stmtinfo_b
) = old_first_stmt
;
258 /* Update all the stmts in the list to point to the new FIRST_STMT. */
259 tmp
= old_first_stmt
;
262 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp
)) = DR_STMT (drb
);
263 tmp
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp
));
268 /* Insert DRB in the list of DRA. */
269 vect_insert_into_interleaving_chain (drb
, dra
);
270 GROUP_FIRST_ELEMENT (stmtinfo_b
) = GROUP_FIRST_ELEMENT (stmtinfo_a
);
275 /* 4. both DRA and DRB are in some interleaving chains. */
276 first_a
= GROUP_FIRST_ELEMENT (stmtinfo_a
);
277 first_b
= GROUP_FIRST_ELEMENT (stmtinfo_b
);
278 if (first_a
== first_b
)
280 init_dra_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a
)));
281 init_drb_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b
)));
283 if (tree_int_cst_compare (init_dra_chain
, init_drb_chain
) > 0)
285 /* Insert the nodes of DRA chain into the DRB chain.
286 After inserting a node, continue from this node of the DRB chain (don't
287 start from the beginning. */
288 node
= GROUP_FIRST_ELEMENT (stmtinfo_a
);
289 prev
= GROUP_FIRST_ELEMENT (stmtinfo_b
);
290 first_stmt
= first_b
;
294 /* Insert the nodes of DRB chain into the DRA chain.
295 After inserting a node, continue from this node of the DRA chain (don't
296 start from the beginning. */
297 node
= GROUP_FIRST_ELEMENT (stmtinfo_b
);
298 prev
= GROUP_FIRST_ELEMENT (stmtinfo_a
);
299 first_stmt
= first_a
;
304 node_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node
)));
305 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
));
308 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
309 if (tree_int_cst_compare (next_init
, node_init
) > 0)
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
)) = node
;
313 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node
)) = next
;
318 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
));
322 /* We got to the end of the list. Insert here. */
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
)) = node
;
324 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node
)) = NULL
;
327 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node
)) = first_stmt
;
328 node
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (node
));
332 /* Check dependence between DRA and DRB for basic block vectorization.
333 If the accesses share same bases and offsets, we can compare their initial
334 constant offsets to decide whether they differ or not. In case of a read-
335 write dependence we check that the load is before the store to ensure that
336 vectorization will not change the order of the accesses. */
339 vect_drs_dependent_in_basic_block (struct data_reference
*dra
,
340 struct data_reference
*drb
)
342 HOST_WIDE_INT type_size_a
, type_size_b
, init_a
, init_b
;
345 /* We only call this function for pairs of loads and stores, but we verify
347 if (DR_IS_READ (dra
) == DR_IS_READ (drb
))
349 if (DR_IS_READ (dra
))
355 /* Check that the data-refs have same bases and offsets. If not, we can't
356 determine if they are dependent. */
357 if (!operand_equal_p (DR_BASE_ADDRESS (dra
), DR_BASE_ADDRESS (drb
), 0)
358 || !dr_equal_offsets_p (dra
, drb
))
361 /* Check the types. */
362 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
363 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
365 if (type_size_a
!= type_size_b
366 || !types_compatible_p (TREE_TYPE (DR_REF (dra
)),
367 TREE_TYPE (DR_REF (drb
))))
370 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
371 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
373 /* Two different locations - no dependence. */
374 if (init_a
!= init_b
)
377 /* We have a read-write dependence. Check that the load is before the store.
378 When we vectorize basic blocks, vector load can be only before
379 corresponding scalar load, and vector store can be only after its
380 corresponding scalar store. So the order of the acceses is preserved in
381 case the load is before the store. */
382 earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
383 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
390 /* Function vect_check_interleaving.
392 Check if DRA and DRB are a part of interleaving. In case they are, insert
393 DRA and DRB in an interleaving chain. */
396 vect_check_interleaving (struct data_reference
*dra
,
397 struct data_reference
*drb
)
399 HOST_WIDE_INT type_size_a
, type_size_b
, diff_mod_size
, step
, init_a
, init_b
;
401 /* Check that the data-refs have same first location (except init) and they
402 are both either store or load (not load and store). */
403 if (!operand_equal_p (DR_BASE_ADDRESS (dra
), DR_BASE_ADDRESS (drb
), 0)
404 || !dr_equal_offsets_p (dra
, drb
)
405 || !tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
))
406 || DR_IS_READ (dra
) != DR_IS_READ (drb
))
410 1. data-refs are of the same type
411 2. their steps are equal
412 3. the step (if greater than zero) is greater than the difference between
414 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
415 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
417 if (type_size_a
!= type_size_b
418 || tree_int_cst_compare (DR_STEP (dra
), DR_STEP (drb
))
419 || !types_compatible_p (TREE_TYPE (DR_REF (dra
)),
420 TREE_TYPE (DR_REF (drb
))))
423 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
424 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
425 step
= TREE_INT_CST_LOW (DR_STEP (dra
));
429 /* If init_a == init_b + the size of the type * k, we have an interleaving,
430 and DRB is accessed before DRA. */
431 diff_mod_size
= (init_a
- init_b
) % type_size_a
;
433 if (step
&& (init_a
- init_b
) > step
)
436 if (diff_mod_size
== 0)
438 vect_update_interleaving_chain (drb
, dra
);
439 if (vect_print_dump_info (REPORT_DR_DETAILS
))
441 fprintf (vect_dump
, "Detected interleaving ");
442 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
443 fprintf (vect_dump
, " and ");
444 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
451 /* If init_b == init_a + the size of the type * k, we have an
452 interleaving, and DRA is accessed before DRB. */
453 diff_mod_size
= (init_b
- init_a
) % type_size_a
;
455 if (step
&& (init_b
- init_a
) > step
)
458 if (diff_mod_size
== 0)
460 vect_update_interleaving_chain (dra
, drb
);
461 if (vect_print_dump_info (REPORT_DR_DETAILS
))
463 fprintf (vect_dump
, "Detected interleaving ");
464 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
465 fprintf (vect_dump
, " and ");
466 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
475 /* Check if data references pointed by DR_I and DR_J are same or
476 belong to same interleaving group. Return FALSE if drs are
477 different, otherwise return TRUE. */
480 vect_same_range_drs (data_reference_p dr_i
, data_reference_p dr_j
)
482 gimple stmt_i
= DR_STMT (dr_i
);
483 gimple stmt_j
= DR_STMT (dr_j
);
485 if (operand_equal_p (DR_REF (dr_i
), DR_REF (dr_j
), 0)
486 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i
))
487 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j
))
488 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i
))
489 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j
)))))
495 /* If address ranges represented by DDR_I and DDR_J are equal,
496 return TRUE, otherwise return FALSE. */
499 vect_vfa_range_equal (ddr_p ddr_i
, ddr_p ddr_j
)
501 if ((vect_same_range_drs (DDR_A (ddr_i
), DDR_A (ddr_j
))
502 && vect_same_range_drs (DDR_B (ddr_i
), DDR_B (ddr_j
)))
503 || (vect_same_range_drs (DDR_A (ddr_i
), DDR_B (ddr_j
))
504 && vect_same_range_drs (DDR_B (ddr_i
), DDR_A (ddr_j
))))
510 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
511 tested at run-time. Return TRUE if DDR was successfully inserted.
512 Return false if versioning is not supported. */
515 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
517 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
519 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
) == 0)
522 if (vect_print_dump_info (REPORT_DR_DETAILS
))
524 fprintf (vect_dump
, "mark for run-time aliasing test between ");
525 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr
)), TDF_SLIM
);
526 fprintf (vect_dump
, " and ");
527 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr
)), TDF_SLIM
);
530 if (optimize_loop_nest_for_size_p (loop
))
532 if (vect_print_dump_info (REPORT_DR_DETAILS
))
533 fprintf (vect_dump
, "versioning not supported when optimizing for size.");
537 /* FORNOW: We don't support versioning with outer-loop vectorization. */
540 if (vect_print_dump_info (REPORT_DR_DETAILS
))
541 fprintf (vect_dump
, "versioning not yet supported for outer-loops.");
545 /* FORNOW: We don't support creating runtime alias tests for non-constant
547 if (TREE_CODE (DR_STEP (DDR_A (ddr
))) != INTEGER_CST
548 || TREE_CODE (DR_STEP (DDR_B (ddr
))) != INTEGER_CST
)
550 if (vect_print_dump_info (REPORT_DR_DETAILS
))
551 fprintf (vect_dump
, "versioning not yet supported for non-constant "
556 VEC_safe_push (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), ddr
);
561 /* Function vect_analyze_data_ref_dependence.
563 Return TRUE if there (might) exist a dependence between a memory-reference
564 DRA and a memory-reference DRB. When versioning for alias may check a
565 dependence at run-time, return FALSE. Adjust *MAX_VF according to
566 the data dependence. */
569 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
570 loop_vec_info loop_vinfo
, int *max_vf
)
573 struct loop
*loop
= NULL
;
574 struct data_reference
*dra
= DDR_A (ddr
);
575 struct data_reference
*drb
= DDR_B (ddr
);
576 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
577 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
578 lambda_vector dist_v
;
579 unsigned int loop_depth
;
581 /* Don't bother to analyze statements marked as unvectorizable. */
582 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a
)
583 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b
))
586 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
588 /* Independent data accesses. */
589 vect_check_interleaving (dra
, drb
);
594 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
596 if ((DR_IS_READ (dra
) && DR_IS_READ (drb
) && loop_vinfo
) || dra
== drb
)
599 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
605 if (vect_print_dump_info (REPORT_DR_DETAILS
))
607 fprintf (vect_dump
, "versioning for alias required: "
608 "can't determine dependence between ");
609 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
610 fprintf (vect_dump
, " and ");
611 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
614 /* Add to list of ddrs that need to be tested at run-time. */
615 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
618 /* When vectorizing a basic block unknown depnedence can still mean
620 if (vect_check_interleaving (dra
, drb
))
623 /* Read-read is OK (we need this check here, after checking for
625 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
628 if (vect_print_dump_info (REPORT_DR_DETAILS
))
630 fprintf (vect_dump
, "can't determine dependence between ");
631 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
632 fprintf (vect_dump
, " and ");
633 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
636 /* We do not vectorize basic blocks with write-write dependencies. */
637 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
640 /* Check that it's not a load-after-store dependence. */
641 earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
642 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
648 /* Versioning for alias is not yet supported for basic block SLP, and
649 dependence distance is unapplicable, hence, in case of known data
650 dependence, basic block vectorization is impossible for now. */
653 if (dra
!= drb
&& vect_check_interleaving (dra
, drb
))
656 if (vect_print_dump_info (REPORT_DR_DETAILS
))
658 fprintf (vect_dump
, "determined dependence between ");
659 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
660 fprintf (vect_dump
, " and ");
661 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
664 /* Do not vectorize basic blcoks with write-write dependences. */
665 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
668 /* Check if this dependence is allowed in basic block vectorization. */
669 return vect_drs_dependent_in_basic_block (dra
, drb
);
672 /* Loop-based vectorization and known data dependence. */
673 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
675 if (vect_print_dump_info (REPORT_DR_DETAILS
))
677 fprintf (vect_dump
, "versioning for alias required: bad dist vector for ");
678 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
679 fprintf (vect_dump
, " and ");
680 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
682 /* Add to list of ddrs that need to be tested at run-time. */
683 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
686 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
687 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
)
689 int dist
= dist_v
[loop_depth
];
691 if (vect_print_dump_info (REPORT_DR_DETAILS
))
692 fprintf (vect_dump
, "dependence distance = %d.", dist
);
696 if (vect_print_dump_info (REPORT_DR_DETAILS
))
698 fprintf (vect_dump
, "dependence distance == 0 between ");
699 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
700 fprintf (vect_dump
, " and ");
701 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
704 /* For interleaving, mark that there is a read-write dependency if
705 necessary. We check before that one of the data-refs is store. */
706 if (DR_IS_READ (dra
))
707 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
710 if (DR_IS_READ (drb
))
711 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
717 if (dist
> 0 && DDR_REVERSED_P (ddr
))
719 /* If DDR_REVERSED_P the order of the data-refs in DDR was
720 reversed (to make distance vector positive), and the actual
721 distance is negative. */
722 if (vect_print_dump_info (REPORT_DR_DETAILS
))
723 fprintf (vect_dump
, "dependence distance negative.");
728 && abs (dist
) < *max_vf
)
730 /* The dependence distance requires reduction of the maximal
731 vectorization factor. */
732 *max_vf
= abs (dist
);
733 if (vect_print_dump_info (REPORT_DR_DETAILS
))
734 fprintf (vect_dump
, "adjusting maximal vectorization factor to %i",
738 if (abs (dist
) >= *max_vf
)
740 /* Dependence distance does not create dependence, as far as
741 vectorization is concerned, in this case. */
742 if (vect_print_dump_info (REPORT_DR_DETAILS
))
743 fprintf (vect_dump
, "dependence distance >= VF.");
747 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
749 fprintf (vect_dump
, "not vectorized, possible dependence "
750 "between data-refs ");
751 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
752 fprintf (vect_dump
, " and ");
753 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
762 /* Function vect_analyze_data_ref_dependences.
764 Examine all the data references in the loop, and make sure there do not
765 exist any data dependences between them. Set *MAX_VF according to
766 the maximum vectorization factor the data dependences allow. */
769 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
,
770 bb_vec_info bb_vinfo
, int *max_vf
)
773 VEC (ddr_p
, heap
) *ddrs
= NULL
;
774 struct data_dependence_relation
*ddr
;
776 if (vect_print_dump_info (REPORT_DETAILS
))
777 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
780 ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
782 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
784 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
785 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
, max_vf
))
792 /* Function vect_compute_data_ref_alignment
794 Compute the misalignment of the data reference DR.
797 1. If during the misalignment computation it is found that the data reference
798 cannot be vectorized then false is returned.
799 2. DR_MISALIGNMENT (DR) is defined.
801 FOR NOW: No analysis is actually performed. Misalignment is calculated
802 only for trivial cases. TODO. */
805 vect_compute_data_ref_alignment (struct data_reference
*dr
)
807 gimple stmt
= DR_STMT (dr
);
808 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
809 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
810 struct loop
*loop
= NULL
;
811 tree ref
= DR_REF (dr
);
813 tree base
, base_addr
;
816 tree aligned_to
, alignment
;
818 if (vect_print_dump_info (REPORT_DETAILS
))
819 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
822 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
824 /* Initialize misalignment to unknown. */
825 SET_DR_MISALIGNMENT (dr
, -1);
827 /* Strided loads perform only component accesses, misalignment information
828 is irrelevant for them. */
829 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
832 misalign
= DR_INIT (dr
);
833 aligned_to
= DR_ALIGNED_TO (dr
);
834 base_addr
= DR_BASE_ADDRESS (dr
);
835 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
837 /* In case the dataref is in an inner-loop of the loop that is being
838 vectorized (LOOP), we use the base and misalignment information
839 relative to the outer-loop (LOOP). This is ok only if the misalignment
840 stays the same throughout the execution of the inner-loop, which is why
841 we have to check that the stride of the dataref in the inner-loop evenly
842 divides by the vector size. */
843 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
845 tree step
= DR_STEP (dr
);
846 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
848 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) == 0)
850 if (vect_print_dump_info (REPORT_ALIGNMENT
))
851 fprintf (vect_dump
, "inner step divides the vector-size.");
852 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
853 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
854 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
858 if (vect_print_dump_info (REPORT_ALIGNMENT
))
859 fprintf (vect_dump
, "inner step doesn't divide the vector-size.");
860 misalign
= NULL_TREE
;
864 /* Similarly, if we're doing basic-block vectorization, we can only use
865 base and misalignment information relative to an innermost loop if the
866 misalignment stays the same throughout the execution of the loop.
867 As above, this is the case if the stride of the dataref evenly divides
868 by the vector size. */
871 tree step
= DR_STEP (dr
);
872 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
874 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0)
876 if (vect_print_dump_info (REPORT_ALIGNMENT
))
877 fprintf (vect_dump
, "SLP: step doesn't divide the vector-size.");
878 misalign
= NULL_TREE
;
882 base
= build_fold_indirect_ref (base_addr
);
883 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
885 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
888 if (vect_print_dump_info (REPORT_ALIGNMENT
))
890 fprintf (vect_dump
, "Unknown alignment for access: ");
891 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
897 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
899 || (TREE_CODE (base_addr
) == SSA_NAME
900 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
901 TREE_TYPE (base_addr
)))),
903 || (get_pointer_alignment (base_addr
) >= TYPE_ALIGN (vectype
)))
906 base_aligned
= false;
910 /* Do not change the alignment of global variables here if
911 flag_section_anchors is enabled as we already generated
912 RTL for other functions. Most global variables should
913 have been aligned during the IPA increase_alignment pass. */
914 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
915 || (TREE_STATIC (base
) && flag_section_anchors
))
917 if (vect_print_dump_info (REPORT_DETAILS
))
919 fprintf (vect_dump
, "can't force alignment of ref: ");
920 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
925 /* Force the alignment of the decl.
926 NOTE: This is the only change to the code we make during
927 the analysis phase, before deciding to vectorize the loop. */
928 if (vect_print_dump_info (REPORT_DETAILS
))
930 fprintf (vect_dump
, "force alignment of ");
931 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
934 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
935 DECL_USER_ALIGN (base
) = 1;
938 /* At this point we assume that the base is aligned. */
939 gcc_assert (base_aligned
940 || (TREE_CODE (base
) == VAR_DECL
941 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
943 /* If this is a backward running DR then first access in the larger
944 vectype actually is N-1 elements before the address in the DR.
945 Adjust misalign accordingly. */
946 if (tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0)
948 tree offset
= ssize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
949 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
950 otherwise we wouldn't be here. */
951 offset
= fold_build2 (MULT_EXPR
, ssizetype
, offset
, DR_STEP (dr
));
952 /* PLUS because DR_STEP was negative. */
953 misalign
= size_binop (PLUS_EXPR
, misalign
, offset
);
956 /* Modulo alignment. */
957 misalign
= size_binop (FLOOR_MOD_EXPR
, misalign
, alignment
);
959 if (!host_integerp (misalign
, 1))
961 /* Negative or overflowed misalignment value. */
962 if (vect_print_dump_info (REPORT_DETAILS
))
963 fprintf (vect_dump
, "unexpected misalign value");
967 SET_DR_MISALIGNMENT (dr
, TREE_INT_CST_LOW (misalign
));
969 if (vect_print_dump_info (REPORT_DETAILS
))
971 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
972 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
979 /* Function vect_compute_data_refs_alignment
981 Compute the misalignment of data references in the loop.
982 Return FALSE if a data reference is found that cannot be vectorized. */
985 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
,
986 bb_vec_info bb_vinfo
)
988 VEC (data_reference_p
, heap
) *datarefs
;
989 struct data_reference
*dr
;
993 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
995 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
997 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
998 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
999 && !vect_compute_data_ref_alignment (dr
))
1003 /* Mark unsupported statement as unvectorizable. */
1004 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
1015 /* Function vect_update_misalignment_for_peel
1017 DR - the data reference whose misalignment is to be adjusted.
1018 DR_PEEL - the data reference whose misalignment is being made
1019 zero in the vector loop by the peel.
1020 NPEEL - the number of iterations in the peel loop if the misalignment
1021 of DR_PEEL is known at compile time. */
1024 vect_update_misalignment_for_peel (struct data_reference
*dr
,
1025 struct data_reference
*dr_peel
, int npeel
)
1028 VEC(dr_p
,heap
) *same_align_drs
;
1029 struct data_reference
*current_dr
;
1030 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
1031 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
1032 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
1033 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
1035 /* For interleaved data accesses the step in the loop must be multiplied by
1036 the size of the interleaving group. */
1037 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1038 dr_size
*= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
1039 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info
))
1040 dr_peel_size
*= GROUP_SIZE (peel_stmt_info
);
1042 /* It can be assumed that the data refs with the same alignment as dr_peel
1043 are aligned in the vector loop. */
1045 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
1046 FOR_EACH_VEC_ELT (dr_p
, same_align_drs
, i
, current_dr
)
1048 if (current_dr
!= dr
)
1050 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
1051 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
1052 SET_DR_MISALIGNMENT (dr
, 0);
1056 if (known_alignment_for_access_p (dr
)
1057 && known_alignment_for_access_p (dr_peel
))
1059 bool negative
= tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0;
1060 int misal
= DR_MISALIGNMENT (dr
);
1061 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1062 misal
+= negative
? -npeel
* dr_size
: npeel
* dr_size
;
1063 misal
&= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1064 SET_DR_MISALIGNMENT (dr
, misal
);
1068 if (vect_print_dump_info (REPORT_DETAILS
))
1069 fprintf (vect_dump
, "Setting misalignment to -1.");
1070 SET_DR_MISALIGNMENT (dr
, -1);
1074 /* Function vect_verify_datarefs_alignment
1076 Return TRUE if all data references in the loop can be
1077 handled with respect to alignment. */
1080 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1082 VEC (data_reference_p
, heap
) *datarefs
;
1083 struct data_reference
*dr
;
1084 enum dr_alignment_support supportable_dr_alignment
;
1088 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1090 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
1092 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1094 gimple stmt
= DR_STMT (dr
);
1095 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1097 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1100 /* For interleaving, only the alignment of the first access matters.
1101 Skip statements marked as not vectorizable. */
1102 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1103 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1104 || !STMT_VINFO_VECTORIZABLE (stmt_info
))
1107 /* Strided loads perform only component accesses, alignment is
1108 irrelevant for them. */
1109 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1112 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1113 if (!supportable_dr_alignment
)
1115 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1117 if (DR_IS_READ (dr
))
1119 "not vectorized: unsupported unaligned load.");
1122 "not vectorized: unsupported unaligned store.");
1124 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1128 if (supportable_dr_alignment
!= dr_aligned
1129 && vect_print_dump_info (REPORT_ALIGNMENT
))
1130 fprintf (vect_dump
, "Vectorizing an unaligned access.");
1136 /* Function vector_alignment_reachable_p
1138 Return true if vector alignment for DR is reachable by peeling
1139 a few loop iterations. Return false otherwise. */
1142 vector_alignment_reachable_p (struct data_reference
*dr
)
1144 gimple stmt
= DR_STMT (dr
);
1145 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1146 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1148 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1150 /* For interleaved access we peel only if number of iterations in
1151 the prolog loop ({VF - misalignment}), is a multiple of the
1152 number of the interleaved accesses. */
1153 int elem_size
, mis_in_elements
;
1154 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1156 /* FORNOW: handle only known alignment. */
1157 if (!known_alignment_for_access_p (dr
))
1160 elem_size
= GET_MODE_SIZE (TYPE_MODE (vectype
)) / nelements
;
1161 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
1163 if ((nelements
- mis_in_elements
) % GROUP_SIZE (stmt_info
))
1167 /* If misalignment is known at the compile time then allow peeling
1168 only if natural alignment is reachable through peeling. */
1169 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
1171 HOST_WIDE_INT elmsize
=
1172 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
1173 if (vect_print_dump_info (REPORT_DETAILS
))
1175 fprintf (vect_dump
, "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
1176 fprintf (vect_dump
, ". misalignment = %d. ", DR_MISALIGNMENT (dr
));
1178 if (DR_MISALIGNMENT (dr
) % elmsize
)
1180 if (vect_print_dump_info (REPORT_DETAILS
))
1181 fprintf (vect_dump
, "data size does not divide the misalignment.\n");
1186 if (!known_alignment_for_access_p (dr
))
1188 tree type
= (TREE_TYPE (DR_REF (dr
)));
1189 bool is_packed
= contains_packed_reference (DR_REF (dr
));
1191 if (compare_tree_int (TYPE_SIZE (type
), TYPE_ALIGN (type
)) > 0)
1194 if (vect_print_dump_info (REPORT_DETAILS
))
1195 fprintf (vect_dump
, "Unknown misalignment, is_packed = %d",is_packed
);
1196 if (targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
1206 /* Calculate the cost of the memory access represented by DR. */
1209 vect_get_data_access_cost (struct data_reference
*dr
,
1210 unsigned int *inside_cost
,
1211 unsigned int *outside_cost
)
1213 gimple stmt
= DR_STMT (dr
);
1214 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1215 int nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1216 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1217 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1218 int ncopies
= vf
/ nunits
;
1220 if (DR_IS_READ (dr
))
1221 vect_get_load_cost (dr
, ncopies
, true, inside_cost
, outside_cost
);
1223 vect_get_store_cost (dr
, ncopies
, inside_cost
);
1225 if (vect_print_dump_info (REPORT_COST
))
1226 fprintf (vect_dump
, "vect_get_data_access_cost: inside_cost = %d, "
1227 "outside_cost = %d.", *inside_cost
, *outside_cost
);
1232 vect_peeling_hash (const void *elem
)
1234 const struct _vect_peel_info
*peel_info
;
1236 peel_info
= (const struct _vect_peel_info
*) elem
;
1237 return (hashval_t
) peel_info
->npeel
;
1242 vect_peeling_hash_eq (const void *elem1
, const void *elem2
)
1244 const struct _vect_peel_info
*a
, *b
;
1246 a
= (const struct _vect_peel_info
*) elem1
;
1247 b
= (const struct _vect_peel_info
*) elem2
;
1248 return (a
->npeel
== b
->npeel
);
1252 /* Insert DR into peeling hash table with NPEEL as key. */
1255 vect_peeling_hash_insert (loop_vec_info loop_vinfo
, struct data_reference
*dr
,
1258 struct _vect_peel_info elem
, *slot
;
1260 bool supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1263 slot
= (vect_peel_info
) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1269 slot
= XNEW (struct _vect_peel_info
);
1270 slot
->npeel
= npeel
;
1273 new_slot
= htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo
), slot
,
1278 if (!supportable_dr_alignment
&& !flag_vect_cost_model
)
1279 slot
->count
+= VECT_MAX_COST
;
1283 /* Traverse peeling hash table to find peeling option that aligns maximum
1284 number of data accesses. */
1287 vect_peeling_hash_get_most_frequent (void **slot
, void *data
)
1289 vect_peel_info elem
= (vect_peel_info
) *slot
;
1290 vect_peel_extended_info max
= (vect_peel_extended_info
) data
;
1292 if (elem
->count
> max
->peel_info
.count
1293 || (elem
->count
== max
->peel_info
.count
1294 && max
->peel_info
.npeel
> elem
->npeel
))
1296 max
->peel_info
.npeel
= elem
->npeel
;
1297 max
->peel_info
.count
= elem
->count
;
1298 max
->peel_info
.dr
= elem
->dr
;
1305 /* Traverse peeling hash table and calculate cost for each peeling option.
1306 Find the one with the lowest cost. */
1309 vect_peeling_hash_get_lowest_cost (void **slot
, void *data
)
1311 vect_peel_info elem
= (vect_peel_info
) *slot
;
1312 vect_peel_extended_info min
= (vect_peel_extended_info
) data
;
1313 int save_misalignment
, dummy
;
1314 unsigned int inside_cost
= 0, outside_cost
= 0, i
;
1315 gimple stmt
= DR_STMT (elem
->dr
);
1316 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1317 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1318 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1319 struct data_reference
*dr
;
1321 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1323 stmt
= DR_STMT (dr
);
1324 stmt_info
= vinfo_for_stmt (stmt
);
1325 /* For interleaving, only the alignment of the first access
1327 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1328 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1331 save_misalignment
= DR_MISALIGNMENT (dr
);
1332 vect_update_misalignment_for_peel (dr
, elem
->dr
, elem
->npeel
);
1333 vect_get_data_access_cost (dr
, &inside_cost
, &outside_cost
);
1334 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1337 outside_cost
+= vect_get_known_peeling_cost (loop_vinfo
, elem
->npeel
, &dummy
,
1338 vect_get_single_scalar_iteration_cost (loop_vinfo
));
1340 if (inside_cost
< min
->inside_cost
1341 || (inside_cost
== min
->inside_cost
&& outside_cost
< min
->outside_cost
))
1343 min
->inside_cost
= inside_cost
;
1344 min
->outside_cost
= outside_cost
;
1345 min
->peel_info
.dr
= elem
->dr
;
1346 min
->peel_info
.npeel
= elem
->npeel
;
1353 /* Choose best peeling option by traversing peeling hash table and either
1354 choosing an option with the lowest cost (if cost model is enabled) or the
1355 option that aligns as many accesses as possible. */
1357 static struct data_reference
*
1358 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo
,
1359 unsigned int *npeel
)
1361 struct _vect_peel_extended_info res
;
1363 res
.peel_info
.dr
= NULL
;
1365 if (flag_vect_cost_model
)
1367 res
.inside_cost
= INT_MAX
;
1368 res
.outside_cost
= INT_MAX
;
1369 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1370 vect_peeling_hash_get_lowest_cost
, &res
);
1374 res
.peel_info
.count
= 0;
1375 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1376 vect_peeling_hash_get_most_frequent
, &res
);
1379 *npeel
= res
.peel_info
.npeel
;
1380 return res
.peel_info
.dr
;
1384 /* Function vect_enhance_data_refs_alignment
1386 This pass will use loop versioning and loop peeling in order to enhance
1387 the alignment of data references in the loop.
1389 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1390 original loop is to be vectorized. Any other loops that are created by
1391 the transformations performed in this pass - are not supposed to be
1392 vectorized. This restriction will be relaxed.
1394 This pass will require a cost model to guide it whether to apply peeling
1395 or versioning or a combination of the two. For example, the scheme that
1396 intel uses when given a loop with several memory accesses, is as follows:
1397 choose one memory access ('p') which alignment you want to force by doing
1398 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1399 other accesses are not necessarily aligned, or (2) use loop versioning to
1400 generate one loop in which all accesses are aligned, and another loop in
1401 which only 'p' is necessarily aligned.
1403 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1404 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1405 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1407 Devising a cost model is the most critical aspect of this work. It will
1408 guide us on which access to peel for, whether to use loop versioning, how
1409 many versions to create, etc. The cost model will probably consist of
1410 generic considerations as well as target specific considerations (on
1411 powerpc for example, misaligned stores are more painful than misaligned
1414 Here are the general steps involved in alignment enhancements:
1416 -- original loop, before alignment analysis:
1417 for (i=0; i<N; i++){
1418 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1419 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1422 -- After vect_compute_data_refs_alignment:
1423 for (i=0; i<N; i++){
1424 x = q[i]; # DR_MISALIGNMENT(q) = 3
1425 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1428 -- Possibility 1: we do loop versioning:
1430 for (i=0; i<N; i++){ # loop 1A
1431 x = q[i]; # DR_MISALIGNMENT(q) = 3
1432 p[i] = y; # DR_MISALIGNMENT(p) = 0
1436 for (i=0; i<N; i++){ # loop 1B
1437 x = q[i]; # DR_MISALIGNMENT(q) = 3
1438 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1442 -- Possibility 2: we do loop peeling:
1443 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1447 for (i = 3; i < N; i++){ # loop 2A
1448 x = q[i]; # DR_MISALIGNMENT(q) = 0
1449 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1452 -- Possibility 3: combination of loop peeling and versioning:
1453 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1458 for (i = 3; i<N; i++){ # loop 3A
1459 x = q[i]; # DR_MISALIGNMENT(q) = 0
1460 p[i] = y; # DR_MISALIGNMENT(p) = 0
1464 for (i = 3; i<N; i++){ # loop 3B
1465 x = q[i]; # DR_MISALIGNMENT(q) = 0
1466 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1470 These loops are later passed to loop_transform to be vectorized. The
1471 vectorizer will use the alignment information to guide the transformation
1472 (whether to generate regular loads/stores, or with special handling for
1476 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1478 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1479 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1480 enum dr_alignment_support supportable_dr_alignment
;
1481 struct data_reference
*dr0
= NULL
, *first_store
= NULL
;
1482 struct data_reference
*dr
;
1484 bool do_peeling
= false;
1485 bool do_versioning
= false;
1488 stmt_vec_info stmt_info
;
1489 int vect_versioning_for_alias_required
;
1490 unsigned int npeel
= 0;
1491 bool all_misalignments_unknown
= true;
1492 unsigned int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1493 unsigned possible_npeel_number
= 1;
1495 unsigned int nelements
, mis
, same_align_drs_max
= 0;
1497 if (vect_print_dump_info (REPORT_DETAILS
))
1498 fprintf (vect_dump
, "=== vect_enhance_data_refs_alignment ===");
1500 /* While cost model enhancements are expected in the future, the high level
1501 view of the code at this time is as follows:
1503 A) If there is a misaligned access then see if peeling to align
1504 this access can make all data references satisfy
1505 vect_supportable_dr_alignment. If so, update data structures
1506 as needed and return true.
1508 B) If peeling wasn't possible and there is a data reference with an
1509 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1510 then see if loop versioning checks can be used to make all data
1511 references satisfy vect_supportable_dr_alignment. If so, update
1512 data structures as needed and return true.
1514 C) If neither peeling nor versioning were successful then return false if
1515 any data reference does not satisfy vect_supportable_dr_alignment.
1517 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1519 Note, Possibility 3 above (which is peeling and versioning together) is not
1520 being done at this time. */
1522 /* (1) Peeling to force alignment. */
1524 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1526 + How many accesses will become aligned due to the peeling
1527 - How many accesses will become unaligned due to the peeling,
1528 and the cost of misaligned accesses.
1529 - The cost of peeling (the extra runtime checks, the increase
1532 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1534 stmt
= DR_STMT (dr
);
1535 stmt_info
= vinfo_for_stmt (stmt
);
1537 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1540 /* For interleaving, only the alignment of the first access
1542 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1543 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1546 /* FORNOW: Any strided load prevents peeling. The induction
1547 variable analysis will fail when the prologue loop is generated,
1548 and so we can't generate the new base for the pointer. */
1549 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1551 if (vect_print_dump_info (REPORT_DETAILS
))
1552 fprintf (vect_dump
, "strided load prevents peeling");
1557 /* For invariant accesses there is nothing to enhance. */
1558 if (integer_zerop (DR_STEP (dr
)))
1561 /* Strided loads perform only component accesses, alignment is
1562 irrelevant for them. */
1563 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1566 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1567 do_peeling
= vector_alignment_reachable_p (dr
);
1570 if (known_alignment_for_access_p (dr
))
1572 unsigned int npeel_tmp
;
1573 bool negative
= tree_int_cst_compare (DR_STEP (dr
),
1574 size_zero_node
) < 0;
1576 /* Save info about DR in the hash table. */
1577 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo
))
1578 LOOP_VINFO_PEELING_HTAB (loop_vinfo
) =
1579 htab_create (1, vect_peeling_hash
,
1580 vect_peeling_hash_eq
, free
);
1582 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1583 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1584 mis
= DR_MISALIGNMENT (dr
) / GET_MODE_SIZE (TYPE_MODE (
1585 TREE_TYPE (DR_REF (dr
))));
1586 npeel_tmp
= (negative
1587 ? (mis
- nelements
) : (nelements
- mis
))
1590 /* For multiple types, it is possible that the bigger type access
1591 will have more than one peeling option. E.g., a loop with two
1592 types: one of size (vector size / 4), and the other one of
1593 size (vector size / 8). Vectorization factor will 8. If both
1594 access are misaligned by 3, the first one needs one scalar
1595 iteration to be aligned, and the second one needs 5. But the
1596 the first one will be aligned also by peeling 5 scalar
1597 iterations, and in that case both accesses will be aligned.
1598 Hence, except for the immediate peeling amount, we also want
1599 to try to add full vector size, while we don't exceed
1600 vectorization factor.
1601 We do this automtically for cost model, since we calculate cost
1602 for every peeling option. */
1603 if (!flag_vect_cost_model
)
1604 possible_npeel_number
= vf
/nelements
;
1606 /* Handle the aligned case. We may decide to align some other
1607 access, making DR unaligned. */
1608 if (DR_MISALIGNMENT (dr
) == 0)
1611 if (!flag_vect_cost_model
)
1612 possible_npeel_number
++;
1615 for (j
= 0; j
< possible_npeel_number
; j
++)
1617 gcc_assert (npeel_tmp
<= vf
);
1618 vect_peeling_hash_insert (loop_vinfo
, dr
, npeel_tmp
);
1619 npeel_tmp
+= nelements
;
1622 all_misalignments_unknown
= false;
1623 /* Data-ref that was chosen for the case that all the
1624 misalignments are unknown is not relevant anymore, since we
1625 have a data-ref with known alignment. */
1630 /* If we don't know all the misalignment values, we prefer
1631 peeling for data-ref that has maximum number of data-refs
1632 with the same alignment, unless the target prefers to align
1633 stores over load. */
1634 if (all_misalignments_unknown
)
1636 if (same_align_drs_max
< VEC_length (dr_p
,
1637 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
))
1640 same_align_drs_max
= VEC_length (dr_p
,
1641 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
1645 if (!first_store
&& DR_IS_WRITE (dr
))
1649 /* If there are both known and unknown misaligned accesses in the
1650 loop, we choose peeling amount according to the known
1654 if (!supportable_dr_alignment
)
1657 if (!first_store
&& DR_IS_WRITE (dr
))
1664 if (!aligned_access_p (dr
))
1666 if (vect_print_dump_info (REPORT_DETAILS
))
1667 fprintf (vect_dump
, "vector alignment may not be reachable");
1674 vect_versioning_for_alias_required
1675 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
);
1677 /* Temporarily, if versioning for alias is required, we disable peeling
1678 until we support peeling and versioning. Often peeling for alignment
1679 will require peeling for loop-bound, which in turn requires that we
1680 know how to adjust the loop ivs after the loop. */
1681 if (vect_versioning_for_alias_required
1682 || !vect_can_advance_ivs_p (loop_vinfo
)
1683 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1686 if (do_peeling
&& all_misalignments_unknown
1687 && vect_supportable_dr_alignment (dr0
, false))
1690 /* Check if the target requires to prefer stores over loads, i.e., if
1691 misaligned stores are more expensive than misaligned loads (taking
1692 drs with same alignment into account). */
1693 if (first_store
&& DR_IS_READ (dr0
))
1695 unsigned int load_inside_cost
= 0, load_outside_cost
= 0;
1696 unsigned int store_inside_cost
= 0, store_outside_cost
= 0;
1697 unsigned int load_inside_penalty
= 0, load_outside_penalty
= 0;
1698 unsigned int store_inside_penalty
= 0, store_outside_penalty
= 0;
1700 vect_get_data_access_cost (dr0
, &load_inside_cost
,
1701 &load_outside_cost
);
1702 vect_get_data_access_cost (first_store
, &store_inside_cost
,
1703 &store_outside_cost
);
1705 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1706 aligning the load DR0). */
1707 load_inside_penalty
= store_inside_cost
;
1708 load_outside_penalty
= store_outside_cost
;
1709 for (i
= 0; VEC_iterate (dr_p
, STMT_VINFO_SAME_ALIGN_REFS
1710 (vinfo_for_stmt (DR_STMT (first_store
))),
1713 if (DR_IS_READ (dr
))
1715 load_inside_penalty
+= load_inside_cost
;
1716 load_outside_penalty
+= load_outside_cost
;
1720 load_inside_penalty
+= store_inside_cost
;
1721 load_outside_penalty
+= store_outside_cost
;
1724 /* Calculate the penalty for leaving DR0 unaligned (by
1725 aligning the FIRST_STORE). */
1726 store_inside_penalty
= load_inside_cost
;
1727 store_outside_penalty
= load_outside_cost
;
1728 for (i
= 0; VEC_iterate (dr_p
, STMT_VINFO_SAME_ALIGN_REFS
1729 (vinfo_for_stmt (DR_STMT (dr0
))),
1732 if (DR_IS_READ (dr
))
1734 store_inside_penalty
+= load_inside_cost
;
1735 store_outside_penalty
+= load_outside_cost
;
1739 store_inside_penalty
+= store_inside_cost
;
1740 store_outside_penalty
+= store_outside_cost
;
1743 if (load_inside_penalty
> store_inside_penalty
1744 || (load_inside_penalty
== store_inside_penalty
1745 && load_outside_penalty
> store_outside_penalty
))
1749 /* In case there are only loads with different unknown misalignments, use
1750 peeling only if it may help to align other accesses in the loop. */
1751 if (!first_store
&& !VEC_length (dr_p
, STMT_VINFO_SAME_ALIGN_REFS
1752 (vinfo_for_stmt (DR_STMT (dr0
))))
1753 && vect_supportable_dr_alignment (dr0
, false)
1754 != dr_unaligned_supported
)
1758 if (do_peeling
&& !dr0
)
1760 /* Peeling is possible, but there is no data access that is not supported
1761 unless aligned. So we try to choose the best possible peeling. */
1763 /* We should get here only if there are drs with known misalignment. */
1764 gcc_assert (!all_misalignments_unknown
);
1766 /* Choose the best peeling from the hash table. */
1767 dr0
= vect_peeling_hash_choose_best_peeling (loop_vinfo
, &npeel
);
1774 stmt
= DR_STMT (dr0
);
1775 stmt_info
= vinfo_for_stmt (stmt
);
1776 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1777 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1779 if (known_alignment_for_access_p (dr0
))
1781 bool negative
= tree_int_cst_compare (DR_STEP (dr0
),
1782 size_zero_node
) < 0;
1785 /* Since it's known at compile time, compute the number of
1786 iterations in the peeled loop (the peeling factor) for use in
1787 updating DR_MISALIGNMENT values. The peeling factor is the
1788 vectorization factor minus the misalignment as an element
1790 mis
= DR_MISALIGNMENT (dr0
);
1791 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1792 npeel
= ((negative
? mis
- nelements
: nelements
- mis
)
1796 /* For interleaved data access every iteration accesses all the
1797 members of the group, therefore we divide the number of iterations
1798 by the group size. */
1799 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1800 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1801 npeel
/= GROUP_SIZE (stmt_info
);
1803 if (vect_print_dump_info (REPORT_DETAILS
))
1804 fprintf (vect_dump
, "Try peeling by %d", npeel
);
1807 /* Ensure that all data refs can be vectorized after the peel. */
1808 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1810 int save_misalignment
;
1815 stmt
= DR_STMT (dr
);
1816 stmt_info
= vinfo_for_stmt (stmt
);
1817 /* For interleaving, only the alignment of the first access
1819 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1820 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1823 /* Strided loads perform only component accesses, alignment is
1824 irrelevant for them. */
1825 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1828 save_misalignment
= DR_MISALIGNMENT (dr
);
1829 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1830 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1831 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1833 if (!supportable_dr_alignment
)
1840 if (do_peeling
&& known_alignment_for_access_p (dr0
) && npeel
== 0)
1842 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1851 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1852 If the misalignment of DR_i is identical to that of dr0 then set
1853 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1854 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1855 by the peeling factor times the element size of DR_i (MOD the
1856 vectorization factor times the size). Otherwise, the
1857 misalignment of DR_i must be set to unknown. */
1858 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1860 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1862 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1864 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = npeel
;
1866 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1867 SET_DR_MISALIGNMENT (dr0
, 0);
1868 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1869 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1871 if (vect_print_dump_info (REPORT_DETAILS
))
1872 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1874 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1881 /* (2) Versioning to force alignment. */
1883 /* Try versioning if:
1884 1) flag_tree_vect_loop_version is TRUE
1885 2) optimize loop for speed
1886 3) there is at least one unsupported misaligned data ref with an unknown
1888 4) all misaligned data refs with a known misalignment are supported, and
1889 5) the number of runtime alignment checks is within reason. */
1892 flag_tree_vect_loop_version
1893 && optimize_loop_nest_for_speed_p (loop
)
1894 && (!loop
->inner
); /* FORNOW */
1898 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1900 stmt
= DR_STMT (dr
);
1901 stmt_info
= vinfo_for_stmt (stmt
);
1903 /* For interleaving, only the alignment of the first access
1905 if (aligned_access_p (dr
)
1906 || (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1907 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
))
1910 /* Strided loads perform only component accesses, alignment is
1911 irrelevant for them. */
1912 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1915 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1917 if (!supportable_dr_alignment
)
1923 if (known_alignment_for_access_p (dr
)
1924 || VEC_length (gimple
,
1925 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1926 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
1928 do_versioning
= false;
1932 stmt
= DR_STMT (dr
);
1933 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1934 gcc_assert (vectype
);
1936 /* The rightmost bits of an aligned address must be zeros.
1937 Construct the mask needed for this test. For example,
1938 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1939 mask must be 15 = 0xf. */
1940 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1942 /* FORNOW: use the same mask to test all potentially unaligned
1943 references in the loop. The vectorizer currently supports
1944 a single vector size, see the reference to
1945 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1946 vectorization factor is computed. */
1947 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1948 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1949 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1950 VEC_safe_push (gimple
, heap
,
1951 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
1956 /* Versioning requires at least one misaligned data reference. */
1957 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
1958 do_versioning
= false;
1959 else if (!do_versioning
)
1960 VEC_truncate (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
1965 VEC(gimple
,heap
) *may_misalign_stmts
1966 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1969 /* It can now be assumed that the data references in the statements
1970 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1971 of the loop being vectorized. */
1972 FOR_EACH_VEC_ELT (gimple
, may_misalign_stmts
, i
, stmt
)
1974 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1975 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1976 SET_DR_MISALIGNMENT (dr
, 0);
1977 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1978 fprintf (vect_dump
, "Alignment of access forced using versioning.");
1981 if (vect_print_dump_info (REPORT_DETAILS
))
1982 fprintf (vect_dump
, "Versioning for alignment will be applied.");
1984 /* Peeling and versioning can't be done together at this time. */
1985 gcc_assert (! (do_peeling
&& do_versioning
));
1987 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1992 /* This point is reached if neither peeling nor versioning is being done. */
1993 gcc_assert (! (do_peeling
|| do_versioning
));
1995 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
2000 /* Function vect_find_same_alignment_drs.
2002 Update group and alignment relations according to the chosen
2003 vectorization factor. */
2006 vect_find_same_alignment_drs (struct data_dependence_relation
*ddr
,
2007 loop_vec_info loop_vinfo
)
2010 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2011 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2012 struct data_reference
*dra
= DDR_A (ddr
);
2013 struct data_reference
*drb
= DDR_B (ddr
);
2014 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
2015 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
2016 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
2017 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
2018 lambda_vector dist_v
;
2019 unsigned int loop_depth
;
2021 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
2027 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
2030 /* Loop-based vectorization and known data dependence. */
2031 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
2034 /* Data-dependence analysis reports a distance vector of zero
2035 for data-references that overlap only in the first iteration
2036 but have different sign step (see PR45764).
2037 So as a sanity check require equal DR_STEP. */
2038 if (!operand_equal_p (DR_STEP (dra
), DR_STEP (drb
), 0))
2041 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
2042 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
)
2044 int dist
= dist_v
[loop_depth
];
2046 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2047 fprintf (vect_dump
, "dependence distance = %d.", dist
);
2049 /* Same loop iteration. */
2051 || (dist
% vectorization_factor
== 0 && dra_size
== drb_size
))
2053 /* Two references with distance zero have the same alignment. */
2054 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
2055 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
2056 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2057 fprintf (vect_dump
, "accesses have the same alignment.");
2058 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2060 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
2061 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
2062 fprintf (vect_dump
, " and ");
2063 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
2070 /* Function vect_analyze_data_refs_alignment
2072 Analyze the alignment of the data-references in the loop.
2073 Return FALSE if a data reference is found that cannot be vectorized. */
2076 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
,
2077 bb_vec_info bb_vinfo
)
2079 if (vect_print_dump_info (REPORT_DETAILS
))
2080 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
2082 /* Mark groups of data references with same alignment using
2083 data dependence information. */
2086 VEC (ddr_p
, heap
) *ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
2087 struct data_dependence_relation
*ddr
;
2090 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
2091 vect_find_same_alignment_drs (ddr
, loop_vinfo
);
2094 if (!vect_compute_data_refs_alignment (loop_vinfo
, bb_vinfo
))
2096 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2098 "not vectorized: can't calculate alignment for data ref.");
2106 /* Analyze groups of accesses: check that DR belongs to a group of
2107 accesses of legal size, step, etc. Detect gaps, single element
2108 interleaving, and other special cases. Set grouped access info.
2109 Collect groups of strided stores for further use in SLP analysis. */
2112 vect_analyze_group_access (struct data_reference
*dr
)
2114 tree step
= DR_STEP (dr
);
2115 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2116 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
2117 gimple stmt
= DR_STMT (dr
);
2118 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2119 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2120 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2121 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2122 HOST_WIDE_INT groupsize
, last_accessed_element
= 1;
2123 bool slp_impossible
= false;
2124 struct loop
*loop
= NULL
;
2127 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2129 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2130 size of the interleaving group (including gaps). */
2131 groupsize
= dr_step
/ type_size
;
2133 /* Not consecutive access is possible only if it is a part of interleaving. */
2134 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2136 /* Check if it this DR is a part of interleaving, and is a single
2137 element of the group that is accessed in the loop. */
2139 /* Gaps are supported only for loads. STEP must be a multiple of the type
2140 size. The size of the group must be a power of 2. */
2142 && (dr_step
% type_size
) == 0
2144 && exact_log2 (groupsize
) != -1)
2146 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = stmt
;
2147 GROUP_SIZE (vinfo_for_stmt (stmt
)) = groupsize
;
2148 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2150 fprintf (vect_dump
, "Detected single element interleaving ");
2151 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2152 fprintf (vect_dump
, " step ");
2153 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2158 if (vect_print_dump_info (REPORT_DETAILS
))
2159 fprintf (vect_dump
, "Data access with gaps requires scalar "
2163 if (vect_print_dump_info (REPORT_DETAILS
))
2164 fprintf (vect_dump
, "Peeling for outer loop is not"
2169 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2175 if (vect_print_dump_info (REPORT_DETAILS
))
2177 fprintf (vect_dump
, "not consecutive access ");
2178 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2183 /* Mark the statement as unvectorizable. */
2184 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2191 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
)
2193 /* First stmt in the interleaving chain. Check the chain. */
2194 gimple next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
2195 struct data_reference
*data_ref
= dr
;
2196 unsigned int count
= 1;
2198 tree prev_init
= DR_INIT (data_ref
);
2200 HOST_WIDE_INT diff
, count_in_bytes
, gaps
= 0;
2204 /* Skip same data-refs. In case that two or more stmts share
2205 data-ref (supported only for loads), we vectorize only the first
2206 stmt, and the rest get their vectorized loads from the first
2208 if (!tree_int_cst_compare (DR_INIT (data_ref
),
2209 DR_INIT (STMT_VINFO_DATA_REF (
2210 vinfo_for_stmt (next
)))))
2212 if (DR_IS_WRITE (data_ref
))
2214 if (vect_print_dump_info (REPORT_DETAILS
))
2215 fprintf (vect_dump
, "Two store stmts share the same dr.");
2219 /* Check that there is no load-store dependencies for this loads
2220 to prevent a case of load-store-load to the same location. */
2221 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
2222 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
2224 if (vect_print_dump_info (REPORT_DETAILS
))
2226 "READ_WRITE dependence in interleaving.");
2230 /* For load use the same data-ref load. */
2231 GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
2234 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2240 /* Check that all the accesses have the same STEP. */
2241 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
2242 if (tree_int_cst_compare (step
, next_step
))
2244 if (vect_print_dump_info (REPORT_DETAILS
))
2245 fprintf (vect_dump
, "not consecutive access in interleaving");
2249 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
2250 /* Check that the distance between two accesses is equal to the type
2251 size. Otherwise, we have gaps. */
2252 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
2253 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
2256 /* FORNOW: SLP of accesses with gaps is not supported. */
2257 slp_impossible
= true;
2258 if (DR_IS_WRITE (data_ref
))
2260 if (vect_print_dump_info (REPORT_DETAILS
))
2261 fprintf (vect_dump
, "interleaved store with gaps");
2268 last_accessed_element
+= diff
;
2270 /* Store the gap from the previous member of the group. If there is no
2271 gap in the access, GROUP_GAP is always 1. */
2272 GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
2274 prev_init
= DR_INIT (data_ref
);
2275 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2276 /* Count the number of data-refs in the chain. */
2280 /* COUNT is the number of accesses found, we multiply it by the size of
2281 the type to get COUNT_IN_BYTES. */
2282 count_in_bytes
= type_size
* count
;
2284 /* Check that the size of the interleaving (including gaps) is not
2285 greater than STEP. */
2286 if (dr_step
&& dr_step
< count_in_bytes
+ gaps
* type_size
)
2288 if (vect_print_dump_info (REPORT_DETAILS
))
2290 fprintf (vect_dump
, "interleaving size is greater than step for ");
2291 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2296 /* Check that the size of the interleaving is equal to STEP for stores,
2297 i.e., that there are no gaps. */
2298 if (dr_step
&& dr_step
!= count_in_bytes
)
2300 if (DR_IS_READ (dr
))
2302 slp_impossible
= true;
2303 /* There is a gap after the last load in the group. This gap is a
2304 difference between the groupsize and the number of elements.
2305 When there is no gap, this difference should be 0. */
2306 GROUP_GAP (vinfo_for_stmt (stmt
)) = groupsize
- count
;
2310 if (vect_print_dump_info (REPORT_DETAILS
))
2311 fprintf (vect_dump
, "interleaved store with gaps");
2316 /* Check that STEP is a multiple of type size. */
2317 if (dr_step
&& (dr_step
% type_size
) != 0)
2319 if (vect_print_dump_info (REPORT_DETAILS
))
2321 fprintf (vect_dump
, "step is not a multiple of type size: step ");
2322 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2323 fprintf (vect_dump
, " size ");
2324 print_generic_expr (vect_dump
, TYPE_SIZE_UNIT (scalar_type
),
2333 GROUP_SIZE (vinfo_for_stmt (stmt
)) = groupsize
;
2334 if (vect_print_dump_info (REPORT_DETAILS
))
2335 fprintf (vect_dump
, "Detected interleaving of size %d", (int)groupsize
);
2337 /* SLP: create an SLP data structure for every interleaving group of
2338 stores for further analysis in vect_analyse_slp. */
2339 if (DR_IS_WRITE (dr
) && !slp_impossible
)
2342 VEC_safe_push (gimple
, heap
, LOOP_VINFO_GROUPED_STORES (loop_vinfo
),
2345 VEC_safe_push (gimple
, heap
, BB_VINFO_GROUPED_STORES (bb_vinfo
),
2349 /* There is a gap in the end of the group. */
2350 if (groupsize
- last_accessed_element
> 0 && loop_vinfo
)
2352 if (vect_print_dump_info (REPORT_DETAILS
))
2353 fprintf (vect_dump
, "Data access with gaps requires scalar "
2357 if (vect_print_dump_info (REPORT_DETAILS
))
2358 fprintf (vect_dump
, "Peeling for outer loop is not supported");
2362 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2370 /* Analyze the access pattern of the data-reference DR.
2371 In case of non-consecutive accesses call vect_analyze_group_access() to
2372 analyze groups of accesses. */
2375 vect_analyze_data_ref_access (struct data_reference
*dr
)
2377 tree step
= DR_STEP (dr
);
2378 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2379 gimple stmt
= DR_STMT (dr
);
2380 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2381 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2382 struct loop
*loop
= NULL
;
2385 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2387 if (loop_vinfo
&& !step
)
2389 if (vect_print_dump_info (REPORT_DETAILS
))
2390 fprintf (vect_dump
, "bad data-ref access in loop");
2394 /* Allow invariant loads in loops. */
2395 if (loop_vinfo
&& integer_zerop (step
))
2397 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2398 return DR_IS_READ (dr
);
2401 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2403 /* Interleaved accesses are not yet supported within outer-loop
2404 vectorization for references in the inner-loop. */
2405 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2407 /* For the rest of the analysis we use the outer-loop step. */
2408 step
= STMT_VINFO_DR_STEP (stmt_info
);
2409 if (integer_zerop (step
))
2411 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2412 fprintf (vect_dump
, "zero step in outer loop.");
2413 if (DR_IS_READ (dr
))
2421 if (TREE_CODE (step
) == INTEGER_CST
)
2423 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2424 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
))
2426 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type
), -dr_step
)))
2428 /* Mark that it is not interleaving. */
2429 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2434 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2436 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2437 fprintf (vect_dump
, "grouped access in outer loop.");
2441 /* Assume this is a DR handled by non-constant strided load case. */
2442 if (TREE_CODE (step
) != INTEGER_CST
)
2443 return STMT_VINFO_STRIDE_LOAD_P (stmt_info
);
2445 /* Not consecutive access - check if it's a part of interleaving group. */
2446 return vect_analyze_group_access (dr
);
2450 /* Function vect_analyze_data_ref_accesses.
2452 Analyze the access pattern of all the data references in the loop.
2454 FORNOW: the only access pattern that is considered vectorizable is a
2455 simple step 1 (consecutive) access.
2457 FORNOW: handle only arrays and pointer accesses. */
2460 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2463 VEC (data_reference_p
, heap
) *datarefs
;
2464 struct data_reference
*dr
;
2466 if (vect_print_dump_info (REPORT_DETAILS
))
2467 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
2470 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2472 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2474 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
2475 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
2476 && !vect_analyze_data_ref_access (dr
))
2478 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2479 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
2483 /* Mark the statement as not vectorizable. */
2484 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2494 /* Function vect_prune_runtime_alias_test_list.
2496 Prune a list of ddrs to be tested at run-time by versioning for alias.
2497 Return FALSE if resulting list of ddrs is longer then allowed by
2498 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2501 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo
)
2503 VEC (ddr_p
, heap
) * ddrs
=
2504 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
2507 if (vect_print_dump_info (REPORT_DETAILS
))
2508 fprintf (vect_dump
, "=== vect_prune_runtime_alias_test_list ===");
2510 for (i
= 0; i
< VEC_length (ddr_p
, ddrs
); )
2515 ddr_i
= VEC_index (ddr_p
, ddrs
, i
);
2518 for (j
= 0; j
< i
; j
++)
2520 ddr_p ddr_j
= VEC_index (ddr_p
, ddrs
, j
);
2522 if (vect_vfa_range_equal (ddr_i
, ddr_j
))
2524 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2526 fprintf (vect_dump
, "found equal ranges ");
2527 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_i
)), TDF_SLIM
);
2528 fprintf (vect_dump
, ", ");
2529 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_i
)), TDF_SLIM
);
2530 fprintf (vect_dump
, " and ");
2531 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_j
)), TDF_SLIM
);
2532 fprintf (vect_dump
, ", ");
2533 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_j
)), TDF_SLIM
);
2542 VEC_ordered_remove (ddr_p
, ddrs
, i
);
2548 if (VEC_length (ddr_p
, ddrs
) >
2549 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
2551 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2554 "disable versioning for alias - max number of generated "
2555 "checks exceeded.");
2558 VEC_truncate (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), 0);
2566 /* Check whether a non-affine read in stmt is suitable for gather load
2567 and if so, return a builtin decl for that operation. */
2570 vect_check_gather (gimple stmt
, loop_vec_info loop_vinfo
, tree
*basep
,
2571 tree
*offp
, int *scalep
)
2573 HOST_WIDE_INT scale
= 1, pbitpos
, pbitsize
;
2574 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2575 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2576 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2577 tree offtype
= NULL_TREE
;
2578 tree decl
, base
, off
;
2579 enum machine_mode pmode
;
2580 int punsignedp
, pvolatilep
;
2582 /* The gather builtins need address of the form
2583 loop_invariant + vector * {1, 2, 4, 8}
2585 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2586 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2587 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2588 multiplications and additions in it. To get a vector, we need
2589 a single SSA_NAME that will be defined in the loop and will
2590 contain everything that is not loop invariant and that can be
2591 vectorized. The following code attempts to find such a preexistng
2592 SSA_NAME OFF and put the loop invariants into a tree BASE
2593 that can be gimplified before the loop. */
2594 base
= get_inner_reference (DR_REF (dr
), &pbitsize
, &pbitpos
, &off
,
2595 &pmode
, &punsignedp
, &pvolatilep
, false);
2596 gcc_assert (base
!= NULL_TREE
&& (pbitpos
% BITS_PER_UNIT
) == 0);
2598 if (TREE_CODE (base
) == MEM_REF
)
2600 if (!integer_zerop (TREE_OPERAND (base
, 1)))
2602 if (off
== NULL_TREE
)
2604 double_int moff
= mem_ref_offset (base
);
2605 off
= double_int_to_tree (sizetype
, moff
);
2608 off
= size_binop (PLUS_EXPR
, off
,
2609 fold_convert (sizetype
, TREE_OPERAND (base
, 1)));
2611 base
= TREE_OPERAND (base
, 0);
2614 base
= build_fold_addr_expr (base
);
2616 if (off
== NULL_TREE
)
2617 off
= size_zero_node
;
2619 /* If base is not loop invariant, either off is 0, then we start with just
2620 the constant offset in the loop invariant BASE and continue with base
2621 as OFF, otherwise give up.
2622 We could handle that case by gimplifying the addition of base + off
2623 into some SSA_NAME and use that as off, but for now punt. */
2624 if (!expr_invariant_in_loop_p (loop
, base
))
2626 if (!integer_zerop (off
))
2629 base
= size_int (pbitpos
/ BITS_PER_UNIT
);
2631 /* Otherwise put base + constant offset into the loop invariant BASE
2632 and continue with OFF. */
2635 base
= fold_convert (sizetype
, base
);
2636 base
= size_binop (PLUS_EXPR
, base
, size_int (pbitpos
/ BITS_PER_UNIT
));
2639 /* OFF at this point may be either a SSA_NAME or some tree expression
2640 from get_inner_reference. Try to peel off loop invariants from it
2641 into BASE as long as possible. */
2643 while (offtype
== NULL_TREE
)
2645 enum tree_code code
;
2646 tree op0
, op1
, add
= NULL_TREE
;
2648 if (TREE_CODE (off
) == SSA_NAME
)
2650 gimple def_stmt
= SSA_NAME_DEF_STMT (off
);
2652 if (expr_invariant_in_loop_p (loop
, off
))
2655 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
2658 op0
= gimple_assign_rhs1 (def_stmt
);
2659 code
= gimple_assign_rhs_code (def_stmt
);
2660 op1
= gimple_assign_rhs2 (def_stmt
);
2664 if (get_gimple_rhs_class (TREE_CODE (off
)) == GIMPLE_TERNARY_RHS
)
2666 code
= TREE_CODE (off
);
2667 extract_ops_from_tree (off
, &code
, &op0
, &op1
);
2671 case POINTER_PLUS_EXPR
:
2673 if (expr_invariant_in_loop_p (loop
, op0
))
2678 add
= fold_convert (sizetype
, add
);
2680 add
= size_binop (MULT_EXPR
, add
, size_int (scale
));
2681 base
= size_binop (PLUS_EXPR
, base
, add
);
2684 if (expr_invariant_in_loop_p (loop
, op1
))
2692 if (expr_invariant_in_loop_p (loop
, op1
))
2694 add
= fold_convert (sizetype
, op1
);
2695 add
= size_binop (MINUS_EXPR
, size_zero_node
, add
);
2701 if (scale
== 1 && host_integerp (op1
, 0))
2703 scale
= tree_low_cst (op1
, 0);
2712 if (!POINTER_TYPE_P (TREE_TYPE (op0
))
2713 && !INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
2715 if (TYPE_PRECISION (TREE_TYPE (op0
))
2716 == TYPE_PRECISION (TREE_TYPE (off
)))
2721 if (TYPE_PRECISION (TREE_TYPE (op0
))
2722 < TYPE_PRECISION (TREE_TYPE (off
)))
2725 offtype
= TREE_TYPE (off
);
2736 /* If at the end OFF still isn't a SSA_NAME or isn't
2737 defined in the loop, punt. */
2738 if (TREE_CODE (off
) != SSA_NAME
2739 || expr_invariant_in_loop_p (loop
, off
))
2742 if (offtype
== NULL_TREE
)
2743 offtype
= TREE_TYPE (off
);
2745 decl
= targetm
.vectorize
.builtin_gather (STMT_VINFO_VECTYPE (stmt_info
),
2747 if (decl
== NULL_TREE
)
2759 /* Check wether a non-affine load in STMT (being in the loop referred to
2760 in LOOP_VINFO) is suitable for handling as strided load. That is the case
2761 if its address is a simple induction variable. If so return the base
2762 of that induction variable in *BASEP and the (loop-invariant) step
2763 in *STEPP, both only when that pointer is non-zero.
2765 This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
2766 base pointer) only. */
2769 vect_check_strided_load (gimple stmt
, loop_vec_info loop_vinfo
, tree
*basep
,
2772 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2773 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2774 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2778 if (!DR_IS_READ (dr
))
2783 if (TREE_CODE (base
) == ARRAY_REF
)
2785 off
= TREE_OPERAND (base
, 1);
2786 base
= TREE_OPERAND (base
, 0);
2788 else if (TREE_CODE (base
) == MEM_REF
)
2790 off
= TREE_OPERAND (base
, 0);
2791 base
= TREE_OPERAND (base
, 1);
2796 if (TREE_CODE (off
) != SSA_NAME
)
2799 if (!expr_invariant_in_loop_p (loop
, base
)
2800 || !simple_iv (loop
, loop_containing_stmt (stmt
), off
, &iv
, true))
2810 /* Function vect_analyze_data_refs.
2812 Find all the data references in the loop or basic block.
2814 The general structure of the analysis of data refs in the vectorizer is as
2816 1- vect_analyze_data_refs(loop/bb): call
2817 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2818 in the loop/bb and their dependences.
2819 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2820 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2821 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2826 vect_analyze_data_refs (loop_vec_info loop_vinfo
,
2827 bb_vec_info bb_vinfo
,
2830 struct loop
*loop
= NULL
;
2831 basic_block bb
= NULL
;
2833 VEC (data_reference_p
, heap
) *datarefs
;
2834 struct data_reference
*dr
;
2836 bool res
, stop_bb_analysis
= false;
2838 if (vect_print_dump_info (REPORT_DETAILS
))
2839 fprintf (vect_dump
, "=== vect_analyze_data_refs ===\n");
2843 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2844 res
= compute_data_dependences_for_loop
2846 &LOOP_VINFO_LOOP_NEST (loop_vinfo
),
2847 &LOOP_VINFO_DATAREFS (loop_vinfo
),
2848 &LOOP_VINFO_DDRS (loop_vinfo
));
2852 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2853 fprintf (vect_dump
, "not vectorized: loop contains function calls"
2854 " or data references that cannot be analyzed");
2858 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2862 gimple_stmt_iterator gsi
;
2864 bb
= BB_VINFO_BB (bb_vinfo
);
2865 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2867 gimple stmt
= gsi_stmt (gsi
);
2868 if (!find_data_references_in_stmt (NULL
, stmt
,
2869 &BB_VINFO_DATAREFS (bb_vinfo
)))
2871 /* Mark the rest of the basic-block as unvectorizable. */
2872 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
2874 stmt
= gsi_stmt (gsi
);
2875 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)) = false;
2880 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo
),
2881 &BB_VINFO_DDRS (bb_vinfo
), NULL
, true))
2883 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2884 fprintf (vect_dump
, "not vectorized: basic block contains function"
2885 " calls or data references that cannot be analyzed");
2889 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2892 /* Go through the data-refs, check that the analysis succeeded. Update
2893 pointer from stmt_vec_info struct to DR and vectype. */
2895 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
2898 stmt_vec_info stmt_info
;
2899 tree base
, offset
, init
;
2900 bool gather
= false;
2903 if (!dr
|| !DR_REF (dr
))
2905 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2906 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
2911 stmt
= DR_STMT (dr
);
2912 stmt_info
= vinfo_for_stmt (stmt
);
2914 if (stop_bb_analysis
)
2916 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2920 /* Check that analysis of the data-ref succeeded. */
2921 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
2924 /* If target supports vector gather loads, see if they can't
2928 && !TREE_THIS_VOLATILE (DR_REF (dr
))
2929 && targetm
.vectorize
.builtin_gather
!= NULL
2930 && !nested_in_vect_loop_p (loop
, stmt
))
2932 struct data_reference
*newdr
2933 = create_data_ref (NULL
, loop_containing_stmt (stmt
),
2934 DR_REF (dr
), stmt
, true);
2935 gcc_assert (newdr
!= NULL
&& DR_REF (newdr
));
2936 if (DR_BASE_ADDRESS (newdr
)
2937 && DR_OFFSET (newdr
)
2940 && integer_zerop (DR_STEP (newdr
)))
2946 free_data_ref (newdr
);
2951 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2953 fprintf (vect_dump
, "not vectorized: data ref analysis "
2955 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2960 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2961 stop_bb_analysis
= true;
2969 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
2971 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2972 fprintf (vect_dump
, "not vectorized: base addr of dr is a "
2977 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2978 stop_bb_analysis
= true;
2987 if (TREE_THIS_VOLATILE (DR_REF (dr
)))
2989 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2991 fprintf (vect_dump
, "not vectorized: volatile type ");
2992 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2997 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2998 stop_bb_analysis
= true;
3005 if (stmt_can_throw_internal (stmt
))
3007 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
3009 fprintf (vect_dump
, "not vectorized: statement can throw an "
3011 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3016 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
3017 stop_bb_analysis
= true;
3026 if (TREE_CODE (DR_REF (dr
)) == COMPONENT_REF
3027 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr
), 1)))
3029 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
3031 fprintf (vect_dump
, "not vectorized: statement is bitfield "
3033 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3038 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
3039 stop_bb_analysis
= true;
3048 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3049 offset
= unshare_expr (DR_OFFSET (dr
));
3050 init
= unshare_expr (DR_INIT (dr
));
3052 if (is_gimple_call (stmt
))
3054 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
3056 fprintf (vect_dump
, "not vectorized: dr in a call ");
3057 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3062 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
3063 stop_bb_analysis
= true;
3072 /* Update DR field in stmt_vec_info struct. */
3074 /* If the dataref is in an inner-loop of the loop that is considered for
3075 for vectorization, we also want to analyze the access relative to
3076 the outer-loop (DR contains information only relative to the
3077 inner-most enclosing loop). We do that by building a reference to the
3078 first location accessed by the inner-loop, and analyze it relative to
3080 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
3082 tree outer_step
, outer_base
, outer_init
;
3083 HOST_WIDE_INT pbitsize
, pbitpos
;
3085 enum machine_mode pmode
;
3086 int punsignedp
, pvolatilep
;
3087 affine_iv base_iv
, offset_iv
;
3090 /* Build a reference to the first location accessed by the
3091 inner-loop: *(BASE+INIT). (The first location is actually
3092 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3093 tree inner_base
= build_fold_indirect_ref
3094 (fold_build_pointer_plus (base
, init
));
3096 if (vect_print_dump_info (REPORT_DETAILS
))
3098 fprintf (vect_dump
, "analyze in outer-loop: ");
3099 print_generic_expr (vect_dump
, inner_base
, TDF_SLIM
);
3102 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
3103 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
3104 gcc_assert (outer_base
!= NULL_TREE
);
3106 if (pbitpos
% BITS_PER_UNIT
!= 0)
3108 if (vect_print_dump_info (REPORT_DETAILS
))
3109 fprintf (vect_dump
, "failed: bit offset alignment.\n");
3113 outer_base
= build_fold_addr_expr (outer_base
);
3114 if (!simple_iv (loop
, loop_containing_stmt (stmt
), outer_base
,
3117 if (vect_print_dump_info (REPORT_DETAILS
))
3118 fprintf (vect_dump
, "failed: evolution of base is not affine.\n");
3125 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
,
3133 offset_iv
.base
= ssize_int (0);
3134 offset_iv
.step
= ssize_int (0);
3136 else if (!simple_iv (loop
, loop_containing_stmt (stmt
), poffset
,
3139 if (vect_print_dump_info (REPORT_DETAILS
))
3140 fprintf (vect_dump
, "evolution of offset is not affine.\n");
3144 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
3145 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
3146 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3147 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
3148 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3150 outer_step
= size_binop (PLUS_EXPR
,
3151 fold_convert (ssizetype
, base_iv
.step
),
3152 fold_convert (ssizetype
, offset_iv
.step
));
3154 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
3155 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3156 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
3157 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
3158 STMT_VINFO_DR_OFFSET (stmt_info
) =
3159 fold_convert (ssizetype
, offset_iv
.base
);
3160 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
3161 size_int (highest_pow2_factor (offset_iv
.base
));
3163 if (vect_print_dump_info (REPORT_DETAILS
))
3165 fprintf (vect_dump
, "\touter base_address: ");
3166 print_generic_expr (vect_dump
, STMT_VINFO_DR_BASE_ADDRESS (stmt_info
), TDF_SLIM
);
3167 fprintf (vect_dump
, "\n\touter offset from base address: ");
3168 print_generic_expr (vect_dump
, STMT_VINFO_DR_OFFSET (stmt_info
), TDF_SLIM
);
3169 fprintf (vect_dump
, "\n\touter constant offset from base address: ");
3170 print_generic_expr (vect_dump
, STMT_VINFO_DR_INIT (stmt_info
), TDF_SLIM
);
3171 fprintf (vect_dump
, "\n\touter step: ");
3172 print_generic_expr (vect_dump
, STMT_VINFO_DR_STEP (stmt_info
), TDF_SLIM
);
3173 fprintf (vect_dump
, "\n\touter aligned to: ");
3174 print_generic_expr (vect_dump
, STMT_VINFO_DR_ALIGNED_TO (stmt_info
), TDF_SLIM
);
3178 if (STMT_VINFO_DATA_REF (stmt_info
))
3180 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
3183 "not vectorized: more than one data ref in stmt: ");
3184 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3189 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
3190 stop_bb_analysis
= true;
3199 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
3201 /* Set vectype for STMT. */
3202 scalar_type
= TREE_TYPE (DR_REF (dr
));
3203 STMT_VINFO_VECTYPE (stmt_info
) =
3204 get_vectype_for_scalar_type (scalar_type
);
3205 if (!STMT_VINFO_VECTYPE (stmt_info
))
3207 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
3210 "not vectorized: no vectype for stmt: ");
3211 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3212 fprintf (vect_dump
, " scalar_type: ");
3213 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
3218 /* Mark the statement as not vectorizable. */
3219 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
3220 stop_bb_analysis
= true;
3226 STMT_VINFO_DATA_REF (stmt_info
) = NULL
;
3232 /* Adjust the minimal vectorization factor according to the
3234 vf
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
3240 unsigned int j
, k
, n
;
3241 struct data_reference
*olddr
3242 = VEC_index (data_reference_p
, datarefs
, i
);
3243 VEC (ddr_p
, heap
) *ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
3244 struct data_dependence_relation
*ddr
, *newddr
;
3247 VEC (loop_p
, heap
) *nest
= LOOP_VINFO_LOOP_NEST (loop_vinfo
);
3249 gather
= 0 != vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
3251 && get_vectype_for_scalar_type (TREE_TYPE (off
)) == NULL_TREE
)
3255 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
3258 "not vectorized: not suitable for gather load ");
3259 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3264 n
= VEC_length (data_reference_p
, datarefs
) - 1;
3265 for (j
= 0, k
= i
- 1; j
< i
; j
++)
3267 ddr
= VEC_index (ddr_p
, ddrs
, k
);
3268 gcc_assert (DDR_B (ddr
) == olddr
);
3269 newddr
= initialize_data_dependence_relation (DDR_A (ddr
), dr
,
3271 VEC_replace (ddr_p
, ddrs
, k
, newddr
);
3272 free_dependence_relation (ddr
);
3274 && DR_IS_WRITE (DDR_A (newddr
))
3275 && DDR_ARE_DEPENDENT (newddr
) != chrec_known
)
3281 n
= k
+ VEC_length (data_reference_p
, datarefs
) - i
- 1;
3284 ddr
= VEC_index (ddr_p
, ddrs
, k
);
3285 gcc_assert (DDR_A (ddr
) == olddr
);
3286 newddr
= initialize_data_dependence_relation (dr
, DDR_B (ddr
),
3288 VEC_replace (ddr_p
, ddrs
, k
, newddr
);
3289 free_dependence_relation (ddr
);
3291 && DR_IS_WRITE (DDR_B (newddr
))
3292 && DDR_ARE_DEPENDENT (newddr
) != chrec_known
)
3296 k
= VEC_length (ddr_p
, ddrs
)
3297 - VEC_length (data_reference_p
, datarefs
) + i
;
3298 ddr
= VEC_index (ddr_p
, ddrs
, k
);
3299 gcc_assert (DDR_A (ddr
) == olddr
&& DDR_B (ddr
) == olddr
);
3300 newddr
= initialize_data_dependence_relation (dr
, dr
, nest
);
3301 VEC_replace (ddr_p
, ddrs
, k
, newddr
);
3302 free_dependence_relation (ddr
);
3303 VEC_replace (data_reference_p
, datarefs
, i
, dr
);
3307 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
3310 "not vectorized: data dependence conflict"
3311 " prevents gather load");
3312 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3317 STMT_VINFO_GATHER_P (stmt_info
) = true;
3320 && TREE_CODE (DR_STEP (dr
)) != INTEGER_CST
)
3322 bool strided_load
= false;
3323 if (!nested_in_vect_loop_p (loop
, stmt
))
3325 = vect_check_strided_load (stmt
, loop_vinfo
, NULL
, NULL
);
3328 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
3331 "not vectorized: not suitable for strided load ");
3332 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
3336 STMT_VINFO_STRIDE_LOAD_P (stmt_info
) = true;
3344 /* Function vect_get_new_vect_var.
3346 Returns a name for a new variable. The current naming scheme appends the
3347 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3348 the name of vectorizer generated variables, and appends that to NAME if
3352 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
3359 case vect_simple_var
:
3362 case vect_scalar_var
:
3365 case vect_pointer_var
:
3374 char* tmp
= concat (prefix
, name
, NULL
);
3375 new_vect_var
= create_tmp_var (type
, tmp
);
3379 new_vect_var
= create_tmp_var (type
, prefix
);
3381 /* Mark vector typed variable as a gimple register variable. */
3382 if (TREE_CODE (type
) == VECTOR_TYPE
)
3383 DECL_GIMPLE_REG_P (new_vect_var
) = true;
3385 return new_vect_var
;
3389 /* Function vect_create_addr_base_for_vector_ref.
3391 Create an expression that computes the address of the first memory location
3392 that will be accessed for a data reference.
3395 STMT: The statement containing the data reference.
3396 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3397 OFFSET: Optional. If supplied, it is be added to the initial address.
3398 LOOP: Specify relative to which loop-nest should the address be computed.
3399 For example, when the dataref is in an inner-loop nested in an
3400 outer-loop that is now being vectorized, LOOP can be either the
3401 outer-loop, or the inner-loop. The first memory location accessed
3402 by the following dataref ('in' points to short):
3409 if LOOP=i_loop: &in (relative to i_loop)
3410 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3413 1. Return an SSA_NAME whose value is the address of the memory location of
3414 the first vector of the data reference.
3415 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3416 these statement(s) which define the returned SSA_NAME.
3418 FORNOW: We are only handling array accesses with step 1. */
3421 vect_create_addr_base_for_vector_ref (gimple stmt
,
3422 gimple_seq
*new_stmt_list
,
3426 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3427 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3428 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3430 tree data_ref_base_var
;
3432 tree addr_base
, addr_expr
;
3434 gimple_seq seq
= NULL
;
3435 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
3436 tree init
= unshare_expr (DR_INIT (dr
));
3438 tree step
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
3439 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3442 if (loop_vinfo
&& loop
&& loop
!= (gimple_bb (stmt
))->loop_father
)
3444 struct loop
*outer_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3446 gcc_assert (nested_in_vect_loop_p (outer_loop
, stmt
));
3448 data_ref_base
= unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
3449 base_offset
= unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info
));
3450 init
= unshare_expr (STMT_VINFO_DR_INIT (stmt_info
));
3454 base_name
= build_fold_indirect_ref (data_ref_base
);
3457 base_offset
= ssize_int (0);
3458 init
= ssize_int (0);
3459 base_name
= build_fold_indirect_ref (unshare_expr (DR_REF (dr
)));
3462 data_ref_base_var
= create_tmp_var (TREE_TYPE (data_ref_base
), "batmp");
3463 add_referenced_var (data_ref_base_var
);
3464 data_ref_base
= force_gimple_operand (data_ref_base
, &seq
, true,
3466 gimple_seq_add_seq (new_stmt_list
, seq
);
3468 /* Create base_offset */
3469 base_offset
= size_binop (PLUS_EXPR
,
3470 fold_convert (sizetype
, base_offset
),
3471 fold_convert (sizetype
, init
));
3472 dest
= create_tmp_var (sizetype
, "base_off");
3473 add_referenced_var (dest
);
3474 base_offset
= force_gimple_operand (base_offset
, &seq
, true, dest
);
3475 gimple_seq_add_seq (new_stmt_list
, seq
);
3479 tree tmp
= create_tmp_var (sizetype
, "offset");
3481 add_referenced_var (tmp
);
3482 offset
= fold_build2 (MULT_EXPR
, sizetype
,
3483 fold_convert (sizetype
, offset
), step
);
3484 base_offset
= fold_build2 (PLUS_EXPR
, sizetype
,
3485 base_offset
, offset
);
3486 base_offset
= force_gimple_operand (base_offset
, &seq
, false, tmp
);
3487 gimple_seq_add_seq (new_stmt_list
, seq
);
3490 /* base + base_offset */
3492 addr_base
= fold_build_pointer_plus (data_ref_base
, base_offset
);
3495 addr_base
= build1 (ADDR_EXPR
,
3496 build_pointer_type (TREE_TYPE (DR_REF (dr
))),
3497 unshare_expr (DR_REF (dr
)));
3500 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
3501 base
= get_base_address (DR_REF (dr
));
3503 && TREE_CODE (base
) == MEM_REF
)
3505 = build_qualified_type (vect_ptr_type
,
3506 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base
, 0))));
3508 vec_stmt
= fold_convert (vect_ptr_type
, addr_base
);
3509 addr_expr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
3510 get_name (base_name
));
3511 add_referenced_var (addr_expr
);
3512 vec_stmt
= force_gimple_operand (vec_stmt
, &seq
, false, addr_expr
);
3513 gimple_seq_add_seq (new_stmt_list
, seq
);
3515 if (DR_PTR_INFO (dr
)
3516 && TREE_CODE (vec_stmt
) == SSA_NAME
)
3518 duplicate_ssa_name_ptr_info (vec_stmt
, DR_PTR_INFO (dr
));
3520 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (vec_stmt
));
3523 if (vect_print_dump_info (REPORT_DETAILS
))
3525 fprintf (vect_dump
, "created ");
3526 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
3533 /* Function vect_create_data_ref_ptr.
3535 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3536 location accessed in the loop by STMT, along with the def-use update
3537 chain to appropriately advance the pointer through the loop iterations.
3538 Also set aliasing information for the pointer. This pointer is used by
3539 the callers to this function to create a memory reference expression for
3540 vector load/store access.
3543 1. STMT: a stmt that references memory. Expected to be of the form
3544 GIMPLE_ASSIGN <name, data-ref> or
3545 GIMPLE_ASSIGN <data-ref, name>.
3546 2. AGGR_TYPE: the type of the reference, which should be either a vector
3548 3. AT_LOOP: the loop where the vector memref is to be created.
3549 4. OFFSET (optional): an offset to be added to the initial address accessed
3550 by the data-ref in STMT.
3551 5. BSI: location where the new stmts are to be placed if there is no loop
3552 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3553 pointing to the initial address.
3556 1. Declare a new ptr to vector_type, and have it point to the base of the
3557 data reference (initial addressed accessed by the data reference).
3558 For example, for vector of type V8HI, the following code is generated:
3561 ap = (v8hi *)initial_address;
3563 if OFFSET is not supplied:
3564 initial_address = &a[init];
3565 if OFFSET is supplied:
3566 initial_address = &a[init + OFFSET];
3568 Return the initial_address in INITIAL_ADDRESS.
3570 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3571 update the pointer in each iteration of the loop.
3573 Return the increment stmt that updates the pointer in PTR_INCR.
3575 3. Set INV_P to true if the access pattern of the data reference in the
3576 vectorized loop is invariant. Set it to false otherwise.
3578 4. Return the pointer. */
3581 vect_create_data_ref_ptr (gimple stmt
, tree aggr_type
, struct loop
*at_loop
,
3582 tree offset
, tree
*initial_address
,
3583 gimple_stmt_iterator
*gsi
, gimple
*ptr_incr
,
3584 bool only_init
, bool *inv_p
)
3587 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3588 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3589 struct loop
*loop
= NULL
;
3590 bool nested_in_vect_loop
= false;
3591 struct loop
*containing_loop
= NULL
;
3596 gimple_seq new_stmt_list
= NULL
;
3600 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3602 gimple_stmt_iterator incr_gsi
;
3605 tree indx_before_incr
, indx_after_incr
;
3608 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3611 gcc_assert (TREE_CODE (aggr_type
) == ARRAY_TYPE
3612 || TREE_CODE (aggr_type
) == VECTOR_TYPE
);
3616 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3617 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
3618 containing_loop
= (gimple_bb (stmt
))->loop_father
;
3619 pe
= loop_preheader_edge (loop
);
3623 gcc_assert (bb_vinfo
);
3628 /* Check the step (evolution) of the load in LOOP, and record
3629 whether it's invariant. */
3630 if (nested_in_vect_loop
)
3631 step
= STMT_VINFO_DR_STEP (stmt_info
);
3633 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
3635 if (tree_int_cst_compare (step
, size_zero_node
) == 0)
3639 negative
= tree_int_cst_compare (step
, size_zero_node
) < 0;
3641 /* Create an expression for the first address accessed by this load
3643 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
3645 if (vect_print_dump_info (REPORT_DETAILS
))
3647 tree data_ref_base
= base_name
;
3648 fprintf (vect_dump
, "create %s-pointer variable to type: ",
3649 tree_code_name
[(int) TREE_CODE (aggr_type
)]);
3650 print_generic_expr (vect_dump
, aggr_type
, TDF_SLIM
);
3651 if (TREE_CODE (data_ref_base
) == VAR_DECL
3652 || TREE_CODE (data_ref_base
) == ARRAY_REF
)
3653 fprintf (vect_dump
, " vectorizing an array ref: ");
3654 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
3655 fprintf (vect_dump
, " vectorizing a record based array ref: ");
3656 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
3657 fprintf (vect_dump
, " vectorizing a pointer ref: ");
3658 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
3661 /* (1) Create the new aggregate-pointer variable. */
3662 aggr_ptr_type
= build_pointer_type (aggr_type
);
3663 base
= get_base_address (DR_REF (dr
));
3665 && TREE_CODE (base
) == MEM_REF
)
3667 = build_qualified_type (aggr_ptr_type
,
3668 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base
, 0))));
3669 aggr_ptr
= vect_get_new_vect_var (aggr_ptr_type
, vect_pointer_var
,
3670 get_name (base_name
));
3672 /* Vector and array types inherit the alias set of their component
3673 type by default so we need to use a ref-all pointer if the data
3674 reference does not conflict with the created aggregated data
3675 reference because it is not addressable. */
3676 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr
),
3677 get_alias_set (DR_REF (dr
))))
3680 = build_pointer_type_for_mode (aggr_type
,
3681 TYPE_MODE (aggr_ptr_type
), true);
3682 aggr_ptr
= vect_get_new_vect_var (aggr_ptr_type
, vect_pointer_var
,
3683 get_name (base_name
));
3686 /* Likewise for any of the data references in the stmt group. */
3687 else if (STMT_VINFO_GROUP_SIZE (stmt_info
) > 1)
3689 gimple orig_stmt
= STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
);
3692 tree lhs
= gimple_assign_lhs (orig_stmt
);
3693 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr
),
3694 get_alias_set (lhs
)))
3697 = build_pointer_type_for_mode (aggr_type
,
3698 TYPE_MODE (aggr_ptr_type
), true);
3700 = vect_get_new_vect_var (aggr_ptr_type
, vect_pointer_var
,
3701 get_name (base_name
));
3705 orig_stmt
= STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt
));
3710 add_referenced_var (aggr_ptr
);
3712 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3713 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3714 def-use update cycles for the pointer: one relative to the outer-loop
3715 (LOOP), which is what steps (3) and (4) below do. The other is relative
3716 to the inner-loop (which is the inner-most loop containing the dataref),
3717 and this is done be step (5) below.
3719 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3720 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3721 redundant. Steps (3),(4) create the following:
3724 LOOP: vp1 = phi(vp0,vp2)
3730 If there is an inner-loop nested in loop, then step (5) will also be
3731 applied, and an additional update in the inner-loop will be created:
3734 LOOP: vp1 = phi(vp0,vp2)
3736 inner: vp3 = phi(vp1,vp4)
3737 vp4 = vp3 + inner_step
3743 /* (2) Calculate the initial address of the aggregate-pointer, and set
3744 the aggregate-pointer to point to it before the loop. */
3746 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3748 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
3754 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmt_list
);
3755 gcc_assert (!new_bb
);
3758 gsi_insert_seq_before (gsi
, new_stmt_list
, GSI_SAME_STMT
);
3761 *initial_address
= new_temp
;
3763 /* Create: p = (aggr_type *) initial_base */
3764 if (TREE_CODE (new_temp
) != SSA_NAME
3765 || !useless_type_conversion_p (aggr_ptr_type
, TREE_TYPE (new_temp
)))
3767 vec_stmt
= gimple_build_assign (aggr_ptr
,
3768 fold_convert (aggr_ptr_type
, new_temp
));
3769 aggr_ptr_init
= make_ssa_name (aggr_ptr
, vec_stmt
);
3770 /* Copy the points-to information if it exists. */
3771 if (DR_PTR_INFO (dr
))
3772 duplicate_ssa_name_ptr_info (aggr_ptr_init
, DR_PTR_INFO (dr
));
3773 gimple_assign_set_lhs (vec_stmt
, aggr_ptr_init
);
3776 new_bb
= gsi_insert_on_edge_immediate (pe
, vec_stmt
);
3777 gcc_assert (!new_bb
);
3780 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
3783 aggr_ptr_init
= new_temp
;
3785 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3786 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3787 inner-loop nested in LOOP (during outer-loop vectorization). */
3789 /* No update in loop is required. */
3790 if (only_init
&& (!loop_vinfo
|| at_loop
== loop
))
3791 aptr
= aggr_ptr_init
;
3794 /* The step of the aggregate pointer is the type size. */
3795 tree step
= TYPE_SIZE_UNIT (aggr_type
);
3796 /* One exception to the above is when the scalar step of the load in
3797 LOOP is zero. In this case the step here is also zero. */
3799 step
= size_zero_node
;
3801 step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
3803 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
3805 create_iv (aggr_ptr_init
,
3806 fold_convert (aggr_ptr_type
, step
),
3807 aggr_ptr
, loop
, &incr_gsi
, insert_after
,
3808 &indx_before_incr
, &indx_after_incr
);
3809 incr
= gsi_stmt (incr_gsi
);
3810 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3812 /* Copy the points-to information if it exists. */
3813 if (DR_PTR_INFO (dr
))
3815 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3816 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3821 aptr
= indx_before_incr
;
3824 if (!nested_in_vect_loop
|| only_init
)
3828 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3829 nested in LOOP, if exists. */
3831 gcc_assert (nested_in_vect_loop
);
3834 standard_iv_increment_position (containing_loop
, &incr_gsi
,
3836 create_iv (aptr
, fold_convert (aggr_ptr_type
, DR_STEP (dr
)), aggr_ptr
,
3837 containing_loop
, &incr_gsi
, insert_after
, &indx_before_incr
,
3839 incr
= gsi_stmt (incr_gsi
);
3840 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3842 /* Copy the points-to information if it exists. */
3843 if (DR_PTR_INFO (dr
))
3845 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3846 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3851 return indx_before_incr
;
3858 /* Function bump_vector_ptr
3860 Increment a pointer (to a vector type) by vector-size. If requested,
3861 i.e. if PTR-INCR is given, then also connect the new increment stmt
3862 to the existing def-use update-chain of the pointer, by modifying
3863 the PTR_INCR as illustrated below:
3865 The pointer def-use update-chain before this function:
3866 DATAREF_PTR = phi (p_0, p_2)
3868 PTR_INCR: p_2 = DATAREF_PTR + step
3870 The pointer def-use update-chain after this function:
3871 DATAREF_PTR = phi (p_0, p_2)
3873 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3875 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3878 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3880 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3881 the loop. The increment amount across iterations is expected
3883 BSI - location where the new update stmt is to be placed.
3884 STMT - the original scalar memory-access stmt that is being vectorized.
3885 BUMP - optional. The offset by which to bump the pointer. If not given,
3886 the offset is assumed to be vector_size.
3888 Output: Return NEW_DATAREF_PTR as illustrated above.
3893 bump_vector_ptr (tree dataref_ptr
, gimple ptr_incr
, gimple_stmt_iterator
*gsi
,
3894 gimple stmt
, tree bump
)
3896 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3897 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3898 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3899 tree ptr_var
= SSA_NAME_VAR (dataref_ptr
);
3900 tree update
= TYPE_SIZE_UNIT (vectype
);
3903 use_operand_p use_p
;
3904 tree new_dataref_ptr
;
3909 incr_stmt
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, ptr_var
,
3910 dataref_ptr
, update
);
3911 new_dataref_ptr
= make_ssa_name (ptr_var
, incr_stmt
);
3912 gimple_assign_set_lhs (incr_stmt
, new_dataref_ptr
);
3913 vect_finish_stmt_generation (stmt
, incr_stmt
, gsi
);
3915 /* Copy the points-to information if it exists. */
3916 if (DR_PTR_INFO (dr
))
3918 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
3919 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr
));
3923 return new_dataref_ptr
;
3925 /* Update the vector-pointer's cross-iteration increment. */
3926 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
3928 tree use
= USE_FROM_PTR (use_p
);
3930 if (use
== dataref_ptr
)
3931 SET_USE (use_p
, new_dataref_ptr
);
3933 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
3936 return new_dataref_ptr
;
3940 /* Function vect_create_destination_var.
3942 Create a new temporary of type VECTYPE. */
3945 vect_create_destination_var (tree scalar_dest
, tree vectype
)
3948 const char *new_name
;
3950 enum vect_var_kind kind
;
3952 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
3953 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
3955 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
3957 new_name
= get_name (scalar_dest
);
3960 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
3961 add_referenced_var (vec_dest
);
3966 /* Function vect_grouped_store_supported.
3968 Returns TRUE if interleave high and interleave low permutations
3969 are supported, and FALSE otherwise. */
3972 vect_grouped_store_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
3974 enum machine_mode mode
= TYPE_MODE (vectype
);
3976 /* vect_permute_store_chain requires the group size to be a power of two. */
3977 if (exact_log2 (count
) == -1)
3979 if (vect_print_dump_info (REPORT_DETAILS
))
3980 fprintf (vect_dump
, "the size of the group of accesses"
3981 " is not a power of 2");
3985 /* Check that the permutation is supported. */
3986 if (VECTOR_MODE_P (mode
))
3988 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
3989 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
3990 for (i
= 0; i
< nelt
/ 2; i
++)
3993 sel
[i
* 2 + 1] = i
+ nelt
;
3995 if (can_vec_perm_p (mode
, false, sel
))
3997 for (i
= 0; i
< nelt
; i
++)
3999 if (can_vec_perm_p (mode
, false, sel
))
4004 if (vect_print_dump_info (REPORT_DETAILS
))
4005 fprintf (vect_dump
, "interleave op not supported by target.");
4010 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
4014 vect_store_lanes_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4016 return vect_lanes_optab_supported_p ("vec_store_lanes",
4017 vec_store_lanes_optab
,
4022 /* Function vect_permute_store_chain.
4024 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4025 a power of 2, generate interleave_high/low stmts to reorder the data
4026 correctly for the stores. Return the final references for stores in
4029 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4030 The input is 4 vectors each containing 8 elements. We assign a number to
4031 each element, the input sequence is:
4033 1st vec: 0 1 2 3 4 5 6 7
4034 2nd vec: 8 9 10 11 12 13 14 15
4035 3rd vec: 16 17 18 19 20 21 22 23
4036 4th vec: 24 25 26 27 28 29 30 31
4038 The output sequence should be:
4040 1st vec: 0 8 16 24 1 9 17 25
4041 2nd vec: 2 10 18 26 3 11 19 27
4042 3rd vec: 4 12 20 28 5 13 21 30
4043 4th vec: 6 14 22 30 7 15 23 31
4045 i.e., we interleave the contents of the four vectors in their order.
4047 We use interleave_high/low instructions to create such output. The input of
4048 each interleave_high/low operation is two vectors:
4051 the even elements of the result vector are obtained left-to-right from the
4052 high/low elements of the first vector. The odd elements of the result are
4053 obtained left-to-right from the high/low elements of the second vector.
4054 The output of interleave_high will be: 0 4 1 5
4055 and of interleave_low: 2 6 3 7
4058 The permutation is done in log LENGTH stages. In each stage interleave_high
4059 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4060 where the first argument is taken from the first half of DR_CHAIN and the
4061 second argument from it's second half.
4064 I1: interleave_high (1st vec, 3rd vec)
4065 I2: interleave_low (1st vec, 3rd vec)
4066 I3: interleave_high (2nd vec, 4th vec)
4067 I4: interleave_low (2nd vec, 4th vec)
4069 The output for the first stage is:
4071 I1: 0 16 1 17 2 18 3 19
4072 I2: 4 20 5 21 6 22 7 23
4073 I3: 8 24 9 25 10 26 11 27
4074 I4: 12 28 13 29 14 30 15 31
4076 The output of the second stage, i.e. the final result is:
4078 I1: 0 8 16 24 1 9 17 25
4079 I2: 2 10 18 26 3 11 19 27
4080 I3: 4 12 20 28 5 13 21 30
4081 I4: 6 14 22 30 7 15 23 31. */
4084 vect_permute_store_chain (VEC(tree
,heap
) *dr_chain
,
4085 unsigned int length
,
4087 gimple_stmt_iterator
*gsi
,
4088 VEC(tree
,heap
) **result_chain
)
4090 tree perm_dest
, vect1
, vect2
, high
, low
;
4092 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
4093 tree perm_mask_low
, perm_mask_high
;
4095 unsigned int j
, nelt
= TYPE_VECTOR_SUBPARTS (vectype
);
4096 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4098 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
4100 for (i
= 0, n
= nelt
/ 2; i
< n
; i
++)
4103 sel
[i
* 2 + 1] = i
+ nelt
;
4105 perm_mask_high
= vect_gen_perm_mask (vectype
, sel
);
4106 gcc_assert (perm_mask_high
!= NULL
);
4108 for (i
= 0; i
< nelt
; i
++)
4110 perm_mask_low
= vect_gen_perm_mask (vectype
, sel
);
4111 gcc_assert (perm_mask_low
!= NULL
);
4113 for (i
= 0, n
= exact_log2 (length
); i
< n
; i
++)
4115 for (j
= 0; j
< length
/2; j
++)
4117 vect1
= VEC_index (tree
, dr_chain
, j
);
4118 vect2
= VEC_index (tree
, dr_chain
, j
+length
/2);
4120 /* Create interleaving stmt:
4121 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4122 perm_dest
= create_tmp_var (vectype
, "vect_inter_high");
4123 DECL_GIMPLE_REG_P (perm_dest
) = 1;
4124 add_referenced_var (perm_dest
);
4125 high
= make_ssa_name (perm_dest
, NULL
);
4127 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, high
,
4128 vect1
, vect2
, perm_mask_high
);
4129 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4130 VEC_replace (tree
, *result_chain
, 2*j
, high
);
4132 /* Create interleaving stmt:
4133 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4134 nelt*3/2+1, ...}> */
4135 perm_dest
= create_tmp_var (vectype
, "vect_inter_low");
4136 DECL_GIMPLE_REG_P (perm_dest
) = 1;
4137 add_referenced_var (perm_dest
);
4138 low
= make_ssa_name (perm_dest
, NULL
);
4140 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, low
,
4141 vect1
, vect2
, perm_mask_low
);
4142 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4143 VEC_replace (tree
, *result_chain
, 2*j
+1, low
);
4145 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
4149 /* Function vect_setup_realignment
4151 This function is called when vectorizing an unaligned load using
4152 the dr_explicit_realign[_optimized] scheme.
4153 This function generates the following code at the loop prolog:
4156 x msq_init = *(floor(p)); # prolog load
4157 realignment_token = call target_builtin;
4159 x msq = phi (msq_init, ---)
4161 The stmts marked with x are generated only for the case of
4162 dr_explicit_realign_optimized.
4164 The code above sets up a new (vector) pointer, pointing to the first
4165 location accessed by STMT, and a "floor-aligned" load using that pointer.
4166 It also generates code to compute the "realignment-token" (if the relevant
4167 target hook was defined), and creates a phi-node at the loop-header bb
4168 whose arguments are the result of the prolog-load (created by this
4169 function) and the result of a load that takes place in the loop (to be
4170 created by the caller to this function).
4172 For the case of dr_explicit_realign_optimized:
4173 The caller to this function uses the phi-result (msq) to create the
4174 realignment code inside the loop, and sets up the missing phi argument,
4177 msq = phi (msq_init, lsq)
4178 lsq = *(floor(p')); # load in loop
4179 result = realign_load (msq, lsq, realignment_token);
4181 For the case of dr_explicit_realign:
4183 msq = *(floor(p)); # load in loop
4185 lsq = *(floor(p')); # load in loop
4186 result = realign_load (msq, lsq, realignment_token);
4189 STMT - (scalar) load stmt to be vectorized. This load accesses
4190 a memory location that may be unaligned.
4191 BSI - place where new code is to be inserted.
4192 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4196 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4197 target hook, if defined.
4198 Return value - the result of the loop-header phi node. */
4201 vect_setup_realignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4202 tree
*realignment_token
,
4203 enum dr_alignment_support alignment_support_scheme
,
4205 struct loop
**at_loop
)
4207 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4208 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4209 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4210 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4211 struct loop
*loop
= NULL
;
4213 tree scalar_dest
= gimple_assign_lhs (stmt
);
4220 tree msq_init
= NULL_TREE
;
4223 tree msq
= NULL_TREE
;
4224 gimple_seq stmts
= NULL
;
4226 bool compute_in_loop
= false;
4227 bool nested_in_vect_loop
= false;
4228 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4229 struct loop
*loop_for_initial_load
= NULL
;
4233 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4234 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4237 gcc_assert (alignment_support_scheme
== dr_explicit_realign
4238 || alignment_support_scheme
== dr_explicit_realign_optimized
);
4240 /* We need to generate three things:
4241 1. the misalignment computation
4242 2. the extra vector load (for the optimized realignment scheme).
4243 3. the phi node for the two vectors from which the realignment is
4244 done (for the optimized realignment scheme). */
4246 /* 1. Determine where to generate the misalignment computation.
4248 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4249 calculation will be generated by this function, outside the loop (in the
4250 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4251 caller, inside the loop.
4253 Background: If the misalignment remains fixed throughout the iterations of
4254 the loop, then both realignment schemes are applicable, and also the
4255 misalignment computation can be done outside LOOP. This is because we are
4256 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4257 are a multiple of VS (the Vector Size), and therefore the misalignment in
4258 different vectorized LOOP iterations is always the same.
4259 The problem arises only if the memory access is in an inner-loop nested
4260 inside LOOP, which is now being vectorized using outer-loop vectorization.
4261 This is the only case when the misalignment of the memory access may not
4262 remain fixed throughout the iterations of the inner-loop (as explained in
4263 detail in vect_supportable_dr_alignment). In this case, not only is the
4264 optimized realignment scheme not applicable, but also the misalignment
4265 computation (and generation of the realignment token that is passed to
4266 REALIGN_LOAD) have to be done inside the loop.
4268 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4269 or not, which in turn determines if the misalignment is computed inside
4270 the inner-loop, or outside LOOP. */
4272 if (init_addr
!= NULL_TREE
|| !loop_vinfo
)
4274 compute_in_loop
= true;
4275 gcc_assert (alignment_support_scheme
== dr_explicit_realign
);
4279 /* 2. Determine where to generate the extra vector load.
4281 For the optimized realignment scheme, instead of generating two vector
4282 loads in each iteration, we generate a single extra vector load in the
4283 preheader of the loop, and in each iteration reuse the result of the
4284 vector load from the previous iteration. In case the memory access is in
4285 an inner-loop nested inside LOOP, which is now being vectorized using
4286 outer-loop vectorization, we need to determine whether this initial vector
4287 load should be generated at the preheader of the inner-loop, or can be
4288 generated at the preheader of LOOP. If the memory access has no evolution
4289 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4290 to be generated inside LOOP (in the preheader of the inner-loop). */
4292 if (nested_in_vect_loop
)
4294 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
4295 bool invariant_in_outerloop
=
4296 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
4297 loop_for_initial_load
= (invariant_in_outerloop
? loop
: loop
->inner
);
4300 loop_for_initial_load
= loop
;
4302 *at_loop
= loop_for_initial_load
;
4304 if (loop_for_initial_load
)
4305 pe
= loop_preheader_edge (loop_for_initial_load
);
4307 /* 3. For the case of the optimized realignment, create the first vector
4308 load at the loop preheader. */
4310 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4312 /* Create msq_init = *(floor(p1)) in the loop preheader */
4314 gcc_assert (!compute_in_loop
);
4315 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4316 ptr
= vect_create_data_ref_ptr (stmt
, vectype
, loop_for_initial_load
,
4317 NULL_TREE
, &init_addr
, NULL
, &inc
,
4319 new_stmt
= gimple_build_assign_with_ops
4320 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
4321 build_int_cst (TREE_TYPE (ptr
),
4322 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4323 new_temp
= make_ssa_name (SSA_NAME_VAR (ptr
), new_stmt
);
4324 gimple_assign_set_lhs (new_stmt
, new_temp
);
4325 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4326 gcc_assert (!new_bb
);
4328 = build2 (MEM_REF
, TREE_TYPE (vec_dest
), new_temp
,
4329 build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0));
4330 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4331 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4332 gimple_assign_set_lhs (new_stmt
, new_temp
);
4335 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4336 gcc_assert (!new_bb
);
4339 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4341 msq_init
= gimple_assign_lhs (new_stmt
);
4344 /* 4. Create realignment token using a target builtin, if available.
4345 It is done either inside the containing loop, or before LOOP (as
4346 determined above). */
4348 if (targetm
.vectorize
.builtin_mask_for_load
)
4352 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4355 /* Generate the INIT_ADDR computation outside LOOP. */
4356 init_addr
= vect_create_addr_base_for_vector_ref (stmt
, &stmts
,
4360 pe
= loop_preheader_edge (loop
);
4361 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4362 gcc_assert (!new_bb
);
4365 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
4368 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
4369 new_stmt
= gimple_build_call (builtin_decl
, 1, init_addr
);
4371 vect_create_destination_var (scalar_dest
,
4372 gimple_call_return_type (new_stmt
));
4373 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4374 gimple_call_set_lhs (new_stmt
, new_temp
);
4376 if (compute_in_loop
)
4377 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4380 /* Generate the misalignment computation outside LOOP. */
4381 pe
= loop_preheader_edge (loop
);
4382 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4383 gcc_assert (!new_bb
);
4386 *realignment_token
= gimple_call_lhs (new_stmt
);
4388 /* The result of the CALL_EXPR to this builtin is determined from
4389 the value of the parameter and no global variables are touched
4390 which makes the builtin a "const" function. Requiring the
4391 builtin to have the "const" attribute makes it unnecessary
4392 to call mark_call_clobbered. */
4393 gcc_assert (TREE_READONLY (builtin_decl
));
4396 if (alignment_support_scheme
== dr_explicit_realign
)
4399 gcc_assert (!compute_in_loop
);
4400 gcc_assert (alignment_support_scheme
== dr_explicit_realign_optimized
);
4403 /* 5. Create msq = phi <msq_init, lsq> in loop */
4405 pe
= loop_preheader_edge (containing_loop
);
4406 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4407 msq
= make_ssa_name (vec_dest
, NULL
);
4408 phi_stmt
= create_phi_node (msq
, containing_loop
->header
);
4409 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
4410 add_phi_arg (phi_stmt
, msq_init
, pe
, UNKNOWN_LOCATION
);
4416 /* Function vect_grouped_load_supported.
4418 Returns TRUE if even and odd permutations are supported,
4419 and FALSE otherwise. */
4422 vect_grouped_load_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4424 enum machine_mode mode
= TYPE_MODE (vectype
);
4426 /* vect_permute_load_chain requires the group size to be a power of two. */
4427 if (exact_log2 (count
) == -1)
4429 if (vect_print_dump_info (REPORT_DETAILS
))
4430 fprintf (vect_dump
, "the size of the group of accesses"
4431 " is not a power of 2");
4435 /* Check that the permutation is supported. */
4436 if (VECTOR_MODE_P (mode
))
4438 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
4439 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4441 for (i
= 0; i
< nelt
; i
++)
4443 if (can_vec_perm_p (mode
, false, sel
))
4445 for (i
= 0; i
< nelt
; i
++)
4447 if (can_vec_perm_p (mode
, false, sel
))
4452 if (vect_print_dump_info (REPORT_DETAILS
))
4453 fprintf (vect_dump
, "extract even/odd not supported by target");
4457 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4461 vect_load_lanes_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4463 return vect_lanes_optab_supported_p ("vec_load_lanes",
4464 vec_load_lanes_optab
,
4468 /* Function vect_permute_load_chain.
4470 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4471 a power of 2, generate extract_even/odd stmts to reorder the input data
4472 correctly. Return the final references for loads in RESULT_CHAIN.
4474 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4475 The input is 4 vectors each containing 8 elements. We assign a number to each
4476 element, the input sequence is:
4478 1st vec: 0 1 2 3 4 5 6 7
4479 2nd vec: 8 9 10 11 12 13 14 15
4480 3rd vec: 16 17 18 19 20 21 22 23
4481 4th vec: 24 25 26 27 28 29 30 31
4483 The output sequence should be:
4485 1st vec: 0 4 8 12 16 20 24 28
4486 2nd vec: 1 5 9 13 17 21 25 29
4487 3rd vec: 2 6 10 14 18 22 26 30
4488 4th vec: 3 7 11 15 19 23 27 31
4490 i.e., the first output vector should contain the first elements of each
4491 interleaving group, etc.
4493 We use extract_even/odd instructions to create such output. The input of
4494 each extract_even/odd operation is two vectors
4498 and the output is the vector of extracted even/odd elements. The output of
4499 extract_even will be: 0 2 4 6
4500 and of extract_odd: 1 3 5 7
4503 The permutation is done in log LENGTH stages. In each stage extract_even
4504 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4505 their order. In our example,
4507 E1: extract_even (1st vec, 2nd vec)
4508 E2: extract_odd (1st vec, 2nd vec)
4509 E3: extract_even (3rd vec, 4th vec)
4510 E4: extract_odd (3rd vec, 4th vec)
4512 The output for the first stage will be:
4514 E1: 0 2 4 6 8 10 12 14
4515 E2: 1 3 5 7 9 11 13 15
4516 E3: 16 18 20 22 24 26 28 30
4517 E4: 17 19 21 23 25 27 29 31
4519 In order to proceed and create the correct sequence for the next stage (or
4520 for the correct output, if the second stage is the last one, as in our
4521 example), we first put the output of extract_even operation and then the
4522 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4523 The input for the second stage is:
4525 1st vec (E1): 0 2 4 6 8 10 12 14
4526 2nd vec (E3): 16 18 20 22 24 26 28 30
4527 3rd vec (E2): 1 3 5 7 9 11 13 15
4528 4th vec (E4): 17 19 21 23 25 27 29 31
4530 The output of the second stage:
4532 E1: 0 4 8 12 16 20 24 28
4533 E2: 2 6 10 14 18 22 26 30
4534 E3: 1 5 9 13 17 21 25 29
4535 E4: 3 7 11 15 19 23 27 31
4537 And RESULT_CHAIN after reordering:
4539 1st vec (E1): 0 4 8 12 16 20 24 28
4540 2nd vec (E3): 1 5 9 13 17 21 25 29
4541 3rd vec (E2): 2 6 10 14 18 22 26 30
4542 4th vec (E4): 3 7 11 15 19 23 27 31. */
4545 vect_permute_load_chain (VEC(tree
,heap
) *dr_chain
,
4546 unsigned int length
,
4548 gimple_stmt_iterator
*gsi
,
4549 VEC(tree
,heap
) **result_chain
)
4551 tree perm_dest
, data_ref
, first_vect
, second_vect
;
4552 tree perm_mask_even
, perm_mask_odd
;
4554 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
4555 unsigned int i
, j
, log_length
= exact_log2 (length
);
4556 unsigned nelt
= TYPE_VECTOR_SUBPARTS (vectype
);
4557 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4559 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
4561 for (i
= 0; i
< nelt
; ++i
)
4563 perm_mask_even
= vect_gen_perm_mask (vectype
, sel
);
4564 gcc_assert (perm_mask_even
!= NULL
);
4566 for (i
= 0; i
< nelt
; ++i
)
4568 perm_mask_odd
= vect_gen_perm_mask (vectype
, sel
);
4569 gcc_assert (perm_mask_odd
!= NULL
);
4571 for (i
= 0; i
< log_length
; i
++)
4573 for (j
= 0; j
< length
; j
+= 2)
4575 first_vect
= VEC_index (tree
, dr_chain
, j
);
4576 second_vect
= VEC_index (tree
, dr_chain
, j
+1);
4578 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4579 perm_dest
= create_tmp_var (vectype
, "vect_perm_even");
4580 DECL_GIMPLE_REG_P (perm_dest
) = 1;
4581 add_referenced_var (perm_dest
);
4583 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, perm_dest
,
4584 first_vect
, second_vect
,
4587 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
4588 gimple_assign_set_lhs (perm_stmt
, data_ref
);
4589 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4591 VEC_replace (tree
, *result_chain
, j
/2, data_ref
);
4593 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4594 perm_dest
= create_tmp_var (vectype
, "vect_perm_odd");
4595 DECL_GIMPLE_REG_P (perm_dest
) = 1;
4596 add_referenced_var (perm_dest
);
4598 perm_stmt
= gimple_build_assign_with_ops3 (VEC_PERM_EXPR
, perm_dest
,
4599 first_vect
, second_vect
,
4602 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
4603 gimple_assign_set_lhs (perm_stmt
, data_ref
);
4604 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4606 VEC_replace (tree
, *result_chain
, j
/2+length
/2, data_ref
);
4608 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
4613 /* Function vect_transform_grouped_load.
4615 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4616 to perform their permutation and ascribe the result vectorized statements to
4617 the scalar statements.
4621 vect_transform_grouped_load (gimple stmt
, VEC(tree
,heap
) *dr_chain
, int size
,
4622 gimple_stmt_iterator
*gsi
)
4624 VEC(tree
,heap
) *result_chain
= NULL
;
4626 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4627 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4628 vectors, that are ready for vector computation. */
4629 result_chain
= VEC_alloc (tree
, heap
, size
);
4630 vect_permute_load_chain (dr_chain
, size
, stmt
, gsi
, &result_chain
);
4631 vect_record_grouped_load_vectors (stmt
, result_chain
);
4632 VEC_free (tree
, heap
, result_chain
);
4635 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4636 generated as part of the vectorization of STMT. Assign the statement
4637 for each vector to the associated scalar statement. */
4640 vect_record_grouped_load_vectors (gimple stmt
, VEC(tree
,heap
) *result_chain
)
4642 gimple first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
4643 gimple next_stmt
, new_stmt
;
4644 unsigned int i
, gap_count
;
4647 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4648 Since we scan the chain starting from it's first node, their order
4649 corresponds the order of data-refs in RESULT_CHAIN. */
4650 next_stmt
= first_stmt
;
4652 FOR_EACH_VEC_ELT (tree
, result_chain
, i
, tmp_data_ref
)
4657 /* Skip the gaps. Loads created for the gaps will be removed by dead
4658 code elimination pass later. No need to check for the first stmt in
4659 the group, since it always exists.
4660 GROUP_GAP is the number of steps in elements from the previous
4661 access (if there is no gap GROUP_GAP is 1). We skip loads that
4662 correspond to the gaps. */
4663 if (next_stmt
!= first_stmt
4664 && gap_count
< GROUP_GAP (vinfo_for_stmt (next_stmt
)))
4672 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
4673 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4674 copies, and we put the new vector statement in the first available
4676 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
4677 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
4680 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
4683 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
4685 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
));
4688 prev_stmt
= rel_stmt
;
4690 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
4693 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) =
4698 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4700 /* If NEXT_STMT accesses the same DR as the previous statement,
4701 put the same TMP_DATA_REF as its vectorized statement; otherwise
4702 get the next data-ref from RESULT_CHAIN. */
4703 if (!next_stmt
|| !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
4709 /* Function vect_force_dr_alignment_p.
4711 Returns whether the alignment of a DECL can be forced to be aligned
4712 on ALIGNMENT bit boundary. */
4715 vect_can_force_dr_alignment_p (const_tree decl
, unsigned int alignment
)
4717 if (TREE_CODE (decl
) != VAR_DECL
)
4720 /* We cannot change alignment of common or external symbols as another
4721 translation unit may contain a definition with lower alignment.
4722 The rules of common symbol linking mean that the definition
4723 will override the common symbol. */
4724 if (DECL_EXTERNAL (decl
)
4725 || DECL_COMMON (decl
))
4728 if (TREE_ASM_WRITTEN (decl
))
4731 /* Do not override the alignment as specified by the ABI when the used
4732 attribute is set. */
4733 if (DECL_PRESERVE_P (decl
))
4736 if (TREE_STATIC (decl
))
4737 return (alignment
<= MAX_OFILE_ALIGNMENT
);
4739 return (alignment
<= MAX_STACK_ALIGNMENT
);
4743 /* Return whether the data reference DR is supported with respect to its
4745 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4746 it is aligned, i.e., check if it is possible to vectorize it with different
4749 enum dr_alignment_support
4750 vect_supportable_dr_alignment (struct data_reference
*dr
,
4751 bool check_aligned_accesses
)
4753 gimple stmt
= DR_STMT (dr
);
4754 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4755 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4756 enum machine_mode mode
= TYPE_MODE (vectype
);
4757 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4758 struct loop
*vect_loop
= NULL
;
4759 bool nested_in_vect_loop
= false;
4761 if (aligned_access_p (dr
) && !check_aligned_accesses
)
4766 vect_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4767 nested_in_vect_loop
= nested_in_vect_loop_p (vect_loop
, stmt
);
4770 /* Possibly unaligned access. */
4772 /* We can choose between using the implicit realignment scheme (generating
4773 a misaligned_move stmt) and the explicit realignment scheme (generating
4774 aligned loads with a REALIGN_LOAD). There are two variants to the
4775 explicit realignment scheme: optimized, and unoptimized.
4776 We can optimize the realignment only if the step between consecutive
4777 vector loads is equal to the vector size. Since the vector memory
4778 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4779 is guaranteed that the misalignment amount remains the same throughout the
4780 execution of the vectorized loop. Therefore, we can create the
4781 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4782 at the loop preheader.
4784 However, in the case of outer-loop vectorization, when vectorizing a
4785 memory access in the inner-loop nested within the LOOP that is now being
4786 vectorized, while it is guaranteed that the misalignment of the
4787 vectorized memory access will remain the same in different outer-loop
4788 iterations, it is *not* guaranteed that is will remain the same throughout
4789 the execution of the inner-loop. This is because the inner-loop advances
4790 with the original scalar step (and not in steps of VS). If the inner-loop
4791 step happens to be a multiple of VS, then the misalignment remains fixed
4792 and we can use the optimized realignment scheme. For example:
4798 When vectorizing the i-loop in the above example, the step between
4799 consecutive vector loads is 1, and so the misalignment does not remain
4800 fixed across the execution of the inner-loop, and the realignment cannot
4801 be optimized (as illustrated in the following pseudo vectorized loop):
4803 for (i=0; i<N; i+=4)
4804 for (j=0; j<M; j++){
4805 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4806 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4807 // (assuming that we start from an aligned address).
4810 We therefore have to use the unoptimized realignment scheme:
4812 for (i=0; i<N; i+=4)
4813 for (j=k; j<M; j+=4)
4814 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4815 // that the misalignment of the initial address is
4818 The loop can then be vectorized as follows:
4820 for (k=0; k<4; k++){
4821 rt = get_realignment_token (&vp[k]);
4822 for (i=0; i<N; i+=4){
4824 for (j=k; j<M; j+=4){
4826 va = REALIGN_LOAD <v1,v2,rt>;
4833 if (DR_IS_READ (dr
))
4835 bool is_packed
= false;
4836 tree type
= (TREE_TYPE (DR_REF (dr
)));
4838 if (optab_handler (vec_realign_load_optab
, mode
) != CODE_FOR_nothing
4839 && (!targetm
.vectorize
.builtin_mask_for_load
4840 || targetm
.vectorize
.builtin_mask_for_load ()))
4842 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4843 if ((nested_in_vect_loop
4844 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4845 != GET_MODE_SIZE (TYPE_MODE (vectype
))))
4847 return dr_explicit_realign
;
4849 return dr_explicit_realign_optimized
;
4851 if (!known_alignment_for_access_p (dr
))
4852 is_packed
= contains_packed_reference (DR_REF (dr
));
4854 if (targetm
.vectorize
.
4855 support_vector_misalignment (mode
, type
,
4856 DR_MISALIGNMENT (dr
), is_packed
))
4857 /* Can't software pipeline the loads, but can at least do them. */
4858 return dr_unaligned_supported
;
4862 bool is_packed
= false;
4863 tree type
= (TREE_TYPE (DR_REF (dr
)));
4865 if (!known_alignment_for_access_p (dr
))
4866 is_packed
= contains_packed_reference (DR_REF (dr
));
4868 if (targetm
.vectorize
.
4869 support_vector_misalignment (mode
, type
,
4870 DR_MISALIGNMENT (dr
), is_packed
))
4871 return dr_unaligned_supported
;
4875 return dr_unaligned_unsupported
;