1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
39 #include "diagnostic-core.h"
41 /* Need to include rtl.h, expr.h, etc. for optabs. */
45 /* Return true if load- or store-lanes optab OPTAB is implemented for
46 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
49 vect_lanes_optab_supported_p (const char *name
, convert_optab optab
,
50 tree vectype
, unsigned HOST_WIDE_INT count
)
52 enum machine_mode mode
, array_mode
;
55 mode
= TYPE_MODE (vectype
);
56 limit_p
= !targetm
.array_mode_supported_p (mode
, count
);
57 array_mode
= mode_for_size (count
* GET_MODE_BITSIZE (mode
),
60 if (array_mode
== BLKmode
)
62 if (dump_enabled_p ())
63 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
64 "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC
"]",
65 GET_MODE_NAME (mode
), count
);
69 if (convert_optab_handler (optab
, array_mode
, mode
) == CODE_FOR_nothing
)
71 if (dump_enabled_p ())
72 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
73 "cannot use %s<%s><%s>", name
,
74 GET_MODE_NAME (array_mode
), GET_MODE_NAME (mode
));
78 if (dump_enabled_p ())
79 dump_printf_loc (MSG_NOTE
, vect_location
,
80 "can use %s<%s><%s>", name
, GET_MODE_NAME (array_mode
),
81 GET_MODE_NAME (mode
));
87 /* Return the smallest scalar part of STMT.
88 This is used to determine the vectype of the stmt. We generally set the
89 vectype according to the type of the result (lhs). For stmts whose
90 result-type is different than the type of the arguments (e.g., demotion,
91 promotion), vectype will be reset appropriately (later). Note that we have
92 to visit the smallest datatype in this function, because that determines the
93 VF. If the smallest datatype in the loop is present only as the rhs of a
94 promotion operation - we'd miss it.
95 Such a case, where a variable of this datatype does not appear in the lhs
96 anywhere in the loop, can only occur if it's an invariant: e.g.:
97 'int_x = (int) short_inv', which we'd expect to have been optimized away by
98 invariant motion. However, we cannot rely on invariant motion to always
99 take invariants out of the loop, and so in the case of promotion we also
100 have to check the rhs.
101 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
105 vect_get_smallest_scalar_type (gimple stmt
, HOST_WIDE_INT
*lhs_size_unit
,
106 HOST_WIDE_INT
*rhs_size_unit
)
108 tree scalar_type
= gimple_expr_type (stmt
);
109 HOST_WIDE_INT lhs
, rhs
;
111 lhs
= rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
113 if (is_gimple_assign (stmt
)
114 && (gimple_assign_cast_p (stmt
)
115 || gimple_assign_rhs_code (stmt
) == WIDEN_MULT_EXPR
116 || gimple_assign_rhs_code (stmt
) == WIDEN_LSHIFT_EXPR
117 || gimple_assign_rhs_code (stmt
) == FLOAT_EXPR
))
119 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
121 rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type
));
123 scalar_type
= rhs_type
;
126 *lhs_size_unit
= lhs
;
127 *rhs_size_unit
= rhs
;
132 /* Check if data references pointed by DR_I and DR_J are same or
133 belong to same interleaving group. Return FALSE if drs are
134 different, otherwise return TRUE. */
137 vect_same_range_drs (data_reference_p dr_i
, data_reference_p dr_j
)
139 gimple stmt_i
= DR_STMT (dr_i
);
140 gimple stmt_j
= DR_STMT (dr_j
);
142 if (operand_equal_p (DR_REF (dr_i
), DR_REF (dr_j
), 0)
143 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i
))
144 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j
))
145 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i
))
146 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j
)))))
152 /* If address ranges represented by DDR_I and DDR_J are equal,
153 return TRUE, otherwise return FALSE. */
156 vect_vfa_range_equal (ddr_p ddr_i
, ddr_p ddr_j
)
158 if ((vect_same_range_drs (DDR_A (ddr_i
), DDR_A (ddr_j
))
159 && vect_same_range_drs (DDR_B (ddr_i
), DDR_B (ddr_j
)))
160 || (vect_same_range_drs (DDR_A (ddr_i
), DDR_B (ddr_j
))
161 && vect_same_range_drs (DDR_B (ddr_i
), DDR_A (ddr_j
))))
167 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
168 tested at run-time. Return TRUE if DDR was successfully inserted.
169 Return false if versioning is not supported. */
172 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
174 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
176 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
) == 0)
179 if (dump_enabled_p ())
181 dump_printf_loc (MSG_NOTE
, vect_location
,
182 "mark for run-time aliasing test between ");
183 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_A (ddr
)));
184 dump_printf (MSG_NOTE
, " and ");
185 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_B (ddr
)));
188 if (optimize_loop_nest_for_size_p (loop
))
190 if (dump_enabled_p ())
191 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
192 "versioning not supported when optimizing for size.");
196 /* FORNOW: We don't support versioning with outer-loop vectorization. */
199 if (dump_enabled_p ())
200 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
201 "versioning not yet supported for outer-loops.");
205 /* FORNOW: We don't support creating runtime alias tests for non-constant
207 if (TREE_CODE (DR_STEP (DDR_A (ddr
))) != INTEGER_CST
208 || TREE_CODE (DR_STEP (DDR_B (ddr
))) != INTEGER_CST
)
210 if (dump_enabled_p ())
211 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
212 "versioning not yet supported for non-constant "
217 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
).safe_push (ddr
);
222 /* Function vect_analyze_data_ref_dependence.
224 Return TRUE if there (might) exist a dependence between a memory-reference
225 DRA and a memory-reference DRB. When versioning for alias may check a
226 dependence at run-time, return FALSE. Adjust *MAX_VF according to
227 the data dependence. */
230 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
231 loop_vec_info loop_vinfo
, int *max_vf
)
234 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
235 struct data_reference
*dra
= DDR_A (ddr
);
236 struct data_reference
*drb
= DDR_B (ddr
);
237 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
238 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
239 lambda_vector dist_v
;
240 unsigned int loop_depth
;
242 /* In loop analysis all data references should be vectorizable. */
243 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a
)
244 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b
))
247 /* Independent data accesses. */
248 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
252 || (DR_IS_READ (dra
) && DR_IS_READ (drb
)))
255 /* Unknown data dependence. */
256 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
258 if (STMT_VINFO_GATHER_P (stmtinfo_a
)
259 || STMT_VINFO_GATHER_P (stmtinfo_b
))
261 if (dump_enabled_p ())
263 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
264 "versioning for alias not supported for: "
265 "can't determine dependence between ");
266 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
268 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
269 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
275 if (dump_enabled_p ())
277 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
278 "versioning for alias required: "
279 "can't determine dependence between ");
280 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
282 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
283 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
287 /* Add to list of ddrs that need to be tested at run-time. */
288 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
291 /* Known data dependence. */
292 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
294 if (STMT_VINFO_GATHER_P (stmtinfo_a
)
295 || STMT_VINFO_GATHER_P (stmtinfo_b
))
297 if (dump_enabled_p ())
299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
300 "versioning for alias not supported for: "
301 "bad dist vector for ");
302 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
304 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
305 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
311 if (dump_enabled_p ())
313 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
314 "versioning for alias required: "
315 "bad dist vector for ");
316 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (dra
));
317 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
318 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (drb
));
320 /* Add to list of ddrs that need to be tested at run-time. */
321 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
324 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
325 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
327 int dist
= dist_v
[loop_depth
];
329 if (dump_enabled_p ())
330 dump_printf_loc (MSG_NOTE
, vect_location
,
331 "dependence distance = %d.", dist
);
335 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE
, vect_location
,
338 "dependence distance == 0 between ");
339 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
340 dump_printf (MSG_NOTE
, " and ");
341 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
344 /* When we perform grouped accesses and perform implicit CSE
345 by detecting equal accesses and doing disambiguation with
346 runtime alias tests like for
354 where we will end up loading { a[i], a[i+1] } once, make
355 sure that inserting group loads before the first load and
356 stores after the last store will do the right thing. */
357 if ((STMT_VINFO_GROUPED_ACCESS (stmtinfo_a
)
358 && GROUP_SAME_DR_STMT (stmtinfo_a
))
359 || (STMT_VINFO_GROUPED_ACCESS (stmtinfo_b
)
360 && GROUP_SAME_DR_STMT (stmtinfo_b
)))
363 earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
365 (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
367 if (dump_enabled_p ())
368 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
369 "READ_WRITE dependence in interleaving.");
377 if (dist
> 0 && DDR_REVERSED_P (ddr
))
379 /* If DDR_REVERSED_P the order of the data-refs in DDR was
380 reversed (to make distance vector positive), and the actual
381 distance is negative. */
382 if (dump_enabled_p ())
383 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
384 "dependence distance negative.");
389 && abs (dist
) < *max_vf
)
391 /* The dependence distance requires reduction of the maximal
392 vectorization factor. */
393 *max_vf
= abs (dist
);
394 if (dump_enabled_p ())
395 dump_printf_loc (MSG_NOTE
, vect_location
,
396 "adjusting maximal vectorization factor to %i",
400 if (abs (dist
) >= *max_vf
)
402 /* Dependence distance does not create dependence, as far as
403 vectorization is concerned, in this case. */
404 if (dump_enabled_p ())
405 dump_printf_loc (MSG_NOTE
, vect_location
,
406 "dependence distance >= VF.");
410 if (dump_enabled_p ())
412 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
413 "not vectorized, possible dependence "
414 "between data-refs ");
415 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
416 dump_printf (MSG_NOTE
, " and ");
417 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
426 /* Function vect_analyze_data_ref_dependences.
428 Examine all the data references in the loop, and make sure there do not
429 exist any data dependences between them. Set *MAX_VF according to
430 the maximum vectorization factor the data dependences allow. */
433 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
, int *max_vf
)
436 struct data_dependence_relation
*ddr
;
438 if (dump_enabled_p ())
439 dump_printf_loc (MSG_NOTE
, vect_location
,
440 "=== vect_analyze_data_ref_dependences ===");
442 if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo
),
443 &LOOP_VINFO_DDRS (loop_vinfo
),
444 LOOP_VINFO_LOOP_NEST (loop_vinfo
), true))
447 FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo
), i
, ddr
)
448 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
, max_vf
))
455 /* Function vect_slp_analyze_data_ref_dependence.
457 Return TRUE if there (might) exist a dependence between a memory-reference
458 DRA and a memory-reference DRB. When versioning for alias may check a
459 dependence at run-time, return FALSE. Adjust *MAX_VF according to
460 the data dependence. */
463 vect_slp_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
)
465 struct data_reference
*dra
= DDR_A (ddr
);
466 struct data_reference
*drb
= DDR_B (ddr
);
468 /* We need to check dependences of statements marked as unvectorizable
469 as well, they still can prohibit vectorization. */
471 /* Independent data accesses. */
472 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
478 /* Read-read is OK. */
479 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
482 /* If dra and drb are part of the same interleaving chain consider
484 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra
)))
485 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra
)))
486 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb
)))))
489 /* Unknown data dependence. */
490 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
494 if (dump_enabled_p ())
496 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
497 "can't determine dependence between ");
498 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (dra
));
499 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
500 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (drb
));
503 /* We do not vectorize basic blocks with write-write dependencies. */
504 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
507 /* Check that it's not a load-after-store dependence. */
508 earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
509 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
515 if (dump_enabled_p ())
517 dump_printf_loc (MSG_NOTE
, vect_location
,
518 "determined dependence between ");
519 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
520 dump_printf (MSG_NOTE
, " and ");
521 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
524 /* Do not vectorize basic blocks with write-write dependences. */
525 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
528 /* Check dependence between DRA and DRB for basic block vectorization.
529 If the accesses share same bases and offsets, we can compare their initial
530 constant offsets to decide whether they differ or not. In case of a read-
531 write dependence we check that the load is before the store to ensure that
532 vectorization will not change the order of the accesses. */
534 HOST_WIDE_INT type_size_a
, type_size_b
, init_a
, init_b
;
537 /* Check that the data-refs have same bases and offsets. If not, we can't
538 determine if they are dependent. */
539 if (!operand_equal_p (DR_BASE_ADDRESS (dra
), DR_BASE_ADDRESS (drb
), 0)
540 || !dr_equal_offsets_p (dra
, drb
))
543 /* Check the types. */
544 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
545 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
547 if (type_size_a
!= type_size_b
548 || !types_compatible_p (TREE_TYPE (DR_REF (dra
)),
549 TREE_TYPE (DR_REF (drb
))))
552 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
553 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
555 /* Two different locations - no dependence. */
556 if (init_a
!= init_b
)
559 /* We have a read-write dependence. Check that the load is before the store.
560 When we vectorize basic blocks, vector load can be only before
561 corresponding scalar load, and vector store can be only after its
562 corresponding scalar store. So the order of the acceses is preserved in
563 case the load is before the store. */
564 earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
565 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
572 /* Function vect_analyze_data_ref_dependences.
574 Examine all the data references in the basic-block, and make sure there
575 do not exist any data dependences between them. Set *MAX_VF according to
576 the maximum vectorization factor the data dependences allow. */
579 vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo
)
581 struct data_dependence_relation
*ddr
;
584 if (dump_enabled_p ())
585 dump_printf_loc (MSG_NOTE
, vect_location
,
586 "=== vect_slp_analyze_data_ref_dependences ===");
588 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo
),
589 &BB_VINFO_DDRS (bb_vinfo
),
593 FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo
), i
, ddr
)
594 if (vect_slp_analyze_data_ref_dependence (ddr
))
601 /* Function vect_compute_data_ref_alignment
603 Compute the misalignment of the data reference DR.
606 1. If during the misalignment computation it is found that the data reference
607 cannot be vectorized then false is returned.
608 2. DR_MISALIGNMENT (DR) is defined.
610 FOR NOW: No analysis is actually performed. Misalignment is calculated
611 only for trivial cases. TODO. */
614 vect_compute_data_ref_alignment (struct data_reference
*dr
)
616 gimple stmt
= DR_STMT (dr
);
617 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
618 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
619 struct loop
*loop
= NULL
;
620 tree ref
= DR_REF (dr
);
622 tree base
, base_addr
;
625 tree aligned_to
, alignment
;
627 if (dump_enabled_p ())
628 dump_printf_loc (MSG_NOTE
, vect_location
,
629 "vect_compute_data_ref_alignment:");
632 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
634 /* Initialize misalignment to unknown. */
635 SET_DR_MISALIGNMENT (dr
, -1);
637 /* Strided loads perform only component accesses, misalignment information
638 is irrelevant for them. */
639 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
642 misalign
= DR_INIT (dr
);
643 aligned_to
= DR_ALIGNED_TO (dr
);
644 base_addr
= DR_BASE_ADDRESS (dr
);
645 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
647 /* In case the dataref is in an inner-loop of the loop that is being
648 vectorized (LOOP), we use the base and misalignment information
649 relative to the outer-loop (LOOP). This is ok only if the misalignment
650 stays the same throughout the execution of the inner-loop, which is why
651 we have to check that the stride of the dataref in the inner-loop evenly
652 divides by the vector size. */
653 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
655 tree step
= DR_STEP (dr
);
656 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
658 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) == 0)
660 if (dump_enabled_p ())
661 dump_printf_loc (MSG_NOTE
, vect_location
,
662 "inner step divides the vector-size.");
663 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
664 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
665 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
669 if (dump_enabled_p ())
670 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
671 "inner step doesn't divide the vector-size.");
672 misalign
= NULL_TREE
;
676 /* Similarly, if we're doing basic-block vectorization, we can only use
677 base and misalignment information relative to an innermost loop if the
678 misalignment stays the same throughout the execution of the loop.
679 As above, this is the case if the stride of the dataref evenly divides
680 by the vector size. */
683 tree step
= DR_STEP (dr
);
684 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
686 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0)
688 if (dump_enabled_p ())
689 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
690 "SLP: step doesn't divide the vector-size.");
691 misalign
= NULL_TREE
;
695 base
= build_fold_indirect_ref (base_addr
);
696 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
698 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
701 if (dump_enabled_p ())
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
704 "Unknown alignment for access: ");
705 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, base
);
711 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
713 || (TREE_CODE (base_addr
) == SSA_NAME
714 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
715 TREE_TYPE (base_addr
)))),
717 || (get_pointer_alignment (base_addr
) >= TYPE_ALIGN (vectype
)))
720 base_aligned
= false;
724 /* Do not change the alignment of global variables here if
725 flag_section_anchors is enabled as we already generated
726 RTL for other functions. Most global variables should
727 have been aligned during the IPA increase_alignment pass. */
728 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
729 || (TREE_STATIC (base
) && flag_section_anchors
))
731 if (dump_enabled_p ())
733 dump_printf_loc (MSG_NOTE
, vect_location
,
734 "can't force alignment of ref: ");
735 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, ref
);
740 /* Force the alignment of the decl.
741 NOTE: This is the only change to the code we make during
742 the analysis phase, before deciding to vectorize the loop. */
743 if (dump_enabled_p ())
745 dump_printf_loc (MSG_NOTE
, vect_location
, "force alignment of ");
746 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, ref
);
749 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
750 DECL_USER_ALIGN (base
) = 1;
753 /* At this point we assume that the base is aligned. */
754 gcc_assert (base_aligned
755 || (TREE_CODE (base
) == VAR_DECL
756 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
758 /* If this is a backward running DR then first access in the larger
759 vectype actually is N-1 elements before the address in the DR.
760 Adjust misalign accordingly. */
761 if (tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0)
763 tree offset
= ssize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
764 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
765 otherwise we wouldn't be here. */
766 offset
= fold_build2 (MULT_EXPR
, ssizetype
, offset
, DR_STEP (dr
));
767 /* PLUS because DR_STEP was negative. */
768 misalign
= size_binop (PLUS_EXPR
, misalign
, offset
);
771 /* Modulo alignment. */
772 misalign
= size_binop (FLOOR_MOD_EXPR
, misalign
, alignment
);
774 if (!host_integerp (misalign
, 1))
776 /* Negative or overflowed misalignment value. */
777 if (dump_enabled_p ())
778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
779 "unexpected misalign value");
783 SET_DR_MISALIGNMENT (dr
, TREE_INT_CST_LOW (misalign
));
785 if (dump_enabled_p ())
787 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
788 "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
789 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, ref
);
796 /* Function vect_compute_data_refs_alignment
798 Compute the misalignment of data references in the loop.
799 Return FALSE if a data reference is found that cannot be vectorized. */
802 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
,
803 bb_vec_info bb_vinfo
)
805 vec
<data_reference_p
> datarefs
;
806 struct data_reference
*dr
;
810 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
812 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
814 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
815 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
816 && !vect_compute_data_ref_alignment (dr
))
820 /* Mark unsupported statement as unvectorizable. */
821 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
832 /* Function vect_update_misalignment_for_peel
834 DR - the data reference whose misalignment is to be adjusted.
835 DR_PEEL - the data reference whose misalignment is being made
836 zero in the vector loop by the peel.
837 NPEEL - the number of iterations in the peel loop if the misalignment
838 of DR_PEEL is known at compile time. */
841 vect_update_misalignment_for_peel (struct data_reference
*dr
,
842 struct data_reference
*dr_peel
, int npeel
)
845 vec
<dr_p
> same_align_drs
;
846 struct data_reference
*current_dr
;
847 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
848 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
849 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
850 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
852 /* For interleaved data accesses the step in the loop must be multiplied by
853 the size of the interleaving group. */
854 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
855 dr_size
*= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
856 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info
))
857 dr_peel_size
*= GROUP_SIZE (peel_stmt_info
);
859 /* It can be assumed that the data refs with the same alignment as dr_peel
860 are aligned in the vector loop. */
862 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
863 FOR_EACH_VEC_ELT (same_align_drs
, i
, current_dr
)
865 if (current_dr
!= dr
)
867 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
868 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
869 SET_DR_MISALIGNMENT (dr
, 0);
873 if (known_alignment_for_access_p (dr
)
874 && known_alignment_for_access_p (dr_peel
))
876 bool negative
= tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0;
877 int misal
= DR_MISALIGNMENT (dr
);
878 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
879 misal
+= negative
? -npeel
* dr_size
: npeel
* dr_size
;
880 misal
&= (TYPE_ALIGN (vectype
) / BITS_PER_UNIT
) - 1;
881 SET_DR_MISALIGNMENT (dr
, misal
);
885 if (dump_enabled_p ())
886 dump_printf_loc (MSG_NOTE
, vect_location
, "Setting misalignment to -1.");
887 SET_DR_MISALIGNMENT (dr
, -1);
891 /* Function vect_verify_datarefs_alignment
893 Return TRUE if all data references in the loop can be
894 handled with respect to alignment. */
897 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
899 vec
<data_reference_p
> datarefs
;
900 struct data_reference
*dr
;
901 enum dr_alignment_support supportable_dr_alignment
;
905 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
907 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
909 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
911 gimple stmt
= DR_STMT (dr
);
912 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
914 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
917 /* For interleaving, only the alignment of the first access matters.
918 Skip statements marked as not vectorizable. */
919 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info
)
920 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
921 || !STMT_VINFO_VECTORIZABLE (stmt_info
))
924 /* Strided loads perform only component accesses, alignment is
925 irrelevant for them. */
926 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
929 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
930 if (!supportable_dr_alignment
)
932 if (dump_enabled_p ())
935 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
936 "not vectorized: unsupported unaligned load.");
938 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
939 "not vectorized: unsupported unaligned "
942 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
947 if (supportable_dr_alignment
!= dr_aligned
&& dump_enabled_p ())
948 dump_printf_loc (MSG_NOTE
, vect_location
,
949 "Vectorizing an unaligned access.");
954 /* Given an memory reference EXP return whether its alignment is less
958 not_size_aligned (tree exp
)
960 if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp
)), 1))
963 return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp
)))
964 > get_object_alignment (exp
));
967 /* Function vector_alignment_reachable_p
969 Return true if vector alignment for DR is reachable by peeling
970 a few loop iterations. Return false otherwise. */
973 vector_alignment_reachable_p (struct data_reference
*dr
)
975 gimple stmt
= DR_STMT (dr
);
976 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
977 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
979 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
981 /* For interleaved access we peel only if number of iterations in
982 the prolog loop ({VF - misalignment}), is a multiple of the
983 number of the interleaved accesses. */
984 int elem_size
, mis_in_elements
;
985 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
987 /* FORNOW: handle only known alignment. */
988 if (!known_alignment_for_access_p (dr
))
991 elem_size
= GET_MODE_SIZE (TYPE_MODE (vectype
)) / nelements
;
992 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
994 if ((nelements
- mis_in_elements
) % GROUP_SIZE (stmt_info
))
998 /* If misalignment is known at the compile time then allow peeling
999 only if natural alignment is reachable through peeling. */
1000 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
1002 HOST_WIDE_INT elmsize
=
1003 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
1004 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE
, vect_location
,
1007 "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
1008 dump_printf (MSG_NOTE
,
1009 ". misalignment = %d. ", DR_MISALIGNMENT (dr
));
1011 if (DR_MISALIGNMENT (dr
) % elmsize
)
1013 if (dump_enabled_p ())
1014 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1015 "data size does not divide the misalignment.\n");
1020 if (!known_alignment_for_access_p (dr
))
1022 tree type
= TREE_TYPE (DR_REF (dr
));
1023 bool is_packed
= not_size_aligned (DR_REF (dr
));
1024 if (dump_enabled_p ())
1025 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1026 "Unknown misalignment, is_packed = %d",is_packed
);
1027 if ((TYPE_USER_ALIGN (type
) && !is_packed
)
1028 || targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
1038 /* Calculate the cost of the memory access represented by DR. */
1041 vect_get_data_access_cost (struct data_reference
*dr
,
1042 unsigned int *inside_cost
,
1043 unsigned int *outside_cost
,
1044 stmt_vector_for_cost
*body_cost_vec
)
1046 gimple stmt
= DR_STMT (dr
);
1047 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1048 int nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1049 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1050 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1051 int ncopies
= vf
/ nunits
;
1053 if (DR_IS_READ (dr
))
1054 vect_get_load_cost (dr
, ncopies
, true, inside_cost
, outside_cost
,
1055 NULL
, body_cost_vec
, false);
1057 vect_get_store_cost (dr
, ncopies
, inside_cost
, body_cost_vec
);
1059 if (dump_enabled_p ())
1060 dump_printf_loc (MSG_NOTE
, vect_location
,
1061 "vect_get_data_access_cost: inside_cost = %d, "
1062 "outside_cost = %d.", *inside_cost
, *outside_cost
);
1066 /* Insert DR into peeling hash table with NPEEL as key. */
1069 vect_peeling_hash_insert (loop_vec_info loop_vinfo
, struct data_reference
*dr
,
1072 struct _vect_peel_info elem
, *slot
;
1073 _vect_peel_info
**new_slot
;
1074 bool supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1077 slot
= LOOP_VINFO_PEELING_HTAB (loop_vinfo
).find (&elem
);
1082 slot
= XNEW (struct _vect_peel_info
);
1083 slot
->npeel
= npeel
;
1086 new_slot
= LOOP_VINFO_PEELING_HTAB (loop_vinfo
).find_slot (slot
, INSERT
);
1090 if (!supportable_dr_alignment
&& !flag_vect_cost_model
)
1091 slot
->count
+= VECT_MAX_COST
;
1095 /* Traverse peeling hash table to find peeling option that aligns maximum
1096 number of data accesses. */
1099 vect_peeling_hash_get_most_frequent (_vect_peel_info
**slot
,
1100 _vect_peel_extended_info
*max
)
1102 vect_peel_info elem
= *slot
;
1104 if (elem
->count
> max
->peel_info
.count
1105 || (elem
->count
== max
->peel_info
.count
1106 && max
->peel_info
.npeel
> elem
->npeel
))
1108 max
->peel_info
.npeel
= elem
->npeel
;
1109 max
->peel_info
.count
= elem
->count
;
1110 max
->peel_info
.dr
= elem
->dr
;
1117 /* Traverse peeling hash table and calculate cost for each peeling option.
1118 Find the one with the lowest cost. */
1121 vect_peeling_hash_get_lowest_cost (_vect_peel_info
**slot
,
1122 _vect_peel_extended_info
*min
)
1124 vect_peel_info elem
= *slot
;
1125 int save_misalignment
, dummy
;
1126 unsigned int inside_cost
= 0, outside_cost
= 0, i
;
1127 gimple stmt
= DR_STMT (elem
->dr
);
1128 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1129 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1130 vec
<data_reference_p
> datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1131 struct data_reference
*dr
;
1132 stmt_vector_for_cost prologue_cost_vec
, body_cost_vec
, epilogue_cost_vec
;
1133 int single_iter_cost
;
1135 prologue_cost_vec
.create (2);
1136 body_cost_vec
.create (2);
1137 epilogue_cost_vec
.create (2);
1139 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1141 stmt
= DR_STMT (dr
);
1142 stmt_info
= vinfo_for_stmt (stmt
);
1143 /* For interleaving, only the alignment of the first access
1145 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1146 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1149 save_misalignment
= DR_MISALIGNMENT (dr
);
1150 vect_update_misalignment_for_peel (dr
, elem
->dr
, elem
->npeel
);
1151 vect_get_data_access_cost (dr
, &inside_cost
, &outside_cost
,
1153 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1156 single_iter_cost
= vect_get_single_scalar_iteration_cost (loop_vinfo
);
1157 outside_cost
+= vect_get_known_peeling_cost (loop_vinfo
, elem
->npeel
,
1158 &dummy
, single_iter_cost
,
1160 &epilogue_cost_vec
);
1162 /* Prologue and epilogue costs are added to the target model later.
1163 These costs depend only on the scalar iteration cost, the
1164 number of peeling iterations finally chosen, and the number of
1165 misaligned statements. So discard the information found here. */
1166 prologue_cost_vec
.release ();
1167 epilogue_cost_vec
.release ();
1169 if (inside_cost
< min
->inside_cost
1170 || (inside_cost
== min
->inside_cost
&& outside_cost
< min
->outside_cost
))
1172 min
->inside_cost
= inside_cost
;
1173 min
->outside_cost
= outside_cost
;
1174 min
->body_cost_vec
.release ();
1175 min
->body_cost_vec
= body_cost_vec
;
1176 min
->peel_info
.dr
= elem
->dr
;
1177 min
->peel_info
.npeel
= elem
->npeel
;
1180 body_cost_vec
.release ();
1186 /* Choose best peeling option by traversing peeling hash table and either
1187 choosing an option with the lowest cost (if cost model is enabled) or the
1188 option that aligns as many accesses as possible. */
1190 static struct data_reference
*
1191 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo
,
1192 unsigned int *npeel
,
1193 stmt_vector_for_cost
*body_cost_vec
)
1195 struct _vect_peel_extended_info res
;
1197 res
.peel_info
.dr
= NULL
;
1198 res
.body_cost_vec
= stmt_vector_for_cost();
1200 if (flag_vect_cost_model
)
1202 res
.inside_cost
= INT_MAX
;
1203 res
.outside_cost
= INT_MAX
;
1204 LOOP_VINFO_PEELING_HTAB (loop_vinfo
)
1205 .traverse
<_vect_peel_extended_info
*,
1206 vect_peeling_hash_get_lowest_cost
> (&res
);
1210 res
.peel_info
.count
= 0;
1211 LOOP_VINFO_PEELING_HTAB (loop_vinfo
)
1212 .traverse
<_vect_peel_extended_info
*,
1213 vect_peeling_hash_get_most_frequent
> (&res
);
1216 *npeel
= res
.peel_info
.npeel
;
1217 *body_cost_vec
= res
.body_cost_vec
;
1218 return res
.peel_info
.dr
;
1222 /* Function vect_enhance_data_refs_alignment
1224 This pass will use loop versioning and loop peeling in order to enhance
1225 the alignment of data references in the loop.
1227 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1228 original loop is to be vectorized. Any other loops that are created by
1229 the transformations performed in this pass - are not supposed to be
1230 vectorized. This restriction will be relaxed.
1232 This pass will require a cost model to guide it whether to apply peeling
1233 or versioning or a combination of the two. For example, the scheme that
1234 intel uses when given a loop with several memory accesses, is as follows:
1235 choose one memory access ('p') which alignment you want to force by doing
1236 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1237 other accesses are not necessarily aligned, or (2) use loop versioning to
1238 generate one loop in which all accesses are aligned, and another loop in
1239 which only 'p' is necessarily aligned.
1241 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1242 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1243 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1245 Devising a cost model is the most critical aspect of this work. It will
1246 guide us on which access to peel for, whether to use loop versioning, how
1247 many versions to create, etc. The cost model will probably consist of
1248 generic considerations as well as target specific considerations (on
1249 powerpc for example, misaligned stores are more painful than misaligned
1252 Here are the general steps involved in alignment enhancements:
1254 -- original loop, before alignment analysis:
1255 for (i=0; i<N; i++){
1256 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1257 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1260 -- After vect_compute_data_refs_alignment:
1261 for (i=0; i<N; i++){
1262 x = q[i]; # DR_MISALIGNMENT(q) = 3
1263 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1266 -- Possibility 1: we do loop versioning:
1268 for (i=0; i<N; i++){ # loop 1A
1269 x = q[i]; # DR_MISALIGNMENT(q) = 3
1270 p[i] = y; # DR_MISALIGNMENT(p) = 0
1274 for (i=0; i<N; i++){ # loop 1B
1275 x = q[i]; # DR_MISALIGNMENT(q) = 3
1276 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1280 -- Possibility 2: we do loop peeling:
1281 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1285 for (i = 3; i < N; i++){ # loop 2A
1286 x = q[i]; # DR_MISALIGNMENT(q) = 0
1287 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1290 -- Possibility 3: combination of loop peeling and versioning:
1291 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1296 for (i = 3; i<N; i++){ # loop 3A
1297 x = q[i]; # DR_MISALIGNMENT(q) = 0
1298 p[i] = y; # DR_MISALIGNMENT(p) = 0
1302 for (i = 3; i<N; i++){ # loop 3B
1303 x = q[i]; # DR_MISALIGNMENT(q) = 0
1304 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1308 These loops are later passed to loop_transform to be vectorized. The
1309 vectorizer will use the alignment information to guide the transformation
1310 (whether to generate regular loads/stores, or with special handling for
1314 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1316 vec
<data_reference_p
> datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1317 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1318 enum dr_alignment_support supportable_dr_alignment
;
1319 struct data_reference
*dr0
= NULL
, *first_store
= NULL
;
1320 struct data_reference
*dr
;
1322 bool do_peeling
= false;
1323 bool do_versioning
= false;
1326 stmt_vec_info stmt_info
;
1327 unsigned int npeel
= 0;
1328 bool all_misalignments_unknown
= true;
1329 unsigned int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1330 unsigned possible_npeel_number
= 1;
1332 unsigned int nelements
, mis
, same_align_drs_max
= 0;
1333 stmt_vector_for_cost body_cost_vec
= stmt_vector_for_cost();
1335 if (dump_enabled_p ())
1336 dump_printf_loc (MSG_NOTE
, vect_location
,
1337 "=== vect_enhance_data_refs_alignment ===");
1339 /* While cost model enhancements are expected in the future, the high level
1340 view of the code at this time is as follows:
1342 A) If there is a misaligned access then see if peeling to align
1343 this access can make all data references satisfy
1344 vect_supportable_dr_alignment. If so, update data structures
1345 as needed and return true.
1347 B) If peeling wasn't possible and there is a data reference with an
1348 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1349 then see if loop versioning checks can be used to make all data
1350 references satisfy vect_supportable_dr_alignment. If so, update
1351 data structures as needed and return true.
1353 C) If neither peeling nor versioning were successful then return false if
1354 any data reference does not satisfy vect_supportable_dr_alignment.
1356 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1358 Note, Possibility 3 above (which is peeling and versioning together) is not
1359 being done at this time. */
1361 /* (1) Peeling to force alignment. */
1363 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1365 + How many accesses will become aligned due to the peeling
1366 - How many accesses will become unaligned due to the peeling,
1367 and the cost of misaligned accesses.
1368 - The cost of peeling (the extra runtime checks, the increase
1371 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1373 stmt
= DR_STMT (dr
);
1374 stmt_info
= vinfo_for_stmt (stmt
);
1376 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1379 /* For interleaving, only the alignment of the first access
1381 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1382 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1385 /* For invariant accesses there is nothing to enhance. */
1386 if (integer_zerop (DR_STEP (dr
)))
1389 /* Strided loads perform only component accesses, alignment is
1390 irrelevant for them. */
1391 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1394 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1395 do_peeling
= vector_alignment_reachable_p (dr
);
1398 if (known_alignment_for_access_p (dr
))
1400 unsigned int npeel_tmp
;
1401 bool negative
= tree_int_cst_compare (DR_STEP (dr
),
1402 size_zero_node
) < 0;
1404 /* Save info about DR in the hash table. */
1405 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo
).is_created ())
1406 LOOP_VINFO_PEELING_HTAB (loop_vinfo
).create (1);
1408 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1409 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1410 mis
= DR_MISALIGNMENT (dr
) / GET_MODE_SIZE (TYPE_MODE (
1411 TREE_TYPE (DR_REF (dr
))));
1412 npeel_tmp
= (negative
1413 ? (mis
- nelements
) : (nelements
- mis
))
1416 /* For multiple types, it is possible that the bigger type access
1417 will have more than one peeling option. E.g., a loop with two
1418 types: one of size (vector size / 4), and the other one of
1419 size (vector size / 8). Vectorization factor will 8. If both
1420 access are misaligned by 3, the first one needs one scalar
1421 iteration to be aligned, and the second one needs 5. But the
1422 the first one will be aligned also by peeling 5 scalar
1423 iterations, and in that case both accesses will be aligned.
1424 Hence, except for the immediate peeling amount, we also want
1425 to try to add full vector size, while we don't exceed
1426 vectorization factor.
1427 We do this automtically for cost model, since we calculate cost
1428 for every peeling option. */
1429 if (!flag_vect_cost_model
)
1430 possible_npeel_number
= vf
/nelements
;
1432 /* Handle the aligned case. We may decide to align some other
1433 access, making DR unaligned. */
1434 if (DR_MISALIGNMENT (dr
) == 0)
1437 if (!flag_vect_cost_model
)
1438 possible_npeel_number
++;
1441 for (j
= 0; j
< possible_npeel_number
; j
++)
1443 gcc_assert (npeel_tmp
<= vf
);
1444 vect_peeling_hash_insert (loop_vinfo
, dr
, npeel_tmp
);
1445 npeel_tmp
+= nelements
;
1448 all_misalignments_unknown
= false;
1449 /* Data-ref that was chosen for the case that all the
1450 misalignments are unknown is not relevant anymore, since we
1451 have a data-ref with known alignment. */
1456 /* If we don't know any misalignment values, we prefer
1457 peeling for data-ref that has the maximum number of data-refs
1458 with the same alignment, unless the target prefers to align
1459 stores over load. */
1460 if (all_misalignments_unknown
)
1462 unsigned same_align_drs
1463 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).length ();
1465 || same_align_drs_max
< same_align_drs
)
1467 same_align_drs_max
= same_align_drs
;
1470 /* For data-refs with the same number of related
1471 accesses prefer the one where the misalign
1472 computation will be invariant in the outermost loop. */
1473 else if (same_align_drs_max
== same_align_drs
)
1475 struct loop
*ivloop0
, *ivloop
;
1476 ivloop0
= outermost_invariant_loop_for_expr
1477 (loop
, DR_BASE_ADDRESS (dr0
));
1478 ivloop
= outermost_invariant_loop_for_expr
1479 (loop
, DR_BASE_ADDRESS (dr
));
1480 if ((ivloop
&& !ivloop0
)
1481 || (ivloop
&& ivloop0
1482 && flow_loop_nested_p (ivloop
, ivloop0
)))
1486 if (!first_store
&& DR_IS_WRITE (dr
))
1490 /* If there are both known and unknown misaligned accesses in the
1491 loop, we choose peeling amount according to the known
1493 if (!supportable_dr_alignment
)
1496 if (!first_store
&& DR_IS_WRITE (dr
))
1503 if (!aligned_access_p (dr
))
1505 if (dump_enabled_p ())
1506 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1507 "vector alignment may not be reachable");
1513 /* Check if we can possibly peel the loop. */
1514 if (!vect_can_advance_ivs_p (loop_vinfo
)
1515 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1518 if (do_peeling
&& all_misalignments_unknown
1519 && vect_supportable_dr_alignment (dr0
, false))
1522 /* Check if the target requires to prefer stores over loads, i.e., if
1523 misaligned stores are more expensive than misaligned loads (taking
1524 drs with same alignment into account). */
1525 if (first_store
&& DR_IS_READ (dr0
))
1527 unsigned int load_inside_cost
= 0, load_outside_cost
= 0;
1528 unsigned int store_inside_cost
= 0, store_outside_cost
= 0;
1529 unsigned int load_inside_penalty
= 0, load_outside_penalty
= 0;
1530 unsigned int store_inside_penalty
= 0, store_outside_penalty
= 0;
1531 stmt_vector_for_cost dummy
;
1534 vect_get_data_access_cost (dr0
, &load_inside_cost
, &load_outside_cost
,
1536 vect_get_data_access_cost (first_store
, &store_inside_cost
,
1537 &store_outside_cost
, &dummy
);
1541 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1542 aligning the load DR0). */
1543 load_inside_penalty
= store_inside_cost
;
1544 load_outside_penalty
= store_outside_cost
;
1546 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1547 DR_STMT (first_store
))).iterate (i
, &dr
);
1549 if (DR_IS_READ (dr
))
1551 load_inside_penalty
+= load_inside_cost
;
1552 load_outside_penalty
+= load_outside_cost
;
1556 load_inside_penalty
+= store_inside_cost
;
1557 load_outside_penalty
+= store_outside_cost
;
1560 /* Calculate the penalty for leaving DR0 unaligned (by
1561 aligning the FIRST_STORE). */
1562 store_inside_penalty
= load_inside_cost
;
1563 store_outside_penalty
= load_outside_cost
;
1565 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1566 DR_STMT (dr0
))).iterate (i
, &dr
);
1568 if (DR_IS_READ (dr
))
1570 store_inside_penalty
+= load_inside_cost
;
1571 store_outside_penalty
+= load_outside_cost
;
1575 store_inside_penalty
+= store_inside_cost
;
1576 store_outside_penalty
+= store_outside_cost
;
1579 if (load_inside_penalty
> store_inside_penalty
1580 || (load_inside_penalty
== store_inside_penalty
1581 && load_outside_penalty
> store_outside_penalty
))
1585 /* In case there are only loads with different unknown misalignments, use
1586 peeling only if it may help to align other accesses in the loop. */
1588 && !STMT_VINFO_SAME_ALIGN_REFS (
1589 vinfo_for_stmt (DR_STMT (dr0
))).length ()
1590 && vect_supportable_dr_alignment (dr0
, false)
1591 != dr_unaligned_supported
)
1595 if (do_peeling
&& !dr0
)
1597 /* Peeling is possible, but there is no data access that is not supported
1598 unless aligned. So we try to choose the best possible peeling. */
1600 /* We should get here only if there are drs with known misalignment. */
1601 gcc_assert (!all_misalignments_unknown
);
1603 /* Choose the best peeling from the hash table. */
1604 dr0
= vect_peeling_hash_choose_best_peeling (loop_vinfo
, &npeel
,
1612 stmt
= DR_STMT (dr0
);
1613 stmt_info
= vinfo_for_stmt (stmt
);
1614 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1615 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1617 if (known_alignment_for_access_p (dr0
))
1619 bool negative
= tree_int_cst_compare (DR_STEP (dr0
),
1620 size_zero_node
) < 0;
1623 /* Since it's known at compile time, compute the number of
1624 iterations in the peeled loop (the peeling factor) for use in
1625 updating DR_MISALIGNMENT values. The peeling factor is the
1626 vectorization factor minus the misalignment as an element
1628 mis
= DR_MISALIGNMENT (dr0
);
1629 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1630 npeel
= ((negative
? mis
- nelements
: nelements
- mis
)
1634 /* For interleaved data access every iteration accesses all the
1635 members of the group, therefore we divide the number of iterations
1636 by the group size. */
1637 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1638 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1639 npeel
/= GROUP_SIZE (stmt_info
);
1641 if (dump_enabled_p ())
1642 dump_printf_loc (MSG_NOTE
, vect_location
,
1643 "Try peeling by %d", npeel
);
1646 /* Ensure that all data refs can be vectorized after the peel. */
1647 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1649 int save_misalignment
;
1654 stmt
= DR_STMT (dr
);
1655 stmt_info
= vinfo_for_stmt (stmt
);
1656 /* For interleaving, only the alignment of the first access
1658 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1659 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1662 /* Strided loads perform only component accesses, alignment is
1663 irrelevant for them. */
1664 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1667 save_misalignment
= DR_MISALIGNMENT (dr
);
1668 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1669 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1670 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1672 if (!supportable_dr_alignment
)
1679 if (do_peeling
&& known_alignment_for_access_p (dr0
) && npeel
== 0)
1681 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1686 body_cost_vec
.release ();
1693 stmt_info_for_cost
*si
;
1694 void *data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
1696 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1697 If the misalignment of DR_i is identical to that of dr0 then set
1698 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1699 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1700 by the peeling factor times the element size of DR_i (MOD the
1701 vectorization factor times the size). Otherwise, the
1702 misalignment of DR_i must be set to unknown. */
1703 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1705 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1707 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1709 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = npeel
;
1711 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1712 SET_DR_MISALIGNMENT (dr0
, 0);
1713 if (dump_enabled_p ())
1715 dump_printf_loc (MSG_NOTE
, vect_location
,
1716 "Alignment of access forced using peeling.");
1717 dump_printf_loc (MSG_NOTE
, vect_location
,
1718 "Peeling for alignment will be applied.");
1720 /* We've delayed passing the inside-loop peeling costs to the
1721 target cost model until we were sure peeling would happen.
1723 if (body_cost_vec
.exists ())
1725 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1727 struct _stmt_vec_info
*stmt_info
1728 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1729 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1730 si
->misalign
, vect_body
);
1732 body_cost_vec
.release ();
1735 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1741 body_cost_vec
.release ();
1743 /* (2) Versioning to force alignment. */
1745 /* Try versioning if:
1746 1) flag_tree_vect_loop_version is TRUE
1747 2) optimize loop for speed
1748 3) there is at least one unsupported misaligned data ref with an unknown
1750 4) all misaligned data refs with a known misalignment are supported, and
1751 5) the number of runtime alignment checks is within reason. */
1754 flag_tree_vect_loop_version
1755 && optimize_loop_nest_for_speed_p (loop
)
1756 && (!loop
->inner
); /* FORNOW */
1760 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1762 stmt
= DR_STMT (dr
);
1763 stmt_info
= vinfo_for_stmt (stmt
);
1765 /* For interleaving, only the alignment of the first access
1767 if (aligned_access_p (dr
)
1768 || (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1769 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
))
1772 /* Strided loads perform only component accesses, alignment is
1773 irrelevant for them. */
1774 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1777 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1779 if (!supportable_dr_alignment
)
1785 if (known_alignment_for_access_p (dr
)
1786 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).length ()
1787 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
1789 do_versioning
= false;
1793 stmt
= DR_STMT (dr
);
1794 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1795 gcc_assert (vectype
);
1797 /* The rightmost bits of an aligned address must be zeros.
1798 Construct the mask needed for this test. For example,
1799 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1800 mask must be 15 = 0xf. */
1801 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1803 /* FORNOW: use the same mask to test all potentially unaligned
1804 references in the loop. The vectorizer currently supports
1805 a single vector size, see the reference to
1806 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1807 vectorization factor is computed. */
1808 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1809 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1810 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1811 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).safe_push (
1816 /* Versioning requires at least one misaligned data reference. */
1817 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
1818 do_versioning
= false;
1819 else if (!do_versioning
)
1820 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).truncate (0);
1825 vec
<gimple
> may_misalign_stmts
1826 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1829 /* It can now be assumed that the data references in the statements
1830 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1831 of the loop being vectorized. */
1832 FOR_EACH_VEC_ELT (may_misalign_stmts
, i
, stmt
)
1834 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1835 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1836 SET_DR_MISALIGNMENT (dr
, 0);
1837 if (dump_enabled_p ())
1838 dump_printf_loc (MSG_NOTE
, vect_location
,
1839 "Alignment of access forced using versioning.");
1842 if (dump_enabled_p ())
1843 dump_printf_loc (MSG_NOTE
, vect_location
,
1844 "Versioning for alignment will be applied.");
1846 /* Peeling and versioning can't be done together at this time. */
1847 gcc_assert (! (do_peeling
&& do_versioning
));
1849 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1854 /* This point is reached if neither peeling nor versioning is being done. */
1855 gcc_assert (! (do_peeling
|| do_versioning
));
1857 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1862 /* Function vect_find_same_alignment_drs.
1864 Update group and alignment relations according to the chosen
1865 vectorization factor. */
1868 vect_find_same_alignment_drs (struct data_dependence_relation
*ddr
,
1869 loop_vec_info loop_vinfo
)
1872 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1873 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1874 struct data_reference
*dra
= DDR_A (ddr
);
1875 struct data_reference
*drb
= DDR_B (ddr
);
1876 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
1877 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
1878 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
1879 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
1880 lambda_vector dist_v
;
1881 unsigned int loop_depth
;
1883 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1889 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1892 /* Loop-based vectorization and known data dependence. */
1893 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1896 /* Data-dependence analysis reports a distance vector of zero
1897 for data-references that overlap only in the first iteration
1898 but have different sign step (see PR45764).
1899 So as a sanity check require equal DR_STEP. */
1900 if (!operand_equal_p (DR_STEP (dra
), DR_STEP (drb
), 0))
1903 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
1904 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1906 int dist
= dist_v
[loop_depth
];
1908 if (dump_enabled_p ())
1909 dump_printf_loc (MSG_NOTE
, vect_location
,
1910 "dependence distance = %d.", dist
);
1912 /* Same loop iteration. */
1914 || (dist
% vectorization_factor
== 0 && dra_size
== drb_size
))
1916 /* Two references with distance zero have the same alignment. */
1917 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
).safe_push (drb
);
1918 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
).safe_push (dra
);
1919 if (dump_enabled_p ())
1921 dump_printf_loc (MSG_NOTE
, vect_location
,
1922 "accesses have the same alignment.");
1923 dump_printf (MSG_NOTE
,
1924 "dependence distance modulo vf == 0 between ");
1925 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
1926 dump_printf (MSG_NOTE
, " and ");
1927 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
1934 /* Function vect_analyze_data_refs_alignment
1936 Analyze the alignment of the data-references in the loop.
1937 Return FALSE if a data reference is found that cannot be vectorized. */
1940 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
,
1941 bb_vec_info bb_vinfo
)
1943 if (dump_enabled_p ())
1944 dump_printf_loc (MSG_NOTE
, vect_location
,
1945 "=== vect_analyze_data_refs_alignment ===");
1947 /* Mark groups of data references with same alignment using
1948 data dependence information. */
1951 vec
<ddr_p
> ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
1952 struct data_dependence_relation
*ddr
;
1955 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
1956 vect_find_same_alignment_drs (ddr
, loop_vinfo
);
1959 if (!vect_compute_data_refs_alignment (loop_vinfo
, bb_vinfo
))
1961 if (dump_enabled_p ())
1962 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1963 "not vectorized: can't calculate alignment "
1972 /* Analyze groups of accesses: check that DR belongs to a group of
1973 accesses of legal size, step, etc. Detect gaps, single element
1974 interleaving, and other special cases. Set grouped access info.
1975 Collect groups of strided stores for further use in SLP analysis. */
1978 vect_analyze_group_access (struct data_reference
*dr
)
1980 tree step
= DR_STEP (dr
);
1981 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
1982 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
1983 gimple stmt
= DR_STMT (dr
);
1984 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1985 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1986 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1987 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
1988 HOST_WIDE_INT groupsize
, last_accessed_element
= 1;
1989 bool slp_impossible
= false;
1990 struct loop
*loop
= NULL
;
1993 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1995 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
1996 size of the interleaving group (including gaps). */
1997 groupsize
= absu_hwi (dr_step
) / type_size
;
1999 /* Not consecutive access is possible only if it is a part of interleaving. */
2000 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2002 /* Check if it this DR is a part of interleaving, and is a single
2003 element of the group that is accessed in the loop. */
2005 /* Gaps are supported only for loads. STEP must be a multiple of the type
2006 size. The size of the group must be a power of 2. */
2008 && (dr_step
% type_size
) == 0
2010 && exact_log2 (groupsize
) != -1)
2012 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = stmt
;
2013 GROUP_SIZE (vinfo_for_stmt (stmt
)) = groupsize
;
2014 if (dump_enabled_p ())
2016 dump_printf_loc (MSG_NOTE
, vect_location
,
2017 "Detected single element interleaving ");
2018 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr
));
2019 dump_printf (MSG_NOTE
, " step ");
2020 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, step
);
2025 if (dump_enabled_p ())
2026 dump_printf_loc (MSG_NOTE
, vect_location
,
2027 "Data access with gaps requires scalar "
2031 if (dump_enabled_p ())
2032 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2033 "Peeling for outer loop is not"
2038 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2044 if (dump_enabled_p ())
2046 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2047 "not consecutive access ");
2048 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2053 /* Mark the statement as unvectorizable. */
2054 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2061 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
)
2063 /* First stmt in the interleaving chain. Check the chain. */
2064 gimple next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
2065 struct data_reference
*data_ref
= dr
;
2066 unsigned int count
= 1;
2067 tree prev_init
= DR_INIT (data_ref
);
2069 HOST_WIDE_INT diff
, gaps
= 0;
2070 unsigned HOST_WIDE_INT count_in_bytes
;
2074 /* Skip same data-refs. In case that two or more stmts share
2075 data-ref (supported only for loads), we vectorize only the first
2076 stmt, and the rest get their vectorized loads from the first
2078 if (!tree_int_cst_compare (DR_INIT (data_ref
),
2079 DR_INIT (STMT_VINFO_DATA_REF (
2080 vinfo_for_stmt (next
)))))
2082 if (DR_IS_WRITE (data_ref
))
2084 if (dump_enabled_p ())
2085 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2086 "Two store stmts share the same dr.");
2090 /* For load use the same data-ref load. */
2091 GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
2094 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2099 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
2101 /* All group members have the same STEP by construction. */
2102 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref
), step
, 0));
2104 /* Check that the distance between two accesses is equal to the type
2105 size. Otherwise, we have gaps. */
2106 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
2107 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
2110 /* FORNOW: SLP of accesses with gaps is not supported. */
2111 slp_impossible
= true;
2112 if (DR_IS_WRITE (data_ref
))
2114 if (dump_enabled_p ())
2115 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2116 "interleaved store with gaps");
2123 last_accessed_element
+= diff
;
2125 /* Store the gap from the previous member of the group. If there is no
2126 gap in the access, GROUP_GAP is always 1. */
2127 GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
2129 prev_init
= DR_INIT (data_ref
);
2130 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2131 /* Count the number of data-refs in the chain. */
2135 /* COUNT is the number of accesses found, we multiply it by the size of
2136 the type to get COUNT_IN_BYTES. */
2137 count_in_bytes
= type_size
* count
;
2139 /* Check that the size of the interleaving (including gaps) is not
2140 greater than STEP. */
2142 && absu_hwi (dr_step
) < count_in_bytes
+ gaps
* type_size
)
2144 if (dump_enabled_p ())
2146 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2147 "interleaving size is greater than step for ");
2148 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (dr
));
2153 /* Check that the size of the interleaving is equal to STEP for stores,
2154 i.e., that there are no gaps. */
2156 && absu_hwi (dr_step
) != count_in_bytes
)
2158 if (DR_IS_READ (dr
))
2160 slp_impossible
= true;
2161 /* There is a gap after the last load in the group. This gap is a
2162 difference between the groupsize and the number of elements.
2163 When there is no gap, this difference should be 0. */
2164 GROUP_GAP (vinfo_for_stmt (stmt
)) = groupsize
- count
;
2168 if (dump_enabled_p ())
2169 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2170 "interleaved store with gaps");
2175 /* Check that STEP is a multiple of type size. */
2177 && (dr_step
% type_size
) != 0)
2179 if (dump_enabled_p ())
2181 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2182 "step is not a multiple of type size: step ");
2183 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, step
);
2184 dump_printf (MSG_MISSED_OPTIMIZATION
, " size ");
2185 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2186 TYPE_SIZE_UNIT (scalar_type
));
2194 GROUP_SIZE (vinfo_for_stmt (stmt
)) = groupsize
;
2195 if (dump_enabled_p ())
2196 dump_printf_loc (MSG_NOTE
, vect_location
,
2197 "Detected interleaving of size %d", (int)groupsize
);
2199 /* SLP: create an SLP data structure for every interleaving group of
2200 stores for further analysis in vect_analyse_slp. */
2201 if (DR_IS_WRITE (dr
) && !slp_impossible
)
2204 LOOP_VINFO_GROUPED_STORES (loop_vinfo
).safe_push (stmt
);
2206 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt
);
2209 /* There is a gap in the end of the group. */
2210 if (groupsize
- last_accessed_element
> 0 && loop_vinfo
)
2212 if (dump_enabled_p ())
2213 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2214 "Data access with gaps requires scalar "
2218 if (dump_enabled_p ())
2219 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2220 "Peeling for outer loop is not supported");
2224 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2232 /* Analyze the access pattern of the data-reference DR.
2233 In case of non-consecutive accesses call vect_analyze_group_access() to
2234 analyze groups of accesses. */
2237 vect_analyze_data_ref_access (struct data_reference
*dr
)
2239 tree step
= DR_STEP (dr
);
2240 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2241 gimple stmt
= DR_STMT (dr
);
2242 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2243 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2244 struct loop
*loop
= NULL
;
2247 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2249 if (loop_vinfo
&& !step
)
2251 if (dump_enabled_p ())
2252 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2253 "bad data-ref access in loop");
2257 /* Allow invariant loads in loops. */
2258 if (loop_vinfo
&& integer_zerop (step
))
2260 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2261 return DR_IS_READ (dr
);
2264 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2266 /* Interleaved accesses are not yet supported within outer-loop
2267 vectorization for references in the inner-loop. */
2268 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2270 /* For the rest of the analysis we use the outer-loop step. */
2271 step
= STMT_VINFO_DR_STEP (stmt_info
);
2272 if (integer_zerop (step
))
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_NOTE
, vect_location
,
2276 "zero step in outer loop.");
2277 if (DR_IS_READ (dr
))
2285 if (TREE_CODE (step
) == INTEGER_CST
)
2287 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2288 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
))
2290 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type
), -dr_step
)))
2292 /* Mark that it is not interleaving. */
2293 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2298 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2300 if (dump_enabled_p ())
2301 dump_printf_loc (MSG_NOTE
, vect_location
,
2302 "grouped access in outer loop.");
2306 /* Assume this is a DR handled by non-constant strided load case. */
2307 if (TREE_CODE (step
) != INTEGER_CST
)
2308 return STMT_VINFO_STRIDE_LOAD_P (stmt_info
);
2310 /* Not consecutive access - check if it's a part of interleaving group. */
2311 return vect_analyze_group_access (dr
);
2314 /* Compare two data-references DRA and DRB to group them into chunks
2315 suitable for grouping. */
2318 dr_group_sort_cmp (const void *dra_
, const void *drb_
)
2320 data_reference_p dra
= *(data_reference_p
*)const_cast<void *>(dra_
);
2321 data_reference_p drb
= *(data_reference_p
*)const_cast<void *>(drb_
);
2325 /* Stabilize sort. */
2329 /* Ordering of DRs according to base. */
2330 if (!operand_equal_p (DR_BASE_ADDRESS (dra
), DR_BASE_ADDRESS (drb
), 0))
2332 h1
= iterative_hash_expr (DR_BASE_ADDRESS (dra
), 0);
2333 h2
= iterative_hash_expr (DR_BASE_ADDRESS (drb
), 0);
2335 return h1
< h2
? -1 : 1;
2338 /* And according to DR_OFFSET. */
2339 if (!dr_equal_offsets_p (dra
, drb
))
2341 h1
= iterative_hash_expr (DR_OFFSET (dra
), 0);
2342 h2
= iterative_hash_expr (DR_OFFSET (drb
), 0);
2344 return h1
< h2
? -1 : 1;
2347 /* Put reads before writes. */
2348 if (DR_IS_READ (dra
) != DR_IS_READ (drb
))
2349 return DR_IS_READ (dra
) ? -1 : 1;
2351 /* Then sort after access size. */
2352 if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))),
2353 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))), 0))
2355 h1
= iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))), 0);
2356 h2
= iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))), 0);
2358 return h1
< h2
? -1 : 1;
2361 /* And after step. */
2362 if (!operand_equal_p (DR_STEP (dra
), DR_STEP (drb
), 0))
2364 h1
= iterative_hash_expr (DR_STEP (dra
), 0);
2365 h2
= iterative_hash_expr (DR_STEP (drb
), 0);
2367 return h1
< h2
? -1 : 1;
2370 /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
2371 cmp
= tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
));
2373 return gimple_uid (DR_STMT (dra
)) < gimple_uid (DR_STMT (drb
)) ? -1 : 1;
2377 /* Function vect_analyze_data_ref_accesses.
2379 Analyze the access pattern of all the data references in the loop.
2381 FORNOW: the only access pattern that is considered vectorizable is a
2382 simple step 1 (consecutive) access.
2384 FORNOW: handle only arrays and pointer accesses. */
2387 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2390 vec
<data_reference_p
> datarefs
;
2391 struct data_reference
*dr
;
2393 if (dump_enabled_p ())
2394 dump_printf_loc (MSG_NOTE
, vect_location
,
2395 "=== vect_analyze_data_ref_accesses ===");
2398 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2400 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2402 if (datarefs
.is_empty ())
2405 /* Sort the array of datarefs to make building the interleaving chains
2407 qsort (datarefs
.address(), datarefs
.length (),
2408 sizeof (data_reference_p
), dr_group_sort_cmp
);
2410 /* Build the interleaving chains. */
2411 for (i
= 0; i
< datarefs
.length () - 1;)
2413 data_reference_p dra
= datarefs
[i
];
2414 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
2415 stmt_vec_info lastinfo
= NULL
;
2416 for (i
= i
+ 1; i
< datarefs
.length (); ++i
)
2418 data_reference_p drb
= datarefs
[i
];
2419 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
2421 /* ??? Imperfect sorting (non-compatible types, non-modulo
2422 accesses, same accesses) can lead to a group to be artificially
2423 split here as we don't just skip over those. If it really
2424 matters we can push those to a worklist and re-iterate
2425 over them. The we can just skip ahead to the next DR here. */
2427 /* Check that the data-refs have same first location (except init)
2428 and they are both either store or load (not load and store). */
2429 if (DR_IS_READ (dra
) != DR_IS_READ (drb
)
2430 || !operand_equal_p (DR_BASE_ADDRESS (dra
),
2431 DR_BASE_ADDRESS (drb
), 0)
2432 || !dr_equal_offsets_p (dra
, drb
))
2435 /* Check that the data-refs have the same constant size and step. */
2436 tree sza
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
)));
2437 tree szb
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
)));
2438 if (!host_integerp (sza
, 1)
2439 || !host_integerp (szb
, 1)
2440 || !tree_int_cst_equal (sza
, szb
)
2441 || !host_integerp (DR_STEP (dra
), 0)
2442 || !host_integerp (DR_STEP (drb
), 0)
2443 || !tree_int_cst_equal (DR_STEP (dra
), DR_STEP (drb
)))
2446 /* Do not place the same access in the interleaving chain twice. */
2447 if (tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
)) == 0)
2450 /* Check the types are compatible.
2451 ??? We don't distinguish this during sorting. */
2452 if (!types_compatible_p (TREE_TYPE (DR_REF (dra
)),
2453 TREE_TYPE (DR_REF (drb
))))
2456 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
2457 HOST_WIDE_INT init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
2458 HOST_WIDE_INT init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
2459 gcc_assert (init_a
< init_b
);
2461 /* If init_b == init_a + the size of the type * k, we have an
2462 interleaving, and DRA is accessed before DRB. */
2463 HOST_WIDE_INT type_size_a
= TREE_INT_CST_LOW (sza
);
2464 if ((init_b
- init_a
) % type_size_a
!= 0)
2467 /* The step (if not zero) is greater than the difference between
2468 data-refs' inits. This splits groups into suitable sizes. */
2469 HOST_WIDE_INT step
= TREE_INT_CST_LOW (DR_STEP (dra
));
2470 if (step
!= 0 && step
<= (init_b
- init_a
))
2473 if (dump_enabled_p ())
2475 dump_printf_loc (MSG_NOTE
, vect_location
,
2476 "Detected interleaving ");
2477 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
2478 dump_printf (MSG_NOTE
, " and ");
2479 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
2482 /* Link the found element into the group list. */
2483 if (!GROUP_FIRST_ELEMENT (stmtinfo_a
))
2485 GROUP_FIRST_ELEMENT (stmtinfo_a
) = DR_STMT (dra
);
2486 lastinfo
= stmtinfo_a
;
2488 GROUP_FIRST_ELEMENT (stmtinfo_b
) = DR_STMT (dra
);
2489 GROUP_NEXT_ELEMENT (lastinfo
) = DR_STMT (drb
);
2490 lastinfo
= stmtinfo_b
;
2494 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
2495 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
2496 && !vect_analyze_data_ref_access (dr
))
2498 if (dump_enabled_p ())
2499 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2500 "not vectorized: complicated access pattern.");
2504 /* Mark the statement as not vectorizable. */
2505 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2515 /* Function vect_prune_runtime_alias_test_list.
2517 Prune a list of ddrs to be tested at run-time by versioning for alias.
2518 Return FALSE if resulting list of ddrs is longer then allowed by
2519 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2522 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo
)
2525 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
2528 if (dump_enabled_p ())
2529 dump_printf_loc (MSG_NOTE
, vect_location
,
2530 "=== vect_prune_runtime_alias_test_list ===");
2532 for (i
= 0; i
< ddrs
.length (); )
2540 for (j
= 0; j
< i
; j
++)
2542 ddr_p ddr_j
= ddrs
[j
];
2544 if (vect_vfa_range_equal (ddr_i
, ddr_j
))
2546 if (dump_enabled_p ())
2548 dump_printf_loc (MSG_NOTE
, vect_location
,
2549 "found equal ranges ");
2550 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_A (ddr_i
)));
2551 dump_printf (MSG_NOTE
, ", ");
2552 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_B (ddr_i
)));
2553 dump_printf (MSG_NOTE
, " and ");
2554 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_A (ddr_j
)));
2555 dump_printf (MSG_NOTE
, ", ");
2556 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_B (ddr_j
)));
2565 ddrs
.ordered_remove (i
);
2571 if (ddrs
.length () >
2572 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
2574 if (dump_enabled_p ())
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2577 "disable versioning for alias - max number of "
2578 "generated checks exceeded.");
2581 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
).truncate (0);
2589 /* Check whether a non-affine read in stmt is suitable for gather load
2590 and if so, return a builtin decl for that operation. */
2593 vect_check_gather (gimple stmt
, loop_vec_info loop_vinfo
, tree
*basep
,
2594 tree
*offp
, int *scalep
)
2596 HOST_WIDE_INT scale
= 1, pbitpos
, pbitsize
;
2597 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2598 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2599 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2600 tree offtype
= NULL_TREE
;
2601 tree decl
, base
, off
;
2602 enum machine_mode pmode
;
2603 int punsignedp
, pvolatilep
;
2605 /* The gather builtins need address of the form
2606 loop_invariant + vector * {1, 2, 4, 8}
2608 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2609 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2610 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2611 multiplications and additions in it. To get a vector, we need
2612 a single SSA_NAME that will be defined in the loop and will
2613 contain everything that is not loop invariant and that can be
2614 vectorized. The following code attempts to find such a preexistng
2615 SSA_NAME OFF and put the loop invariants into a tree BASE
2616 that can be gimplified before the loop. */
2617 base
= get_inner_reference (DR_REF (dr
), &pbitsize
, &pbitpos
, &off
,
2618 &pmode
, &punsignedp
, &pvolatilep
, false);
2619 gcc_assert (base
!= NULL_TREE
&& (pbitpos
% BITS_PER_UNIT
) == 0);
2621 if (TREE_CODE (base
) == MEM_REF
)
2623 if (!integer_zerop (TREE_OPERAND (base
, 1)))
2625 if (off
== NULL_TREE
)
2627 double_int moff
= mem_ref_offset (base
);
2628 off
= double_int_to_tree (sizetype
, moff
);
2631 off
= size_binop (PLUS_EXPR
, off
,
2632 fold_convert (sizetype
, TREE_OPERAND (base
, 1)));
2634 base
= TREE_OPERAND (base
, 0);
2637 base
= build_fold_addr_expr (base
);
2639 if (off
== NULL_TREE
)
2640 off
= size_zero_node
;
2642 /* If base is not loop invariant, either off is 0, then we start with just
2643 the constant offset in the loop invariant BASE and continue with base
2644 as OFF, otherwise give up.
2645 We could handle that case by gimplifying the addition of base + off
2646 into some SSA_NAME and use that as off, but for now punt. */
2647 if (!expr_invariant_in_loop_p (loop
, base
))
2649 if (!integer_zerop (off
))
2652 base
= size_int (pbitpos
/ BITS_PER_UNIT
);
2654 /* Otherwise put base + constant offset into the loop invariant BASE
2655 and continue with OFF. */
2658 base
= fold_convert (sizetype
, base
);
2659 base
= size_binop (PLUS_EXPR
, base
, size_int (pbitpos
/ BITS_PER_UNIT
));
2662 /* OFF at this point may be either a SSA_NAME or some tree expression
2663 from get_inner_reference. Try to peel off loop invariants from it
2664 into BASE as long as possible. */
2666 while (offtype
== NULL_TREE
)
2668 enum tree_code code
;
2669 tree op0
, op1
, add
= NULL_TREE
;
2671 if (TREE_CODE (off
) == SSA_NAME
)
2673 gimple def_stmt
= SSA_NAME_DEF_STMT (off
);
2675 if (expr_invariant_in_loop_p (loop
, off
))
2678 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
2681 op0
= gimple_assign_rhs1 (def_stmt
);
2682 code
= gimple_assign_rhs_code (def_stmt
);
2683 op1
= gimple_assign_rhs2 (def_stmt
);
2687 if (get_gimple_rhs_class (TREE_CODE (off
)) == GIMPLE_TERNARY_RHS
)
2689 code
= TREE_CODE (off
);
2690 extract_ops_from_tree (off
, &code
, &op0
, &op1
);
2694 case POINTER_PLUS_EXPR
:
2696 if (expr_invariant_in_loop_p (loop
, op0
))
2701 add
= fold_convert (sizetype
, add
);
2703 add
= size_binop (MULT_EXPR
, add
, size_int (scale
));
2704 base
= size_binop (PLUS_EXPR
, base
, add
);
2707 if (expr_invariant_in_loop_p (loop
, op1
))
2715 if (expr_invariant_in_loop_p (loop
, op1
))
2717 add
= fold_convert (sizetype
, op1
);
2718 add
= size_binop (MINUS_EXPR
, size_zero_node
, add
);
2724 if (scale
== 1 && host_integerp (op1
, 0))
2726 scale
= tree_low_cst (op1
, 0);
2735 if (!POINTER_TYPE_P (TREE_TYPE (op0
))
2736 && !INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
2738 if (TYPE_PRECISION (TREE_TYPE (op0
))
2739 == TYPE_PRECISION (TREE_TYPE (off
)))
2744 if (TYPE_PRECISION (TREE_TYPE (op0
))
2745 < TYPE_PRECISION (TREE_TYPE (off
)))
2748 offtype
= TREE_TYPE (off
);
2759 /* If at the end OFF still isn't a SSA_NAME or isn't
2760 defined in the loop, punt. */
2761 if (TREE_CODE (off
) != SSA_NAME
2762 || expr_invariant_in_loop_p (loop
, off
))
2765 if (offtype
== NULL_TREE
)
2766 offtype
= TREE_TYPE (off
);
2768 decl
= targetm
.vectorize
.builtin_gather (STMT_VINFO_VECTYPE (stmt_info
),
2770 if (decl
== NULL_TREE
)
2782 /* Function vect_analyze_data_refs.
2784 Find all the data references in the loop or basic block.
2786 The general structure of the analysis of data refs in the vectorizer is as
2788 1- vect_analyze_data_refs(loop/bb): call
2789 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2790 in the loop/bb and their dependences.
2791 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2792 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2793 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2798 vect_analyze_data_refs (loop_vec_info loop_vinfo
,
2799 bb_vec_info bb_vinfo
,
2802 struct loop
*loop
= NULL
;
2803 basic_block bb
= NULL
;
2805 vec
<data_reference_p
> datarefs
;
2806 struct data_reference
*dr
;
2809 if (dump_enabled_p ())
2810 dump_printf_loc (MSG_NOTE
, vect_location
,
2811 "=== vect_analyze_data_refs ===\n");
2815 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2816 if (!find_loop_nest (loop
, &LOOP_VINFO_LOOP_NEST (loop_vinfo
))
2817 || find_data_references_in_loop
2818 (loop
, &LOOP_VINFO_DATAREFS (loop_vinfo
)))
2820 if (dump_enabled_p ())
2821 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2822 "not vectorized: loop contains function calls"
2823 " or data references that cannot be analyzed");
2827 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2831 gimple_stmt_iterator gsi
;
2833 bb
= BB_VINFO_BB (bb_vinfo
);
2834 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2836 gimple stmt
= gsi_stmt (gsi
);
2837 if (!find_data_references_in_stmt (NULL
, stmt
,
2838 &BB_VINFO_DATAREFS (bb_vinfo
)))
2840 /* Mark the rest of the basic-block as unvectorizable. */
2841 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
2843 stmt
= gsi_stmt (gsi
);
2844 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)) = false;
2850 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2853 /* Go through the data-refs, check that the analysis succeeded. Update
2854 pointer from stmt_vec_info struct to DR and vectype. */
2856 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
2859 stmt_vec_info stmt_info
;
2860 tree base
, offset
, init
;
2861 bool gather
= false;
2865 if (!dr
|| !DR_REF (dr
))
2867 if (dump_enabled_p ())
2868 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2869 "not vectorized: unhandled data-ref ");
2873 stmt
= DR_STMT (dr
);
2874 stmt_info
= vinfo_for_stmt (stmt
);
2876 /* Discard clobbers from the dataref vector. We will remove
2877 clobber stmts during vectorization. */
2878 if (gimple_clobber_p (stmt
))
2880 if (i
== datarefs
.length () - 1)
2885 datarefs
[i
] = datarefs
.pop ();
2889 /* Check that analysis of the data-ref succeeded. */
2890 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
2893 /* If target supports vector gather loads, see if they can't
2897 && !TREE_THIS_VOLATILE (DR_REF (dr
))
2898 && targetm
.vectorize
.builtin_gather
!= NULL
2899 && !nested_in_vect_loop_p (loop
, stmt
))
2901 struct data_reference
*newdr
2902 = create_data_ref (NULL
, loop_containing_stmt (stmt
),
2903 DR_REF (dr
), stmt
, true);
2904 gcc_assert (newdr
!= NULL
&& DR_REF (newdr
));
2905 if (DR_BASE_ADDRESS (newdr
)
2906 && DR_OFFSET (newdr
)
2909 && integer_zerop (DR_STEP (newdr
)))
2915 free_data_ref (newdr
);
2920 if (dump_enabled_p ())
2922 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2923 "not vectorized: data ref analysis "
2925 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2935 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
2937 if (dump_enabled_p ())
2938 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2939 "not vectorized: base addr of dr is a "
2950 if (TREE_THIS_VOLATILE (DR_REF (dr
)))
2952 if (dump_enabled_p ())
2954 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2955 "not vectorized: volatile type ");
2956 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2965 if (stmt_can_throw_internal (stmt
))
2967 if (dump_enabled_p ())
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2970 "not vectorized: statement can throw an "
2972 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2983 if (TREE_CODE (DR_REF (dr
)) == COMPONENT_REF
2984 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr
), 1)))
2986 if (dump_enabled_p ())
2988 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2989 "not vectorized: statement is bitfield "
2991 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3002 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3003 offset
= unshare_expr (DR_OFFSET (dr
));
3004 init
= unshare_expr (DR_INIT (dr
));
3006 if (is_gimple_call (stmt
))
3008 if (dump_enabled_p ())
3010 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3011 "not vectorized: dr in a call ");
3012 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3023 /* Update DR field in stmt_vec_info struct. */
3025 /* If the dataref is in an inner-loop of the loop that is considered for
3026 for vectorization, we also want to analyze the access relative to
3027 the outer-loop (DR contains information only relative to the
3028 inner-most enclosing loop). We do that by building a reference to the
3029 first location accessed by the inner-loop, and analyze it relative to
3031 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
3033 tree outer_step
, outer_base
, outer_init
;
3034 HOST_WIDE_INT pbitsize
, pbitpos
;
3036 enum machine_mode pmode
;
3037 int punsignedp
, pvolatilep
;
3038 affine_iv base_iv
, offset_iv
;
3041 /* Build a reference to the first location accessed by the
3042 inner-loop: *(BASE+INIT). (The first location is actually
3043 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3044 tree inner_base
= build_fold_indirect_ref
3045 (fold_build_pointer_plus (base
, init
));
3047 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE
, vect_location
,
3050 "analyze in outer-loop: ");
3051 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, inner_base
);
3054 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
3055 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
3056 gcc_assert (outer_base
!= NULL_TREE
);
3058 if (pbitpos
% BITS_PER_UNIT
!= 0)
3060 if (dump_enabled_p ())
3061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3062 "failed: bit offset alignment.\n");
3066 outer_base
= build_fold_addr_expr (outer_base
);
3067 if (!simple_iv (loop
, loop_containing_stmt (stmt
), outer_base
,
3070 if (dump_enabled_p ())
3071 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3072 "failed: evolution of base is not affine.\n");
3079 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
,
3087 offset_iv
.base
= ssize_int (0);
3088 offset_iv
.step
= ssize_int (0);
3090 else if (!simple_iv (loop
, loop_containing_stmt (stmt
), poffset
,
3093 if (dump_enabled_p ())
3094 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3095 "evolution of offset is not affine.\n");
3099 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
3100 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
3101 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3102 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
3103 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3105 outer_step
= size_binop (PLUS_EXPR
,
3106 fold_convert (ssizetype
, base_iv
.step
),
3107 fold_convert (ssizetype
, offset_iv
.step
));
3109 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
3110 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3111 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
3112 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
3113 STMT_VINFO_DR_OFFSET (stmt_info
) =
3114 fold_convert (ssizetype
, offset_iv
.base
);
3115 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
3116 size_int (highest_pow2_factor (offset_iv
.base
));
3118 if (dump_enabled_p ())
3120 dump_printf_loc (MSG_NOTE
, vect_location
,
3121 "\touter base_address: ");
3122 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3123 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
3124 dump_printf (MSG_NOTE
, "\n\touter offset from base address: ");
3125 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3126 STMT_VINFO_DR_OFFSET (stmt_info
));
3127 dump_printf (MSG_NOTE
,
3128 "\n\touter constant offset from base address: ");
3129 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3130 STMT_VINFO_DR_INIT (stmt_info
));
3131 dump_printf (MSG_NOTE
, "\n\touter step: ");
3132 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3133 STMT_VINFO_DR_STEP (stmt_info
));
3134 dump_printf (MSG_NOTE
, "\n\touter aligned to: ");
3135 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3136 STMT_VINFO_DR_ALIGNED_TO (stmt_info
));
3140 if (STMT_VINFO_DATA_REF (stmt_info
))
3142 if (dump_enabled_p ())
3144 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3145 "not vectorized: more than one data ref "
3147 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3158 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
3160 /* Set vectype for STMT. */
3161 scalar_type
= TREE_TYPE (DR_REF (dr
));
3162 STMT_VINFO_VECTYPE (stmt_info
) =
3163 get_vectype_for_scalar_type (scalar_type
);
3164 if (!STMT_VINFO_VECTYPE (stmt_info
))
3166 if (dump_enabled_p ())
3168 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3169 "not vectorized: no vectype for stmt: ");
3170 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3171 dump_printf (MSG_MISSED_OPTIMIZATION
, " scalar_type: ");
3172 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_DETAILS
,
3181 STMT_VINFO_DATA_REF (stmt_info
) = NULL
;
3188 if (dump_enabled_p ())
3190 dump_printf_loc (MSG_NOTE
, vect_location
,
3191 "got vectype for stmt: ");
3192 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3193 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3194 STMT_VINFO_VECTYPE (stmt_info
));
3198 /* Adjust the minimal vectorization factor according to the
3200 vf
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
3208 gather
= 0 != vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
3210 && get_vectype_for_scalar_type (TREE_TYPE (off
)) == NULL_TREE
)
3214 STMT_VINFO_DATA_REF (stmt_info
) = NULL
;
3216 if (dump_enabled_p ())
3218 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3219 "not vectorized: not suitable for gather "
3221 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3227 STMT_VINFO_GATHER_P (stmt_info
) = true;
3230 && TREE_CODE (DR_STEP (dr
)) != INTEGER_CST
)
3232 if (nested_in_vect_loop_p (loop
, stmt
)
3233 || !DR_IS_READ (dr
))
3235 if (dump_enabled_p ())
3237 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3238 "not vectorized: not suitable for strided "
3240 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3244 STMT_VINFO_STRIDE_LOAD_P (stmt_info
) = true;
3248 /* If we stopped analysis at the first dataref we could not analyze
3249 when trying to vectorize a basic-block mark the rest of the datarefs
3250 as not vectorizable and truncate the vector of datarefs. That
3251 avoids spending useless time in analyzing their dependence. */
3252 if (i
!= datarefs
.length ())
3254 gcc_assert (bb_vinfo
!= NULL
);
3255 for (unsigned j
= i
; j
< datarefs
.length (); ++j
)
3257 data_reference_p dr
= datarefs
[j
];
3258 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
3261 datarefs
.truncate (i
);
3268 /* Function vect_get_new_vect_var.
3270 Returns a name for a new variable. The current naming scheme appends the
3271 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3272 the name of vectorizer generated variables, and appends that to NAME if
3276 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
3283 case vect_simple_var
:
3286 case vect_scalar_var
:
3289 case vect_pointer_var
:
3298 char* tmp
= concat (prefix
, "_", name
, NULL
);
3299 new_vect_var
= create_tmp_reg (type
, tmp
);
3303 new_vect_var
= create_tmp_reg (type
, prefix
);
3305 return new_vect_var
;
3309 /* Function vect_create_addr_base_for_vector_ref.
3311 Create an expression that computes the address of the first memory location
3312 that will be accessed for a data reference.
3315 STMT: The statement containing the data reference.
3316 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3317 OFFSET: Optional. If supplied, it is be added to the initial address.
3318 LOOP: Specify relative to which loop-nest should the address be computed.
3319 For example, when the dataref is in an inner-loop nested in an
3320 outer-loop that is now being vectorized, LOOP can be either the
3321 outer-loop, or the inner-loop. The first memory location accessed
3322 by the following dataref ('in' points to short):
3329 if LOOP=i_loop: &in (relative to i_loop)
3330 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3333 1. Return an SSA_NAME whose value is the address of the memory location of
3334 the first vector of the data reference.
3335 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3336 these statement(s) which define the returned SSA_NAME.
3338 FORNOW: We are only handling array accesses with step 1. */
3341 vect_create_addr_base_for_vector_ref (gimple stmt
,
3342 gimple_seq
*new_stmt_list
,
3346 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3347 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3349 const char *base_name
;
3352 gimple_seq seq
= NULL
;
3356 tree step
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
3357 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3359 if (loop_vinfo
&& loop
&& loop
!= (gimple_bb (stmt
))->loop_father
)
3361 struct loop
*outer_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3363 gcc_assert (nested_in_vect_loop_p (outer_loop
, stmt
));
3365 data_ref_base
= unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
3366 base_offset
= unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info
));
3367 init
= unshare_expr (STMT_VINFO_DR_INIT (stmt_info
));
3371 data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3372 base_offset
= unshare_expr (DR_OFFSET (dr
));
3373 init
= unshare_expr (DR_INIT (dr
));
3377 base_name
= get_name (data_ref_base
);
3380 base_offset
= ssize_int (0);
3381 init
= ssize_int (0);
3382 base_name
= get_name (DR_REF (dr
));
3385 /* Create base_offset */
3386 base_offset
= size_binop (PLUS_EXPR
,
3387 fold_convert (sizetype
, base_offset
),
3388 fold_convert (sizetype
, init
));
3392 offset
= fold_build2 (MULT_EXPR
, sizetype
,
3393 fold_convert (sizetype
, offset
), step
);
3394 base_offset
= fold_build2 (PLUS_EXPR
, sizetype
,
3395 base_offset
, offset
);
3398 /* base + base_offset */
3400 addr_base
= fold_build_pointer_plus (data_ref_base
, base_offset
);
3403 addr_base
= build1 (ADDR_EXPR
,
3404 build_pointer_type (TREE_TYPE (DR_REF (dr
))),
3405 unshare_expr (DR_REF (dr
)));
3408 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
3409 addr_base
= fold_convert (vect_ptr_type
, addr_base
);
3410 dest
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
, base_name
);
3411 addr_base
= force_gimple_operand (addr_base
, &seq
, false, dest
);
3412 gimple_seq_add_seq (new_stmt_list
, seq
);
3414 if (DR_PTR_INFO (dr
)
3415 && TREE_CODE (addr_base
) == SSA_NAME
)
3417 duplicate_ssa_name_ptr_info (addr_base
, DR_PTR_INFO (dr
));
3419 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base
));
3422 if (dump_enabled_p ())
3424 dump_printf_loc (MSG_NOTE
, vect_location
, "created ");
3425 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, addr_base
);
3432 /* Function vect_create_data_ref_ptr.
3434 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3435 location accessed in the loop by STMT, along with the def-use update
3436 chain to appropriately advance the pointer through the loop iterations.
3437 Also set aliasing information for the pointer. This pointer is used by
3438 the callers to this function to create a memory reference expression for
3439 vector load/store access.
3442 1. STMT: a stmt that references memory. Expected to be of the form
3443 GIMPLE_ASSIGN <name, data-ref> or
3444 GIMPLE_ASSIGN <data-ref, name>.
3445 2. AGGR_TYPE: the type of the reference, which should be either a vector
3447 3. AT_LOOP: the loop where the vector memref is to be created.
3448 4. OFFSET (optional): an offset to be added to the initial address accessed
3449 by the data-ref in STMT.
3450 5. BSI: location where the new stmts are to be placed if there is no loop
3451 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3452 pointing to the initial address.
3455 1. Declare a new ptr to vector_type, and have it point to the base of the
3456 data reference (initial addressed accessed by the data reference).
3457 For example, for vector of type V8HI, the following code is generated:
3460 ap = (v8hi *)initial_address;
3462 if OFFSET is not supplied:
3463 initial_address = &a[init];
3464 if OFFSET is supplied:
3465 initial_address = &a[init + OFFSET];
3467 Return the initial_address in INITIAL_ADDRESS.
3469 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3470 update the pointer in each iteration of the loop.
3472 Return the increment stmt that updates the pointer in PTR_INCR.
3474 3. Set INV_P to true if the access pattern of the data reference in the
3475 vectorized loop is invariant. Set it to false otherwise.
3477 4. Return the pointer. */
3480 vect_create_data_ref_ptr (gimple stmt
, tree aggr_type
, struct loop
*at_loop
,
3481 tree offset
, tree
*initial_address
,
3482 gimple_stmt_iterator
*gsi
, gimple
*ptr_incr
,
3483 bool only_init
, bool *inv_p
)
3485 const char *base_name
;
3486 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3487 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3488 struct loop
*loop
= NULL
;
3489 bool nested_in_vect_loop
= false;
3490 struct loop
*containing_loop
= NULL
;
3495 gimple_seq new_stmt_list
= NULL
;
3499 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3501 gimple_stmt_iterator incr_gsi
;
3503 tree indx_before_incr
, indx_after_incr
;
3506 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3508 gcc_assert (TREE_CODE (aggr_type
) == ARRAY_TYPE
3509 || TREE_CODE (aggr_type
) == VECTOR_TYPE
);
3513 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3514 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
3515 containing_loop
= (gimple_bb (stmt
))->loop_father
;
3516 pe
= loop_preheader_edge (loop
);
3520 gcc_assert (bb_vinfo
);
3525 /* Check the step (evolution) of the load in LOOP, and record
3526 whether it's invariant. */
3527 if (nested_in_vect_loop
)
3528 step
= STMT_VINFO_DR_STEP (stmt_info
);
3530 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
3532 if (integer_zerop (step
))
3537 /* Create an expression for the first address accessed by this load
3539 base_name
= get_name (DR_BASE_ADDRESS (dr
));
3541 if (dump_enabled_p ())
3543 tree dr_base_type
= TREE_TYPE (DR_BASE_OBJECT (dr
));
3544 dump_printf_loc (MSG_NOTE
, vect_location
,
3545 "create %s-pointer variable to type: ",
3546 tree_code_name
[(int) TREE_CODE (aggr_type
)]);
3547 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, aggr_type
);
3548 if (TREE_CODE (dr_base_type
) == ARRAY_TYPE
)
3549 dump_printf (MSG_NOTE
, " vectorizing an array ref: ");
3550 else if (TREE_CODE (dr_base_type
) == VECTOR_TYPE
)
3551 dump_printf (MSG_NOTE
, " vectorizing a vector ref: ");
3552 else if (TREE_CODE (dr_base_type
) == RECORD_TYPE
)
3553 dump_printf (MSG_NOTE
, " vectorizing a record based array ref: ");
3555 dump_printf (MSG_NOTE
, " vectorizing a pointer ref: ");
3556 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_BASE_OBJECT (dr
));
3559 /* (1) Create the new aggregate-pointer variable.
3560 Vector and array types inherit the alias set of their component
3561 type by default so we need to use a ref-all pointer if the data
3562 reference does not conflict with the created aggregated data
3563 reference because it is not addressable. */
3564 bool need_ref_all
= false;
3565 if (!alias_sets_conflict_p (get_alias_set (aggr_type
),
3566 get_alias_set (DR_REF (dr
))))
3567 need_ref_all
= true;
3568 /* Likewise for any of the data references in the stmt group. */
3569 else if (STMT_VINFO_GROUP_SIZE (stmt_info
) > 1)
3571 gimple orig_stmt
= STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
);
3574 stmt_vec_info sinfo
= vinfo_for_stmt (orig_stmt
);
3575 struct data_reference
*sdr
= STMT_VINFO_DATA_REF (sinfo
);
3576 if (!alias_sets_conflict_p (get_alias_set (aggr_type
),
3577 get_alias_set (DR_REF (sdr
))))
3579 need_ref_all
= true;
3582 orig_stmt
= STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo
);
3586 aggr_ptr_type
= build_pointer_type_for_mode (aggr_type
, ptr_mode
,
3588 aggr_ptr
= vect_get_new_vect_var (aggr_ptr_type
, vect_pointer_var
, base_name
);
3591 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3592 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3593 def-use update cycles for the pointer: one relative to the outer-loop
3594 (LOOP), which is what steps (3) and (4) below do. The other is relative
3595 to the inner-loop (which is the inner-most loop containing the dataref),
3596 and this is done be step (5) below.
3598 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3599 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3600 redundant. Steps (3),(4) create the following:
3603 LOOP: vp1 = phi(vp0,vp2)
3609 If there is an inner-loop nested in loop, then step (5) will also be
3610 applied, and an additional update in the inner-loop will be created:
3613 LOOP: vp1 = phi(vp0,vp2)
3615 inner: vp3 = phi(vp1,vp4)
3616 vp4 = vp3 + inner_step
3622 /* (2) Calculate the initial address of the aggregate-pointer, and set
3623 the aggregate-pointer to point to it before the loop. */
3625 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3627 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
3633 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmt_list
);
3634 gcc_assert (!new_bb
);
3637 gsi_insert_seq_before (gsi
, new_stmt_list
, GSI_SAME_STMT
);
3640 *initial_address
= new_temp
;
3642 /* Create: p = (aggr_type *) initial_base */
3643 if (TREE_CODE (new_temp
) != SSA_NAME
3644 || !useless_type_conversion_p (aggr_ptr_type
, TREE_TYPE (new_temp
)))
3646 vec_stmt
= gimple_build_assign (aggr_ptr
,
3647 fold_convert (aggr_ptr_type
, new_temp
));
3648 aggr_ptr_init
= make_ssa_name (aggr_ptr
, vec_stmt
);
3649 /* Copy the points-to information if it exists. */
3650 if (DR_PTR_INFO (dr
))
3651 duplicate_ssa_name_ptr_info (aggr_ptr_init
, DR_PTR_INFO (dr
));
3652 gimple_assign_set_lhs (vec_stmt
, aggr_ptr_init
);
3655 new_bb
= gsi_insert_on_edge_immediate (pe
, vec_stmt
);
3656 gcc_assert (!new_bb
);
3659 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
3662 aggr_ptr_init
= new_temp
;
3664 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3665 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3666 inner-loop nested in LOOP (during outer-loop vectorization). */
3668 /* No update in loop is required. */
3669 if (only_init
&& (!loop_vinfo
|| at_loop
== loop
))
3670 aptr
= aggr_ptr_init
;
3673 /* The step of the aggregate pointer is the type size. */
3674 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
3675 /* One exception to the above is when the scalar step of the load in
3676 LOOP is zero. In this case the step here is also zero. */
3678 iv_step
= size_zero_node
;
3679 else if (tree_int_cst_sgn (step
) == -1)
3680 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
3682 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
3684 create_iv (aggr_ptr_init
,
3685 fold_convert (aggr_ptr_type
, iv_step
),
3686 aggr_ptr
, loop
, &incr_gsi
, insert_after
,
3687 &indx_before_incr
, &indx_after_incr
);
3688 incr
= gsi_stmt (incr_gsi
);
3689 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3691 /* Copy the points-to information if it exists. */
3692 if (DR_PTR_INFO (dr
))
3694 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3695 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3700 aptr
= indx_before_incr
;
3703 if (!nested_in_vect_loop
|| only_init
)
3707 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3708 nested in LOOP, if exists. */
3710 gcc_assert (nested_in_vect_loop
);
3713 standard_iv_increment_position (containing_loop
, &incr_gsi
,
3715 create_iv (aptr
, fold_convert (aggr_ptr_type
, DR_STEP (dr
)), aggr_ptr
,
3716 containing_loop
, &incr_gsi
, insert_after
, &indx_before_incr
,
3718 incr
= gsi_stmt (incr_gsi
);
3719 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3721 /* Copy the points-to information if it exists. */
3722 if (DR_PTR_INFO (dr
))
3724 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3725 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3730 return indx_before_incr
;
3737 /* Function bump_vector_ptr
3739 Increment a pointer (to a vector type) by vector-size. If requested,
3740 i.e. if PTR-INCR is given, then also connect the new increment stmt
3741 to the existing def-use update-chain of the pointer, by modifying
3742 the PTR_INCR as illustrated below:
3744 The pointer def-use update-chain before this function:
3745 DATAREF_PTR = phi (p_0, p_2)
3747 PTR_INCR: p_2 = DATAREF_PTR + step
3749 The pointer def-use update-chain after this function:
3750 DATAREF_PTR = phi (p_0, p_2)
3752 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3754 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3757 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3759 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3760 the loop. The increment amount across iterations is expected
3762 BSI - location where the new update stmt is to be placed.
3763 STMT - the original scalar memory-access stmt that is being vectorized.
3764 BUMP - optional. The offset by which to bump the pointer. If not given,
3765 the offset is assumed to be vector_size.
3767 Output: Return NEW_DATAREF_PTR as illustrated above.
3772 bump_vector_ptr (tree dataref_ptr
, gimple ptr_incr
, gimple_stmt_iterator
*gsi
,
3773 gimple stmt
, tree bump
)
3775 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3776 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3777 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3778 tree update
= TYPE_SIZE_UNIT (vectype
);
3781 use_operand_p use_p
;
3782 tree new_dataref_ptr
;
3787 new_dataref_ptr
= copy_ssa_name (dataref_ptr
, NULL
);
3788 incr_stmt
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, new_dataref_ptr
,
3789 dataref_ptr
, update
);
3790 vect_finish_stmt_generation (stmt
, incr_stmt
, gsi
);
3792 /* Copy the points-to information if it exists. */
3793 if (DR_PTR_INFO (dr
))
3795 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
3796 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr
));
3800 return new_dataref_ptr
;
3802 /* Update the vector-pointer's cross-iteration increment. */
3803 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
3805 tree use
= USE_FROM_PTR (use_p
);
3807 if (use
== dataref_ptr
)
3808 SET_USE (use_p
, new_dataref_ptr
);
3810 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
3813 return new_dataref_ptr
;
3817 /* Function vect_create_destination_var.
3819 Create a new temporary of type VECTYPE. */
3822 vect_create_destination_var (tree scalar_dest
, tree vectype
)
3828 enum vect_var_kind kind
;
3830 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
3831 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
3833 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
3835 name
= get_name (scalar_dest
);
3837 asprintf (&new_name
, "%s_%u", name
, SSA_NAME_VERSION (scalar_dest
));
3839 asprintf (&new_name
, "_%u", SSA_NAME_VERSION (scalar_dest
));
3840 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
3846 /* Function vect_grouped_store_supported.
3848 Returns TRUE if interleave high and interleave low permutations
3849 are supported, and FALSE otherwise. */
3852 vect_grouped_store_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
3854 enum machine_mode mode
= TYPE_MODE (vectype
);
3856 /* vect_permute_store_chain requires the group size to be a power of two. */
3857 if (exact_log2 (count
) == -1)
3859 if (dump_enabled_p ())
3860 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3861 "the size of the group of accesses"
3862 " is not a power of 2");
3866 /* Check that the permutation is supported. */
3867 if (VECTOR_MODE_P (mode
))
3869 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
3870 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
3871 for (i
= 0; i
< nelt
/ 2; i
++)
3874 sel
[i
* 2 + 1] = i
+ nelt
;
3876 if (can_vec_perm_p (mode
, false, sel
))
3878 for (i
= 0; i
< nelt
; i
++)
3880 if (can_vec_perm_p (mode
, false, sel
))
3885 if (dump_enabled_p ())
3886 dump_printf (MSG_MISSED_OPTIMIZATION
,
3887 "interleave op not supported by target.");
3892 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3896 vect_store_lanes_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
3898 return vect_lanes_optab_supported_p ("vec_store_lanes",
3899 vec_store_lanes_optab
,
3904 /* Function vect_permute_store_chain.
3906 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3907 a power of 2, generate interleave_high/low stmts to reorder the data
3908 correctly for the stores. Return the final references for stores in
3911 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3912 The input is 4 vectors each containing 8 elements. We assign a number to
3913 each element, the input sequence is:
3915 1st vec: 0 1 2 3 4 5 6 7
3916 2nd vec: 8 9 10 11 12 13 14 15
3917 3rd vec: 16 17 18 19 20 21 22 23
3918 4th vec: 24 25 26 27 28 29 30 31
3920 The output sequence should be:
3922 1st vec: 0 8 16 24 1 9 17 25
3923 2nd vec: 2 10 18 26 3 11 19 27
3924 3rd vec: 4 12 20 28 5 13 21 30
3925 4th vec: 6 14 22 30 7 15 23 31
3927 i.e., we interleave the contents of the four vectors in their order.
3929 We use interleave_high/low instructions to create such output. The input of
3930 each interleave_high/low operation is two vectors:
3933 the even elements of the result vector are obtained left-to-right from the
3934 high/low elements of the first vector. The odd elements of the result are
3935 obtained left-to-right from the high/low elements of the second vector.
3936 The output of interleave_high will be: 0 4 1 5
3937 and of interleave_low: 2 6 3 7
3940 The permutation is done in log LENGTH stages. In each stage interleave_high
3941 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3942 where the first argument is taken from the first half of DR_CHAIN and the
3943 second argument from it's second half.
3946 I1: interleave_high (1st vec, 3rd vec)
3947 I2: interleave_low (1st vec, 3rd vec)
3948 I3: interleave_high (2nd vec, 4th vec)
3949 I4: interleave_low (2nd vec, 4th vec)
3951 The output for the first stage is:
3953 I1: 0 16 1 17 2 18 3 19
3954 I2: 4 20 5 21 6 22 7 23
3955 I3: 8 24 9 25 10 26 11 27
3956 I4: 12 28 13 29 14 30 15 31
3958 The output of the second stage, i.e. the final result is:
3960 I1: 0 8 16 24 1 9 17 25
3961 I2: 2 10 18 26 3 11 19 27
3962 I3: 4 12 20 28 5 13 21 30
3963 I4: 6 14 22 30 7 15 23 31. */
3966 vect_permute_store_chain (vec
<tree
> dr_chain
,
3967 unsigned int length
,
3969 gimple_stmt_iterator
*gsi
,
3970 vec
<tree
> *result_chain
)
3972 tree vect1
, vect2
, high
, low
;
3974 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
3975 tree perm_mask_low
, perm_mask_high
;
3977 unsigned int j
, nelt
= TYPE_VECTOR_SUBPARTS (vectype
);
3978 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
3980 result_chain
->quick_grow (length
);
3981 memcpy (result_chain
->address (), dr_chain
.address (),
3982 length
* sizeof (tree
));
3984 for (i
= 0, n
= nelt
/ 2; i
< n
; i
++)
3987 sel
[i
* 2 + 1] = i
+ nelt
;
3989 perm_mask_high
= vect_gen_perm_mask (vectype
, sel
);
3990 gcc_assert (perm_mask_high
!= NULL
);
3992 for (i
= 0; i
< nelt
; i
++)
3994 perm_mask_low
= vect_gen_perm_mask (vectype
, sel
);
3995 gcc_assert (perm_mask_low
!= NULL
);
3997 for (i
= 0, n
= exact_log2 (length
); i
< n
; i
++)
3999 for (j
= 0; j
< length
/2; j
++)
4001 vect1
= dr_chain
[j
];
4002 vect2
= dr_chain
[j
+length
/2];
4004 /* Create interleaving stmt:
4005 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4006 high
= make_temp_ssa_name (vectype
, NULL
, "vect_inter_high");
4008 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, high
,
4009 vect1
, vect2
, perm_mask_high
);
4010 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4011 (*result_chain
)[2*j
] = high
;
4013 /* Create interleaving stmt:
4014 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4015 nelt*3/2+1, ...}> */
4016 low
= make_temp_ssa_name (vectype
, NULL
, "vect_inter_low");
4018 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, low
,
4019 vect1
, vect2
, perm_mask_low
);
4020 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4021 (*result_chain
)[2*j
+1] = low
;
4023 memcpy (dr_chain
.address (), result_chain
->address (),
4024 length
* sizeof (tree
));
4028 /* Function vect_setup_realignment
4030 This function is called when vectorizing an unaligned load using
4031 the dr_explicit_realign[_optimized] scheme.
4032 This function generates the following code at the loop prolog:
4035 x msq_init = *(floor(p)); # prolog load
4036 realignment_token = call target_builtin;
4038 x msq = phi (msq_init, ---)
4040 The stmts marked with x are generated only for the case of
4041 dr_explicit_realign_optimized.
4043 The code above sets up a new (vector) pointer, pointing to the first
4044 location accessed by STMT, and a "floor-aligned" load using that pointer.
4045 It also generates code to compute the "realignment-token" (if the relevant
4046 target hook was defined), and creates a phi-node at the loop-header bb
4047 whose arguments are the result of the prolog-load (created by this
4048 function) and the result of a load that takes place in the loop (to be
4049 created by the caller to this function).
4051 For the case of dr_explicit_realign_optimized:
4052 The caller to this function uses the phi-result (msq) to create the
4053 realignment code inside the loop, and sets up the missing phi argument,
4056 msq = phi (msq_init, lsq)
4057 lsq = *(floor(p')); # load in loop
4058 result = realign_load (msq, lsq, realignment_token);
4060 For the case of dr_explicit_realign:
4062 msq = *(floor(p)); # load in loop
4064 lsq = *(floor(p')); # load in loop
4065 result = realign_load (msq, lsq, realignment_token);
4068 STMT - (scalar) load stmt to be vectorized. This load accesses
4069 a memory location that may be unaligned.
4070 BSI - place where new code is to be inserted.
4071 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4075 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4076 target hook, if defined.
4077 Return value - the result of the loop-header phi node. */
4080 vect_setup_realignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4081 tree
*realignment_token
,
4082 enum dr_alignment_support alignment_support_scheme
,
4084 struct loop
**at_loop
)
4086 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4087 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4088 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4089 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4090 struct loop
*loop
= NULL
;
4092 tree scalar_dest
= gimple_assign_lhs (stmt
);
4099 tree msq_init
= NULL_TREE
;
4102 tree msq
= NULL_TREE
;
4103 gimple_seq stmts
= NULL
;
4105 bool compute_in_loop
= false;
4106 bool nested_in_vect_loop
= false;
4107 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4108 struct loop
*loop_for_initial_load
= NULL
;
4112 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4113 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4116 gcc_assert (alignment_support_scheme
== dr_explicit_realign
4117 || alignment_support_scheme
== dr_explicit_realign_optimized
);
4119 /* We need to generate three things:
4120 1. the misalignment computation
4121 2. the extra vector load (for the optimized realignment scheme).
4122 3. the phi node for the two vectors from which the realignment is
4123 done (for the optimized realignment scheme). */
4125 /* 1. Determine where to generate the misalignment computation.
4127 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4128 calculation will be generated by this function, outside the loop (in the
4129 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4130 caller, inside the loop.
4132 Background: If the misalignment remains fixed throughout the iterations of
4133 the loop, then both realignment schemes are applicable, and also the
4134 misalignment computation can be done outside LOOP. This is because we are
4135 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4136 are a multiple of VS (the Vector Size), and therefore the misalignment in
4137 different vectorized LOOP iterations is always the same.
4138 The problem arises only if the memory access is in an inner-loop nested
4139 inside LOOP, which is now being vectorized using outer-loop vectorization.
4140 This is the only case when the misalignment of the memory access may not
4141 remain fixed throughout the iterations of the inner-loop (as explained in
4142 detail in vect_supportable_dr_alignment). In this case, not only is the
4143 optimized realignment scheme not applicable, but also the misalignment
4144 computation (and generation of the realignment token that is passed to
4145 REALIGN_LOAD) have to be done inside the loop.
4147 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4148 or not, which in turn determines if the misalignment is computed inside
4149 the inner-loop, or outside LOOP. */
4151 if (init_addr
!= NULL_TREE
|| !loop_vinfo
)
4153 compute_in_loop
= true;
4154 gcc_assert (alignment_support_scheme
== dr_explicit_realign
);
4158 /* 2. Determine where to generate the extra vector load.
4160 For the optimized realignment scheme, instead of generating two vector
4161 loads in each iteration, we generate a single extra vector load in the
4162 preheader of the loop, and in each iteration reuse the result of the
4163 vector load from the previous iteration. In case the memory access is in
4164 an inner-loop nested inside LOOP, which is now being vectorized using
4165 outer-loop vectorization, we need to determine whether this initial vector
4166 load should be generated at the preheader of the inner-loop, or can be
4167 generated at the preheader of LOOP. If the memory access has no evolution
4168 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4169 to be generated inside LOOP (in the preheader of the inner-loop). */
4171 if (nested_in_vect_loop
)
4173 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
4174 bool invariant_in_outerloop
=
4175 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
4176 loop_for_initial_load
= (invariant_in_outerloop
? loop
: loop
->inner
);
4179 loop_for_initial_load
= loop
;
4181 *at_loop
= loop_for_initial_load
;
4183 if (loop_for_initial_load
)
4184 pe
= loop_preheader_edge (loop_for_initial_load
);
4186 /* 3. For the case of the optimized realignment, create the first vector
4187 load at the loop preheader. */
4189 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4191 /* Create msq_init = *(floor(p1)) in the loop preheader */
4193 gcc_assert (!compute_in_loop
);
4194 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4195 ptr
= vect_create_data_ref_ptr (stmt
, vectype
, loop_for_initial_load
,
4196 NULL_TREE
, &init_addr
, NULL
, &inc
,
4198 new_temp
= copy_ssa_name (ptr
, NULL
);
4199 new_stmt
= gimple_build_assign_with_ops
4200 (BIT_AND_EXPR
, new_temp
, ptr
,
4201 build_int_cst (TREE_TYPE (ptr
),
4202 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4203 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4204 gcc_assert (!new_bb
);
4206 = build2 (MEM_REF
, TREE_TYPE (vec_dest
), new_temp
,
4207 build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0));
4208 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4209 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4210 gimple_assign_set_lhs (new_stmt
, new_temp
);
4213 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4214 gcc_assert (!new_bb
);
4217 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4219 msq_init
= gimple_assign_lhs (new_stmt
);
4222 /* 4. Create realignment token using a target builtin, if available.
4223 It is done either inside the containing loop, or before LOOP (as
4224 determined above). */
4226 if (targetm
.vectorize
.builtin_mask_for_load
)
4230 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4233 /* Generate the INIT_ADDR computation outside LOOP. */
4234 init_addr
= vect_create_addr_base_for_vector_ref (stmt
, &stmts
,
4238 pe
= loop_preheader_edge (loop
);
4239 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4240 gcc_assert (!new_bb
);
4243 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
4246 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
4247 new_stmt
= gimple_build_call (builtin_decl
, 1, init_addr
);
4249 vect_create_destination_var (scalar_dest
,
4250 gimple_call_return_type (new_stmt
));
4251 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4252 gimple_call_set_lhs (new_stmt
, new_temp
);
4254 if (compute_in_loop
)
4255 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4258 /* Generate the misalignment computation outside LOOP. */
4259 pe
= loop_preheader_edge (loop
);
4260 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4261 gcc_assert (!new_bb
);
4264 *realignment_token
= gimple_call_lhs (new_stmt
);
4266 /* The result of the CALL_EXPR to this builtin is determined from
4267 the value of the parameter and no global variables are touched
4268 which makes the builtin a "const" function. Requiring the
4269 builtin to have the "const" attribute makes it unnecessary
4270 to call mark_call_clobbered. */
4271 gcc_assert (TREE_READONLY (builtin_decl
));
4274 if (alignment_support_scheme
== dr_explicit_realign
)
4277 gcc_assert (!compute_in_loop
);
4278 gcc_assert (alignment_support_scheme
== dr_explicit_realign_optimized
);
4281 /* 5. Create msq = phi <msq_init, lsq> in loop */
4283 pe
= loop_preheader_edge (containing_loop
);
4284 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4285 msq
= make_ssa_name (vec_dest
, NULL
);
4286 phi_stmt
= create_phi_node (msq
, containing_loop
->header
);
4287 add_phi_arg (phi_stmt
, msq_init
, pe
, UNKNOWN_LOCATION
);
4293 /* Function vect_grouped_load_supported.
4295 Returns TRUE if even and odd permutations are supported,
4296 and FALSE otherwise. */
4299 vect_grouped_load_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4301 enum machine_mode mode
= TYPE_MODE (vectype
);
4303 /* vect_permute_load_chain requires the group size to be a power of two. */
4304 if (exact_log2 (count
) == -1)
4306 if (dump_enabled_p ())
4307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4308 "the size of the group of accesses"
4309 " is not a power of 2");
4313 /* Check that the permutation is supported. */
4314 if (VECTOR_MODE_P (mode
))
4316 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
4317 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4319 for (i
= 0; i
< nelt
; i
++)
4321 if (can_vec_perm_p (mode
, false, sel
))
4323 for (i
= 0; i
< nelt
; i
++)
4325 if (can_vec_perm_p (mode
, false, sel
))
4330 if (dump_enabled_p ())
4331 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4332 "extract even/odd not supported by target");
4336 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4340 vect_load_lanes_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4342 return vect_lanes_optab_supported_p ("vec_load_lanes",
4343 vec_load_lanes_optab
,
4347 /* Function vect_permute_load_chain.
4349 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4350 a power of 2, generate extract_even/odd stmts to reorder the input data
4351 correctly. Return the final references for loads in RESULT_CHAIN.
4353 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4354 The input is 4 vectors each containing 8 elements. We assign a number to each
4355 element, the input sequence is:
4357 1st vec: 0 1 2 3 4 5 6 7
4358 2nd vec: 8 9 10 11 12 13 14 15
4359 3rd vec: 16 17 18 19 20 21 22 23
4360 4th vec: 24 25 26 27 28 29 30 31
4362 The output sequence should be:
4364 1st vec: 0 4 8 12 16 20 24 28
4365 2nd vec: 1 5 9 13 17 21 25 29
4366 3rd vec: 2 6 10 14 18 22 26 30
4367 4th vec: 3 7 11 15 19 23 27 31
4369 i.e., the first output vector should contain the first elements of each
4370 interleaving group, etc.
4372 We use extract_even/odd instructions to create such output. The input of
4373 each extract_even/odd operation is two vectors
4377 and the output is the vector of extracted even/odd elements. The output of
4378 extract_even will be: 0 2 4 6
4379 and of extract_odd: 1 3 5 7
4382 The permutation is done in log LENGTH stages. In each stage extract_even
4383 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4384 their order. In our example,
4386 E1: extract_even (1st vec, 2nd vec)
4387 E2: extract_odd (1st vec, 2nd vec)
4388 E3: extract_even (3rd vec, 4th vec)
4389 E4: extract_odd (3rd vec, 4th vec)
4391 The output for the first stage will be:
4393 E1: 0 2 4 6 8 10 12 14
4394 E2: 1 3 5 7 9 11 13 15
4395 E3: 16 18 20 22 24 26 28 30
4396 E4: 17 19 21 23 25 27 29 31
4398 In order to proceed and create the correct sequence for the next stage (or
4399 for the correct output, if the second stage is the last one, as in our
4400 example), we first put the output of extract_even operation and then the
4401 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4402 The input for the second stage is:
4404 1st vec (E1): 0 2 4 6 8 10 12 14
4405 2nd vec (E3): 16 18 20 22 24 26 28 30
4406 3rd vec (E2): 1 3 5 7 9 11 13 15
4407 4th vec (E4): 17 19 21 23 25 27 29 31
4409 The output of the second stage:
4411 E1: 0 4 8 12 16 20 24 28
4412 E2: 2 6 10 14 18 22 26 30
4413 E3: 1 5 9 13 17 21 25 29
4414 E4: 3 7 11 15 19 23 27 31
4416 And RESULT_CHAIN after reordering:
4418 1st vec (E1): 0 4 8 12 16 20 24 28
4419 2nd vec (E3): 1 5 9 13 17 21 25 29
4420 3rd vec (E2): 2 6 10 14 18 22 26 30
4421 4th vec (E4): 3 7 11 15 19 23 27 31. */
4424 vect_permute_load_chain (vec
<tree
> dr_chain
,
4425 unsigned int length
,
4427 gimple_stmt_iterator
*gsi
,
4428 vec
<tree
> *result_chain
)
4430 tree data_ref
, first_vect
, second_vect
;
4431 tree perm_mask_even
, perm_mask_odd
;
4433 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
4434 unsigned int i
, j
, log_length
= exact_log2 (length
);
4435 unsigned nelt
= TYPE_VECTOR_SUBPARTS (vectype
);
4436 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4438 result_chain
->quick_grow (length
);
4439 memcpy (result_chain
->address (), dr_chain
.address (),
4440 length
* sizeof (tree
));
4442 for (i
= 0; i
< nelt
; ++i
)
4444 perm_mask_even
= vect_gen_perm_mask (vectype
, sel
);
4445 gcc_assert (perm_mask_even
!= NULL
);
4447 for (i
= 0; i
< nelt
; ++i
)
4449 perm_mask_odd
= vect_gen_perm_mask (vectype
, sel
);
4450 gcc_assert (perm_mask_odd
!= NULL
);
4452 for (i
= 0; i
< log_length
; i
++)
4454 for (j
= 0; j
< length
; j
+= 2)
4456 first_vect
= dr_chain
[j
];
4457 second_vect
= dr_chain
[j
+1];
4459 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4460 data_ref
= make_temp_ssa_name (vectype
, NULL
, "vect_perm_even");
4461 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4462 first_vect
, second_vect
,
4464 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4465 (*result_chain
)[j
/2] = data_ref
;
4467 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4468 data_ref
= make_temp_ssa_name (vectype
, NULL
, "vect_perm_odd");
4469 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4470 first_vect
, second_vect
,
4472 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4473 (*result_chain
)[j
/2+length
/2] = data_ref
;
4475 memcpy (dr_chain
.address (), result_chain
->address (),
4476 length
* sizeof (tree
));
4481 /* Function vect_transform_grouped_load.
4483 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4484 to perform their permutation and ascribe the result vectorized statements to
4485 the scalar statements.
4489 vect_transform_grouped_load (gimple stmt
, vec
<tree
> dr_chain
, int size
,
4490 gimple_stmt_iterator
*gsi
)
4492 vec
<tree
> result_chain
= vNULL
;
4494 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4495 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4496 vectors, that are ready for vector computation. */
4497 result_chain
.create (size
);
4498 vect_permute_load_chain (dr_chain
, size
, stmt
, gsi
, &result_chain
);
4499 vect_record_grouped_load_vectors (stmt
, result_chain
);
4500 result_chain
.release ();
4503 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4504 generated as part of the vectorization of STMT. Assign the statement
4505 for each vector to the associated scalar statement. */
4508 vect_record_grouped_load_vectors (gimple stmt
, vec
<tree
> result_chain
)
4510 gimple first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
4511 gimple next_stmt
, new_stmt
;
4512 unsigned int i
, gap_count
;
4515 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4516 Since we scan the chain starting from it's first node, their order
4517 corresponds the order of data-refs in RESULT_CHAIN. */
4518 next_stmt
= first_stmt
;
4520 FOR_EACH_VEC_ELT (result_chain
, i
, tmp_data_ref
)
4525 /* Skip the gaps. Loads created for the gaps will be removed by dead
4526 code elimination pass later. No need to check for the first stmt in
4527 the group, since it always exists.
4528 GROUP_GAP is the number of steps in elements from the previous
4529 access (if there is no gap GROUP_GAP is 1). We skip loads that
4530 correspond to the gaps. */
4531 if (next_stmt
!= first_stmt
4532 && gap_count
< GROUP_GAP (vinfo_for_stmt (next_stmt
)))
4540 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
4541 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4542 copies, and we put the new vector statement in the first available
4544 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
4545 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
4548 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
4551 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
4553 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
));
4556 prev_stmt
= rel_stmt
;
4558 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
4561 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) =
4566 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4568 /* If NEXT_STMT accesses the same DR as the previous statement,
4569 put the same TMP_DATA_REF as its vectorized statement; otherwise
4570 get the next data-ref from RESULT_CHAIN. */
4571 if (!next_stmt
|| !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
4577 /* Function vect_force_dr_alignment_p.
4579 Returns whether the alignment of a DECL can be forced to be aligned
4580 on ALIGNMENT bit boundary. */
4583 vect_can_force_dr_alignment_p (const_tree decl
, unsigned int alignment
)
4585 if (TREE_CODE (decl
) != VAR_DECL
)
4588 /* We cannot change alignment of common or external symbols as another
4589 translation unit may contain a definition with lower alignment.
4590 The rules of common symbol linking mean that the definition
4591 will override the common symbol. The same is true for constant
4592 pool entries which may be shared and are not properly merged
4594 if (DECL_EXTERNAL (decl
)
4595 || DECL_COMMON (decl
)
4596 || DECL_IN_CONSTANT_POOL (decl
))
4599 if (TREE_ASM_WRITTEN (decl
))
4602 /* Do not override the alignment as specified by the ABI when the used
4603 attribute is set. */
4604 if (DECL_PRESERVE_P (decl
))
4607 /* Do not override explicit alignment set by the user when an explicit
4608 section name is also used. This is a common idiom used by many
4609 software projects. */
4610 if (DECL_SECTION_NAME (decl
) != NULL_TREE
4611 && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl
))
4614 if (TREE_STATIC (decl
))
4615 return (alignment
<= MAX_OFILE_ALIGNMENT
);
4617 return (alignment
<= MAX_STACK_ALIGNMENT
);
4621 /* Return whether the data reference DR is supported with respect to its
4623 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4624 it is aligned, i.e., check if it is possible to vectorize it with different
4627 enum dr_alignment_support
4628 vect_supportable_dr_alignment (struct data_reference
*dr
,
4629 bool check_aligned_accesses
)
4631 gimple stmt
= DR_STMT (dr
);
4632 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4633 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4634 enum machine_mode mode
= TYPE_MODE (vectype
);
4635 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4636 struct loop
*vect_loop
= NULL
;
4637 bool nested_in_vect_loop
= false;
4639 if (aligned_access_p (dr
) && !check_aligned_accesses
)
4644 vect_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4645 nested_in_vect_loop
= nested_in_vect_loop_p (vect_loop
, stmt
);
4648 /* Possibly unaligned access. */
4650 /* We can choose between using the implicit realignment scheme (generating
4651 a misaligned_move stmt) and the explicit realignment scheme (generating
4652 aligned loads with a REALIGN_LOAD). There are two variants to the
4653 explicit realignment scheme: optimized, and unoptimized.
4654 We can optimize the realignment only if the step between consecutive
4655 vector loads is equal to the vector size. Since the vector memory
4656 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4657 is guaranteed that the misalignment amount remains the same throughout the
4658 execution of the vectorized loop. Therefore, we can create the
4659 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4660 at the loop preheader.
4662 However, in the case of outer-loop vectorization, when vectorizing a
4663 memory access in the inner-loop nested within the LOOP that is now being
4664 vectorized, while it is guaranteed that the misalignment of the
4665 vectorized memory access will remain the same in different outer-loop
4666 iterations, it is *not* guaranteed that is will remain the same throughout
4667 the execution of the inner-loop. This is because the inner-loop advances
4668 with the original scalar step (and not in steps of VS). If the inner-loop
4669 step happens to be a multiple of VS, then the misalignment remains fixed
4670 and we can use the optimized realignment scheme. For example:
4676 When vectorizing the i-loop in the above example, the step between
4677 consecutive vector loads is 1, and so the misalignment does not remain
4678 fixed across the execution of the inner-loop, and the realignment cannot
4679 be optimized (as illustrated in the following pseudo vectorized loop):
4681 for (i=0; i<N; i+=4)
4682 for (j=0; j<M; j++){
4683 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4684 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4685 // (assuming that we start from an aligned address).
4688 We therefore have to use the unoptimized realignment scheme:
4690 for (i=0; i<N; i+=4)
4691 for (j=k; j<M; j+=4)
4692 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4693 // that the misalignment of the initial address is
4696 The loop can then be vectorized as follows:
4698 for (k=0; k<4; k++){
4699 rt = get_realignment_token (&vp[k]);
4700 for (i=0; i<N; i+=4){
4702 for (j=k; j<M; j+=4){
4704 va = REALIGN_LOAD <v1,v2,rt>;
4711 if (DR_IS_READ (dr
))
4713 bool is_packed
= false;
4714 tree type
= (TREE_TYPE (DR_REF (dr
)));
4716 if (optab_handler (vec_realign_load_optab
, mode
) != CODE_FOR_nothing
4717 && (!targetm
.vectorize
.builtin_mask_for_load
4718 || targetm
.vectorize
.builtin_mask_for_load ()))
4720 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4721 if ((nested_in_vect_loop
4722 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4723 != GET_MODE_SIZE (TYPE_MODE (vectype
))))
4725 return dr_explicit_realign
;
4727 return dr_explicit_realign_optimized
;
4729 if (!known_alignment_for_access_p (dr
))
4730 is_packed
= not_size_aligned (DR_REF (dr
));
4732 if ((TYPE_USER_ALIGN (type
) && !is_packed
)
4733 || targetm
.vectorize
.
4734 support_vector_misalignment (mode
, type
,
4735 DR_MISALIGNMENT (dr
), is_packed
))
4736 /* Can't software pipeline the loads, but can at least do them. */
4737 return dr_unaligned_supported
;
4741 bool is_packed
= false;
4742 tree type
= (TREE_TYPE (DR_REF (dr
)));
4744 if (!known_alignment_for_access_p (dr
))
4745 is_packed
= not_size_aligned (DR_REF (dr
));
4747 if ((TYPE_USER_ALIGN (type
) && !is_packed
)
4748 || targetm
.vectorize
.
4749 support_vector_misalignment (mode
, type
,
4750 DR_MISALIGNMENT (dr
), is_packed
))
4751 return dr_unaligned_supported
;
4755 return dr_unaligned_unsupported
;