1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
39 #include "diagnostic-core.h"
41 /* Need to include rtl.h, expr.h, etc. for optabs. */
45 /* Return true if load- or store-lanes optab OPTAB is implemented for
46 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
49 vect_lanes_optab_supported_p (const char *name
, convert_optab optab
,
50 tree vectype
, unsigned HOST_WIDE_INT count
)
52 enum machine_mode mode
, array_mode
;
55 mode
= TYPE_MODE (vectype
);
56 limit_p
= !targetm
.array_mode_supported_p (mode
, count
);
57 array_mode
= mode_for_size (count
* GET_MODE_BITSIZE (mode
),
60 if (array_mode
== BLKmode
)
62 if (dump_enabled_p ())
63 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
64 "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC
"]",
65 GET_MODE_NAME (mode
), count
);
69 if (convert_optab_handler (optab
, array_mode
, mode
) == CODE_FOR_nothing
)
71 if (dump_enabled_p ())
72 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
73 "cannot use %s<%s><%s>", name
,
74 GET_MODE_NAME (array_mode
), GET_MODE_NAME (mode
));
78 if (dump_enabled_p ())
79 dump_printf_loc (MSG_NOTE
, vect_location
,
80 "can use %s<%s><%s>", name
, GET_MODE_NAME (array_mode
),
81 GET_MODE_NAME (mode
));
87 /* Return the smallest scalar part of STMT.
88 This is used to determine the vectype of the stmt. We generally set the
89 vectype according to the type of the result (lhs). For stmts whose
90 result-type is different than the type of the arguments (e.g., demotion,
91 promotion), vectype will be reset appropriately (later). Note that we have
92 to visit the smallest datatype in this function, because that determines the
93 VF. If the smallest datatype in the loop is present only as the rhs of a
94 promotion operation - we'd miss it.
95 Such a case, where a variable of this datatype does not appear in the lhs
96 anywhere in the loop, can only occur if it's an invariant: e.g.:
97 'int_x = (int) short_inv', which we'd expect to have been optimized away by
98 invariant motion. However, we cannot rely on invariant motion to always
99 take invariants out of the loop, and so in the case of promotion we also
100 have to check the rhs.
101 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
105 vect_get_smallest_scalar_type (gimple stmt
, HOST_WIDE_INT
*lhs_size_unit
,
106 HOST_WIDE_INT
*rhs_size_unit
)
108 tree scalar_type
= gimple_expr_type (stmt
);
109 HOST_WIDE_INT lhs
, rhs
;
111 lhs
= rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
113 if (is_gimple_assign (stmt
)
114 && (gimple_assign_cast_p (stmt
)
115 || gimple_assign_rhs_code (stmt
) == WIDEN_MULT_EXPR
116 || gimple_assign_rhs_code (stmt
) == WIDEN_LSHIFT_EXPR
117 || gimple_assign_rhs_code (stmt
) == FLOAT_EXPR
))
119 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
121 rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type
));
123 scalar_type
= rhs_type
;
126 *lhs_size_unit
= lhs
;
127 *rhs_size_unit
= rhs
;
132 /* Find the place of the data-ref in STMT in the interleaving chain that starts
133 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
136 vect_get_place_in_interleaving_chain (gimple stmt
, gimple first_stmt
)
138 gimple next_stmt
= first_stmt
;
141 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
144 while (next_stmt
&& next_stmt
!= stmt
)
147 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
157 /* Check if data references pointed by DR_I and DR_J are same or
158 belong to same interleaving group. Return FALSE if drs are
159 different, otherwise return TRUE. */
162 vect_same_range_drs (data_reference_p dr_i
, data_reference_p dr_j
)
164 gimple stmt_i
= DR_STMT (dr_i
);
165 gimple stmt_j
= DR_STMT (dr_j
);
167 if (operand_equal_p (DR_REF (dr_i
), DR_REF (dr_j
), 0)
168 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i
))
169 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j
))
170 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i
))
171 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j
)))))
177 /* If address ranges represented by DDR_I and DDR_J are equal,
178 return TRUE, otherwise return FALSE. */
181 vect_vfa_range_equal (ddr_p ddr_i
, ddr_p ddr_j
)
183 if ((vect_same_range_drs (DDR_A (ddr_i
), DDR_A (ddr_j
))
184 && vect_same_range_drs (DDR_B (ddr_i
), DDR_B (ddr_j
)))
185 || (vect_same_range_drs (DDR_A (ddr_i
), DDR_B (ddr_j
))
186 && vect_same_range_drs (DDR_B (ddr_i
), DDR_A (ddr_j
))))
192 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
193 tested at run-time. Return TRUE if DDR was successfully inserted.
194 Return false if versioning is not supported. */
197 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
199 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
201 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
) == 0)
204 if (dump_enabled_p ())
206 dump_printf_loc (MSG_NOTE
, vect_location
,
207 "mark for run-time aliasing test between ");
208 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_A (ddr
)));
209 dump_printf (MSG_NOTE
, " and ");
210 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_B (ddr
)));
213 if (optimize_loop_nest_for_size_p (loop
))
215 if (dump_enabled_p ())
216 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
217 "versioning not supported when optimizing for size.");
221 /* FORNOW: We don't support versioning with outer-loop vectorization. */
224 if (dump_enabled_p ())
225 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
226 "versioning not yet supported for outer-loops.");
230 /* FORNOW: We don't support creating runtime alias tests for non-constant
232 if (TREE_CODE (DR_STEP (DDR_A (ddr
))) != INTEGER_CST
233 || TREE_CODE (DR_STEP (DDR_B (ddr
))) != INTEGER_CST
)
235 if (dump_enabled_p ())
236 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
237 "versioning not yet supported for non-constant "
242 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
).safe_push (ddr
);
247 /* Function vect_analyze_data_ref_dependence.
249 Return TRUE if there (might) exist a dependence between a memory-reference
250 DRA and a memory-reference DRB. When versioning for alias may check a
251 dependence at run-time, return FALSE. Adjust *MAX_VF according to
252 the data dependence. */
255 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
256 loop_vec_info loop_vinfo
, int *max_vf
)
259 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
260 struct data_reference
*dra
= DDR_A (ddr
);
261 struct data_reference
*drb
= DDR_B (ddr
);
262 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
263 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
264 lambda_vector dist_v
;
265 unsigned int loop_depth
;
267 /* In loop analysis all data references should be vectorizable. */
268 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a
)
269 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b
))
272 /* Independent data accesses. */
273 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
277 || (DR_IS_READ (dra
) && DR_IS_READ (drb
)))
280 /* Unknown data dependence. */
281 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
283 if (STMT_VINFO_GATHER_P (stmtinfo_a
)
284 || STMT_VINFO_GATHER_P (stmtinfo_b
))
286 if (dump_enabled_p ())
288 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
289 "versioning for alias not supported for: "
290 "can't determine dependence between ");
291 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
293 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
294 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
300 if (dump_enabled_p ())
302 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
303 "versioning for alias required: "
304 "can't determine dependence between ");
305 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
307 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
308 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
312 /* Add to list of ddrs that need to be tested at run-time. */
313 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
316 /* Known data dependence. */
317 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
319 if (STMT_VINFO_GATHER_P (stmtinfo_a
)
320 || STMT_VINFO_GATHER_P (stmtinfo_b
))
322 if (dump_enabled_p ())
324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
325 "versioning for alias not supported for: "
326 "bad dist vector for ");
327 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
329 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
330 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
336 if (dump_enabled_p ())
338 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
339 "versioning for alias required: "
340 "bad dist vector for ");
341 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (dra
));
342 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
343 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (drb
));
345 /* Add to list of ddrs that need to be tested at run-time. */
346 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
349 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
350 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
352 int dist
= dist_v
[loop_depth
];
354 if (dump_enabled_p ())
355 dump_printf_loc (MSG_NOTE
, vect_location
,
356 "dependence distance = %d.", dist
);
360 if (dump_enabled_p ())
362 dump_printf_loc (MSG_NOTE
, vect_location
,
363 "dependence distance == 0 between ");
364 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
365 dump_printf (MSG_NOTE
, " and ");
366 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
369 /* For interleaving, mark that there is a read-write dependency if
370 necessary. We check before that one of the data-refs is store. */
371 if (DR_IS_READ (dra
))
372 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
375 if (DR_IS_READ (drb
))
376 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
382 if (dist
> 0 && DDR_REVERSED_P (ddr
))
384 /* If DDR_REVERSED_P the order of the data-refs in DDR was
385 reversed (to make distance vector positive), and the actual
386 distance is negative. */
387 if (dump_enabled_p ())
388 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
389 "dependence distance negative.");
394 && abs (dist
) < *max_vf
)
396 /* The dependence distance requires reduction of the maximal
397 vectorization factor. */
398 *max_vf
= abs (dist
);
399 if (dump_enabled_p ())
400 dump_printf_loc (MSG_NOTE
, vect_location
,
401 "adjusting maximal vectorization factor to %i",
405 if (abs (dist
) >= *max_vf
)
407 /* Dependence distance does not create dependence, as far as
408 vectorization is concerned, in this case. */
409 if (dump_enabled_p ())
410 dump_printf_loc (MSG_NOTE
, vect_location
,
411 "dependence distance >= VF.");
415 if (dump_enabled_p ())
417 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
418 "not vectorized, possible dependence "
419 "between data-refs ");
420 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
421 dump_printf (MSG_NOTE
, " and ");
422 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
431 /* Function vect_analyze_data_ref_dependences.
433 Examine all the data references in the loop, and make sure there do not
434 exist any data dependences between them. Set *MAX_VF according to
435 the maximum vectorization factor the data dependences allow. */
438 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
, int *max_vf
)
441 struct data_dependence_relation
*ddr
;
443 if (dump_enabled_p ())
444 dump_printf_loc (MSG_NOTE
, vect_location
,
445 "=== vect_analyze_data_ref_dependences ===");
447 if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo
),
448 &LOOP_VINFO_DDRS (loop_vinfo
),
449 LOOP_VINFO_LOOP_NEST (loop_vinfo
), true))
452 FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo
), i
, ddr
)
453 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
, max_vf
))
460 /* Function vect_slp_analyze_data_ref_dependence.
462 Return TRUE if there (might) exist a dependence between a memory-reference
463 DRA and a memory-reference DRB. When versioning for alias may check a
464 dependence at run-time, return FALSE. Adjust *MAX_VF according to
465 the data dependence. */
468 vect_slp_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
)
470 struct data_reference
*dra
= DDR_A (ddr
);
471 struct data_reference
*drb
= DDR_B (ddr
);
473 /* We need to check dependences of statements marked as unvectorizable
474 as well, they still can prohibit vectorization. */
476 /* Independent data accesses. */
477 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
483 /* Read-read is OK. */
484 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
487 /* If dra and drb are part of the same interleaving chain consider
489 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra
)))
490 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra
)))
491 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb
)))))
494 /* Unknown data dependence. */
495 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
499 if (dump_enabled_p ())
501 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
502 "can't determine dependence between ");
503 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (dra
));
504 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
505 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (drb
));
508 /* We do not vectorize basic blocks with write-write dependencies. */
509 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
512 /* Check that it's not a load-after-store dependence. */
513 earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
514 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
520 if (dump_enabled_p ())
522 dump_printf_loc (MSG_NOTE
, vect_location
,
523 "determined dependence between ");
524 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
525 dump_printf (MSG_NOTE
, " and ");
526 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
529 /* Do not vectorize basic blocks with write-write dependences. */
530 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
533 /* Check dependence between DRA and DRB for basic block vectorization.
534 If the accesses share same bases and offsets, we can compare their initial
535 constant offsets to decide whether they differ or not. In case of a read-
536 write dependence we check that the load is before the store to ensure that
537 vectorization will not change the order of the accesses. */
539 HOST_WIDE_INT type_size_a
, type_size_b
, init_a
, init_b
;
542 /* Check that the data-refs have same bases and offsets. If not, we can't
543 determine if they are dependent. */
544 if (!operand_equal_p (DR_BASE_ADDRESS (dra
), DR_BASE_ADDRESS (drb
), 0)
545 || !dr_equal_offsets_p (dra
, drb
))
548 /* Check the types. */
549 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
550 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
552 if (type_size_a
!= type_size_b
553 || !types_compatible_p (TREE_TYPE (DR_REF (dra
)),
554 TREE_TYPE (DR_REF (drb
))))
557 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
558 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
560 /* Two different locations - no dependence. */
561 if (init_a
!= init_b
)
564 /* We have a read-write dependence. Check that the load is before the store.
565 When we vectorize basic blocks, vector load can be only before
566 corresponding scalar load, and vector store can be only after its
567 corresponding scalar store. So the order of the acceses is preserved in
568 case the load is before the store. */
569 earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
570 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
577 /* Function vect_analyze_data_ref_dependences.
579 Examine all the data references in the basic-block, and make sure there
580 do not exist any data dependences between them. Set *MAX_VF according to
581 the maximum vectorization factor the data dependences allow. */
584 vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo
)
586 struct data_dependence_relation
*ddr
;
589 if (dump_enabled_p ())
590 dump_printf_loc (MSG_NOTE
, vect_location
,
591 "=== vect_slp_analyze_data_ref_dependences ===");
593 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo
),
594 &BB_VINFO_DDRS (bb_vinfo
),
598 FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo
), i
, ddr
)
599 if (vect_slp_analyze_data_ref_dependence (ddr
))
606 /* Function vect_compute_data_ref_alignment
608 Compute the misalignment of the data reference DR.
611 1. If during the misalignment computation it is found that the data reference
612 cannot be vectorized then false is returned.
613 2. DR_MISALIGNMENT (DR) is defined.
615 FOR NOW: No analysis is actually performed. Misalignment is calculated
616 only for trivial cases. TODO. */
619 vect_compute_data_ref_alignment (struct data_reference
*dr
)
621 gimple stmt
= DR_STMT (dr
);
622 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
623 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
624 struct loop
*loop
= NULL
;
625 tree ref
= DR_REF (dr
);
627 tree base
, base_addr
;
630 tree aligned_to
, alignment
;
632 if (dump_enabled_p ())
633 dump_printf_loc (MSG_NOTE
, vect_location
,
634 "vect_compute_data_ref_alignment:");
637 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
639 /* Initialize misalignment to unknown. */
640 SET_DR_MISALIGNMENT (dr
, -1);
642 /* Strided loads perform only component accesses, misalignment information
643 is irrelevant for them. */
644 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
647 misalign
= DR_INIT (dr
);
648 aligned_to
= DR_ALIGNED_TO (dr
);
649 base_addr
= DR_BASE_ADDRESS (dr
);
650 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
652 /* In case the dataref is in an inner-loop of the loop that is being
653 vectorized (LOOP), we use the base and misalignment information
654 relative to the outer-loop (LOOP). This is ok only if the misalignment
655 stays the same throughout the execution of the inner-loop, which is why
656 we have to check that the stride of the dataref in the inner-loop evenly
657 divides by the vector size. */
658 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
660 tree step
= DR_STEP (dr
);
661 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
663 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) == 0)
665 if (dump_enabled_p ())
666 dump_printf_loc (MSG_NOTE
, vect_location
,
667 "inner step divides the vector-size.");
668 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
669 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
670 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
674 if (dump_enabled_p ())
675 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
676 "inner step doesn't divide the vector-size.");
677 misalign
= NULL_TREE
;
681 /* Similarly, if we're doing basic-block vectorization, we can only use
682 base and misalignment information relative to an innermost loop if the
683 misalignment stays the same throughout the execution of the loop.
684 As above, this is the case if the stride of the dataref evenly divides
685 by the vector size. */
688 tree step
= DR_STEP (dr
);
689 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
691 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0)
693 if (dump_enabled_p ())
694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
695 "SLP: step doesn't divide the vector-size.");
696 misalign
= NULL_TREE
;
700 base
= build_fold_indirect_ref (base_addr
);
701 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
703 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
706 if (dump_enabled_p ())
708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
709 "Unknown alignment for access: ");
710 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, base
);
716 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
718 || (TREE_CODE (base_addr
) == SSA_NAME
719 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
720 TREE_TYPE (base_addr
)))),
722 || (get_pointer_alignment (base_addr
) >= TYPE_ALIGN (vectype
)))
725 base_aligned
= false;
729 /* Do not change the alignment of global variables here if
730 flag_section_anchors is enabled as we already generated
731 RTL for other functions. Most global variables should
732 have been aligned during the IPA increase_alignment pass. */
733 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
734 || (TREE_STATIC (base
) && flag_section_anchors
))
736 if (dump_enabled_p ())
738 dump_printf_loc (MSG_NOTE
, vect_location
,
739 "can't force alignment of ref: ");
740 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, ref
);
745 /* Force the alignment of the decl.
746 NOTE: This is the only change to the code we make during
747 the analysis phase, before deciding to vectorize the loop. */
748 if (dump_enabled_p ())
750 dump_printf_loc (MSG_NOTE
, vect_location
, "force alignment of ");
751 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, ref
);
754 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
755 DECL_USER_ALIGN (base
) = 1;
758 /* At this point we assume that the base is aligned. */
759 gcc_assert (base_aligned
760 || (TREE_CODE (base
) == VAR_DECL
761 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
763 /* If this is a backward running DR then first access in the larger
764 vectype actually is N-1 elements before the address in the DR.
765 Adjust misalign accordingly. */
766 if (tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0)
768 tree offset
= ssize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
769 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
770 otherwise we wouldn't be here. */
771 offset
= fold_build2 (MULT_EXPR
, ssizetype
, offset
, DR_STEP (dr
));
772 /* PLUS because DR_STEP was negative. */
773 misalign
= size_binop (PLUS_EXPR
, misalign
, offset
);
776 /* Modulo alignment. */
777 misalign
= size_binop (FLOOR_MOD_EXPR
, misalign
, alignment
);
779 if (!host_integerp (misalign
, 1))
781 /* Negative or overflowed misalignment value. */
782 if (dump_enabled_p ())
783 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
784 "unexpected misalign value");
788 SET_DR_MISALIGNMENT (dr
, TREE_INT_CST_LOW (misalign
));
790 if (dump_enabled_p ())
792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
793 "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
794 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, ref
);
801 /* Function vect_compute_data_refs_alignment
803 Compute the misalignment of data references in the loop.
804 Return FALSE if a data reference is found that cannot be vectorized. */
807 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
,
808 bb_vec_info bb_vinfo
)
810 vec
<data_reference_p
> datarefs
;
811 struct data_reference
*dr
;
815 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
817 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
819 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
820 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
821 && !vect_compute_data_ref_alignment (dr
))
825 /* Mark unsupported statement as unvectorizable. */
826 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
837 /* Function vect_update_misalignment_for_peel
839 DR - the data reference whose misalignment is to be adjusted.
840 DR_PEEL - the data reference whose misalignment is being made
841 zero in the vector loop by the peel.
842 NPEEL - the number of iterations in the peel loop if the misalignment
843 of DR_PEEL is known at compile time. */
846 vect_update_misalignment_for_peel (struct data_reference
*dr
,
847 struct data_reference
*dr_peel
, int npeel
)
850 vec
<dr_p
> same_align_drs
;
851 struct data_reference
*current_dr
;
852 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
853 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
854 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
855 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
857 /* For interleaved data accesses the step in the loop must be multiplied by
858 the size of the interleaving group. */
859 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
860 dr_size
*= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
861 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info
))
862 dr_peel_size
*= GROUP_SIZE (peel_stmt_info
);
864 /* It can be assumed that the data refs with the same alignment as dr_peel
865 are aligned in the vector loop. */
867 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
868 FOR_EACH_VEC_ELT (same_align_drs
, i
, current_dr
)
870 if (current_dr
!= dr
)
872 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
873 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
874 SET_DR_MISALIGNMENT (dr
, 0);
878 if (known_alignment_for_access_p (dr
)
879 && known_alignment_for_access_p (dr_peel
))
881 bool negative
= tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0;
882 int misal
= DR_MISALIGNMENT (dr
);
883 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
884 misal
+= negative
? -npeel
* dr_size
: npeel
* dr_size
;
885 misal
&= (TYPE_ALIGN (vectype
) / BITS_PER_UNIT
) - 1;
886 SET_DR_MISALIGNMENT (dr
, misal
);
890 if (dump_enabled_p ())
891 dump_printf_loc (MSG_NOTE
, vect_location
, "Setting misalignment to -1.");
892 SET_DR_MISALIGNMENT (dr
, -1);
896 /* Function vect_verify_datarefs_alignment
898 Return TRUE if all data references in the loop can be
899 handled with respect to alignment. */
902 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
904 vec
<data_reference_p
> datarefs
;
905 struct data_reference
*dr
;
906 enum dr_alignment_support supportable_dr_alignment
;
910 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
912 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
914 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
916 gimple stmt
= DR_STMT (dr
);
917 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
919 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
922 /* For interleaving, only the alignment of the first access matters.
923 Skip statements marked as not vectorizable. */
924 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info
)
925 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
926 || !STMT_VINFO_VECTORIZABLE (stmt_info
))
929 /* Strided loads perform only component accesses, alignment is
930 irrelevant for them. */
931 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
934 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
935 if (!supportable_dr_alignment
)
937 if (dump_enabled_p ())
940 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
941 "not vectorized: unsupported unaligned load.");
943 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
944 "not vectorized: unsupported unaligned "
947 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
952 if (supportable_dr_alignment
!= dr_aligned
&& dump_enabled_p ())
953 dump_printf_loc (MSG_NOTE
, vect_location
,
954 "Vectorizing an unaligned access.");
959 /* Given an memory reference EXP return whether its alignment is less
963 not_size_aligned (tree exp
)
965 if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp
)), 1))
968 return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp
)))
969 > get_object_alignment (exp
));
972 /* Function vector_alignment_reachable_p
974 Return true if vector alignment for DR is reachable by peeling
975 a few loop iterations. Return false otherwise. */
978 vector_alignment_reachable_p (struct data_reference
*dr
)
980 gimple stmt
= DR_STMT (dr
);
981 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
982 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
984 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
986 /* For interleaved access we peel only if number of iterations in
987 the prolog loop ({VF - misalignment}), is a multiple of the
988 number of the interleaved accesses. */
989 int elem_size
, mis_in_elements
;
990 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
992 /* FORNOW: handle only known alignment. */
993 if (!known_alignment_for_access_p (dr
))
996 elem_size
= GET_MODE_SIZE (TYPE_MODE (vectype
)) / nelements
;
997 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
999 if ((nelements
- mis_in_elements
) % GROUP_SIZE (stmt_info
))
1003 /* If misalignment is known at the compile time then allow peeling
1004 only if natural alignment is reachable through peeling. */
1005 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
1007 HOST_WIDE_INT elmsize
=
1008 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
1009 if (dump_enabled_p ())
1011 dump_printf_loc (MSG_NOTE
, vect_location
,
1012 "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
1013 dump_printf (MSG_NOTE
,
1014 ". misalignment = %d. ", DR_MISALIGNMENT (dr
));
1016 if (DR_MISALIGNMENT (dr
) % elmsize
)
1018 if (dump_enabled_p ())
1019 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1020 "data size does not divide the misalignment.\n");
1025 if (!known_alignment_for_access_p (dr
))
1027 tree type
= TREE_TYPE (DR_REF (dr
));
1028 bool is_packed
= not_size_aligned (DR_REF (dr
));
1029 if (dump_enabled_p ())
1030 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1031 "Unknown misalignment, is_packed = %d",is_packed
);
1032 if (targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
1042 /* Calculate the cost of the memory access represented by DR. */
1045 vect_get_data_access_cost (struct data_reference
*dr
,
1046 unsigned int *inside_cost
,
1047 unsigned int *outside_cost
,
1048 stmt_vector_for_cost
*body_cost_vec
)
1050 gimple stmt
= DR_STMT (dr
);
1051 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1052 int nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1053 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1054 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1055 int ncopies
= vf
/ nunits
;
1057 if (DR_IS_READ (dr
))
1058 vect_get_load_cost (dr
, ncopies
, true, inside_cost
, outside_cost
,
1059 NULL
, body_cost_vec
, false);
1061 vect_get_store_cost (dr
, ncopies
, inside_cost
, body_cost_vec
);
1063 if (dump_enabled_p ())
1064 dump_printf_loc (MSG_NOTE
, vect_location
,
1065 "vect_get_data_access_cost: inside_cost = %d, "
1066 "outside_cost = %d.", *inside_cost
, *outside_cost
);
1071 vect_peeling_hash (const void *elem
)
1073 const struct _vect_peel_info
*peel_info
;
1075 peel_info
= (const struct _vect_peel_info
*) elem
;
1076 return (hashval_t
) peel_info
->npeel
;
1081 vect_peeling_hash_eq (const void *elem1
, const void *elem2
)
1083 const struct _vect_peel_info
*a
, *b
;
1085 a
= (const struct _vect_peel_info
*) elem1
;
1086 b
= (const struct _vect_peel_info
*) elem2
;
1087 return (a
->npeel
== b
->npeel
);
1091 /* Insert DR into peeling hash table with NPEEL as key. */
1094 vect_peeling_hash_insert (loop_vec_info loop_vinfo
, struct data_reference
*dr
,
1097 struct _vect_peel_info elem
, *slot
;
1099 bool supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1102 slot
= (vect_peel_info
) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1108 slot
= XNEW (struct _vect_peel_info
);
1109 slot
->npeel
= npeel
;
1112 new_slot
= htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo
), slot
,
1117 if (!supportable_dr_alignment
&& !flag_vect_cost_model
)
1118 slot
->count
+= VECT_MAX_COST
;
1122 /* Traverse peeling hash table to find peeling option that aligns maximum
1123 number of data accesses. */
1126 vect_peeling_hash_get_most_frequent (void **slot
, void *data
)
1128 vect_peel_info elem
= (vect_peel_info
) *slot
;
1129 vect_peel_extended_info max
= (vect_peel_extended_info
) data
;
1131 if (elem
->count
> max
->peel_info
.count
1132 || (elem
->count
== max
->peel_info
.count
1133 && max
->peel_info
.npeel
> elem
->npeel
))
1135 max
->peel_info
.npeel
= elem
->npeel
;
1136 max
->peel_info
.count
= elem
->count
;
1137 max
->peel_info
.dr
= elem
->dr
;
1144 /* Traverse peeling hash table and calculate cost for each peeling option.
1145 Find the one with the lowest cost. */
1148 vect_peeling_hash_get_lowest_cost (void **slot
, void *data
)
1150 vect_peel_info elem
= (vect_peel_info
) *slot
;
1151 vect_peel_extended_info min
= (vect_peel_extended_info
) data
;
1152 int save_misalignment
, dummy
;
1153 unsigned int inside_cost
= 0, outside_cost
= 0, i
;
1154 gimple stmt
= DR_STMT (elem
->dr
);
1155 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1156 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1157 vec
<data_reference_p
> datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1158 struct data_reference
*dr
;
1159 stmt_vector_for_cost prologue_cost_vec
, body_cost_vec
, epilogue_cost_vec
;
1160 int single_iter_cost
;
1162 prologue_cost_vec
.create (2);
1163 body_cost_vec
.create (2);
1164 epilogue_cost_vec
.create (2);
1166 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1168 stmt
= DR_STMT (dr
);
1169 stmt_info
= vinfo_for_stmt (stmt
);
1170 /* For interleaving, only the alignment of the first access
1172 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1173 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1176 save_misalignment
= DR_MISALIGNMENT (dr
);
1177 vect_update_misalignment_for_peel (dr
, elem
->dr
, elem
->npeel
);
1178 vect_get_data_access_cost (dr
, &inside_cost
, &outside_cost
,
1180 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1183 single_iter_cost
= vect_get_single_scalar_iteration_cost (loop_vinfo
);
1184 outside_cost
+= vect_get_known_peeling_cost (loop_vinfo
, elem
->npeel
,
1185 &dummy
, single_iter_cost
,
1187 &epilogue_cost_vec
);
1189 /* Prologue and epilogue costs are added to the target model later.
1190 These costs depend only on the scalar iteration cost, the
1191 number of peeling iterations finally chosen, and the number of
1192 misaligned statements. So discard the information found here. */
1193 prologue_cost_vec
.release ();
1194 epilogue_cost_vec
.release ();
1196 if (inside_cost
< min
->inside_cost
1197 || (inside_cost
== min
->inside_cost
&& outside_cost
< min
->outside_cost
))
1199 min
->inside_cost
= inside_cost
;
1200 min
->outside_cost
= outside_cost
;
1201 min
->body_cost_vec
.release ();
1202 min
->body_cost_vec
= body_cost_vec
;
1203 min
->peel_info
.dr
= elem
->dr
;
1204 min
->peel_info
.npeel
= elem
->npeel
;
1207 body_cost_vec
.release ();
1213 /* Choose best peeling option by traversing peeling hash table and either
1214 choosing an option with the lowest cost (if cost model is enabled) or the
1215 option that aligns as many accesses as possible. */
1217 static struct data_reference
*
1218 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo
,
1219 unsigned int *npeel
,
1220 stmt_vector_for_cost
*body_cost_vec
)
1222 struct _vect_peel_extended_info res
;
1224 res
.peel_info
.dr
= NULL
;
1225 res
.body_cost_vec
= stmt_vector_for_cost();
1227 if (flag_vect_cost_model
)
1229 res
.inside_cost
= INT_MAX
;
1230 res
.outside_cost
= INT_MAX
;
1231 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1232 vect_peeling_hash_get_lowest_cost
, &res
);
1236 res
.peel_info
.count
= 0;
1237 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1238 vect_peeling_hash_get_most_frequent
, &res
);
1241 *npeel
= res
.peel_info
.npeel
;
1242 *body_cost_vec
= res
.body_cost_vec
;
1243 return res
.peel_info
.dr
;
1247 /* Function vect_enhance_data_refs_alignment
1249 This pass will use loop versioning and loop peeling in order to enhance
1250 the alignment of data references in the loop.
1252 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1253 original loop is to be vectorized. Any other loops that are created by
1254 the transformations performed in this pass - are not supposed to be
1255 vectorized. This restriction will be relaxed.
1257 This pass will require a cost model to guide it whether to apply peeling
1258 or versioning or a combination of the two. For example, the scheme that
1259 intel uses when given a loop with several memory accesses, is as follows:
1260 choose one memory access ('p') which alignment you want to force by doing
1261 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1262 other accesses are not necessarily aligned, or (2) use loop versioning to
1263 generate one loop in which all accesses are aligned, and another loop in
1264 which only 'p' is necessarily aligned.
1266 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1267 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1268 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1270 Devising a cost model is the most critical aspect of this work. It will
1271 guide us on which access to peel for, whether to use loop versioning, how
1272 many versions to create, etc. The cost model will probably consist of
1273 generic considerations as well as target specific considerations (on
1274 powerpc for example, misaligned stores are more painful than misaligned
1277 Here are the general steps involved in alignment enhancements:
1279 -- original loop, before alignment analysis:
1280 for (i=0; i<N; i++){
1281 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1282 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1285 -- After vect_compute_data_refs_alignment:
1286 for (i=0; i<N; i++){
1287 x = q[i]; # DR_MISALIGNMENT(q) = 3
1288 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1291 -- Possibility 1: we do loop versioning:
1293 for (i=0; i<N; i++){ # loop 1A
1294 x = q[i]; # DR_MISALIGNMENT(q) = 3
1295 p[i] = y; # DR_MISALIGNMENT(p) = 0
1299 for (i=0; i<N; i++){ # loop 1B
1300 x = q[i]; # DR_MISALIGNMENT(q) = 3
1301 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1305 -- Possibility 2: we do loop peeling:
1306 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1310 for (i = 3; i < N; i++){ # loop 2A
1311 x = q[i]; # DR_MISALIGNMENT(q) = 0
1312 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1315 -- Possibility 3: combination of loop peeling and versioning:
1316 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1321 for (i = 3; i<N; i++){ # loop 3A
1322 x = q[i]; # DR_MISALIGNMENT(q) = 0
1323 p[i] = y; # DR_MISALIGNMENT(p) = 0
1327 for (i = 3; i<N; i++){ # loop 3B
1328 x = q[i]; # DR_MISALIGNMENT(q) = 0
1329 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1333 These loops are later passed to loop_transform to be vectorized. The
1334 vectorizer will use the alignment information to guide the transformation
1335 (whether to generate regular loads/stores, or with special handling for
1339 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1341 vec
<data_reference_p
> datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1342 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1343 enum dr_alignment_support supportable_dr_alignment
;
1344 struct data_reference
*dr0
= NULL
, *first_store
= NULL
;
1345 struct data_reference
*dr
;
1347 bool do_peeling
= false;
1348 bool do_versioning
= false;
1351 stmt_vec_info stmt_info
;
1352 int vect_versioning_for_alias_required
;
1353 unsigned int npeel
= 0;
1354 bool all_misalignments_unknown
= true;
1355 unsigned int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1356 unsigned possible_npeel_number
= 1;
1358 unsigned int nelements
, mis
, same_align_drs_max
= 0;
1359 stmt_vector_for_cost body_cost_vec
= stmt_vector_for_cost();
1361 if (dump_enabled_p ())
1362 dump_printf_loc (MSG_NOTE
, vect_location
,
1363 "=== vect_enhance_data_refs_alignment ===");
1365 /* While cost model enhancements are expected in the future, the high level
1366 view of the code at this time is as follows:
1368 A) If there is a misaligned access then see if peeling to align
1369 this access can make all data references satisfy
1370 vect_supportable_dr_alignment. If so, update data structures
1371 as needed and return true.
1373 B) If peeling wasn't possible and there is a data reference with an
1374 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1375 then see if loop versioning checks can be used to make all data
1376 references satisfy vect_supportable_dr_alignment. If so, update
1377 data structures as needed and return true.
1379 C) If neither peeling nor versioning were successful then return false if
1380 any data reference does not satisfy vect_supportable_dr_alignment.
1382 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1384 Note, Possibility 3 above (which is peeling and versioning together) is not
1385 being done at this time. */
1387 /* (1) Peeling to force alignment. */
1389 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1391 + How many accesses will become aligned due to the peeling
1392 - How many accesses will become unaligned due to the peeling,
1393 and the cost of misaligned accesses.
1394 - The cost of peeling (the extra runtime checks, the increase
1397 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1399 stmt
= DR_STMT (dr
);
1400 stmt_info
= vinfo_for_stmt (stmt
);
1402 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1405 /* For interleaving, only the alignment of the first access
1407 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1408 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1411 /* For invariant accesses there is nothing to enhance. */
1412 if (integer_zerop (DR_STEP (dr
)))
1415 /* Strided loads perform only component accesses, alignment is
1416 irrelevant for them. */
1417 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1420 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1421 do_peeling
= vector_alignment_reachable_p (dr
);
1424 if (known_alignment_for_access_p (dr
))
1426 unsigned int npeel_tmp
;
1427 bool negative
= tree_int_cst_compare (DR_STEP (dr
),
1428 size_zero_node
) < 0;
1430 /* Save info about DR in the hash table. */
1431 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo
))
1432 LOOP_VINFO_PEELING_HTAB (loop_vinfo
) =
1433 htab_create (1, vect_peeling_hash
,
1434 vect_peeling_hash_eq
, free
);
1436 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1437 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1438 mis
= DR_MISALIGNMENT (dr
) / GET_MODE_SIZE (TYPE_MODE (
1439 TREE_TYPE (DR_REF (dr
))));
1440 npeel_tmp
= (negative
1441 ? (mis
- nelements
) : (nelements
- mis
))
1444 /* For multiple types, it is possible that the bigger type access
1445 will have more than one peeling option. E.g., a loop with two
1446 types: one of size (vector size / 4), and the other one of
1447 size (vector size / 8). Vectorization factor will 8. If both
1448 access are misaligned by 3, the first one needs one scalar
1449 iteration to be aligned, and the second one needs 5. But the
1450 the first one will be aligned also by peeling 5 scalar
1451 iterations, and in that case both accesses will be aligned.
1452 Hence, except for the immediate peeling amount, we also want
1453 to try to add full vector size, while we don't exceed
1454 vectorization factor.
1455 We do this automtically for cost model, since we calculate cost
1456 for every peeling option. */
1457 if (!flag_vect_cost_model
)
1458 possible_npeel_number
= vf
/nelements
;
1460 /* Handle the aligned case. We may decide to align some other
1461 access, making DR unaligned. */
1462 if (DR_MISALIGNMENT (dr
) == 0)
1465 if (!flag_vect_cost_model
)
1466 possible_npeel_number
++;
1469 for (j
= 0; j
< possible_npeel_number
; j
++)
1471 gcc_assert (npeel_tmp
<= vf
);
1472 vect_peeling_hash_insert (loop_vinfo
, dr
, npeel_tmp
);
1473 npeel_tmp
+= nelements
;
1476 all_misalignments_unknown
= false;
1477 /* Data-ref that was chosen for the case that all the
1478 misalignments are unknown is not relevant anymore, since we
1479 have a data-ref with known alignment. */
1484 /* If we don't know all the misalignment values, we prefer
1485 peeling for data-ref that has maximum number of data-refs
1486 with the same alignment, unless the target prefers to align
1487 stores over load. */
1488 if (all_misalignments_unknown
)
1490 if (same_align_drs_max
1491 < STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).length ()
1495 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).length ();
1499 if (!first_store
&& DR_IS_WRITE (dr
))
1503 /* If there are both known and unknown misaligned accesses in the
1504 loop, we choose peeling amount according to the known
1508 if (!supportable_dr_alignment
)
1511 if (!first_store
&& DR_IS_WRITE (dr
))
1518 if (!aligned_access_p (dr
))
1520 if (dump_enabled_p ())
1521 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1522 "vector alignment may not be reachable");
1528 vect_versioning_for_alias_required
1529 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
);
1531 /* Temporarily, if versioning for alias is required, we disable peeling
1532 until we support peeling and versioning. Often peeling for alignment
1533 will require peeling for loop-bound, which in turn requires that we
1534 know how to adjust the loop ivs after the loop. */
1535 if (vect_versioning_for_alias_required
1536 || !vect_can_advance_ivs_p (loop_vinfo
)
1537 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1540 if (do_peeling
&& all_misalignments_unknown
1541 && vect_supportable_dr_alignment (dr0
, false))
1544 /* Check if the target requires to prefer stores over loads, i.e., if
1545 misaligned stores are more expensive than misaligned loads (taking
1546 drs with same alignment into account). */
1547 if (first_store
&& DR_IS_READ (dr0
))
1549 unsigned int load_inside_cost
= 0, load_outside_cost
= 0;
1550 unsigned int store_inside_cost
= 0, store_outside_cost
= 0;
1551 unsigned int load_inside_penalty
= 0, load_outside_penalty
= 0;
1552 unsigned int store_inside_penalty
= 0, store_outside_penalty
= 0;
1553 stmt_vector_for_cost dummy
;
1556 vect_get_data_access_cost (dr0
, &load_inside_cost
, &load_outside_cost
,
1558 vect_get_data_access_cost (first_store
, &store_inside_cost
,
1559 &store_outside_cost
, &dummy
);
1563 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1564 aligning the load DR0). */
1565 load_inside_penalty
= store_inside_cost
;
1566 load_outside_penalty
= store_outside_cost
;
1568 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1569 DR_STMT (first_store
))).iterate (i
, &dr
);
1571 if (DR_IS_READ (dr
))
1573 load_inside_penalty
+= load_inside_cost
;
1574 load_outside_penalty
+= load_outside_cost
;
1578 load_inside_penalty
+= store_inside_cost
;
1579 load_outside_penalty
+= store_outside_cost
;
1582 /* Calculate the penalty for leaving DR0 unaligned (by
1583 aligning the FIRST_STORE). */
1584 store_inside_penalty
= load_inside_cost
;
1585 store_outside_penalty
= load_outside_cost
;
1587 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1588 DR_STMT (dr0
))).iterate (i
, &dr
);
1590 if (DR_IS_READ (dr
))
1592 store_inside_penalty
+= load_inside_cost
;
1593 store_outside_penalty
+= load_outside_cost
;
1597 store_inside_penalty
+= store_inside_cost
;
1598 store_outside_penalty
+= store_outside_cost
;
1601 if (load_inside_penalty
> store_inside_penalty
1602 || (load_inside_penalty
== store_inside_penalty
1603 && load_outside_penalty
> store_outside_penalty
))
1607 /* In case there are only loads with different unknown misalignments, use
1608 peeling only if it may help to align other accesses in the loop. */
1610 && !STMT_VINFO_SAME_ALIGN_REFS (
1611 vinfo_for_stmt (DR_STMT (dr0
))).length ()
1612 && vect_supportable_dr_alignment (dr0
, false)
1613 != dr_unaligned_supported
)
1617 if (do_peeling
&& !dr0
)
1619 /* Peeling is possible, but there is no data access that is not supported
1620 unless aligned. So we try to choose the best possible peeling. */
1622 /* We should get here only if there are drs with known misalignment. */
1623 gcc_assert (!all_misalignments_unknown
);
1625 /* Choose the best peeling from the hash table. */
1626 dr0
= vect_peeling_hash_choose_best_peeling (loop_vinfo
, &npeel
,
1634 stmt
= DR_STMT (dr0
);
1635 stmt_info
= vinfo_for_stmt (stmt
);
1636 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1637 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1639 if (known_alignment_for_access_p (dr0
))
1641 bool negative
= tree_int_cst_compare (DR_STEP (dr0
),
1642 size_zero_node
) < 0;
1645 /* Since it's known at compile time, compute the number of
1646 iterations in the peeled loop (the peeling factor) for use in
1647 updating DR_MISALIGNMENT values. The peeling factor is the
1648 vectorization factor minus the misalignment as an element
1650 mis
= DR_MISALIGNMENT (dr0
);
1651 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1652 npeel
= ((negative
? mis
- nelements
: nelements
- mis
)
1656 /* For interleaved data access every iteration accesses all the
1657 members of the group, therefore we divide the number of iterations
1658 by the group size. */
1659 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1660 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1661 npeel
/= GROUP_SIZE (stmt_info
);
1663 if (dump_enabled_p ())
1664 dump_printf_loc (MSG_NOTE
, vect_location
,
1665 "Try peeling by %d", npeel
);
1668 /* Ensure that all data refs can be vectorized after the peel. */
1669 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1671 int save_misalignment
;
1676 stmt
= DR_STMT (dr
);
1677 stmt_info
= vinfo_for_stmt (stmt
);
1678 /* For interleaving, only the alignment of the first access
1680 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1681 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1684 /* Strided loads perform only component accesses, alignment is
1685 irrelevant for them. */
1686 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1689 save_misalignment
= DR_MISALIGNMENT (dr
);
1690 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1691 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1692 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1694 if (!supportable_dr_alignment
)
1701 if (do_peeling
&& known_alignment_for_access_p (dr0
) && npeel
== 0)
1703 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1708 body_cost_vec
.release ();
1715 stmt_info_for_cost
*si
;
1716 void *data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
1718 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1719 If the misalignment of DR_i is identical to that of dr0 then set
1720 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1721 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1722 by the peeling factor times the element size of DR_i (MOD the
1723 vectorization factor times the size). Otherwise, the
1724 misalignment of DR_i must be set to unknown. */
1725 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1727 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1729 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1731 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = npeel
;
1733 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1734 SET_DR_MISALIGNMENT (dr0
, 0);
1735 if (dump_enabled_p ())
1737 dump_printf_loc (MSG_NOTE
, vect_location
,
1738 "Alignment of access forced using peeling.");
1739 dump_printf_loc (MSG_NOTE
, vect_location
,
1740 "Peeling for alignment will be applied.");
1742 /* We've delayed passing the inside-loop peeling costs to the
1743 target cost model until we were sure peeling would happen.
1745 if (body_cost_vec
.exists ())
1747 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1749 struct _stmt_vec_info
*stmt_info
1750 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1751 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1752 si
->misalign
, vect_body
);
1754 body_cost_vec
.release ();
1757 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1763 body_cost_vec
.release ();
1765 /* (2) Versioning to force alignment. */
1767 /* Try versioning if:
1768 1) flag_tree_vect_loop_version is TRUE
1769 2) optimize loop for speed
1770 3) there is at least one unsupported misaligned data ref with an unknown
1772 4) all misaligned data refs with a known misalignment are supported, and
1773 5) the number of runtime alignment checks is within reason. */
1776 flag_tree_vect_loop_version
1777 && optimize_loop_nest_for_speed_p (loop
)
1778 && (!loop
->inner
); /* FORNOW */
1782 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1784 stmt
= DR_STMT (dr
);
1785 stmt_info
= vinfo_for_stmt (stmt
);
1787 /* For interleaving, only the alignment of the first access
1789 if (aligned_access_p (dr
)
1790 || (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1791 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
))
1794 /* Strided loads perform only component accesses, alignment is
1795 irrelevant for them. */
1796 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1799 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1801 if (!supportable_dr_alignment
)
1807 if (known_alignment_for_access_p (dr
)
1808 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).length ()
1809 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
1811 do_versioning
= false;
1815 stmt
= DR_STMT (dr
);
1816 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1817 gcc_assert (vectype
);
1819 /* The rightmost bits of an aligned address must be zeros.
1820 Construct the mask needed for this test. For example,
1821 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1822 mask must be 15 = 0xf. */
1823 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1825 /* FORNOW: use the same mask to test all potentially unaligned
1826 references in the loop. The vectorizer currently supports
1827 a single vector size, see the reference to
1828 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1829 vectorization factor is computed. */
1830 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1831 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1832 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1833 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).safe_push (
1838 /* Versioning requires at least one misaligned data reference. */
1839 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
1840 do_versioning
= false;
1841 else if (!do_versioning
)
1842 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).truncate (0);
1847 vec
<gimple
> may_misalign_stmts
1848 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1851 /* It can now be assumed that the data references in the statements
1852 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1853 of the loop being vectorized. */
1854 FOR_EACH_VEC_ELT (may_misalign_stmts
, i
, stmt
)
1856 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1857 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1858 SET_DR_MISALIGNMENT (dr
, 0);
1859 if (dump_enabled_p ())
1860 dump_printf_loc (MSG_NOTE
, vect_location
,
1861 "Alignment of access forced using versioning.");
1864 if (dump_enabled_p ())
1865 dump_printf_loc (MSG_NOTE
, vect_location
,
1866 "Versioning for alignment will be applied.");
1868 /* Peeling and versioning can't be done together at this time. */
1869 gcc_assert (! (do_peeling
&& do_versioning
));
1871 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1876 /* This point is reached if neither peeling nor versioning is being done. */
1877 gcc_assert (! (do_peeling
|| do_versioning
));
1879 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1884 /* Function vect_find_same_alignment_drs.
1886 Update group and alignment relations according to the chosen
1887 vectorization factor. */
1890 vect_find_same_alignment_drs (struct data_dependence_relation
*ddr
,
1891 loop_vec_info loop_vinfo
)
1894 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1895 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1896 struct data_reference
*dra
= DDR_A (ddr
);
1897 struct data_reference
*drb
= DDR_B (ddr
);
1898 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
1899 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
1900 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
1901 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
1902 lambda_vector dist_v
;
1903 unsigned int loop_depth
;
1905 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1911 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1914 /* Loop-based vectorization and known data dependence. */
1915 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1918 /* Data-dependence analysis reports a distance vector of zero
1919 for data-references that overlap only in the first iteration
1920 but have different sign step (see PR45764).
1921 So as a sanity check require equal DR_STEP. */
1922 if (!operand_equal_p (DR_STEP (dra
), DR_STEP (drb
), 0))
1925 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
1926 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1928 int dist
= dist_v
[loop_depth
];
1930 if (dump_enabled_p ())
1931 dump_printf_loc (MSG_NOTE
, vect_location
,
1932 "dependence distance = %d.", dist
);
1934 /* Same loop iteration. */
1936 || (dist
% vectorization_factor
== 0 && dra_size
== drb_size
))
1938 /* Two references with distance zero have the same alignment. */
1939 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
).safe_push (drb
);
1940 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
).safe_push (dra
);
1941 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_NOTE
, vect_location
,
1944 "accesses have the same alignment.");
1945 dump_printf (MSG_NOTE
,
1946 "dependence distance modulo vf == 0 between ");
1947 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
1948 dump_printf (MSG_NOTE
, " and ");
1949 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
1956 /* Function vect_analyze_data_refs_alignment
1958 Analyze the alignment of the data-references in the loop.
1959 Return FALSE if a data reference is found that cannot be vectorized. */
1962 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
,
1963 bb_vec_info bb_vinfo
)
1965 if (dump_enabled_p ())
1966 dump_printf_loc (MSG_NOTE
, vect_location
,
1967 "=== vect_analyze_data_refs_alignment ===");
1969 /* Mark groups of data references with same alignment using
1970 data dependence information. */
1973 vec
<ddr_p
> ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
1974 struct data_dependence_relation
*ddr
;
1977 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
1978 vect_find_same_alignment_drs (ddr
, loop_vinfo
);
1981 if (!vect_compute_data_refs_alignment (loop_vinfo
, bb_vinfo
))
1983 if (dump_enabled_p ())
1984 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1985 "not vectorized: can't calculate alignment "
1994 /* Analyze groups of accesses: check that DR belongs to a group of
1995 accesses of legal size, step, etc. Detect gaps, single element
1996 interleaving, and other special cases. Set grouped access info.
1997 Collect groups of strided stores for further use in SLP analysis. */
2000 vect_analyze_group_access (struct data_reference
*dr
)
2002 tree step
= DR_STEP (dr
);
2003 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2004 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
2005 gimple stmt
= DR_STMT (dr
);
2006 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2007 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2008 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2009 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2010 HOST_WIDE_INT groupsize
, last_accessed_element
= 1;
2011 bool slp_impossible
= false;
2012 struct loop
*loop
= NULL
;
2015 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2017 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2018 size of the interleaving group (including gaps). */
2019 groupsize
= dr_step
/ type_size
;
2021 /* Not consecutive access is possible only if it is a part of interleaving. */
2022 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2024 /* Check if it this DR is a part of interleaving, and is a single
2025 element of the group that is accessed in the loop. */
2027 /* Gaps are supported only for loads. STEP must be a multiple of the type
2028 size. The size of the group must be a power of 2. */
2030 && (dr_step
% type_size
) == 0
2032 && exact_log2 (groupsize
) != -1)
2034 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = stmt
;
2035 GROUP_SIZE (vinfo_for_stmt (stmt
)) = groupsize
;
2036 if (dump_enabled_p ())
2038 dump_printf_loc (MSG_NOTE
, vect_location
,
2039 "Detected single element interleaving ");
2040 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr
));
2041 dump_printf (MSG_NOTE
, " step ");
2042 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, step
);
2047 if (dump_enabled_p ())
2048 dump_printf_loc (MSG_NOTE
, vect_location
,
2049 "Data access with gaps requires scalar "
2053 if (dump_enabled_p ())
2054 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2055 "Peeling for outer loop is not"
2060 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2066 if (dump_enabled_p ())
2068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2069 "not consecutive access ");
2070 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2075 /* Mark the statement as unvectorizable. */
2076 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2083 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
)
2085 /* First stmt in the interleaving chain. Check the chain. */
2086 gimple next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
2087 struct data_reference
*data_ref
= dr
;
2088 unsigned int count
= 1;
2090 tree prev_init
= DR_INIT (data_ref
);
2092 HOST_WIDE_INT diff
, count_in_bytes
, gaps
= 0;
2096 /* Skip same data-refs. In case that two or more stmts share
2097 data-ref (supported only for loads), we vectorize only the first
2098 stmt, and the rest get their vectorized loads from the first
2100 if (!tree_int_cst_compare (DR_INIT (data_ref
),
2101 DR_INIT (STMT_VINFO_DATA_REF (
2102 vinfo_for_stmt (next
)))))
2104 if (DR_IS_WRITE (data_ref
))
2106 if (dump_enabled_p ())
2107 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2108 "Two store stmts share the same dr.");
2112 /* Check that there is no load-store dependencies for this loads
2113 to prevent a case of load-store-load to the same location. */
2114 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
2115 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
2117 if (dump_enabled_p ())
2118 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2119 "READ_WRITE dependence in interleaving.");
2123 /* For load use the same data-ref load. */
2124 GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
2127 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2133 /* Check that all the accesses have the same STEP. */
2134 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
2135 if (tree_int_cst_compare (step
, next_step
))
2137 if (dump_enabled_p ())
2138 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2139 "not consecutive access in interleaving");
2143 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
2144 /* Check that the distance between two accesses is equal to the type
2145 size. Otherwise, we have gaps. */
2146 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
2147 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
2150 /* FORNOW: SLP of accesses with gaps is not supported. */
2151 slp_impossible
= true;
2152 if (DR_IS_WRITE (data_ref
))
2154 if (dump_enabled_p ())
2155 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2156 "interleaved store with gaps");
2163 last_accessed_element
+= diff
;
2165 /* Store the gap from the previous member of the group. If there is no
2166 gap in the access, GROUP_GAP is always 1. */
2167 GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
2169 prev_init
= DR_INIT (data_ref
);
2170 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2171 /* Count the number of data-refs in the chain. */
2175 /* COUNT is the number of accesses found, we multiply it by the size of
2176 the type to get COUNT_IN_BYTES. */
2177 count_in_bytes
= type_size
* count
;
2179 /* Check that the size of the interleaving (including gaps) is not
2180 greater than STEP. */
2181 if (dr_step
&& dr_step
< count_in_bytes
+ gaps
* type_size
)
2183 if (dump_enabled_p ())
2185 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2186 "interleaving size is greater than step for ");
2187 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (dr
));
2192 /* Check that the size of the interleaving is equal to STEP for stores,
2193 i.e., that there are no gaps. */
2194 if (dr_step
&& dr_step
!= count_in_bytes
)
2196 if (DR_IS_READ (dr
))
2198 slp_impossible
= true;
2199 /* There is a gap after the last load in the group. This gap is a
2200 difference between the groupsize and the number of elements.
2201 When there is no gap, this difference should be 0. */
2202 GROUP_GAP (vinfo_for_stmt (stmt
)) = groupsize
- count
;
2206 if (dump_enabled_p ())
2207 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2208 "interleaved store with gaps");
2213 /* Check that STEP is a multiple of type size. */
2214 if (dr_step
&& (dr_step
% type_size
) != 0)
2216 if (dump_enabled_p ())
2218 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2219 "step is not a multiple of type size: step ");
2220 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, step
);
2221 dump_printf (MSG_MISSED_OPTIMIZATION
, " size ");
2222 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2223 TYPE_SIZE_UNIT (scalar_type
));
2231 GROUP_SIZE (vinfo_for_stmt (stmt
)) = groupsize
;
2232 if (dump_enabled_p ())
2233 dump_printf_loc (MSG_NOTE
, vect_location
,
2234 "Detected interleaving of size %d", (int)groupsize
);
2236 /* SLP: create an SLP data structure for every interleaving group of
2237 stores for further analysis in vect_analyse_slp. */
2238 if (DR_IS_WRITE (dr
) && !slp_impossible
)
2241 LOOP_VINFO_GROUPED_STORES (loop_vinfo
).safe_push (stmt
);
2243 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt
);
2246 /* There is a gap in the end of the group. */
2247 if (groupsize
- last_accessed_element
> 0 && loop_vinfo
)
2249 if (dump_enabled_p ())
2250 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2251 "Data access with gaps requires scalar "
2255 if (dump_enabled_p ())
2256 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2257 "Peeling for outer loop is not supported");
2261 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2269 /* Analyze the access pattern of the data-reference DR.
2270 In case of non-consecutive accesses call vect_analyze_group_access() to
2271 analyze groups of accesses. */
2274 vect_analyze_data_ref_access (struct data_reference
*dr
)
2276 tree step
= DR_STEP (dr
);
2277 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2278 gimple stmt
= DR_STMT (dr
);
2279 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2280 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2281 struct loop
*loop
= NULL
;
2284 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2286 if (loop_vinfo
&& !step
)
2288 if (dump_enabled_p ())
2289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2290 "bad data-ref access in loop");
2294 /* Allow invariant loads in loops. */
2295 if (loop_vinfo
&& integer_zerop (step
))
2297 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2298 return DR_IS_READ (dr
);
2301 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2303 /* Interleaved accesses are not yet supported within outer-loop
2304 vectorization for references in the inner-loop. */
2305 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2307 /* For the rest of the analysis we use the outer-loop step. */
2308 step
= STMT_VINFO_DR_STEP (stmt_info
);
2309 if (integer_zerop (step
))
2311 if (dump_enabled_p ())
2312 dump_printf_loc (MSG_NOTE
, vect_location
,
2313 "zero step in outer loop.");
2314 if (DR_IS_READ (dr
))
2322 if (TREE_CODE (step
) == INTEGER_CST
)
2324 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2325 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
))
2327 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type
), -dr_step
)))
2329 /* Mark that it is not interleaving. */
2330 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2335 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2337 if (dump_enabled_p ())
2338 dump_printf_loc (MSG_NOTE
, vect_location
,
2339 "grouped access in outer loop.");
2343 /* Assume this is a DR handled by non-constant strided load case. */
2344 if (TREE_CODE (step
) != INTEGER_CST
)
2345 return STMT_VINFO_STRIDE_LOAD_P (stmt_info
);
2347 /* Not consecutive access - check if it's a part of interleaving group. */
2348 return vect_analyze_group_access (dr
);
2351 /* Compare two data-references DRA and DRB to group them into chunks
2352 suitable for grouping. */
2355 dr_group_sort_cmp (const void *dra_
, const void *drb_
)
2357 data_reference_p dra
= *(data_reference_p
*)const_cast<void *>(dra_
);
2358 data_reference_p drb
= *(data_reference_p
*)const_cast<void *>(drb_
);
2362 /* Stabilize sort. */
2366 /* Ordering of DRs according to base. */
2367 if (!operand_equal_p (DR_BASE_ADDRESS (dra
), DR_BASE_ADDRESS (drb
), 0))
2369 h1
= iterative_hash_expr (DR_BASE_ADDRESS (dra
), 0);
2370 h2
= iterative_hash_expr (DR_BASE_ADDRESS (drb
), 0);
2372 return h1
< h2
? -1 : 1;
2375 /* And according to DR_OFFSET. */
2376 if (!dr_equal_offsets_p (dra
, drb
))
2378 h1
= iterative_hash_expr (DR_OFFSET (dra
), 0);
2379 h2
= iterative_hash_expr (DR_OFFSET (drb
), 0);
2381 return h1
< h2
? -1 : 1;
2384 /* Put reads before writes. */
2385 if (DR_IS_READ (dra
) != DR_IS_READ (drb
))
2386 return DR_IS_READ (dra
) ? -1 : 1;
2388 /* Then sort after access size. */
2389 if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))),
2390 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))), 0))
2392 h1
= iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))), 0);
2393 h2
= iterative_hash_expr (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))), 0);
2395 return h1
< h2
? -1 : 1;
2398 /* And after step. */
2399 if (!operand_equal_p (DR_STEP (dra
), DR_STEP (drb
), 0))
2401 h1
= iterative_hash_expr (DR_STEP (dra
), 0);
2402 h2
= iterative_hash_expr (DR_STEP (drb
), 0);
2404 return h1
< h2
? -1 : 1;
2407 /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
2408 cmp
= tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
));
2410 return gimple_uid (DR_STMT (dra
)) < gimple_uid (DR_STMT (drb
)) ? -1 : 1;
2414 /* Function vect_analyze_data_ref_accesses.
2416 Analyze the access pattern of all the data references in the loop.
2418 FORNOW: the only access pattern that is considered vectorizable is a
2419 simple step 1 (consecutive) access.
2421 FORNOW: handle only arrays and pointer accesses. */
2424 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2427 vec
<data_reference_p
> datarefs
;
2428 struct data_reference
*dr
;
2430 if (dump_enabled_p ())
2431 dump_printf_loc (MSG_NOTE
, vect_location
,
2432 "=== vect_analyze_data_ref_accesses ===");
2435 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2437 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2439 if (datarefs
.is_empty ())
2442 /* Sort the array of datarefs to make building the interleaving chains
2444 qsort (datarefs
.address(), datarefs
.length (),
2445 sizeof (data_reference_p
), dr_group_sort_cmp
);
2447 /* Build the interleaving chains. */
2448 for (i
= 0; i
< datarefs
.length () - 1;)
2450 data_reference_p dra
= datarefs
[i
];
2451 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
2452 stmt_vec_info lastinfo
= NULL
;
2453 for (i
= i
+ 1; i
< datarefs
.length (); ++i
)
2455 data_reference_p drb
= datarefs
[i
];
2456 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
2458 /* ??? Imperfect sorting (non-compatible types, non-modulo
2459 accesses, same accesses) can lead to a group to be artificially
2460 split here as we don't just skip over those. If it really
2461 matters we can push those to a worklist and re-iterate
2462 over them. The we can just skip ahead to the next DR here. */
2464 /* Check that the data-refs have same first location (except init)
2465 and they are both either store or load (not load and store). */
2466 if (DR_IS_READ (dra
) != DR_IS_READ (drb
)
2467 || !operand_equal_p (DR_BASE_ADDRESS (dra
),
2468 DR_BASE_ADDRESS (drb
), 0)
2469 || !dr_equal_offsets_p (dra
, drb
))
2472 /* Check that the data-refs have the same constant size and step. */
2473 tree sza
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
)));
2474 tree szb
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
)));
2475 if (!host_integerp (sza
, 1)
2476 || !host_integerp (szb
, 1)
2477 || !tree_int_cst_equal (sza
, szb
)
2478 || !host_integerp (DR_STEP (dra
), 0)
2479 || !host_integerp (DR_STEP (drb
), 0)
2480 || !tree_int_cst_equal (DR_STEP (dra
), DR_STEP (drb
)))
2483 /* Do not place the same access in the interleaving chain twice. */
2484 if (tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
)) == 0)
2487 /* Check the types are compatible.
2488 ??? We don't distinguish this during sorting. */
2489 if (!types_compatible_p (TREE_TYPE (DR_REF (dra
)),
2490 TREE_TYPE (DR_REF (drb
))))
2493 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
2494 HOST_WIDE_INT init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
2495 HOST_WIDE_INT init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
2496 gcc_assert (init_a
< init_b
);
2498 /* If init_b == init_a + the size of the type * k, we have an
2499 interleaving, and DRA is accessed before DRB. */
2500 HOST_WIDE_INT type_size_a
= TREE_INT_CST_LOW (sza
);
2501 if ((init_b
- init_a
) % type_size_a
!= 0)
2504 /* The step (if not zero) is greater than the difference between
2505 data-refs' inits. This splits groups into suitable sizes. */
2506 HOST_WIDE_INT step
= TREE_INT_CST_LOW (DR_STEP (dra
));
2507 if (step
!= 0 && step
<= (init_b
- init_a
))
2510 if (dump_enabled_p ())
2512 dump_printf_loc (MSG_NOTE
, vect_location
,
2513 "Detected interleaving ");
2514 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
2515 dump_printf (MSG_NOTE
, " and ");
2516 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
2519 /* Link the found element into the group list. */
2520 if (!GROUP_FIRST_ELEMENT (stmtinfo_a
))
2522 GROUP_FIRST_ELEMENT (stmtinfo_a
) = DR_STMT (dra
);
2523 lastinfo
= stmtinfo_a
;
2525 GROUP_FIRST_ELEMENT (stmtinfo_b
) = DR_STMT (dra
);
2526 GROUP_NEXT_ELEMENT (lastinfo
) = DR_STMT (drb
);
2527 lastinfo
= stmtinfo_b
;
2531 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
2532 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
2533 && !vect_analyze_data_ref_access (dr
))
2535 if (dump_enabled_p ())
2536 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2537 "not vectorized: complicated access pattern.");
2541 /* Mark the statement as not vectorizable. */
2542 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2552 /* Function vect_prune_runtime_alias_test_list.
2554 Prune a list of ddrs to be tested at run-time by versioning for alias.
2555 Return FALSE if resulting list of ddrs is longer then allowed by
2556 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2559 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo
)
2562 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
2565 if (dump_enabled_p ())
2566 dump_printf_loc (MSG_NOTE
, vect_location
,
2567 "=== vect_prune_runtime_alias_test_list ===");
2569 for (i
= 0; i
< ddrs
.length (); )
2577 for (j
= 0; j
< i
; j
++)
2579 ddr_p ddr_j
= ddrs
[j
];
2581 if (vect_vfa_range_equal (ddr_i
, ddr_j
))
2583 if (dump_enabled_p ())
2585 dump_printf_loc (MSG_NOTE
, vect_location
,
2586 "found equal ranges ");
2587 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_A (ddr_i
)));
2588 dump_printf (MSG_NOTE
, ", ");
2589 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_B (ddr_i
)));
2590 dump_printf (MSG_NOTE
, " and ");
2591 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_A (ddr_j
)));
2592 dump_printf (MSG_NOTE
, ", ");
2593 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_B (ddr_j
)));
2602 ddrs
.ordered_remove (i
);
2608 if (ddrs
.length () >
2609 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
2611 if (dump_enabled_p ())
2613 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2614 "disable versioning for alias - max number of "
2615 "generated checks exceeded.");
2618 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
).truncate (0);
2626 /* Check whether a non-affine read in stmt is suitable for gather load
2627 and if so, return a builtin decl for that operation. */
2630 vect_check_gather (gimple stmt
, loop_vec_info loop_vinfo
, tree
*basep
,
2631 tree
*offp
, int *scalep
)
2633 HOST_WIDE_INT scale
= 1, pbitpos
, pbitsize
;
2634 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2635 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2636 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2637 tree offtype
= NULL_TREE
;
2638 tree decl
, base
, off
;
2639 enum machine_mode pmode
;
2640 int punsignedp
, pvolatilep
;
2642 /* The gather builtins need address of the form
2643 loop_invariant + vector * {1, 2, 4, 8}
2645 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2646 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2647 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2648 multiplications and additions in it. To get a vector, we need
2649 a single SSA_NAME that will be defined in the loop and will
2650 contain everything that is not loop invariant and that can be
2651 vectorized. The following code attempts to find such a preexistng
2652 SSA_NAME OFF and put the loop invariants into a tree BASE
2653 that can be gimplified before the loop. */
2654 base
= get_inner_reference (DR_REF (dr
), &pbitsize
, &pbitpos
, &off
,
2655 &pmode
, &punsignedp
, &pvolatilep
, false);
2656 gcc_assert (base
!= NULL_TREE
&& (pbitpos
% BITS_PER_UNIT
) == 0);
2658 if (TREE_CODE (base
) == MEM_REF
)
2660 if (!integer_zerop (TREE_OPERAND (base
, 1)))
2662 if (off
== NULL_TREE
)
2664 double_int moff
= mem_ref_offset (base
);
2665 off
= double_int_to_tree (sizetype
, moff
);
2668 off
= size_binop (PLUS_EXPR
, off
,
2669 fold_convert (sizetype
, TREE_OPERAND (base
, 1)));
2671 base
= TREE_OPERAND (base
, 0);
2674 base
= build_fold_addr_expr (base
);
2676 if (off
== NULL_TREE
)
2677 off
= size_zero_node
;
2679 /* If base is not loop invariant, either off is 0, then we start with just
2680 the constant offset in the loop invariant BASE and continue with base
2681 as OFF, otherwise give up.
2682 We could handle that case by gimplifying the addition of base + off
2683 into some SSA_NAME and use that as off, but for now punt. */
2684 if (!expr_invariant_in_loop_p (loop
, base
))
2686 if (!integer_zerop (off
))
2689 base
= size_int (pbitpos
/ BITS_PER_UNIT
);
2691 /* Otherwise put base + constant offset into the loop invariant BASE
2692 and continue with OFF. */
2695 base
= fold_convert (sizetype
, base
);
2696 base
= size_binop (PLUS_EXPR
, base
, size_int (pbitpos
/ BITS_PER_UNIT
));
2699 /* OFF at this point may be either a SSA_NAME or some tree expression
2700 from get_inner_reference. Try to peel off loop invariants from it
2701 into BASE as long as possible. */
2703 while (offtype
== NULL_TREE
)
2705 enum tree_code code
;
2706 tree op0
, op1
, add
= NULL_TREE
;
2708 if (TREE_CODE (off
) == SSA_NAME
)
2710 gimple def_stmt
= SSA_NAME_DEF_STMT (off
);
2712 if (expr_invariant_in_loop_p (loop
, off
))
2715 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
2718 op0
= gimple_assign_rhs1 (def_stmt
);
2719 code
= gimple_assign_rhs_code (def_stmt
);
2720 op1
= gimple_assign_rhs2 (def_stmt
);
2724 if (get_gimple_rhs_class (TREE_CODE (off
)) == GIMPLE_TERNARY_RHS
)
2726 code
= TREE_CODE (off
);
2727 extract_ops_from_tree (off
, &code
, &op0
, &op1
);
2731 case POINTER_PLUS_EXPR
:
2733 if (expr_invariant_in_loop_p (loop
, op0
))
2738 add
= fold_convert (sizetype
, add
);
2740 add
= size_binop (MULT_EXPR
, add
, size_int (scale
));
2741 base
= size_binop (PLUS_EXPR
, base
, add
);
2744 if (expr_invariant_in_loop_p (loop
, op1
))
2752 if (expr_invariant_in_loop_p (loop
, op1
))
2754 add
= fold_convert (sizetype
, op1
);
2755 add
= size_binop (MINUS_EXPR
, size_zero_node
, add
);
2761 if (scale
== 1 && host_integerp (op1
, 0))
2763 scale
= tree_low_cst (op1
, 0);
2772 if (!POINTER_TYPE_P (TREE_TYPE (op0
))
2773 && !INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
2775 if (TYPE_PRECISION (TREE_TYPE (op0
))
2776 == TYPE_PRECISION (TREE_TYPE (off
)))
2781 if (TYPE_PRECISION (TREE_TYPE (op0
))
2782 < TYPE_PRECISION (TREE_TYPE (off
)))
2785 offtype
= TREE_TYPE (off
);
2796 /* If at the end OFF still isn't a SSA_NAME or isn't
2797 defined in the loop, punt. */
2798 if (TREE_CODE (off
) != SSA_NAME
2799 || expr_invariant_in_loop_p (loop
, off
))
2802 if (offtype
== NULL_TREE
)
2803 offtype
= TREE_TYPE (off
);
2805 decl
= targetm
.vectorize
.builtin_gather (STMT_VINFO_VECTYPE (stmt_info
),
2807 if (decl
== NULL_TREE
)
2819 /* Function vect_analyze_data_refs.
2821 Find all the data references in the loop or basic block.
2823 The general structure of the analysis of data refs in the vectorizer is as
2825 1- vect_analyze_data_refs(loop/bb): call
2826 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2827 in the loop/bb and their dependences.
2828 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2829 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2830 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2835 vect_analyze_data_refs (loop_vec_info loop_vinfo
,
2836 bb_vec_info bb_vinfo
,
2839 struct loop
*loop
= NULL
;
2840 basic_block bb
= NULL
;
2842 vec
<data_reference_p
> datarefs
;
2843 struct data_reference
*dr
;
2846 if (dump_enabled_p ())
2847 dump_printf_loc (MSG_NOTE
, vect_location
,
2848 "=== vect_analyze_data_refs ===\n");
2852 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2853 if (!find_loop_nest (loop
, &LOOP_VINFO_LOOP_NEST (loop_vinfo
))
2854 || find_data_references_in_loop
2855 (loop
, &LOOP_VINFO_DATAREFS (loop_vinfo
)))
2857 if (dump_enabled_p ())
2858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2859 "not vectorized: loop contains function calls"
2860 " or data references that cannot be analyzed");
2864 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2868 gimple_stmt_iterator gsi
;
2870 bb
= BB_VINFO_BB (bb_vinfo
);
2871 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2873 gimple stmt
= gsi_stmt (gsi
);
2874 if (!find_data_references_in_stmt (NULL
, stmt
,
2875 &BB_VINFO_DATAREFS (bb_vinfo
)))
2877 /* Mark the rest of the basic-block as unvectorizable. */
2878 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
2880 stmt
= gsi_stmt (gsi
);
2881 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)) = false;
2887 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2890 /* Go through the data-refs, check that the analysis succeeded. Update
2891 pointer from stmt_vec_info struct to DR and vectype. */
2893 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
2896 stmt_vec_info stmt_info
;
2897 tree base
, offset
, init
;
2898 bool gather
= false;
2901 if (!dr
|| !DR_REF (dr
))
2903 if (dump_enabled_p ())
2904 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2905 "not vectorized: unhandled data-ref ");
2909 stmt
= DR_STMT (dr
);
2910 stmt_info
= vinfo_for_stmt (stmt
);
2912 /* Check that analysis of the data-ref succeeded. */
2913 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
2916 /* If target supports vector gather loads, see if they can't
2920 && !TREE_THIS_VOLATILE (DR_REF (dr
))
2921 && targetm
.vectorize
.builtin_gather
!= NULL
2922 && !nested_in_vect_loop_p (loop
, stmt
))
2924 struct data_reference
*newdr
2925 = create_data_ref (NULL
, loop_containing_stmt (stmt
),
2926 DR_REF (dr
), stmt
, true);
2927 gcc_assert (newdr
!= NULL
&& DR_REF (newdr
));
2928 if (DR_BASE_ADDRESS (newdr
)
2929 && DR_OFFSET (newdr
)
2932 && integer_zerop (DR_STEP (newdr
)))
2938 free_data_ref (newdr
);
2943 if (dump_enabled_p ())
2945 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2946 "not vectorized: data ref analysis "
2948 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2958 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
2960 if (dump_enabled_p ())
2961 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2962 "not vectorized: base addr of dr is a "
2973 if (TREE_THIS_VOLATILE (DR_REF (dr
)))
2975 if (dump_enabled_p ())
2977 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2978 "not vectorized: volatile type ");
2979 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2988 if (stmt_can_throw_internal (stmt
))
2990 if (dump_enabled_p ())
2992 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2993 "not vectorized: statement can throw an "
2995 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3006 if (TREE_CODE (DR_REF (dr
)) == COMPONENT_REF
3007 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr
), 1)))
3009 if (dump_enabled_p ())
3011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3012 "not vectorized: statement is bitfield "
3014 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3025 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3026 offset
= unshare_expr (DR_OFFSET (dr
));
3027 init
= unshare_expr (DR_INIT (dr
));
3029 if (is_gimple_call (stmt
))
3031 if (dump_enabled_p ())
3033 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3034 "not vectorized: dr in a call ");
3035 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3046 /* Update DR field in stmt_vec_info struct. */
3048 /* If the dataref is in an inner-loop of the loop that is considered for
3049 for vectorization, we also want to analyze the access relative to
3050 the outer-loop (DR contains information only relative to the
3051 inner-most enclosing loop). We do that by building a reference to the
3052 first location accessed by the inner-loop, and analyze it relative to
3054 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
3056 tree outer_step
, outer_base
, outer_init
;
3057 HOST_WIDE_INT pbitsize
, pbitpos
;
3059 enum machine_mode pmode
;
3060 int punsignedp
, pvolatilep
;
3061 affine_iv base_iv
, offset_iv
;
3064 /* Build a reference to the first location accessed by the
3065 inner-loop: *(BASE+INIT). (The first location is actually
3066 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3067 tree inner_base
= build_fold_indirect_ref
3068 (fold_build_pointer_plus (base
, init
));
3070 if (dump_enabled_p ())
3072 dump_printf_loc (MSG_NOTE
, vect_location
,
3073 "analyze in outer-loop: ");
3074 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, inner_base
);
3077 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
3078 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
3079 gcc_assert (outer_base
!= NULL_TREE
);
3081 if (pbitpos
% BITS_PER_UNIT
!= 0)
3083 if (dump_enabled_p ())
3084 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3085 "failed: bit offset alignment.\n");
3089 outer_base
= build_fold_addr_expr (outer_base
);
3090 if (!simple_iv (loop
, loop_containing_stmt (stmt
), outer_base
,
3093 if (dump_enabled_p ())
3094 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3095 "failed: evolution of base is not affine.\n");
3102 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
,
3110 offset_iv
.base
= ssize_int (0);
3111 offset_iv
.step
= ssize_int (0);
3113 else if (!simple_iv (loop
, loop_containing_stmt (stmt
), poffset
,
3116 if (dump_enabled_p ())
3117 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3118 "evolution of offset is not affine.\n");
3122 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
3123 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
3124 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3125 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
3126 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3128 outer_step
= size_binop (PLUS_EXPR
,
3129 fold_convert (ssizetype
, base_iv
.step
),
3130 fold_convert (ssizetype
, offset_iv
.step
));
3132 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
3133 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3134 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
3135 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
3136 STMT_VINFO_DR_OFFSET (stmt_info
) =
3137 fold_convert (ssizetype
, offset_iv
.base
);
3138 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
3139 size_int (highest_pow2_factor (offset_iv
.base
));
3141 if (dump_enabled_p ())
3143 dump_printf_loc (MSG_NOTE
, vect_location
,
3144 "\touter base_address: ");
3145 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3146 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
3147 dump_printf (MSG_NOTE
, "\n\touter offset from base address: ");
3148 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3149 STMT_VINFO_DR_OFFSET (stmt_info
));
3150 dump_printf (MSG_NOTE
,
3151 "\n\touter constant offset from base address: ");
3152 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3153 STMT_VINFO_DR_INIT (stmt_info
));
3154 dump_printf (MSG_NOTE
, "\n\touter step: ");
3155 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3156 STMT_VINFO_DR_STEP (stmt_info
));
3157 dump_printf (MSG_NOTE
, "\n\touter aligned to: ");
3158 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3159 STMT_VINFO_DR_ALIGNED_TO (stmt_info
));
3163 if (STMT_VINFO_DATA_REF (stmt_info
))
3165 if (dump_enabled_p ())
3167 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3168 "not vectorized: more than one data ref "
3170 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3181 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
3183 /* Set vectype for STMT. */
3184 scalar_type
= TREE_TYPE (DR_REF (dr
));
3185 STMT_VINFO_VECTYPE (stmt_info
) =
3186 get_vectype_for_scalar_type (scalar_type
);
3187 if (!STMT_VINFO_VECTYPE (stmt_info
))
3189 if (dump_enabled_p ())
3191 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3192 "not vectorized: no vectype for stmt: ");
3193 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3194 dump_printf (MSG_MISSED_OPTIMIZATION
, " scalar_type: ");
3195 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_DETAILS
,
3204 STMT_VINFO_DATA_REF (stmt_info
) = NULL
;
3211 if (dump_enabled_p ())
3213 dump_printf_loc (MSG_NOTE
, vect_location
,
3214 "got vectype for stmt: ");
3215 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3216 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3217 STMT_VINFO_VECTYPE (stmt_info
));
3221 /* Adjust the minimal vectorization factor according to the
3223 vf
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
3231 gather
= 0 != vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
3233 && get_vectype_for_scalar_type (TREE_TYPE (off
)) == NULL_TREE
)
3237 STMT_VINFO_DATA_REF (stmt_info
) = NULL
;
3239 if (dump_enabled_p ())
3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3242 "not vectorized: not suitable for gather "
3244 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3250 STMT_VINFO_GATHER_P (stmt_info
) = true;
3253 && TREE_CODE (DR_STEP (dr
)) != INTEGER_CST
)
3255 if (nested_in_vect_loop_p (loop
, stmt
)
3256 || !DR_IS_READ (dr
))
3258 if (dump_enabled_p ())
3260 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3261 "not vectorized: not suitable for strided "
3263 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3267 STMT_VINFO_STRIDE_LOAD_P (stmt_info
) = true;
3271 /* If we stopped analysis at the first dataref we could not analyze
3272 when trying to vectorize a basic-block mark the rest of the datarefs
3273 as not vectorizable and truncate the vector of datarefs. That
3274 avoids spending useless time in analyzing their dependence. */
3275 if (i
!= datarefs
.length ())
3277 gcc_assert (bb_vinfo
!= NULL
);
3278 for (unsigned j
= i
; j
< datarefs
.length (); ++j
)
3280 data_reference_p dr
= datarefs
[j
];
3281 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
3284 datarefs
.truncate (i
);
3291 /* Function vect_get_new_vect_var.
3293 Returns a name for a new variable. The current naming scheme appends the
3294 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3295 the name of vectorizer generated variables, and appends that to NAME if
3299 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
3306 case vect_simple_var
:
3309 case vect_scalar_var
:
3312 case vect_pointer_var
:
3321 char* tmp
= concat (prefix
, "_", name
, NULL
);
3322 new_vect_var
= create_tmp_reg (type
, tmp
);
3326 new_vect_var
= create_tmp_reg (type
, prefix
);
3328 return new_vect_var
;
3332 /* Function vect_create_addr_base_for_vector_ref.
3334 Create an expression that computes the address of the first memory location
3335 that will be accessed for a data reference.
3338 STMT: The statement containing the data reference.
3339 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3340 OFFSET: Optional. If supplied, it is be added to the initial address.
3341 LOOP: Specify relative to which loop-nest should the address be computed.
3342 For example, when the dataref is in an inner-loop nested in an
3343 outer-loop that is now being vectorized, LOOP can be either the
3344 outer-loop, or the inner-loop. The first memory location accessed
3345 by the following dataref ('in' points to short):
3352 if LOOP=i_loop: &in (relative to i_loop)
3353 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3356 1. Return an SSA_NAME whose value is the address of the memory location of
3357 the first vector of the data reference.
3358 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3359 these statement(s) which define the returned SSA_NAME.
3361 FORNOW: We are only handling array accesses with step 1. */
3364 vect_create_addr_base_for_vector_ref (gimple stmt
,
3365 gimple_seq
*new_stmt_list
,
3369 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3370 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3372 const char *base_name
;
3375 gimple_seq seq
= NULL
;
3379 tree step
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
3380 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3382 if (loop_vinfo
&& loop
&& loop
!= (gimple_bb (stmt
))->loop_father
)
3384 struct loop
*outer_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3386 gcc_assert (nested_in_vect_loop_p (outer_loop
, stmt
));
3388 data_ref_base
= unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
3389 base_offset
= unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info
));
3390 init
= unshare_expr (STMT_VINFO_DR_INIT (stmt_info
));
3394 data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3395 base_offset
= unshare_expr (DR_OFFSET (dr
));
3396 init
= unshare_expr (DR_INIT (dr
));
3400 base_name
= get_name (data_ref_base
);
3403 base_offset
= ssize_int (0);
3404 init
= ssize_int (0);
3405 base_name
= get_name (DR_REF (dr
));
3408 /* Create base_offset */
3409 base_offset
= size_binop (PLUS_EXPR
,
3410 fold_convert (sizetype
, base_offset
),
3411 fold_convert (sizetype
, init
));
3415 offset
= fold_build2 (MULT_EXPR
, sizetype
,
3416 fold_convert (sizetype
, offset
), step
);
3417 base_offset
= fold_build2 (PLUS_EXPR
, sizetype
,
3418 base_offset
, offset
);
3421 /* base + base_offset */
3423 addr_base
= fold_build_pointer_plus (data_ref_base
, base_offset
);
3426 addr_base
= build1 (ADDR_EXPR
,
3427 build_pointer_type (TREE_TYPE (DR_REF (dr
))),
3428 unshare_expr (DR_REF (dr
)));
3431 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
3432 addr_base
= fold_convert (vect_ptr_type
, addr_base
);
3433 dest
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
, base_name
);
3434 addr_base
= force_gimple_operand (addr_base
, &seq
, false, dest
);
3435 gimple_seq_add_seq (new_stmt_list
, seq
);
3437 if (DR_PTR_INFO (dr
)
3438 && TREE_CODE (addr_base
) == SSA_NAME
)
3440 duplicate_ssa_name_ptr_info (addr_base
, DR_PTR_INFO (dr
));
3442 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base
));
3445 if (dump_enabled_p ())
3447 dump_printf_loc (MSG_NOTE
, vect_location
, "created ");
3448 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, addr_base
);
3455 /* Function vect_create_data_ref_ptr.
3457 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3458 location accessed in the loop by STMT, along with the def-use update
3459 chain to appropriately advance the pointer through the loop iterations.
3460 Also set aliasing information for the pointer. This pointer is used by
3461 the callers to this function to create a memory reference expression for
3462 vector load/store access.
3465 1. STMT: a stmt that references memory. Expected to be of the form
3466 GIMPLE_ASSIGN <name, data-ref> or
3467 GIMPLE_ASSIGN <data-ref, name>.
3468 2. AGGR_TYPE: the type of the reference, which should be either a vector
3470 3. AT_LOOP: the loop where the vector memref is to be created.
3471 4. OFFSET (optional): an offset to be added to the initial address accessed
3472 by the data-ref in STMT.
3473 5. BSI: location where the new stmts are to be placed if there is no loop
3474 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3475 pointing to the initial address.
3478 1. Declare a new ptr to vector_type, and have it point to the base of the
3479 data reference (initial addressed accessed by the data reference).
3480 For example, for vector of type V8HI, the following code is generated:
3483 ap = (v8hi *)initial_address;
3485 if OFFSET is not supplied:
3486 initial_address = &a[init];
3487 if OFFSET is supplied:
3488 initial_address = &a[init + OFFSET];
3490 Return the initial_address in INITIAL_ADDRESS.
3492 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3493 update the pointer in each iteration of the loop.
3495 Return the increment stmt that updates the pointer in PTR_INCR.
3497 3. Set INV_P to true if the access pattern of the data reference in the
3498 vectorized loop is invariant. Set it to false otherwise.
3500 4. Return the pointer. */
3503 vect_create_data_ref_ptr (gimple stmt
, tree aggr_type
, struct loop
*at_loop
,
3504 tree offset
, tree
*initial_address
,
3505 gimple_stmt_iterator
*gsi
, gimple
*ptr_incr
,
3506 bool only_init
, bool *inv_p
)
3508 const char *base_name
;
3509 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3510 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3511 struct loop
*loop
= NULL
;
3512 bool nested_in_vect_loop
= false;
3513 struct loop
*containing_loop
= NULL
;
3518 gimple_seq new_stmt_list
= NULL
;
3522 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3524 gimple_stmt_iterator incr_gsi
;
3527 tree indx_before_incr
, indx_after_incr
;
3530 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3532 gcc_assert (TREE_CODE (aggr_type
) == ARRAY_TYPE
3533 || TREE_CODE (aggr_type
) == VECTOR_TYPE
);
3537 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3538 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
3539 containing_loop
= (gimple_bb (stmt
))->loop_father
;
3540 pe
= loop_preheader_edge (loop
);
3544 gcc_assert (bb_vinfo
);
3549 /* Check the step (evolution) of the load in LOOP, and record
3550 whether it's invariant. */
3551 if (nested_in_vect_loop
)
3552 step
= STMT_VINFO_DR_STEP (stmt_info
);
3554 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
3556 if (tree_int_cst_compare (step
, size_zero_node
) == 0)
3560 negative
= tree_int_cst_compare (step
, size_zero_node
) < 0;
3562 /* Create an expression for the first address accessed by this load
3564 base_name
= get_name (DR_BASE_ADDRESS (dr
));
3566 if (dump_enabled_p ())
3568 tree dr_base_type
= TREE_TYPE (DR_BASE_OBJECT (dr
));
3569 dump_printf_loc (MSG_NOTE
, vect_location
,
3570 "create %s-pointer variable to type: ",
3571 tree_code_name
[(int) TREE_CODE (aggr_type
)]);
3572 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, aggr_type
);
3573 if (TREE_CODE (dr_base_type
) == ARRAY_TYPE
)
3574 dump_printf (MSG_NOTE
, " vectorizing an array ref: ");
3575 else if (TREE_CODE (dr_base_type
) == VECTOR_TYPE
)
3576 dump_printf (MSG_NOTE
, " vectorizing a vector ref: ");
3577 else if (TREE_CODE (dr_base_type
) == RECORD_TYPE
)
3578 dump_printf (MSG_NOTE
, " vectorizing a record based array ref: ");
3580 dump_printf (MSG_NOTE
, " vectorizing a pointer ref: ");
3581 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_BASE_OBJECT (dr
));
3584 /* (1) Create the new aggregate-pointer variable.
3585 Vector and array types inherit the alias set of their component
3586 type by default so we need to use a ref-all pointer if the data
3587 reference does not conflict with the created aggregated data
3588 reference because it is not addressable. */
3589 bool need_ref_all
= false;
3590 if (!alias_sets_conflict_p (get_alias_set (aggr_type
),
3591 get_alias_set (DR_REF (dr
))))
3592 need_ref_all
= true;
3593 /* Likewise for any of the data references in the stmt group. */
3594 else if (STMT_VINFO_GROUP_SIZE (stmt_info
) > 1)
3596 gimple orig_stmt
= STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
);
3599 stmt_vec_info sinfo
= vinfo_for_stmt (orig_stmt
);
3600 struct data_reference
*sdr
= STMT_VINFO_DATA_REF (sinfo
);
3601 if (!alias_sets_conflict_p (get_alias_set (aggr_type
),
3602 get_alias_set (DR_REF (sdr
))))
3604 need_ref_all
= true;
3607 orig_stmt
= STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo
);
3611 aggr_ptr_type
= build_pointer_type_for_mode (aggr_type
, ptr_mode
,
3613 aggr_ptr
= vect_get_new_vect_var (aggr_ptr_type
, vect_pointer_var
, base_name
);
3616 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3617 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3618 def-use update cycles for the pointer: one relative to the outer-loop
3619 (LOOP), which is what steps (3) and (4) below do. The other is relative
3620 to the inner-loop (which is the inner-most loop containing the dataref),
3621 and this is done be step (5) below.
3623 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3624 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3625 redundant. Steps (3),(4) create the following:
3628 LOOP: vp1 = phi(vp0,vp2)
3634 If there is an inner-loop nested in loop, then step (5) will also be
3635 applied, and an additional update in the inner-loop will be created:
3638 LOOP: vp1 = phi(vp0,vp2)
3640 inner: vp3 = phi(vp1,vp4)
3641 vp4 = vp3 + inner_step
3647 /* (2) Calculate the initial address of the aggregate-pointer, and set
3648 the aggregate-pointer to point to it before the loop. */
3650 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3652 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
3658 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmt_list
);
3659 gcc_assert (!new_bb
);
3662 gsi_insert_seq_before (gsi
, new_stmt_list
, GSI_SAME_STMT
);
3665 *initial_address
= new_temp
;
3667 /* Create: p = (aggr_type *) initial_base */
3668 if (TREE_CODE (new_temp
) != SSA_NAME
3669 || !useless_type_conversion_p (aggr_ptr_type
, TREE_TYPE (new_temp
)))
3671 vec_stmt
= gimple_build_assign (aggr_ptr
,
3672 fold_convert (aggr_ptr_type
, new_temp
));
3673 aggr_ptr_init
= make_ssa_name (aggr_ptr
, vec_stmt
);
3674 /* Copy the points-to information if it exists. */
3675 if (DR_PTR_INFO (dr
))
3676 duplicate_ssa_name_ptr_info (aggr_ptr_init
, DR_PTR_INFO (dr
));
3677 gimple_assign_set_lhs (vec_stmt
, aggr_ptr_init
);
3680 new_bb
= gsi_insert_on_edge_immediate (pe
, vec_stmt
);
3681 gcc_assert (!new_bb
);
3684 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
3687 aggr_ptr_init
= new_temp
;
3689 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3690 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3691 inner-loop nested in LOOP (during outer-loop vectorization). */
3693 /* No update in loop is required. */
3694 if (only_init
&& (!loop_vinfo
|| at_loop
== loop
))
3695 aptr
= aggr_ptr_init
;
3698 /* The step of the aggregate pointer is the type size. */
3699 tree step
= TYPE_SIZE_UNIT (aggr_type
);
3700 /* One exception to the above is when the scalar step of the load in
3701 LOOP is zero. In this case the step here is also zero. */
3703 step
= size_zero_node
;
3705 step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
3707 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
3709 create_iv (aggr_ptr_init
,
3710 fold_convert (aggr_ptr_type
, step
),
3711 aggr_ptr
, loop
, &incr_gsi
, insert_after
,
3712 &indx_before_incr
, &indx_after_incr
);
3713 incr
= gsi_stmt (incr_gsi
);
3714 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3716 /* Copy the points-to information if it exists. */
3717 if (DR_PTR_INFO (dr
))
3719 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3720 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3725 aptr
= indx_before_incr
;
3728 if (!nested_in_vect_loop
|| only_init
)
3732 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3733 nested in LOOP, if exists. */
3735 gcc_assert (nested_in_vect_loop
);
3738 standard_iv_increment_position (containing_loop
, &incr_gsi
,
3740 create_iv (aptr
, fold_convert (aggr_ptr_type
, DR_STEP (dr
)), aggr_ptr
,
3741 containing_loop
, &incr_gsi
, insert_after
, &indx_before_incr
,
3743 incr
= gsi_stmt (incr_gsi
);
3744 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3746 /* Copy the points-to information if it exists. */
3747 if (DR_PTR_INFO (dr
))
3749 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3750 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3755 return indx_before_incr
;
3762 /* Function bump_vector_ptr
3764 Increment a pointer (to a vector type) by vector-size. If requested,
3765 i.e. if PTR-INCR is given, then also connect the new increment stmt
3766 to the existing def-use update-chain of the pointer, by modifying
3767 the PTR_INCR as illustrated below:
3769 The pointer def-use update-chain before this function:
3770 DATAREF_PTR = phi (p_0, p_2)
3772 PTR_INCR: p_2 = DATAREF_PTR + step
3774 The pointer def-use update-chain after this function:
3775 DATAREF_PTR = phi (p_0, p_2)
3777 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3779 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3782 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3784 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3785 the loop. The increment amount across iterations is expected
3787 BSI - location where the new update stmt is to be placed.
3788 STMT - the original scalar memory-access stmt that is being vectorized.
3789 BUMP - optional. The offset by which to bump the pointer. If not given,
3790 the offset is assumed to be vector_size.
3792 Output: Return NEW_DATAREF_PTR as illustrated above.
3797 bump_vector_ptr (tree dataref_ptr
, gimple ptr_incr
, gimple_stmt_iterator
*gsi
,
3798 gimple stmt
, tree bump
)
3800 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3801 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3802 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3803 tree update
= TYPE_SIZE_UNIT (vectype
);
3806 use_operand_p use_p
;
3807 tree new_dataref_ptr
;
3812 new_dataref_ptr
= copy_ssa_name (dataref_ptr
, NULL
);
3813 incr_stmt
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, new_dataref_ptr
,
3814 dataref_ptr
, update
);
3815 vect_finish_stmt_generation (stmt
, incr_stmt
, gsi
);
3817 /* Copy the points-to information if it exists. */
3818 if (DR_PTR_INFO (dr
))
3820 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
3821 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr
));
3825 return new_dataref_ptr
;
3827 /* Update the vector-pointer's cross-iteration increment. */
3828 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
3830 tree use
= USE_FROM_PTR (use_p
);
3832 if (use
== dataref_ptr
)
3833 SET_USE (use_p
, new_dataref_ptr
);
3835 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
3838 return new_dataref_ptr
;
3842 /* Function vect_create_destination_var.
3844 Create a new temporary of type VECTYPE. */
3847 vect_create_destination_var (tree scalar_dest
, tree vectype
)
3853 enum vect_var_kind kind
;
3855 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
3856 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
3858 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
3860 name
= get_name (scalar_dest
);
3862 asprintf (&new_name
, "%s_%u", name
, SSA_NAME_VERSION (scalar_dest
));
3864 asprintf (&new_name
, "_%u", SSA_NAME_VERSION (scalar_dest
));
3865 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
3871 /* Function vect_grouped_store_supported.
3873 Returns TRUE if interleave high and interleave low permutations
3874 are supported, and FALSE otherwise. */
3877 vect_grouped_store_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
3879 enum machine_mode mode
= TYPE_MODE (vectype
);
3881 /* vect_permute_store_chain requires the group size to be a power of two. */
3882 if (exact_log2 (count
) == -1)
3884 if (dump_enabled_p ())
3885 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3886 "the size of the group of accesses"
3887 " is not a power of 2");
3891 /* Check that the permutation is supported. */
3892 if (VECTOR_MODE_P (mode
))
3894 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
3895 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
3896 for (i
= 0; i
< nelt
/ 2; i
++)
3899 sel
[i
* 2 + 1] = i
+ nelt
;
3901 if (can_vec_perm_p (mode
, false, sel
))
3903 for (i
= 0; i
< nelt
; i
++)
3905 if (can_vec_perm_p (mode
, false, sel
))
3910 if (dump_enabled_p ())
3911 dump_printf (MSG_MISSED_OPTIMIZATION
,
3912 "interleave op not supported by target.");
3917 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3921 vect_store_lanes_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
3923 return vect_lanes_optab_supported_p ("vec_store_lanes",
3924 vec_store_lanes_optab
,
3929 /* Function vect_permute_store_chain.
3931 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3932 a power of 2, generate interleave_high/low stmts to reorder the data
3933 correctly for the stores. Return the final references for stores in
3936 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3937 The input is 4 vectors each containing 8 elements. We assign a number to
3938 each element, the input sequence is:
3940 1st vec: 0 1 2 3 4 5 6 7
3941 2nd vec: 8 9 10 11 12 13 14 15
3942 3rd vec: 16 17 18 19 20 21 22 23
3943 4th vec: 24 25 26 27 28 29 30 31
3945 The output sequence should be:
3947 1st vec: 0 8 16 24 1 9 17 25
3948 2nd vec: 2 10 18 26 3 11 19 27
3949 3rd vec: 4 12 20 28 5 13 21 30
3950 4th vec: 6 14 22 30 7 15 23 31
3952 i.e., we interleave the contents of the four vectors in their order.
3954 We use interleave_high/low instructions to create such output. The input of
3955 each interleave_high/low operation is two vectors:
3958 the even elements of the result vector are obtained left-to-right from the
3959 high/low elements of the first vector. The odd elements of the result are
3960 obtained left-to-right from the high/low elements of the second vector.
3961 The output of interleave_high will be: 0 4 1 5
3962 and of interleave_low: 2 6 3 7
3965 The permutation is done in log LENGTH stages. In each stage interleave_high
3966 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3967 where the first argument is taken from the first half of DR_CHAIN and the
3968 second argument from it's second half.
3971 I1: interleave_high (1st vec, 3rd vec)
3972 I2: interleave_low (1st vec, 3rd vec)
3973 I3: interleave_high (2nd vec, 4th vec)
3974 I4: interleave_low (2nd vec, 4th vec)
3976 The output for the first stage is:
3978 I1: 0 16 1 17 2 18 3 19
3979 I2: 4 20 5 21 6 22 7 23
3980 I3: 8 24 9 25 10 26 11 27
3981 I4: 12 28 13 29 14 30 15 31
3983 The output of the second stage, i.e. the final result is:
3985 I1: 0 8 16 24 1 9 17 25
3986 I2: 2 10 18 26 3 11 19 27
3987 I3: 4 12 20 28 5 13 21 30
3988 I4: 6 14 22 30 7 15 23 31. */
3991 vect_permute_store_chain (vec
<tree
> dr_chain
,
3992 unsigned int length
,
3994 gimple_stmt_iterator
*gsi
,
3995 vec
<tree
> *result_chain
)
3997 tree vect1
, vect2
, high
, low
;
3999 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
4000 tree perm_mask_low
, perm_mask_high
;
4002 unsigned int j
, nelt
= TYPE_VECTOR_SUBPARTS (vectype
);
4003 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4005 result_chain
->quick_grow (length
);
4006 memcpy (result_chain
->address (), dr_chain
.address (),
4007 length
* sizeof (tree
));
4009 for (i
= 0, n
= nelt
/ 2; i
< n
; i
++)
4012 sel
[i
* 2 + 1] = i
+ nelt
;
4014 perm_mask_high
= vect_gen_perm_mask (vectype
, sel
);
4015 gcc_assert (perm_mask_high
!= NULL
);
4017 for (i
= 0; i
< nelt
; i
++)
4019 perm_mask_low
= vect_gen_perm_mask (vectype
, sel
);
4020 gcc_assert (perm_mask_low
!= NULL
);
4022 for (i
= 0, n
= exact_log2 (length
); i
< n
; i
++)
4024 for (j
= 0; j
< length
/2; j
++)
4026 vect1
= dr_chain
[j
];
4027 vect2
= dr_chain
[j
+length
/2];
4029 /* Create interleaving stmt:
4030 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4031 high
= make_temp_ssa_name (vectype
, NULL
, "vect_inter_high");
4033 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, high
,
4034 vect1
, vect2
, perm_mask_high
);
4035 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4036 (*result_chain
)[2*j
] = high
;
4038 /* Create interleaving stmt:
4039 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4040 nelt*3/2+1, ...}> */
4041 low
= make_temp_ssa_name (vectype
, NULL
, "vect_inter_low");
4043 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, low
,
4044 vect1
, vect2
, perm_mask_low
);
4045 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4046 (*result_chain
)[2*j
+1] = low
;
4048 memcpy (dr_chain
.address (), result_chain
->address (),
4049 length
* sizeof (tree
));
4053 /* Function vect_setup_realignment
4055 This function is called when vectorizing an unaligned load using
4056 the dr_explicit_realign[_optimized] scheme.
4057 This function generates the following code at the loop prolog:
4060 x msq_init = *(floor(p)); # prolog load
4061 realignment_token = call target_builtin;
4063 x msq = phi (msq_init, ---)
4065 The stmts marked with x are generated only for the case of
4066 dr_explicit_realign_optimized.
4068 The code above sets up a new (vector) pointer, pointing to the first
4069 location accessed by STMT, and a "floor-aligned" load using that pointer.
4070 It also generates code to compute the "realignment-token" (if the relevant
4071 target hook was defined), and creates a phi-node at the loop-header bb
4072 whose arguments are the result of the prolog-load (created by this
4073 function) and the result of a load that takes place in the loop (to be
4074 created by the caller to this function).
4076 For the case of dr_explicit_realign_optimized:
4077 The caller to this function uses the phi-result (msq) to create the
4078 realignment code inside the loop, and sets up the missing phi argument,
4081 msq = phi (msq_init, lsq)
4082 lsq = *(floor(p')); # load in loop
4083 result = realign_load (msq, lsq, realignment_token);
4085 For the case of dr_explicit_realign:
4087 msq = *(floor(p)); # load in loop
4089 lsq = *(floor(p')); # load in loop
4090 result = realign_load (msq, lsq, realignment_token);
4093 STMT - (scalar) load stmt to be vectorized. This load accesses
4094 a memory location that may be unaligned.
4095 BSI - place where new code is to be inserted.
4096 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4100 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4101 target hook, if defined.
4102 Return value - the result of the loop-header phi node. */
4105 vect_setup_realignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4106 tree
*realignment_token
,
4107 enum dr_alignment_support alignment_support_scheme
,
4109 struct loop
**at_loop
)
4111 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4112 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4113 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4114 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4115 struct loop
*loop
= NULL
;
4117 tree scalar_dest
= gimple_assign_lhs (stmt
);
4124 tree msq_init
= NULL_TREE
;
4127 tree msq
= NULL_TREE
;
4128 gimple_seq stmts
= NULL
;
4130 bool compute_in_loop
= false;
4131 bool nested_in_vect_loop
= false;
4132 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4133 struct loop
*loop_for_initial_load
= NULL
;
4137 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4138 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4141 gcc_assert (alignment_support_scheme
== dr_explicit_realign
4142 || alignment_support_scheme
== dr_explicit_realign_optimized
);
4144 /* We need to generate three things:
4145 1. the misalignment computation
4146 2. the extra vector load (for the optimized realignment scheme).
4147 3. the phi node for the two vectors from which the realignment is
4148 done (for the optimized realignment scheme). */
4150 /* 1. Determine where to generate the misalignment computation.
4152 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4153 calculation will be generated by this function, outside the loop (in the
4154 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4155 caller, inside the loop.
4157 Background: If the misalignment remains fixed throughout the iterations of
4158 the loop, then both realignment schemes are applicable, and also the
4159 misalignment computation can be done outside LOOP. This is because we are
4160 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4161 are a multiple of VS (the Vector Size), and therefore the misalignment in
4162 different vectorized LOOP iterations is always the same.
4163 The problem arises only if the memory access is in an inner-loop nested
4164 inside LOOP, which is now being vectorized using outer-loop vectorization.
4165 This is the only case when the misalignment of the memory access may not
4166 remain fixed throughout the iterations of the inner-loop (as explained in
4167 detail in vect_supportable_dr_alignment). In this case, not only is the
4168 optimized realignment scheme not applicable, but also the misalignment
4169 computation (and generation of the realignment token that is passed to
4170 REALIGN_LOAD) have to be done inside the loop.
4172 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4173 or not, which in turn determines if the misalignment is computed inside
4174 the inner-loop, or outside LOOP. */
4176 if (init_addr
!= NULL_TREE
|| !loop_vinfo
)
4178 compute_in_loop
= true;
4179 gcc_assert (alignment_support_scheme
== dr_explicit_realign
);
4183 /* 2. Determine where to generate the extra vector load.
4185 For the optimized realignment scheme, instead of generating two vector
4186 loads in each iteration, we generate a single extra vector load in the
4187 preheader of the loop, and in each iteration reuse the result of the
4188 vector load from the previous iteration. In case the memory access is in
4189 an inner-loop nested inside LOOP, which is now being vectorized using
4190 outer-loop vectorization, we need to determine whether this initial vector
4191 load should be generated at the preheader of the inner-loop, or can be
4192 generated at the preheader of LOOP. If the memory access has no evolution
4193 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4194 to be generated inside LOOP (in the preheader of the inner-loop). */
4196 if (nested_in_vect_loop
)
4198 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
4199 bool invariant_in_outerloop
=
4200 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
4201 loop_for_initial_load
= (invariant_in_outerloop
? loop
: loop
->inner
);
4204 loop_for_initial_load
= loop
;
4206 *at_loop
= loop_for_initial_load
;
4208 if (loop_for_initial_load
)
4209 pe
= loop_preheader_edge (loop_for_initial_load
);
4211 /* 3. For the case of the optimized realignment, create the first vector
4212 load at the loop preheader. */
4214 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4216 /* Create msq_init = *(floor(p1)) in the loop preheader */
4218 gcc_assert (!compute_in_loop
);
4219 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4220 ptr
= vect_create_data_ref_ptr (stmt
, vectype
, loop_for_initial_load
,
4221 NULL_TREE
, &init_addr
, NULL
, &inc
,
4223 new_temp
= copy_ssa_name (ptr
, NULL
);
4224 new_stmt
= gimple_build_assign_with_ops
4225 (BIT_AND_EXPR
, new_temp
, ptr
,
4226 build_int_cst (TREE_TYPE (ptr
),
4227 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4228 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4229 gcc_assert (!new_bb
);
4231 = build2 (MEM_REF
, TREE_TYPE (vec_dest
), new_temp
,
4232 build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0));
4233 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4234 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4235 gimple_assign_set_lhs (new_stmt
, new_temp
);
4238 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4239 gcc_assert (!new_bb
);
4242 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4244 msq_init
= gimple_assign_lhs (new_stmt
);
4247 /* 4. Create realignment token using a target builtin, if available.
4248 It is done either inside the containing loop, or before LOOP (as
4249 determined above). */
4251 if (targetm
.vectorize
.builtin_mask_for_load
)
4255 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4258 /* Generate the INIT_ADDR computation outside LOOP. */
4259 init_addr
= vect_create_addr_base_for_vector_ref (stmt
, &stmts
,
4263 pe
= loop_preheader_edge (loop
);
4264 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4265 gcc_assert (!new_bb
);
4268 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
4271 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
4272 new_stmt
= gimple_build_call (builtin_decl
, 1, init_addr
);
4274 vect_create_destination_var (scalar_dest
,
4275 gimple_call_return_type (new_stmt
));
4276 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4277 gimple_call_set_lhs (new_stmt
, new_temp
);
4279 if (compute_in_loop
)
4280 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4283 /* Generate the misalignment computation outside LOOP. */
4284 pe
= loop_preheader_edge (loop
);
4285 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4286 gcc_assert (!new_bb
);
4289 *realignment_token
= gimple_call_lhs (new_stmt
);
4291 /* The result of the CALL_EXPR to this builtin is determined from
4292 the value of the parameter and no global variables are touched
4293 which makes the builtin a "const" function. Requiring the
4294 builtin to have the "const" attribute makes it unnecessary
4295 to call mark_call_clobbered. */
4296 gcc_assert (TREE_READONLY (builtin_decl
));
4299 if (alignment_support_scheme
== dr_explicit_realign
)
4302 gcc_assert (!compute_in_loop
);
4303 gcc_assert (alignment_support_scheme
== dr_explicit_realign_optimized
);
4306 /* 5. Create msq = phi <msq_init, lsq> in loop */
4308 pe
= loop_preheader_edge (containing_loop
);
4309 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4310 msq
= make_ssa_name (vec_dest
, NULL
);
4311 phi_stmt
= create_phi_node (msq
, containing_loop
->header
);
4312 add_phi_arg (phi_stmt
, msq_init
, pe
, UNKNOWN_LOCATION
);
4318 /* Function vect_grouped_load_supported.
4320 Returns TRUE if even and odd permutations are supported,
4321 and FALSE otherwise. */
4324 vect_grouped_load_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4326 enum machine_mode mode
= TYPE_MODE (vectype
);
4328 /* vect_permute_load_chain requires the group size to be a power of two. */
4329 if (exact_log2 (count
) == -1)
4331 if (dump_enabled_p ())
4332 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4333 "the size of the group of accesses"
4334 " is not a power of 2");
4338 /* Check that the permutation is supported. */
4339 if (VECTOR_MODE_P (mode
))
4341 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
4342 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4344 for (i
= 0; i
< nelt
; i
++)
4346 if (can_vec_perm_p (mode
, false, sel
))
4348 for (i
= 0; i
< nelt
; i
++)
4350 if (can_vec_perm_p (mode
, false, sel
))
4355 if (dump_enabled_p ())
4356 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4357 "extract even/odd not supported by target");
4361 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4365 vect_load_lanes_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4367 return vect_lanes_optab_supported_p ("vec_load_lanes",
4368 vec_load_lanes_optab
,
4372 /* Function vect_permute_load_chain.
4374 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4375 a power of 2, generate extract_even/odd stmts to reorder the input data
4376 correctly. Return the final references for loads in RESULT_CHAIN.
4378 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4379 The input is 4 vectors each containing 8 elements. We assign a number to each
4380 element, the input sequence is:
4382 1st vec: 0 1 2 3 4 5 6 7
4383 2nd vec: 8 9 10 11 12 13 14 15
4384 3rd vec: 16 17 18 19 20 21 22 23
4385 4th vec: 24 25 26 27 28 29 30 31
4387 The output sequence should be:
4389 1st vec: 0 4 8 12 16 20 24 28
4390 2nd vec: 1 5 9 13 17 21 25 29
4391 3rd vec: 2 6 10 14 18 22 26 30
4392 4th vec: 3 7 11 15 19 23 27 31
4394 i.e., the first output vector should contain the first elements of each
4395 interleaving group, etc.
4397 We use extract_even/odd instructions to create such output. The input of
4398 each extract_even/odd operation is two vectors
4402 and the output is the vector of extracted even/odd elements. The output of
4403 extract_even will be: 0 2 4 6
4404 and of extract_odd: 1 3 5 7
4407 The permutation is done in log LENGTH stages. In each stage extract_even
4408 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4409 their order. In our example,
4411 E1: extract_even (1st vec, 2nd vec)
4412 E2: extract_odd (1st vec, 2nd vec)
4413 E3: extract_even (3rd vec, 4th vec)
4414 E4: extract_odd (3rd vec, 4th vec)
4416 The output for the first stage will be:
4418 E1: 0 2 4 6 8 10 12 14
4419 E2: 1 3 5 7 9 11 13 15
4420 E3: 16 18 20 22 24 26 28 30
4421 E4: 17 19 21 23 25 27 29 31
4423 In order to proceed and create the correct sequence for the next stage (or
4424 for the correct output, if the second stage is the last one, as in our
4425 example), we first put the output of extract_even operation and then the
4426 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4427 The input for the second stage is:
4429 1st vec (E1): 0 2 4 6 8 10 12 14
4430 2nd vec (E3): 16 18 20 22 24 26 28 30
4431 3rd vec (E2): 1 3 5 7 9 11 13 15
4432 4th vec (E4): 17 19 21 23 25 27 29 31
4434 The output of the second stage:
4436 E1: 0 4 8 12 16 20 24 28
4437 E2: 2 6 10 14 18 22 26 30
4438 E3: 1 5 9 13 17 21 25 29
4439 E4: 3 7 11 15 19 23 27 31
4441 And RESULT_CHAIN after reordering:
4443 1st vec (E1): 0 4 8 12 16 20 24 28
4444 2nd vec (E3): 1 5 9 13 17 21 25 29
4445 3rd vec (E2): 2 6 10 14 18 22 26 30
4446 4th vec (E4): 3 7 11 15 19 23 27 31. */
4449 vect_permute_load_chain (vec
<tree
> dr_chain
,
4450 unsigned int length
,
4452 gimple_stmt_iterator
*gsi
,
4453 vec
<tree
> *result_chain
)
4455 tree data_ref
, first_vect
, second_vect
;
4456 tree perm_mask_even
, perm_mask_odd
;
4458 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
4459 unsigned int i
, j
, log_length
= exact_log2 (length
);
4460 unsigned nelt
= TYPE_VECTOR_SUBPARTS (vectype
);
4461 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4463 result_chain
->quick_grow (length
);
4464 memcpy (result_chain
->address (), dr_chain
.address (),
4465 length
* sizeof (tree
));
4467 for (i
= 0; i
< nelt
; ++i
)
4469 perm_mask_even
= vect_gen_perm_mask (vectype
, sel
);
4470 gcc_assert (perm_mask_even
!= NULL
);
4472 for (i
= 0; i
< nelt
; ++i
)
4474 perm_mask_odd
= vect_gen_perm_mask (vectype
, sel
);
4475 gcc_assert (perm_mask_odd
!= NULL
);
4477 for (i
= 0; i
< log_length
; i
++)
4479 for (j
= 0; j
< length
; j
+= 2)
4481 first_vect
= dr_chain
[j
];
4482 second_vect
= dr_chain
[j
+1];
4484 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4485 data_ref
= make_temp_ssa_name (vectype
, NULL
, "vect_perm_even");
4486 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4487 first_vect
, second_vect
,
4489 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4490 (*result_chain
)[j
/2] = data_ref
;
4492 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4493 data_ref
= make_temp_ssa_name (vectype
, NULL
, "vect_perm_odd");
4494 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4495 first_vect
, second_vect
,
4497 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4498 (*result_chain
)[j
/2+length
/2] = data_ref
;
4500 memcpy (dr_chain
.address (), result_chain
->address (),
4501 length
* sizeof (tree
));
4506 /* Function vect_transform_grouped_load.
4508 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4509 to perform their permutation and ascribe the result vectorized statements to
4510 the scalar statements.
4514 vect_transform_grouped_load (gimple stmt
, vec
<tree
> dr_chain
, int size
,
4515 gimple_stmt_iterator
*gsi
)
4517 vec
<tree
> result_chain
= vNULL
;
4519 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4520 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4521 vectors, that are ready for vector computation. */
4522 result_chain
.create (size
);
4523 vect_permute_load_chain (dr_chain
, size
, stmt
, gsi
, &result_chain
);
4524 vect_record_grouped_load_vectors (stmt
, result_chain
);
4525 result_chain
.release ();
4528 /* RESULT_CHAIN contains the output of a group of grouped loads that were
4529 generated as part of the vectorization of STMT. Assign the statement
4530 for each vector to the associated scalar statement. */
4533 vect_record_grouped_load_vectors (gimple stmt
, vec
<tree
> result_chain
)
4535 gimple first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
4536 gimple next_stmt
, new_stmt
;
4537 unsigned int i
, gap_count
;
4540 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4541 Since we scan the chain starting from it's first node, their order
4542 corresponds the order of data-refs in RESULT_CHAIN. */
4543 next_stmt
= first_stmt
;
4545 FOR_EACH_VEC_ELT (result_chain
, i
, tmp_data_ref
)
4550 /* Skip the gaps. Loads created for the gaps will be removed by dead
4551 code elimination pass later. No need to check for the first stmt in
4552 the group, since it always exists.
4553 GROUP_GAP is the number of steps in elements from the previous
4554 access (if there is no gap GROUP_GAP is 1). We skip loads that
4555 correspond to the gaps. */
4556 if (next_stmt
!= first_stmt
4557 && gap_count
< GROUP_GAP (vinfo_for_stmt (next_stmt
)))
4565 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
4566 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4567 copies, and we put the new vector statement in the first available
4569 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
4570 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
4573 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
4576 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
4578 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
));
4581 prev_stmt
= rel_stmt
;
4583 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
4586 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) =
4591 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4593 /* If NEXT_STMT accesses the same DR as the previous statement,
4594 put the same TMP_DATA_REF as its vectorized statement; otherwise
4595 get the next data-ref from RESULT_CHAIN. */
4596 if (!next_stmt
|| !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
4602 /* Function vect_force_dr_alignment_p.
4604 Returns whether the alignment of a DECL can be forced to be aligned
4605 on ALIGNMENT bit boundary. */
4608 vect_can_force_dr_alignment_p (const_tree decl
, unsigned int alignment
)
4610 if (TREE_CODE (decl
) != VAR_DECL
)
4613 /* We cannot change alignment of common or external symbols as another
4614 translation unit may contain a definition with lower alignment.
4615 The rules of common symbol linking mean that the definition
4616 will override the common symbol. The same is true for constant
4617 pool entries which may be shared and are not properly merged
4619 if (DECL_EXTERNAL (decl
)
4620 || DECL_COMMON (decl
)
4621 || DECL_IN_CONSTANT_POOL (decl
))
4624 if (TREE_ASM_WRITTEN (decl
))
4627 /* Do not override the alignment as specified by the ABI when the used
4628 attribute is set. */
4629 if (DECL_PRESERVE_P (decl
))
4632 /* Do not override explicit alignment set by the user when an explicit
4633 section name is also used. This is a common idiom used by many
4634 software projects. */
4635 if (DECL_SECTION_NAME (decl
) != NULL_TREE
4636 && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl
))
4639 if (TREE_STATIC (decl
))
4640 return (alignment
<= MAX_OFILE_ALIGNMENT
);
4642 return (alignment
<= MAX_STACK_ALIGNMENT
);
4646 /* Return whether the data reference DR is supported with respect to its
4648 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4649 it is aligned, i.e., check if it is possible to vectorize it with different
4652 enum dr_alignment_support
4653 vect_supportable_dr_alignment (struct data_reference
*dr
,
4654 bool check_aligned_accesses
)
4656 gimple stmt
= DR_STMT (dr
);
4657 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4658 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4659 enum machine_mode mode
= TYPE_MODE (vectype
);
4660 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4661 struct loop
*vect_loop
= NULL
;
4662 bool nested_in_vect_loop
= false;
4664 if (aligned_access_p (dr
) && !check_aligned_accesses
)
4669 vect_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4670 nested_in_vect_loop
= nested_in_vect_loop_p (vect_loop
, stmt
);
4673 /* Possibly unaligned access. */
4675 /* We can choose between using the implicit realignment scheme (generating
4676 a misaligned_move stmt) and the explicit realignment scheme (generating
4677 aligned loads with a REALIGN_LOAD). There are two variants to the
4678 explicit realignment scheme: optimized, and unoptimized.
4679 We can optimize the realignment only if the step between consecutive
4680 vector loads is equal to the vector size. Since the vector memory
4681 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4682 is guaranteed that the misalignment amount remains the same throughout the
4683 execution of the vectorized loop. Therefore, we can create the
4684 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4685 at the loop preheader.
4687 However, in the case of outer-loop vectorization, when vectorizing a
4688 memory access in the inner-loop nested within the LOOP that is now being
4689 vectorized, while it is guaranteed that the misalignment of the
4690 vectorized memory access will remain the same in different outer-loop
4691 iterations, it is *not* guaranteed that is will remain the same throughout
4692 the execution of the inner-loop. This is because the inner-loop advances
4693 with the original scalar step (and not in steps of VS). If the inner-loop
4694 step happens to be a multiple of VS, then the misalignment remains fixed
4695 and we can use the optimized realignment scheme. For example:
4701 When vectorizing the i-loop in the above example, the step between
4702 consecutive vector loads is 1, and so the misalignment does not remain
4703 fixed across the execution of the inner-loop, and the realignment cannot
4704 be optimized (as illustrated in the following pseudo vectorized loop):
4706 for (i=0; i<N; i+=4)
4707 for (j=0; j<M; j++){
4708 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4709 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4710 // (assuming that we start from an aligned address).
4713 We therefore have to use the unoptimized realignment scheme:
4715 for (i=0; i<N; i+=4)
4716 for (j=k; j<M; j+=4)
4717 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4718 // that the misalignment of the initial address is
4721 The loop can then be vectorized as follows:
4723 for (k=0; k<4; k++){
4724 rt = get_realignment_token (&vp[k]);
4725 for (i=0; i<N; i+=4){
4727 for (j=k; j<M; j+=4){
4729 va = REALIGN_LOAD <v1,v2,rt>;
4736 if (DR_IS_READ (dr
))
4738 bool is_packed
= false;
4739 tree type
= (TREE_TYPE (DR_REF (dr
)));
4741 if (optab_handler (vec_realign_load_optab
, mode
) != CODE_FOR_nothing
4742 && (!targetm
.vectorize
.builtin_mask_for_load
4743 || targetm
.vectorize
.builtin_mask_for_load ()))
4745 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4746 if ((nested_in_vect_loop
4747 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4748 != GET_MODE_SIZE (TYPE_MODE (vectype
))))
4750 return dr_explicit_realign
;
4752 return dr_explicit_realign_optimized
;
4754 if (!known_alignment_for_access_p (dr
))
4755 is_packed
= not_size_aligned (DR_REF (dr
));
4757 if (targetm
.vectorize
.
4758 support_vector_misalignment (mode
, type
,
4759 DR_MISALIGNMENT (dr
), is_packed
))
4760 /* Can't software pipeline the loads, but can at least do them. */
4761 return dr_unaligned_supported
;
4765 bool is_packed
= false;
4766 tree type
= (TREE_TYPE (DR_REF (dr
)));
4768 if (!known_alignment_for_access_p (dr
))
4769 is_packed
= not_size_aligned (DR_REF (dr
));
4771 if (targetm
.vectorize
.
4772 support_vector_misalignment (mode
, type
,
4773 DR_MISALIGNMENT (dr
), is_packed
))
4774 return dr_unaligned_supported
;
4778 return dr_unaligned_unsupported
;