1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
41 /* Need to include rtl.h, expr.h, etc. for optabs. */
45 /* Return the smallest scalar part of STMT.
46 This is used to determine the vectype of the stmt. We generally set the
47 vectype according to the type of the result (lhs). For stmts whose
48 result-type is different than the type of the arguments (e.g., demotion,
49 promotion), vectype will be reset appropriately (later). Note that we have
50 to visit the smallest datatype in this function, because that determines the
51 VF. If the smallest datatype in the loop is present only as the rhs of a
52 promotion operation - we'd miss it.
53 Such a case, where a variable of this datatype does not appear in the lhs
54 anywhere in the loop, can only occur if it's an invariant: e.g.:
55 'int_x = (int) short_inv', which we'd expect to have been optimized away by
56 invariant motion. However, we cannot rely on invariant motion to always take
57 invariants out of the loop, and so in the case of promotion we also have to
59 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
63 vect_get_smallest_scalar_type (gimple stmt
, HOST_WIDE_INT
*lhs_size_unit
,
64 HOST_WIDE_INT
*rhs_size_unit
)
66 tree scalar_type
= gimple_expr_type (stmt
);
67 HOST_WIDE_INT lhs
, rhs
;
69 lhs
= rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
71 if (is_gimple_assign (stmt
)
72 && (gimple_assign_cast_p (stmt
)
73 || gimple_assign_rhs_code (stmt
) == WIDEN_MULT_EXPR
74 || gimple_assign_rhs_code (stmt
) == FLOAT_EXPR
))
76 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
78 rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type
));
80 scalar_type
= rhs_type
;
89 /* Find the place of the data-ref in STMT in the interleaving chain that starts
90 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
93 vect_get_place_in_interleaving_chain (gimple stmt
, gimple first_stmt
)
95 gimple next_stmt
= first_stmt
;
98 if (first_stmt
!= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
101 while (next_stmt
&& next_stmt
!= stmt
)
104 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
114 /* Function vect_insert_into_interleaving_chain.
116 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
119 vect_insert_into_interleaving_chain (struct data_reference
*dra
,
120 struct data_reference
*drb
)
124 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
125 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
127 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
128 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
131 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
132 if (tree_int_cst_compare (next_init
, DR_INIT (dra
)) > 0)
135 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
136 DR_GROUP_NEXT_DR (stmtinfo_a
) = next
;
140 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
143 /* We got to the end of the list. Insert here. */
144 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
145 DR_GROUP_NEXT_DR (stmtinfo_a
) = NULL
;
149 /* Function vect_update_interleaving_chain.
151 For two data-refs DRA and DRB that are a part of a chain interleaved data
152 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
154 There are four possible cases:
155 1. New stmts - both DRA and DRB are not a part of any chain:
158 2. DRB is a part of a chain and DRA is not:
159 no need to update FIRST_DR
160 no need to insert DRB
161 insert DRA according to init
162 3. DRA is a part of a chain and DRB is not:
163 if (init of FIRST_DR > init of DRB)
165 NEXT(FIRST_DR) = previous FIRST_DR
167 insert DRB according to its init
168 4. both DRA and DRB are in some interleaving chains:
169 choose the chain with the smallest init of FIRST_DR
170 insert the nodes of the second chain into the first one. */
173 vect_update_interleaving_chain (struct data_reference
*drb
,
174 struct data_reference
*dra
)
176 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
177 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
178 tree next_init
, init_dra_chain
, init_drb_chain
;
179 gimple first_a
, first_b
;
181 gimple node
, prev
, next
, first_stmt
;
183 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
184 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
186 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_STMT (drb
);
187 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
188 DR_GROUP_NEXT_DR (stmtinfo_b
) = DR_STMT (dra
);
192 /* 2. DRB is a part of a chain and DRA is not. */
193 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && DR_GROUP_FIRST_DR (stmtinfo_b
))
195 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_GROUP_FIRST_DR (stmtinfo_b
);
196 /* Insert DRA into the chain of DRB. */
197 vect_insert_into_interleaving_chain (dra
, drb
);
201 /* 3. DRA is a part of a chain and DRB is not. */
202 if (DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
204 gimple old_first_stmt
= DR_GROUP_FIRST_DR (stmtinfo_a
);
205 tree init_old
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
209 if (tree_int_cst_compare (init_old
, DR_INIT (drb
)) > 0)
211 /* DRB's init is smaller than the init of the stmt previously marked
212 as the first stmt of the interleaving chain of DRA. Therefore, we
213 update FIRST_STMT and put DRB in the head of the list. */
214 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
215 DR_GROUP_NEXT_DR (stmtinfo_b
) = old_first_stmt
;
217 /* Update all the stmts in the list to point to the new FIRST_STMT. */
218 tmp
= old_first_stmt
;
221 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp
)) = DR_STMT (drb
);
222 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp
));
227 /* Insert DRB in the list of DRA. */
228 vect_insert_into_interleaving_chain (drb
, dra
);
229 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_GROUP_FIRST_DR (stmtinfo_a
);
234 /* 4. both DRA and DRB are in some interleaving chains. */
235 first_a
= DR_GROUP_FIRST_DR (stmtinfo_a
);
236 first_b
= DR_GROUP_FIRST_DR (stmtinfo_b
);
237 if (first_a
== first_b
)
239 init_dra_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a
)));
240 init_drb_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b
)));
242 if (tree_int_cst_compare (init_dra_chain
, init_drb_chain
) > 0)
244 /* Insert the nodes of DRA chain into the DRB chain.
245 After inserting a node, continue from this node of the DRB chain (don't
246 start from the beginning. */
247 node
= DR_GROUP_FIRST_DR (stmtinfo_a
);
248 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
249 first_stmt
= first_b
;
253 /* Insert the nodes of DRB chain into the DRA chain.
254 After inserting a node, continue from this node of the DRA chain (don't
255 start from the beginning. */
256 node
= DR_GROUP_FIRST_DR (stmtinfo_b
);
257 prev
= DR_GROUP_FIRST_DR (stmtinfo_a
);
258 first_stmt
= first_a
;
263 node_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node
)));
264 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
267 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
268 if (tree_int_cst_compare (next_init
, node_init
) > 0)
271 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
272 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = next
;
277 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
281 /* We got to the end of the list. Insert here. */
282 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
283 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = NULL
;
286 DR_GROUP_FIRST_DR (vinfo_for_stmt (node
)) = first_stmt
;
287 node
= DR_GROUP_NEXT_DR (vinfo_for_stmt (node
));
292 /* Function vect_equal_offsets.
294 Check if OFFSET1 and OFFSET2 are identical expressions. */
297 vect_equal_offsets (tree offset1
, tree offset2
)
301 STRIP_NOPS (offset1
);
302 STRIP_NOPS (offset2
);
304 if (offset1
== offset2
)
307 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
308 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
311 res
= vect_equal_offsets (TREE_OPERAND (offset1
, 0),
312 TREE_OPERAND (offset2
, 0));
314 if (!res
|| !BINARY_CLASS_P (offset1
))
317 res
= vect_equal_offsets (TREE_OPERAND (offset1
, 1),
318 TREE_OPERAND (offset2
, 1));
324 /* Function vect_check_interleaving.
326 Check if DRA and DRB are a part of interleaving. In case they are, insert
327 DRA and DRB in an interleaving chain. */
330 vect_check_interleaving (struct data_reference
*dra
,
331 struct data_reference
*drb
)
333 HOST_WIDE_INT type_size_a
, type_size_b
, diff_mod_size
, step
, init_a
, init_b
;
335 /* Check that the data-refs have same first location (except init) and they
336 are both either store or load (not load and store). */
337 if ((DR_BASE_ADDRESS (dra
) != DR_BASE_ADDRESS (drb
)
338 && (TREE_CODE (DR_BASE_ADDRESS (dra
)) != ADDR_EXPR
339 || TREE_CODE (DR_BASE_ADDRESS (drb
)) != ADDR_EXPR
340 || TREE_OPERAND (DR_BASE_ADDRESS (dra
), 0)
341 != TREE_OPERAND (DR_BASE_ADDRESS (drb
),0)))
342 || !vect_equal_offsets (DR_OFFSET (dra
), DR_OFFSET (drb
))
343 || !tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
))
344 || DR_IS_READ (dra
) != DR_IS_READ (drb
))
348 1. data-refs are of the same type
349 2. their steps are equal
350 3. the step (if greater than zero) is greater than the difference between
352 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
353 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
355 if (type_size_a
!= type_size_b
356 || tree_int_cst_compare (DR_STEP (dra
), DR_STEP (drb
))
357 || !types_compatible_p (TREE_TYPE (DR_REF (dra
)),
358 TREE_TYPE (DR_REF (drb
))))
361 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
362 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
363 step
= TREE_INT_CST_LOW (DR_STEP (dra
));
367 /* If init_a == init_b + the size of the type * k, we have an interleaving,
368 and DRB is accessed before DRA. */
369 diff_mod_size
= (init_a
- init_b
) % type_size_a
;
371 if (step
&& (init_a
- init_b
) > step
)
374 if (diff_mod_size
== 0)
376 vect_update_interleaving_chain (drb
, dra
);
377 if (vect_print_dump_info (REPORT_DR_DETAILS
))
379 fprintf (vect_dump
, "Detected interleaving ");
380 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
381 fprintf (vect_dump
, " and ");
382 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
389 /* If init_b == init_a + the size of the type * k, we have an
390 interleaving, and DRA is accessed before DRB. */
391 diff_mod_size
= (init_b
- init_a
) % type_size_a
;
393 if (step
&& (init_b
- init_a
) > step
)
396 if (diff_mod_size
== 0)
398 vect_update_interleaving_chain (dra
, drb
);
399 if (vect_print_dump_info (REPORT_DR_DETAILS
))
401 fprintf (vect_dump
, "Detected interleaving ");
402 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
403 fprintf (vect_dump
, " and ");
404 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
413 /* Check if data references pointed by DR_I and DR_J are same or
414 belong to same interleaving group. Return FALSE if drs are
415 different, otherwise return TRUE. */
418 vect_same_range_drs (data_reference_p dr_i
, data_reference_p dr_j
)
420 gimple stmt_i
= DR_STMT (dr_i
);
421 gimple stmt_j
= DR_STMT (dr_j
);
423 if (operand_equal_p (DR_REF (dr_i
), DR_REF (dr_j
), 0)
424 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i
))
425 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j
))
426 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i
))
427 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j
)))))
433 /* If address ranges represented by DDR_I and DDR_J are equal,
434 return TRUE, otherwise return FALSE. */
437 vect_vfa_range_equal (ddr_p ddr_i
, ddr_p ddr_j
)
439 if ((vect_same_range_drs (DDR_A (ddr_i
), DDR_A (ddr_j
))
440 && vect_same_range_drs (DDR_B (ddr_i
), DDR_B (ddr_j
)))
441 || (vect_same_range_drs (DDR_A (ddr_i
), DDR_B (ddr_j
))
442 && vect_same_range_drs (DDR_B (ddr_i
), DDR_A (ddr_j
))))
448 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
449 tested at run-time. Return TRUE if DDR was successfully inserted.
450 Return false if versioning is not supported. */
453 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
455 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
457 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
) == 0)
460 if (vect_print_dump_info (REPORT_DR_DETAILS
))
462 fprintf (vect_dump
, "mark for run-time aliasing test between ");
463 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr
)), TDF_SLIM
);
464 fprintf (vect_dump
, " and ");
465 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr
)), TDF_SLIM
);
468 if (optimize_loop_nest_for_size_p (loop
))
470 if (vect_print_dump_info (REPORT_DR_DETAILS
))
471 fprintf (vect_dump
, "versioning not supported when optimizing for size.");
475 /* FORNOW: We don't support versioning with outer-loop vectorization. */
478 if (vect_print_dump_info (REPORT_DR_DETAILS
))
479 fprintf (vect_dump
, "versioning not yet supported for outer-loops.");
483 VEC_safe_push (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), ddr
);
488 /* Function vect_analyze_data_ref_dependence.
490 Return TRUE if there (might) exist a dependence between a memory-reference
491 DRA and a memory-reference DRB. When versioning for alias may check a
492 dependence at run-time, return FALSE. Adjust *MAX_VF according to
493 the data dependence. */
496 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
497 loop_vec_info loop_vinfo
, int *max_vf
)
500 struct loop
*loop
= NULL
;
501 struct data_reference
*dra
= DDR_A (ddr
);
502 struct data_reference
*drb
= DDR_B (ddr
);
503 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
504 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
505 lambda_vector dist_v
;
506 unsigned int loop_depth
;
508 /* Don't bother to analyze statements marked as unvectorizable. */
509 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a
)
510 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b
))
513 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
515 /* Independent data accesses. */
516 vect_check_interleaving (dra
, drb
);
521 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
523 if ((DR_IS_READ (dra
) && DR_IS_READ (drb
) && loop_vinfo
) || dra
== drb
)
526 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
530 if (vect_print_dump_info (REPORT_DR_DETAILS
))
532 fprintf (vect_dump
, "versioning for alias required: "
533 "can't determine dependence between ");
534 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
535 fprintf (vect_dump
, " and ");
536 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
539 /* Add to list of ddrs that need to be tested at run-time. */
540 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
543 /* When vectorizing a basic block unknown depnedence can still mean
545 if (vect_check_interleaving (dra
, drb
))
548 if (vect_print_dump_info (REPORT_DR_DETAILS
))
550 fprintf (vect_dump
, "can't determine dependence between ");
551 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
552 fprintf (vect_dump
, " and ");
553 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
556 /* Mark the statements as unvectorizable. */
557 STMT_VINFO_VECTORIZABLE (stmtinfo_a
) = false;
558 STMT_VINFO_VECTORIZABLE (stmtinfo_b
) = false;
563 /* Versioning for alias is not yet supported for basic block SLP, and
564 dependence distance is unapplicable, hence, in case of known data
565 dependence, basic block vectorization is impossible for now. */
568 if (dra
!= drb
&& vect_check_interleaving (dra
, drb
))
571 if (vect_print_dump_info (REPORT_DR_DETAILS
))
573 fprintf (vect_dump
, "determined dependence between ");
574 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
575 fprintf (vect_dump
, " and ");
576 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
582 /* Loop-based vectorization and known data dependence. */
583 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
585 if (vect_print_dump_info (REPORT_DR_DETAILS
))
587 fprintf (vect_dump
, "versioning for alias required: bad dist vector for ");
588 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
589 fprintf (vect_dump
, " and ");
590 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
592 /* Add to list of ddrs that need to be tested at run-time. */
593 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
596 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
597 for (i
= 0; VEC_iterate (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
); i
++)
599 int dist
= dist_v
[loop_depth
];
601 if (vect_print_dump_info (REPORT_DR_DETAILS
))
602 fprintf (vect_dump
, "dependence distance = %d.", dist
);
606 if (vect_print_dump_info (REPORT_DR_DETAILS
))
608 fprintf (vect_dump
, "dependence distance == 0 between ");
609 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
610 fprintf (vect_dump
, " and ");
611 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
614 /* For interleaving, mark that there is a read-write dependency if
615 necessary. We check before that one of the data-refs is store. */
616 if (DR_IS_READ (dra
))
617 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
620 if (DR_IS_READ (drb
))
621 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
627 if (dist
> 0 && DDR_REVERSED_P (ddr
))
629 /* If DDR_REVERSED_P the order of the data-refs in DDR was
630 reversed (to make distance vector positive), and the actual
631 distance is negative. */
632 if (vect_print_dump_info (REPORT_DR_DETAILS
))
633 fprintf (vect_dump
, "dependence distance negative.");
638 && abs (dist
) < *max_vf
)
640 /* The dependence distance requires reduction of the maximal
641 vectorization factor. */
642 *max_vf
= abs (dist
);
643 if (vect_print_dump_info (REPORT_DR_DETAILS
))
644 fprintf (vect_dump
, "adjusting maximal vectorization factor to %i",
648 if (abs (dist
) >= *max_vf
)
650 /* Dependence distance does not create dependence, as far as
651 vectorization is concerned, in this case. */
652 if (vect_print_dump_info (REPORT_DR_DETAILS
))
653 fprintf (vect_dump
, "dependence distance >= VF.");
657 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
659 fprintf (vect_dump
, "not vectorized, possible dependence "
660 "between data-refs ");
661 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
662 fprintf (vect_dump
, " and ");
663 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
672 /* Function vect_analyze_data_ref_dependences.
674 Examine all the data references in the loop, and make sure there do not
675 exist any data dependences between them. Set *MAX_VF according to
676 the maximum vectorization factor the data dependences allow. */
679 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
,
680 bb_vec_info bb_vinfo
, int *max_vf
)
683 VEC (ddr_p
, heap
) *ddrs
= NULL
;
684 struct data_dependence_relation
*ddr
;
686 if (vect_print_dump_info (REPORT_DETAILS
))
687 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
690 ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
692 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
694 for (i
= 0; VEC_iterate (ddr_p
, ddrs
, i
, ddr
); i
++)
695 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
, max_vf
))
702 /* Function vect_compute_data_ref_alignment
704 Compute the misalignment of the data reference DR.
707 1. If during the misalignment computation it is found that the data reference
708 cannot be vectorized then false is returned.
709 2. DR_MISALIGNMENT (DR) is defined.
711 FOR NOW: No analysis is actually performed. Misalignment is calculated
712 only for trivial cases. TODO. */
715 vect_compute_data_ref_alignment (struct data_reference
*dr
)
717 gimple stmt
= DR_STMT (dr
);
718 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
719 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
720 struct loop
*loop
= NULL
;
721 tree ref
= DR_REF (dr
);
723 tree base
, base_addr
;
726 tree aligned_to
, alignment
;
728 if (vect_print_dump_info (REPORT_DETAILS
))
729 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
732 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
734 /* Initialize misalignment to unknown. */
735 SET_DR_MISALIGNMENT (dr
, -1);
737 misalign
= DR_INIT (dr
);
738 aligned_to
= DR_ALIGNED_TO (dr
);
739 base_addr
= DR_BASE_ADDRESS (dr
);
740 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
742 /* In case the dataref is in an inner-loop of the loop that is being
743 vectorized (LOOP), we use the base and misalignment information
744 relative to the outer-loop (LOOP). This is ok only if the misalignment
745 stays the same throughout the execution of the inner-loop, which is why
746 we have to check that the stride of the dataref in the inner-loop evenly
747 divides by the vector size. */
748 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
750 tree step
= DR_STEP (dr
);
751 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
753 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) == 0)
755 if (vect_print_dump_info (REPORT_ALIGNMENT
))
756 fprintf (vect_dump
, "inner step divides the vector-size.");
757 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
758 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
759 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
763 if (vect_print_dump_info (REPORT_ALIGNMENT
))
764 fprintf (vect_dump
, "inner step doesn't divide the vector-size.");
765 misalign
= NULL_TREE
;
769 base
= build_fold_indirect_ref (base_addr
);
770 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
772 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
775 if (vect_print_dump_info (REPORT_ALIGNMENT
))
777 fprintf (vect_dump
, "Unknown alignment for access: ");
778 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
784 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
786 || (TREE_CODE (base_addr
) == SSA_NAME
787 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
788 TREE_TYPE (base_addr
)))),
792 base_aligned
= false;
796 /* Do not change the alignment of global variables if
797 flag_section_anchors is enabled. */
798 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
799 || (TREE_STATIC (base
) && flag_section_anchors
))
801 if (vect_print_dump_info (REPORT_DETAILS
))
803 fprintf (vect_dump
, "can't force alignment of ref: ");
804 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
809 /* Force the alignment of the decl.
810 NOTE: This is the only change to the code we make during
811 the analysis phase, before deciding to vectorize the loop. */
812 if (vect_print_dump_info (REPORT_DETAILS
))
814 fprintf (vect_dump
, "force alignment of ");
815 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
818 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
819 DECL_USER_ALIGN (base
) = 1;
822 /* At this point we assume that the base is aligned. */
823 gcc_assert (base_aligned
824 || (TREE_CODE (base
) == VAR_DECL
825 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
827 /* Modulo alignment. */
828 misalign
= size_binop (FLOOR_MOD_EXPR
, misalign
, alignment
);
830 if (!host_integerp (misalign
, 1))
832 /* Negative or overflowed misalignment value. */
833 if (vect_print_dump_info (REPORT_DETAILS
))
834 fprintf (vect_dump
, "unexpected misalign value");
838 SET_DR_MISALIGNMENT (dr
, TREE_INT_CST_LOW (misalign
));
840 if (vect_print_dump_info (REPORT_DETAILS
))
842 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
843 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
850 /* Function vect_compute_data_refs_alignment
852 Compute the misalignment of data references in the loop.
853 Return FALSE if a data reference is found that cannot be vectorized. */
856 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
,
857 bb_vec_info bb_vinfo
)
859 VEC (data_reference_p
, heap
) *datarefs
;
860 struct data_reference
*dr
;
864 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
866 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
868 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
869 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
870 && !vect_compute_data_ref_alignment (dr
))
874 /* Mark unsupported statement as unvectorizable. */
875 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
886 /* Function vect_update_misalignment_for_peel
888 DR - the data reference whose misalignment is to be adjusted.
889 DR_PEEL - the data reference whose misalignment is being made
890 zero in the vector loop by the peel.
891 NPEEL - the number of iterations in the peel loop if the misalignment
892 of DR_PEEL is known at compile time. */
895 vect_update_misalignment_for_peel (struct data_reference
*dr
,
896 struct data_reference
*dr_peel
, int npeel
)
899 VEC(dr_p
,heap
) *same_align_drs
;
900 struct data_reference
*current_dr
;
901 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
902 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
903 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
904 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
906 /* For interleaved data accesses the step in the loop must be multiplied by
907 the size of the interleaving group. */
908 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
909 dr_size
*= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info
)));
910 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info
))
911 dr_peel_size
*= DR_GROUP_SIZE (peel_stmt_info
);
913 /* It can be assumed that the data refs with the same alignment as dr_peel
914 are aligned in the vector loop. */
916 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
917 for (i
= 0; VEC_iterate (dr_p
, same_align_drs
, i
, current_dr
); i
++)
919 if (current_dr
!= dr
)
921 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
922 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
923 SET_DR_MISALIGNMENT (dr
, 0);
927 if (known_alignment_for_access_p (dr
)
928 && known_alignment_for_access_p (dr_peel
))
930 int misal
= DR_MISALIGNMENT (dr
);
931 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
932 misal
+= npeel
* dr_size
;
933 misal
%= GET_MODE_SIZE (TYPE_MODE (vectype
));
934 SET_DR_MISALIGNMENT (dr
, misal
);
938 if (vect_print_dump_info (REPORT_DETAILS
))
939 fprintf (vect_dump
, "Setting misalignment to -1.");
940 SET_DR_MISALIGNMENT (dr
, -1);
944 /* Function vect_verify_datarefs_alignment
946 Return TRUE if all data references in the loop can be
947 handled with respect to alignment. */
950 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
952 VEC (data_reference_p
, heap
) *datarefs
;
953 struct data_reference
*dr
;
954 enum dr_alignment_support supportable_dr_alignment
;
958 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
960 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
962 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
964 gimple stmt
= DR_STMT (dr
);
965 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
967 /* For interleaving, only the alignment of the first access matters.
968 Skip statements marked as not vectorizable. */
969 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info
)
970 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
971 || !STMT_VINFO_VECTORIZABLE (stmt_info
))
974 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
975 if (!supportable_dr_alignment
)
977 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
981 "not vectorized: unsupported unaligned load.");
984 "not vectorized: unsupported unaligned store.");
986 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
990 if (supportable_dr_alignment
!= dr_aligned
991 && vect_print_dump_info (REPORT_ALIGNMENT
))
992 fprintf (vect_dump
, "Vectorizing an unaligned access.");
998 /* Function vector_alignment_reachable_p
1000 Return true if vector alignment for DR is reachable by peeling
1001 a few loop iterations. Return false otherwise. */
1004 vector_alignment_reachable_p (struct data_reference
*dr
)
1006 gimple stmt
= DR_STMT (dr
);
1007 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1008 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1010 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1012 /* For interleaved access we peel only if number of iterations in
1013 the prolog loop ({VF - misalignment}), is a multiple of the
1014 number of the interleaved accesses. */
1015 int elem_size
, mis_in_elements
;
1016 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1018 /* FORNOW: handle only known alignment. */
1019 if (!known_alignment_for_access_p (dr
))
1022 elem_size
= GET_MODE_SIZE (TYPE_MODE (vectype
)) / nelements
;
1023 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
1025 if ((nelements
- mis_in_elements
) % DR_GROUP_SIZE (stmt_info
))
1029 /* If misalignment is known at the compile time then allow peeling
1030 only if natural alignment is reachable through peeling. */
1031 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
1033 HOST_WIDE_INT elmsize
=
1034 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
1035 if (vect_print_dump_info (REPORT_DETAILS
))
1037 fprintf (vect_dump
, "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
1038 fprintf (vect_dump
, ". misalignment = %d. ", DR_MISALIGNMENT (dr
));
1040 if (DR_MISALIGNMENT (dr
) % elmsize
)
1042 if (vect_print_dump_info (REPORT_DETAILS
))
1043 fprintf (vect_dump
, "data size does not divide the misalignment.\n");
1048 if (!known_alignment_for_access_p (dr
))
1050 tree type
= (TREE_TYPE (DR_REF (dr
)));
1051 tree ba
= DR_BASE_OBJECT (dr
);
1052 bool is_packed
= false;
1055 is_packed
= contains_packed_reference (ba
);
1057 if (vect_print_dump_info (REPORT_DETAILS
))
1058 fprintf (vect_dump
, "Unknown misalignment, is_packed = %d",is_packed
);
1059 if (targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
1069 /* Calculate the cost of the memory access represented by DR. */
1072 vect_get_data_access_cost (struct data_reference
*dr
,
1073 unsigned int *inside_cost
,
1074 unsigned int *outside_cost
)
1076 gimple stmt
= DR_STMT (dr
);
1077 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1078 int nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1079 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1080 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1081 int ncopies
= vf
/ nunits
;
1082 bool supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1084 if (!supportable_dr_alignment
)
1085 *inside_cost
= VECT_MAX_COST
;
1088 if (DR_IS_READ (dr
))
1089 vect_get_load_cost (dr
, ncopies
, true, inside_cost
, outside_cost
);
1091 vect_get_store_cost (dr
, ncopies
, inside_cost
);
1094 if (vect_print_dump_info (REPORT_COST
))
1095 fprintf (vect_dump
, "vect_get_data_access_cost: inside_cost = %d, "
1096 "outside_cost = %d.", *inside_cost
, *outside_cost
);
1101 vect_peeling_hash (const void *elem
)
1103 const struct _vect_peel_info
*peel_info
;
1105 peel_info
= (const struct _vect_peel_info
*) elem
;
1106 return (hashval_t
) peel_info
->npeel
;
1111 vect_peeling_hash_eq (const void *elem1
, const void *elem2
)
1113 const struct _vect_peel_info
*a
, *b
;
1115 a
= (const struct _vect_peel_info
*) elem1
;
1116 b
= (const struct _vect_peel_info
*) elem2
;
1117 return (a
->npeel
== b
->npeel
);
1121 /* Insert DR into peeling hash table with NPEEL as key. */
1124 vect_peeling_hash_insert (loop_vec_info loop_vinfo
, struct data_reference
*dr
,
1127 struct _vect_peel_info elem
, *slot
;
1129 bool supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1132 slot
= (vect_peel_info
) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1138 slot
= XNEW (struct _vect_peel_info
);
1139 slot
->npeel
= npeel
;
1142 new_slot
= htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo
), slot
,
1147 if (!supportable_dr_alignment
&& !flag_vect_cost_model
)
1148 slot
->count
+= VECT_MAX_COST
;
1152 /* Traverse peeling hash table to find peeling option that aligns maximum
1153 number of data accesses. */
1156 vect_peeling_hash_get_most_frequent (void **slot
, void *data
)
1158 vect_peel_info elem
= (vect_peel_info
) *slot
;
1159 vect_peel_extended_info max
= (vect_peel_extended_info
) data
;
1161 if (elem
->count
> max
->peel_info
.count
)
1163 max
->peel_info
.npeel
= elem
->npeel
;
1164 max
->peel_info
.count
= elem
->count
;
1165 max
->peel_info
.dr
= elem
->dr
;
1172 /* Traverse peeling hash table and calculate cost for each peeling option. Find
1173 one with the lowest cost. */
1176 vect_peeling_hash_get_lowest_cost (void **slot
, void *data
)
1178 vect_peel_info elem
= (vect_peel_info
) *slot
;
1179 vect_peel_extended_info min
= (vect_peel_extended_info
) data
;
1180 int save_misalignment
, dummy
;
1181 unsigned int inside_cost
= 0, outside_cost
= 0, i
;
1182 gimple stmt
= DR_STMT (elem
->dr
);
1183 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1184 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1185 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1186 struct data_reference
*dr
;
1188 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1190 stmt
= DR_STMT (dr
);
1191 stmt_info
= vinfo_for_stmt (stmt
);
1192 /* For interleaving, only the alignment of the first access
1194 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1195 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1198 save_misalignment
= DR_MISALIGNMENT (dr
);
1199 vect_update_misalignment_for_peel (dr
, elem
->dr
, elem
->npeel
);
1200 vect_get_data_access_cost (dr
, &inside_cost
, &outside_cost
);
1201 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1204 outside_cost
+= vect_get_known_peeling_cost (loop_vinfo
, elem
->npeel
, &dummy
,
1205 vect_get_single_scalar_iteraion_cost (loop_vinfo
));
1207 if (inside_cost
< min
->inside_cost
1208 || (inside_cost
== min
->inside_cost
&& outside_cost
< min
->outside_cost
))
1210 min
->inside_cost
= inside_cost
;
1211 min
->outside_cost
= outside_cost
;
1212 min
->peel_info
.dr
= elem
->dr
;
1213 min
->peel_info
.npeel
= elem
->npeel
;
1220 /* Choose best peeling option by traversing peeling hash table and either
1221 choosing an option with the lowest cost (if cost model is enabled) or the
1222 option that aligns as many accesses as possible. */
1224 static struct data_reference
*
1225 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo
,
1226 unsigned int *npeel
)
1228 struct _vect_peel_extended_info res
;
1230 res
.peel_info
.dr
= NULL
;
1232 if (flag_vect_cost_model
)
1234 res
.inside_cost
= INT_MAX
;
1235 res
.outside_cost
= INT_MAX
;
1236 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1237 vect_peeling_hash_get_lowest_cost
, &res
);
1241 res
.peel_info
.count
= 0;
1242 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1243 vect_peeling_hash_get_most_frequent
, &res
);
1246 *npeel
= res
.peel_info
.npeel
;
1247 return res
.peel_info
.dr
;
1251 /* Function vect_enhance_data_refs_alignment
1253 This pass will use loop versioning and loop peeling in order to enhance
1254 the alignment of data references in the loop.
1256 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1257 original loop is to be vectorized; Any other loops that are created by
1258 the transformations performed in this pass - are not supposed to be
1259 vectorized. This restriction will be relaxed.
1261 This pass will require a cost model to guide it whether to apply peeling
1262 or versioning or a combination of the two. For example, the scheme that
1263 intel uses when given a loop with several memory accesses, is as follows:
1264 choose one memory access ('p') which alignment you want to force by doing
1265 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1266 other accesses are not necessarily aligned, or (2) use loop versioning to
1267 generate one loop in which all accesses are aligned, and another loop in
1268 which only 'p' is necessarily aligned.
1270 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1271 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1272 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1274 Devising a cost model is the most critical aspect of this work. It will
1275 guide us on which access to peel for, whether to use loop versioning, how
1276 many versions to create, etc. The cost model will probably consist of
1277 generic considerations as well as target specific considerations (on
1278 powerpc for example, misaligned stores are more painful than misaligned
1281 Here are the general steps involved in alignment enhancements:
1283 -- original loop, before alignment analysis:
1284 for (i=0; i<N; i++){
1285 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1286 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1289 -- After vect_compute_data_refs_alignment:
1290 for (i=0; i<N; i++){
1291 x = q[i]; # DR_MISALIGNMENT(q) = 3
1292 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1295 -- Possibility 1: we do loop versioning:
1297 for (i=0; i<N; i++){ # loop 1A
1298 x = q[i]; # DR_MISALIGNMENT(q) = 3
1299 p[i] = y; # DR_MISALIGNMENT(p) = 0
1303 for (i=0; i<N; i++){ # loop 1B
1304 x = q[i]; # DR_MISALIGNMENT(q) = 3
1305 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1309 -- Possibility 2: we do loop peeling:
1310 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1314 for (i = 3; i < N; i++){ # loop 2A
1315 x = q[i]; # DR_MISALIGNMENT(q) = 0
1316 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1319 -- Possibility 3: combination of loop peeling and versioning:
1320 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1325 for (i = 3; i<N; i++){ # loop 3A
1326 x = q[i]; # DR_MISALIGNMENT(q) = 0
1327 p[i] = y; # DR_MISALIGNMENT(p) = 0
1331 for (i = 3; i<N; i++){ # loop 3B
1332 x = q[i]; # DR_MISALIGNMENT(q) = 0
1333 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1337 These loops are later passed to loop_transform to be vectorized. The
1338 vectorizer will use the alignment information to guide the transformation
1339 (whether to generate regular loads/stores, or with special handling for
1343 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1345 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1346 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1347 enum dr_alignment_support supportable_dr_alignment
;
1348 struct data_reference
*dr0
= NULL
, *first_store
= NULL
;
1349 struct data_reference
*dr
;
1351 bool do_peeling
= false;
1352 bool do_versioning
= false;
1355 stmt_vec_info stmt_info
;
1356 int vect_versioning_for_alias_required
;
1357 unsigned int npeel
= 0;
1358 bool all_misalignments_unknown
= true;
1359 unsigned int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1360 unsigned possible_npeel_number
= 1;
1362 unsigned int nelements
, mis
, same_align_drs_max
= 0;
1364 if (vect_print_dump_info (REPORT_DETAILS
))
1365 fprintf (vect_dump
, "=== vect_enhance_data_refs_alignment ===");
1367 /* While cost model enhancements are expected in the future, the high level
1368 view of the code at this time is as follows:
1370 A) If there is a misaligned access then see if peeling to align
1371 this access can make all data references satisfy
1372 vect_supportable_dr_alignment. If so, update data structures
1373 as needed and return true.
1375 B) If peeling wasn't possible and there is a data reference with an
1376 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1377 then see if loop versioning checks can be used to make all data
1378 references satisfy vect_supportable_dr_alignment. If so, update
1379 data structures as needed and return true.
1381 C) If neither peeling nor versioning were successful then return false if
1382 any data reference does not satisfy vect_supportable_dr_alignment.
1384 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1386 Note, Possibility 3 above (which is peeling and versioning together) is not
1387 being done at this time. */
1389 /* (1) Peeling to force alignment. */
1391 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1393 + How many accesses will become aligned due to the peeling
1394 - How many accesses will become unaligned due to the peeling,
1395 and the cost of misaligned accesses.
1396 - The cost of peeling (the extra runtime checks, the increase
1399 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1401 stmt
= DR_STMT (dr
);
1402 stmt_info
= vinfo_for_stmt (stmt
);
1404 /* For interleaving, only the alignment of the first access
1406 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1407 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1410 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1411 do_peeling
= vector_alignment_reachable_p (dr
);
1414 if (known_alignment_for_access_p (dr
))
1416 unsigned int npeel_tmp
;
1418 /* Save info about DR in the hash table. */
1419 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo
))
1420 LOOP_VINFO_PEELING_HTAB (loop_vinfo
) =
1421 htab_create (1, vect_peeling_hash
,
1422 vect_peeling_hash_eq
, free
);
1424 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1425 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1426 mis
= DR_MISALIGNMENT (dr
) / GET_MODE_SIZE (TYPE_MODE (
1427 TREE_TYPE (DR_REF (dr
))));
1428 npeel_tmp
= (nelements
- mis
) % vf
;
1430 /* For multiple types, it is possible that the bigger type access
1431 will have more than one peeling option. E.g., a loop with two
1432 types: one of size (vector size / 4), and the other one of
1433 size (vector size / 8). Vectorization factor will 8. If both
1434 access are misaligned by 3, the first one needs one scalar
1435 iteration to be aligned, and the second one needs 5. But the
1436 the first one will be aligned also by peeling 5 scalar
1437 iterations, and in that case both accesses will be aligned.
1438 Hence, except for the immediate peeling amount, we also want
1439 to try to add full vector size, while we don't exceed
1440 vectorization factor.
1441 We do this automtically for cost model, since we calculate cost
1442 for every peeling option. */
1443 if (!flag_vect_cost_model
)
1444 possible_npeel_number
= vf
/nelements
;
1446 /* Handle the aligned case. We may decide to align some other
1447 access, making DR unaligned. */
1448 if (DR_MISALIGNMENT (dr
) == 0)
1451 if (!flag_vect_cost_model
)
1452 possible_npeel_number
++;
1455 for (j
= 0; j
< possible_npeel_number
; j
++)
1457 gcc_assert (npeel_tmp
<= vf
);
1458 vect_peeling_hash_insert (loop_vinfo
, dr
, npeel_tmp
);
1459 npeel_tmp
+= nelements
;
1462 all_misalignments_unknown
= false;
1463 /* Data-ref that was chosen for the case that all the
1464 misalignments are unknown is not relevant anymore, since we
1465 have a data-ref with known alignment. */
1470 /* If we don't know all the misalignment values, we prefer
1471 peeling for data-ref that has maximum number of data-refs
1472 with the same alignment, unless the target prefers to align
1473 stores over load. */
1474 if (all_misalignments_unknown
)
1476 if (same_align_drs_max
< VEC_length (dr_p
,
1477 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
))
1480 same_align_drs_max
= VEC_length (dr_p
,
1481 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
1485 if (!first_store
&& !DR_IS_READ (dr
))
1489 /* If there are both known and unknown misaligned accesses in the
1490 loop, we choose peeling amount according to the known
1494 if (!supportable_dr_alignment
)
1497 if (!first_store
&& !DR_IS_READ (dr
))
1504 if (!aligned_access_p (dr
))
1506 if (vect_print_dump_info (REPORT_DETAILS
))
1507 fprintf (vect_dump
, "vector alignment may not be reachable");
1514 vect_versioning_for_alias_required
1515 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
);
1517 /* Temporarily, if versioning for alias is required, we disable peeling
1518 until we support peeling and versioning. Often peeling for alignment
1519 will require peeling for loop-bound, which in turn requires that we
1520 know how to adjust the loop ivs after the loop. */
1521 if (vect_versioning_for_alias_required
1522 || !vect_can_advance_ivs_p (loop_vinfo
)
1523 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1526 if (do_peeling
&& all_misalignments_unknown
1527 && vect_supportable_dr_alignment (dr0
, false))
1530 /* Check if the target requires to prefer stores over loads, i.e., if
1531 misaligned stores are more expensive than misaligned loads (taking
1532 drs with same alignment into account). */
1533 if (first_store
&& DR_IS_READ (dr0
))
1535 unsigned int load_inside_cost
= 0, load_outside_cost
= 0;
1536 unsigned int store_inside_cost
= 0, store_outside_cost
= 0;
1537 unsigned int load_inside_penalty
= 0, load_outside_penalty
= 0;
1538 unsigned int store_inside_penalty
= 0, store_outside_penalty
= 0;
1540 vect_get_data_access_cost (dr0
, &load_inside_cost
,
1541 &load_outside_cost
);
1542 vect_get_data_access_cost (first_store
, &store_inside_cost
,
1543 &store_outside_cost
);
1545 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1546 aligning the load DR0). */
1547 load_inside_penalty
= store_inside_cost
;
1548 load_outside_penalty
= store_outside_cost
;
1549 for (i
= 0; VEC_iterate (dr_p
, STMT_VINFO_SAME_ALIGN_REFS
1550 (vinfo_for_stmt (DR_STMT (first_store
))),
1553 if (DR_IS_READ (dr
))
1555 load_inside_penalty
+= load_inside_cost
;
1556 load_outside_penalty
+= load_outside_cost
;
1560 load_inside_penalty
+= store_inside_cost
;
1561 load_outside_penalty
+= store_outside_cost
;
1564 /* Calculate the penalty for leaving DR0 unaligned (by
1565 aligning the FIRST_STORE). */
1566 store_inside_penalty
= load_inside_cost
;
1567 store_outside_penalty
= load_outside_cost
;
1568 for (i
= 0; VEC_iterate (dr_p
, STMT_VINFO_SAME_ALIGN_REFS
1569 (vinfo_for_stmt (DR_STMT (dr0
))),
1572 if (DR_IS_READ (dr
))
1574 store_inside_penalty
+= load_inside_cost
;
1575 store_outside_penalty
+= load_outside_cost
;
1579 store_inside_penalty
+= store_inside_cost
;
1580 store_outside_penalty
+= store_outside_cost
;
1583 if (load_inside_penalty
> store_inside_penalty
1584 || (load_inside_penalty
== store_inside_penalty
1585 && load_outside_penalty
> store_outside_penalty
))
1589 /* In case there are only loads with different unknown misalignments, use
1590 peeling only if it may help to align other accesses in the loop. */
1591 if (!first_store
&& !VEC_length (dr_p
, STMT_VINFO_SAME_ALIGN_REFS
1592 (vinfo_for_stmt (DR_STMT (dr0
))))
1593 && vect_supportable_dr_alignment (dr0
, false)
1594 != dr_unaligned_supported
)
1598 if (do_peeling
&& !dr0
)
1600 /* Peeling is possible, but there is no data access that is not supported
1601 unless aligned. So we try to choose the best possible peeling. */
1603 /* We should get here only if there are drs with known misalignment. */
1604 gcc_assert (!all_misalignments_unknown
);
1606 /* Choose the best peeling from the hash table. */
1607 dr0
= vect_peeling_hash_choose_best_peeling (loop_vinfo
, &npeel
);
1614 stmt
= DR_STMT (dr0
);
1615 stmt_info
= vinfo_for_stmt (stmt
);
1616 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1617 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1619 if (known_alignment_for_access_p (dr0
))
1623 /* Since it's known at compile time, compute the number of
1624 iterations in the peeled loop (the peeling factor) for use in
1625 updating DR_MISALIGNMENT values. The peeling factor is the
1626 vectorization factor minus the misalignment as an element
1628 mis
= DR_MISALIGNMENT (dr0
);
1629 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1630 npeel
= nelements
- mis
;
1633 /* For interleaved data access every iteration accesses all the
1634 members of the group, therefore we divide the number of iterations
1635 by the group size. */
1636 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1637 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1638 npeel
/= DR_GROUP_SIZE (stmt_info
);
1640 if (vect_print_dump_info (REPORT_DETAILS
))
1641 fprintf (vect_dump
, "Try peeling by %d", npeel
);
1644 /* Ensure that all data refs can be vectorized after the peel. */
1645 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1647 int save_misalignment
;
1652 stmt
= DR_STMT (dr
);
1653 stmt_info
= vinfo_for_stmt (stmt
);
1654 /* For interleaving, only the alignment of the first access
1656 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1657 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1660 save_misalignment
= DR_MISALIGNMENT (dr
);
1661 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1662 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1663 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1665 if (!supportable_dr_alignment
)
1672 if (do_peeling
&& known_alignment_for_access_p (dr0
) && npeel
== 0)
1674 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1683 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1684 If the misalignment of DR_i is identical to that of dr0 then set
1685 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1686 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1687 by the peeling factor times the element size of DR_i (MOD the
1688 vectorization factor times the size). Otherwise, the
1689 misalignment of DR_i must be set to unknown. */
1690 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1692 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1694 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1696 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = npeel
;
1698 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1699 SET_DR_MISALIGNMENT (dr0
, 0);
1700 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1701 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1703 if (vect_print_dump_info (REPORT_DETAILS
))
1704 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1706 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1713 /* (2) Versioning to force alignment. */
1715 /* Try versioning if:
1716 1) flag_tree_vect_loop_version is TRUE
1717 2) optimize loop for speed
1718 3) there is at least one unsupported misaligned data ref with an unknown
1720 4) all misaligned data refs with a known misalignment are supported, and
1721 5) the number of runtime alignment checks is within reason. */
1724 flag_tree_vect_loop_version
1725 && optimize_loop_nest_for_speed_p (loop
)
1726 && (!loop
->inner
); /* FORNOW */
1730 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
1732 stmt
= DR_STMT (dr
);
1733 stmt_info
= vinfo_for_stmt (stmt
);
1735 /* For interleaving, only the alignment of the first access
1737 if (aligned_access_p (dr
)
1738 || (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1739 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
))
1742 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1744 if (!supportable_dr_alignment
)
1750 if (known_alignment_for_access_p (dr
)
1751 || VEC_length (gimple
,
1752 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1753 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
1755 do_versioning
= false;
1759 stmt
= DR_STMT (dr
);
1760 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1761 gcc_assert (vectype
);
1763 /* The rightmost bits of an aligned address must be zeros.
1764 Construct the mask needed for this test. For example,
1765 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1766 mask must be 15 = 0xf. */
1767 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1769 /* FORNOW: use the same mask to test all potentially unaligned
1770 references in the loop. The vectorizer currently supports
1771 a single vector size, see the reference to
1772 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1773 vectorization factor is computed. */
1774 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1775 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1776 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1777 VEC_safe_push (gimple
, heap
,
1778 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
1783 /* Versioning requires at least one misaligned data reference. */
1784 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
1785 do_versioning
= false;
1786 else if (!do_versioning
)
1787 VEC_truncate (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
1792 VEC(gimple
,heap
) *may_misalign_stmts
1793 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1796 /* It can now be assumed that the data references in the statements
1797 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1798 of the loop being vectorized. */
1799 for (i
= 0; VEC_iterate (gimple
, may_misalign_stmts
, i
, stmt
); i
++)
1801 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1802 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1803 SET_DR_MISALIGNMENT (dr
, 0);
1804 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1805 fprintf (vect_dump
, "Alignment of access forced using versioning.");
1808 if (vect_print_dump_info (REPORT_DETAILS
))
1809 fprintf (vect_dump
, "Versioning for alignment will be applied.");
1811 /* Peeling and versioning can't be done together at this time. */
1812 gcc_assert (! (do_peeling
&& do_versioning
));
1814 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1819 /* This point is reached if neither peeling nor versioning is being done. */
1820 gcc_assert (! (do_peeling
|| do_versioning
));
1822 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1827 /* Function vect_find_same_alignment_drs.
1829 Update group and alignment relations according to the chosen
1830 vectorization factor. */
1833 vect_find_same_alignment_drs (struct data_dependence_relation
*ddr
,
1834 loop_vec_info loop_vinfo
)
1837 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1838 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1839 struct data_reference
*dra
= DDR_A (ddr
);
1840 struct data_reference
*drb
= DDR_B (ddr
);
1841 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
1842 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
1843 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
1844 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
1845 lambda_vector dist_v
;
1846 unsigned int loop_depth
;
1848 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1854 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1857 /* Loop-based vectorization and known data dependence. */
1858 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1861 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
1862 for (i
= 0; VEC_iterate (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
); i
++)
1864 int dist
= dist_v
[loop_depth
];
1866 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1867 fprintf (vect_dump
, "dependence distance = %d.", dist
);
1869 /* Same loop iteration. */
1871 || (dist
% vectorization_factor
== 0 && dra_size
== drb_size
))
1873 /* Two references with distance zero have the same alignment. */
1874 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
1875 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
1876 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1877 fprintf (vect_dump
, "accesses have the same alignment.");
1878 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1880 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
1881 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1882 fprintf (vect_dump
, " and ");
1883 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1890 /* Function vect_analyze_data_refs_alignment
1892 Analyze the alignment of the data-references in the loop.
1893 Return FALSE if a data reference is found that cannot be vectorized. */
1896 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
,
1897 bb_vec_info bb_vinfo
)
1899 if (vect_print_dump_info (REPORT_DETAILS
))
1900 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
1902 /* Mark groups of data references with same alignment using
1903 data dependence information. */
1906 VEC (ddr_p
, heap
) *ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
1907 struct data_dependence_relation
*ddr
;
1910 for (i
= 0; VEC_iterate (ddr_p
, ddrs
, i
, ddr
); i
++)
1911 vect_find_same_alignment_drs (ddr
, loop_vinfo
);
1914 if (!vect_compute_data_refs_alignment (loop_vinfo
, bb_vinfo
))
1916 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1918 "not vectorized: can't calculate alignment for data ref.");
1926 /* Analyze groups of strided accesses: check that DR belongs to a group of
1927 strided accesses of legal size, step, etc. Detect gaps, single element
1928 interleaving, and other special cases. Set strided access info.
1929 Collect groups of strided stores for further use in SLP analysis. */
1932 vect_analyze_group_access (struct data_reference
*dr
)
1934 tree step
= DR_STEP (dr
);
1935 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
1936 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
1937 gimple stmt
= DR_STMT (dr
);
1938 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1939 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1940 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1941 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
1942 HOST_WIDE_INT stride
;
1943 bool slp_impossible
= false;
1945 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1946 interleaving group (including gaps). */
1947 stride
= dr_step
/ type_size
;
1949 /* Not consecutive access is possible only if it is a part of interleaving. */
1950 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
1952 /* Check if it this DR is a part of interleaving, and is a single
1953 element of the group that is accessed in the loop. */
1955 /* Gaps are supported only for loads. STEP must be a multiple of the type
1956 size. The size of the group must be a power of 2. */
1958 && (dr_step
% type_size
) == 0
1960 && exact_log2 (stride
) != -1)
1962 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = stmt
;
1963 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
1964 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1966 fprintf (vect_dump
, "Detected single element interleaving ");
1967 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1968 fprintf (vect_dump
, " step ");
1969 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
1974 if (vect_print_dump_info (REPORT_DETAILS
))
1976 fprintf (vect_dump
, "not consecutive access ");
1977 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
1982 /* Mark the statement as unvectorizable. */
1983 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
1990 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
)
1992 /* First stmt in the interleaving chain. Check the chain. */
1993 gimple next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt
));
1994 struct data_reference
*data_ref
= dr
;
1995 unsigned int count
= 1;
1997 tree prev_init
= DR_INIT (data_ref
);
1999 HOST_WIDE_INT diff
, count_in_bytes
, gaps
= 0;
2003 /* Skip same data-refs. In case that two or more stmts share data-ref
2004 (supported only for loads), we vectorize only the first stmt, and
2005 the rest get their vectorized loads from the first one. */
2006 if (!tree_int_cst_compare (DR_INIT (data_ref
),
2007 DR_INIT (STMT_VINFO_DATA_REF (
2008 vinfo_for_stmt (next
)))))
2010 if (!DR_IS_READ (data_ref
))
2012 if (vect_print_dump_info (REPORT_DETAILS
))
2013 fprintf (vect_dump
, "Two store stmts share the same dr.");
2017 /* Check that there is no load-store dependencies for this loads
2018 to prevent a case of load-store-load to the same location. */
2019 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
2020 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
2022 if (vect_print_dump_info (REPORT_DETAILS
))
2024 "READ_WRITE dependence in interleaving.");
2028 /* For load use the same data-ref load. */
2029 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
2032 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2037 /* Check that all the accesses have the same STEP. */
2038 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
2039 if (tree_int_cst_compare (step
, next_step
))
2041 if (vect_print_dump_info (REPORT_DETAILS
))
2042 fprintf (vect_dump
, "not consecutive access in interleaving");
2046 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
2047 /* Check that the distance between two accesses is equal to the type
2048 size. Otherwise, we have gaps. */
2049 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
2050 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
2053 /* FORNOW: SLP of accesses with gaps is not supported. */
2054 slp_impossible
= true;
2055 if (!DR_IS_READ (data_ref
))
2057 if (vect_print_dump_info (REPORT_DETAILS
))
2058 fprintf (vect_dump
, "interleaved store with gaps");
2065 /* Store the gap from the previous member of the group. If there is no
2066 gap in the access, DR_GROUP_GAP is always 1. */
2067 DR_GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
2069 prev_init
= DR_INIT (data_ref
);
2070 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2071 /* Count the number of data-refs in the chain. */
2075 /* COUNT is the number of accesses found, we multiply it by the size of
2076 the type to get COUNT_IN_BYTES. */
2077 count_in_bytes
= type_size
* count
;
2079 /* Check that the size of the interleaving (including gaps) is not
2080 greater than STEP. */
2081 if (dr_step
&& dr_step
< count_in_bytes
+ gaps
* type_size
)
2083 if (vect_print_dump_info (REPORT_DETAILS
))
2085 fprintf (vect_dump
, "interleaving size is greater than step for ");
2086 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2091 /* Check that the size of the interleaving is equal to STEP for stores,
2092 i.e., that there are no gaps. */
2093 if (dr_step
&& dr_step
!= count_in_bytes
)
2095 if (DR_IS_READ (dr
))
2097 slp_impossible
= true;
2098 /* There is a gap after the last load in the group. This gap is a
2099 difference between the stride and the number of elements. When
2100 there is no gap, this difference should be 0. */
2101 DR_GROUP_GAP (vinfo_for_stmt (stmt
)) = stride
- count
;
2105 if (vect_print_dump_info (REPORT_DETAILS
))
2106 fprintf (vect_dump
, "interleaved store with gaps");
2111 /* Check that STEP is a multiple of type size. */
2112 if (dr_step
&& (dr_step
% type_size
) != 0)
2114 if (vect_print_dump_info (REPORT_DETAILS
))
2116 fprintf (vect_dump
, "step is not a multiple of type size: step ");
2117 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2118 fprintf (vect_dump
, " size ");
2119 print_generic_expr (vect_dump
, TYPE_SIZE_UNIT (scalar_type
),
2125 /* FORNOW: we handle only interleaving that is a power of 2.
2126 We don't fail here if it may be still possible to vectorize the
2127 group using SLP. If not, the size of the group will be checked in
2128 vect_analyze_operations, and the vectorization will fail. */
2129 if (exact_log2 (stride
) == -1)
2131 if (vect_print_dump_info (REPORT_DETAILS
))
2132 fprintf (vect_dump
, "interleaving is not a power of 2");
2141 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2142 if (vect_print_dump_info (REPORT_DETAILS
))
2143 fprintf (vect_dump
, "Detected interleaving of size %d", (int)stride
);
2145 /* SLP: create an SLP data structure for every interleaving group of
2146 stores for further analysis in vect_analyse_slp. */
2147 if (!DR_IS_READ (dr
) && !slp_impossible
)
2150 VEC_safe_push (gimple
, heap
, LOOP_VINFO_STRIDED_STORES (loop_vinfo
),
2153 VEC_safe_push (gimple
, heap
, BB_VINFO_STRIDED_STORES (bb_vinfo
),
2162 /* Analyze the access pattern of the data-reference DR.
2163 In case of non-consecutive accesses call vect_analyze_group_access() to
2164 analyze groups of strided accesses. */
2167 vect_analyze_data_ref_access (struct data_reference
*dr
)
2169 tree step
= DR_STEP (dr
);
2170 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2171 gimple stmt
= DR_STMT (dr
);
2172 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2173 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2174 struct loop
*loop
= NULL
;
2175 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2178 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2180 if (loop_vinfo
&& !step
)
2182 if (vect_print_dump_info (REPORT_DETAILS
))
2183 fprintf (vect_dump
, "bad data-ref access in loop");
2187 /* Don't allow invariant accesses in loops. */
2188 if (loop_vinfo
&& dr_step
== 0)
2191 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2193 /* Interleaved accesses are not yet supported within outer-loop
2194 vectorization for references in the inner-loop. */
2195 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL
;
2197 /* For the rest of the analysis we use the outer-loop step. */
2198 step
= STMT_VINFO_DR_STEP (stmt_info
);
2199 dr_step
= TREE_INT_CST_LOW (step
);
2203 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2204 fprintf (vect_dump
, "zero step in outer loop.");
2205 if (DR_IS_READ (dr
))
2213 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
)))
2215 /* Mark that it is not interleaving. */
2216 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL
;
2220 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2222 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2223 fprintf (vect_dump
, "strided access in outer loop.");
2227 /* Not consecutive access - check if it's a part of interleaving group. */
2228 return vect_analyze_group_access (dr
);
2232 /* Function vect_analyze_data_ref_accesses.
2234 Analyze the access pattern of all the data references in the loop.
2236 FORNOW: the only access pattern that is considered vectorizable is a
2237 simple step 1 (consecutive) access.
2239 FORNOW: handle only arrays and pointer accesses. */
2242 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2245 VEC (data_reference_p
, heap
) *datarefs
;
2246 struct data_reference
*dr
;
2248 if (vect_print_dump_info (REPORT_DETAILS
))
2249 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
2252 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2254 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2256 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
2257 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
2258 && !vect_analyze_data_ref_access (dr
))
2260 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2261 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
2265 /* Mark the statement as not vectorizable. */
2266 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2276 /* Function vect_prune_runtime_alias_test_list.
2278 Prune a list of ddrs to be tested at run-time by versioning for alias.
2279 Return FALSE if resulting list of ddrs is longer then allowed by
2280 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2283 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo
)
2285 VEC (ddr_p
, heap
) * ddrs
=
2286 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
2289 if (vect_print_dump_info (REPORT_DETAILS
))
2290 fprintf (vect_dump
, "=== vect_prune_runtime_alias_test_list ===");
2292 for (i
= 0; i
< VEC_length (ddr_p
, ddrs
); )
2297 ddr_i
= VEC_index (ddr_p
, ddrs
, i
);
2300 for (j
= 0; j
< i
; j
++)
2302 ddr_p ddr_j
= VEC_index (ddr_p
, ddrs
, j
);
2304 if (vect_vfa_range_equal (ddr_i
, ddr_j
))
2306 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2308 fprintf (vect_dump
, "found equal ranges ");
2309 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_i
)), TDF_SLIM
);
2310 fprintf (vect_dump
, ", ");
2311 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_i
)), TDF_SLIM
);
2312 fprintf (vect_dump
, " and ");
2313 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_j
)), TDF_SLIM
);
2314 fprintf (vect_dump
, ", ");
2315 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_j
)), TDF_SLIM
);
2324 VEC_ordered_remove (ddr_p
, ddrs
, i
);
2330 if (VEC_length (ddr_p
, ddrs
) >
2331 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
2333 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2336 "disable versioning for alias - max number of generated "
2337 "checks exceeded.");
2340 VEC_truncate (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), 0);
2349 /* Function vect_analyze_data_refs.
2351 Find all the data references in the loop or basic block.
2353 The general structure of the analysis of data refs in the vectorizer is as
2355 1- vect_analyze_data_refs(loop/bb): call
2356 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2357 in the loop/bb and their dependences.
2358 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2359 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2360 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2365 vect_analyze_data_refs (loop_vec_info loop_vinfo
,
2366 bb_vec_info bb_vinfo
,
2369 struct loop
*loop
= NULL
;
2370 basic_block bb
= NULL
;
2372 VEC (data_reference_p
, heap
) *datarefs
;
2373 struct data_reference
*dr
;
2377 if (vect_print_dump_info (REPORT_DETAILS
))
2378 fprintf (vect_dump
, "=== vect_analyze_data_refs ===\n");
2382 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2383 res
= compute_data_dependences_for_loop
2384 (loop
, true, &LOOP_VINFO_DATAREFS (loop_vinfo
),
2385 &LOOP_VINFO_DDRS (loop_vinfo
));
2389 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2390 fprintf (vect_dump
, "not vectorized: loop contains function calls"
2391 " or data references that cannot be analyzed");
2395 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2399 bb
= BB_VINFO_BB (bb_vinfo
);
2400 res
= compute_data_dependences_for_bb (bb
, true,
2401 &BB_VINFO_DATAREFS (bb_vinfo
),
2402 &BB_VINFO_DDRS (bb_vinfo
));
2405 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2406 fprintf (vect_dump
, "not vectorized: basic block contains function"
2407 " calls or data references that cannot be analyzed");
2411 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2414 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2415 from stmt_vec_info struct to DR and vectype. */
2417 for (i
= 0; VEC_iterate (data_reference_p
, datarefs
, i
, dr
); i
++)
2420 stmt_vec_info stmt_info
;
2421 tree base
, offset
, init
;
2424 if (!dr
|| !DR_REF (dr
))
2426 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2427 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
2431 stmt
= DR_STMT (dr
);
2432 stmt_info
= vinfo_for_stmt (stmt
);
2434 /* Check that analysis of the data-ref succeeded. */
2435 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
2438 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2440 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
2441 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2446 /* Mark the statement as not vectorizable. */
2447 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2454 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
2456 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2457 fprintf (vect_dump
, "not vectorized: base addr of dr is a "
2461 /* Mark the statement as not vectorizable. */
2462 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2469 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
2470 offset
= unshare_expr (DR_OFFSET (dr
));
2471 init
= unshare_expr (DR_INIT (dr
));
2473 /* Update DR field in stmt_vec_info struct. */
2475 /* If the dataref is in an inner-loop of the loop that is considered for
2476 for vectorization, we also want to analyze the access relative to
2477 the outer-loop (DR contains information only relative to the
2478 inner-most enclosing loop). We do that by building a reference to the
2479 first location accessed by the inner-loop, and analyze it relative to
2481 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2483 tree outer_step
, outer_base
, outer_init
;
2484 HOST_WIDE_INT pbitsize
, pbitpos
;
2486 enum machine_mode pmode
;
2487 int punsignedp
, pvolatilep
;
2488 affine_iv base_iv
, offset_iv
;
2491 /* Build a reference to the first location accessed by the
2492 inner-loop: *(BASE+INIT). (The first location is actually
2493 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2494 tree inner_base
= build_fold_indirect_ref
2495 (fold_build2 (POINTER_PLUS_EXPR
,
2496 TREE_TYPE (base
), base
,
2497 fold_convert (sizetype
, init
)));
2499 if (vect_print_dump_info (REPORT_DETAILS
))
2501 fprintf (vect_dump
, "analyze in outer-loop: ");
2502 print_generic_expr (vect_dump
, inner_base
, TDF_SLIM
);
2505 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
2506 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
2507 gcc_assert (outer_base
!= NULL_TREE
);
2509 if (pbitpos
% BITS_PER_UNIT
!= 0)
2511 if (vect_print_dump_info (REPORT_DETAILS
))
2512 fprintf (vect_dump
, "failed: bit offset alignment.\n");
2516 outer_base
= build_fold_addr_expr (outer_base
);
2517 if (!simple_iv (loop
, loop_containing_stmt (stmt
), outer_base
,
2520 if (vect_print_dump_info (REPORT_DETAILS
))
2521 fprintf (vect_dump
, "failed: evolution of base is not affine.\n");
2528 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
,
2536 offset_iv
.base
= ssize_int (0);
2537 offset_iv
.step
= ssize_int (0);
2539 else if (!simple_iv (loop
, loop_containing_stmt (stmt
), poffset
,
2542 if (vect_print_dump_info (REPORT_DETAILS
))
2543 fprintf (vect_dump
, "evolution of offset is not affine.\n");
2547 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
2548 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
2549 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
2550 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
2551 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
2553 outer_step
= size_binop (PLUS_EXPR
,
2554 fold_convert (ssizetype
, base_iv
.step
),
2555 fold_convert (ssizetype
, offset_iv
.step
));
2557 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
2558 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2559 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
2560 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
2561 STMT_VINFO_DR_OFFSET (stmt_info
) =
2562 fold_convert (ssizetype
, offset_iv
.base
);
2563 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
2564 size_int (highest_pow2_factor (offset_iv
.base
));
2566 if (vect_print_dump_info (REPORT_DETAILS
))
2568 fprintf (vect_dump
, "\touter base_address: ");
2569 print_generic_expr (vect_dump
, STMT_VINFO_DR_BASE_ADDRESS (stmt_info
), TDF_SLIM
);
2570 fprintf (vect_dump
, "\n\touter offset from base address: ");
2571 print_generic_expr (vect_dump
, STMT_VINFO_DR_OFFSET (stmt_info
), TDF_SLIM
);
2572 fprintf (vect_dump
, "\n\touter constant offset from base address: ");
2573 print_generic_expr (vect_dump
, STMT_VINFO_DR_INIT (stmt_info
), TDF_SLIM
);
2574 fprintf (vect_dump
, "\n\touter step: ");
2575 print_generic_expr (vect_dump
, STMT_VINFO_DR_STEP (stmt_info
), TDF_SLIM
);
2576 fprintf (vect_dump
, "\n\touter aligned to: ");
2577 print_generic_expr (vect_dump
, STMT_VINFO_DR_ALIGNED_TO (stmt_info
), TDF_SLIM
);
2581 if (STMT_VINFO_DATA_REF (stmt_info
))
2583 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2586 "not vectorized: more than one data ref in stmt: ");
2587 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2592 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
2594 /* Set vectype for STMT. */
2595 scalar_type
= TREE_TYPE (DR_REF (dr
));
2596 STMT_VINFO_VECTYPE (stmt_info
) =
2597 get_vectype_for_scalar_type (scalar_type
);
2598 if (!STMT_VINFO_VECTYPE (stmt_info
))
2600 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2603 "not vectorized: no vectype for stmt: ");
2604 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2605 fprintf (vect_dump
, " scalar_type: ");
2606 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
2611 /* Mark the statement as not vectorizable. */
2612 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2619 /* Adjust the minimal vectorization factor according to the
2621 vf
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
2630 /* Function vect_get_new_vect_var.
2632 Returns a name for a new variable. The current naming scheme appends the
2633 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2634 the name of vectorizer generated variables, and appends that to NAME if
2638 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
2645 case vect_simple_var
:
2648 case vect_scalar_var
:
2651 case vect_pointer_var
:
2660 char* tmp
= concat (prefix
, name
, NULL
);
2661 new_vect_var
= create_tmp_var (type
, tmp
);
2665 new_vect_var
= create_tmp_var (type
, prefix
);
2667 /* Mark vector typed variable as a gimple register variable. */
2668 if (TREE_CODE (type
) == VECTOR_TYPE
)
2669 DECL_GIMPLE_REG_P (new_vect_var
) = true;
2671 return new_vect_var
;
2675 /* Function vect_create_addr_base_for_vector_ref.
2677 Create an expression that computes the address of the first memory location
2678 that will be accessed for a data reference.
2681 STMT: The statement containing the data reference.
2682 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2683 OFFSET: Optional. If supplied, it is be added to the initial address.
2684 LOOP: Specify relative to which loop-nest should the address be computed.
2685 For example, when the dataref is in an inner-loop nested in an
2686 outer-loop that is now being vectorized, LOOP can be either the
2687 outer-loop, or the inner-loop. The first memory location accessed
2688 by the following dataref ('in' points to short):
2695 if LOOP=i_loop: &in (relative to i_loop)
2696 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2699 1. Return an SSA_NAME whose value is the address of the memory location of
2700 the first vector of the data reference.
2701 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2702 these statement(s) which define the returned SSA_NAME.
2704 FORNOW: We are only handling array accesses with step 1. */
2707 vect_create_addr_base_for_vector_ref (gimple stmt
,
2708 gimple_seq
*new_stmt_list
,
2712 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2713 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2714 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
2716 tree data_ref_base_var
;
2718 tree addr_base
, addr_expr
;
2720 gimple_seq seq
= NULL
;
2721 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
2722 tree init
= unshare_expr (DR_INIT (dr
));
2724 tree step
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
2725 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2728 if (loop_vinfo
&& loop
&& loop
!= (gimple_bb (stmt
))->loop_father
)
2730 struct loop
*outer_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2732 gcc_assert (nested_in_vect_loop_p (outer_loop
, stmt
));
2734 data_ref_base
= unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
2735 base_offset
= unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info
));
2736 init
= unshare_expr (STMT_VINFO_DR_INIT (stmt_info
));
2740 base_name
= build_fold_indirect_ref (data_ref_base
);
2743 base_offset
= ssize_int (0);
2744 init
= ssize_int (0);
2745 base_name
= build_fold_indirect_ref (unshare_expr (DR_REF (dr
)));
2748 data_ref_base_var
= create_tmp_var (TREE_TYPE (data_ref_base
), "batmp");
2749 add_referenced_var (data_ref_base_var
);
2750 data_ref_base
= force_gimple_operand (data_ref_base
, &seq
, true,
2752 gimple_seq_add_seq (new_stmt_list
, seq
);
2754 /* Create base_offset */
2755 base_offset
= size_binop (PLUS_EXPR
,
2756 fold_convert (sizetype
, base_offset
),
2757 fold_convert (sizetype
, init
));
2758 dest
= create_tmp_var (sizetype
, "base_off");
2759 add_referenced_var (dest
);
2760 base_offset
= force_gimple_operand (base_offset
, &seq
, true, dest
);
2761 gimple_seq_add_seq (new_stmt_list
, seq
);
2765 tree tmp
= create_tmp_var (sizetype
, "offset");
2767 add_referenced_var (tmp
);
2768 offset
= fold_build2 (MULT_EXPR
, sizetype
,
2769 fold_convert (sizetype
, offset
), step
);
2770 base_offset
= fold_build2 (PLUS_EXPR
, sizetype
,
2771 base_offset
, offset
);
2772 base_offset
= force_gimple_operand (base_offset
, &seq
, false, tmp
);
2773 gimple_seq_add_seq (new_stmt_list
, seq
);
2776 /* base + base_offset */
2778 addr_base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (data_ref_base
),
2779 data_ref_base
, base_offset
);
2782 addr_base
= build1 (ADDR_EXPR
,
2783 build_pointer_type (TREE_TYPE (DR_REF (dr
))),
2784 unshare_expr (DR_REF (dr
)));
2787 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
2788 base
= get_base_address (DR_REF (dr
));
2790 && TREE_CODE (base
) == MEM_REF
)
2792 = build_qualified_type (vect_ptr_type
,
2793 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base
, 0))));
2795 vec_stmt
= fold_convert (vect_ptr_type
, addr_base
);
2796 addr_expr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
2797 get_name (base_name
));
2798 add_referenced_var (addr_expr
);
2799 vec_stmt
= force_gimple_operand (vec_stmt
, &seq
, false, addr_expr
);
2800 gimple_seq_add_seq (new_stmt_list
, seq
);
2802 if (DR_PTR_INFO (dr
)
2803 && TREE_CODE (vec_stmt
) == SSA_NAME
)
2804 duplicate_ssa_name_ptr_info (vec_stmt
, DR_PTR_INFO (dr
));
2806 if (vect_print_dump_info (REPORT_DETAILS
))
2808 fprintf (vect_dump
, "created ");
2809 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
2816 /* Function vect_create_data_ref_ptr.
2818 Create a new pointer to vector type (vp), that points to the first location
2819 accessed in the loop by STMT, along with the def-use update chain to
2820 appropriately advance the pointer through the loop iterations. Also set
2821 aliasing information for the pointer. This vector pointer is used by the
2822 callers to this function to create a memory reference expression for vector
2826 1. STMT: a stmt that references memory. Expected to be of the form
2827 GIMPLE_ASSIGN <name, data-ref> or
2828 GIMPLE_ASSIGN <data-ref, name>.
2829 2. AT_LOOP: the loop where the vector memref is to be created.
2830 3. OFFSET (optional): an offset to be added to the initial address accessed
2831 by the data-ref in STMT.
2832 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2833 pointing to the initial address.
2834 5. TYPE: if not NULL indicates the required type of the data-ref.
2837 1. Declare a new ptr to vector_type, and have it point to the base of the
2838 data reference (initial addressed accessed by the data reference).
2839 For example, for vector of type V8HI, the following code is generated:
2842 vp = (v8hi *)initial_address;
2844 if OFFSET is not supplied:
2845 initial_address = &a[init];
2846 if OFFSET is supplied:
2847 initial_address = &a[init + OFFSET];
2849 Return the initial_address in INITIAL_ADDRESS.
2851 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2852 update the pointer in each iteration of the loop.
2854 Return the increment stmt that updates the pointer in PTR_INCR.
2856 3. Set INV_P to true if the access pattern of the data reference in the
2857 vectorized loop is invariant. Set it to false otherwise.
2859 4. Return the pointer. */
2862 vect_create_data_ref_ptr (gimple stmt
, struct loop
*at_loop
,
2863 tree offset
, tree
*initial_address
, gimple
*ptr_incr
,
2864 bool only_init
, bool *inv_p
)
2867 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2868 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2869 struct loop
*loop
= NULL
;
2870 bool nested_in_vect_loop
= false;
2871 struct loop
*containing_loop
= NULL
;
2872 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2877 gimple_seq new_stmt_list
= NULL
;
2881 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2883 gimple_stmt_iterator incr_gsi
;
2885 tree indx_before_incr
, indx_after_incr
;
2888 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2889 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
2894 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2895 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
2896 containing_loop
= (gimple_bb (stmt
))->loop_father
;
2897 pe
= loop_preheader_edge (loop
);
2901 gcc_assert (bb_vinfo
);
2906 /* Check the step (evolution) of the load in LOOP, and record
2907 whether it's invariant. */
2908 if (nested_in_vect_loop
)
2909 step
= STMT_VINFO_DR_STEP (stmt_info
);
2911 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
2913 if (tree_int_cst_compare (step
, size_zero_node
) == 0)
2918 /* Create an expression for the first address accessed by this load
2920 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
2922 if (vect_print_dump_info (REPORT_DETAILS
))
2924 tree data_ref_base
= base_name
;
2925 fprintf (vect_dump
, "create vector-pointer variable to type: ");
2926 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
2927 if (TREE_CODE (data_ref_base
) == VAR_DECL
2928 || TREE_CODE (data_ref_base
) == ARRAY_REF
)
2929 fprintf (vect_dump
, " vectorizing an array ref: ");
2930 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
2931 fprintf (vect_dump
, " vectorizing a record based array ref: ");
2932 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
2933 fprintf (vect_dump
, " vectorizing a pointer ref: ");
2934 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
2937 /** (1) Create the new vector-pointer variable: **/
2938 vect_ptr_type
= build_pointer_type (vectype
);
2939 base
= get_base_address (DR_REF (dr
));
2941 && TREE_CODE (base
) == MEM_REF
)
2943 = build_qualified_type (vect_ptr_type
,
2944 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base
, 0))));
2945 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
2946 get_name (base_name
));
2948 /* Vector types inherit the alias set of their component type by default so
2949 we need to use a ref-all pointer if the data reference does not conflict
2950 with the created vector data reference because it is not addressable. */
2951 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr
),
2952 get_alias_set (DR_REF (dr
))))
2955 = build_pointer_type_for_mode (vectype
,
2956 TYPE_MODE (vect_ptr_type
), true);
2957 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
2958 get_name (base_name
));
2961 /* Likewise for any of the data references in the stmt group. */
2962 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info
) > 1)
2964 gimple orig_stmt
= STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info
);
2967 tree lhs
= gimple_assign_lhs (orig_stmt
);
2968 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr
),
2969 get_alias_set (lhs
)))
2972 = build_pointer_type_for_mode (vectype
,
2973 TYPE_MODE (vect_ptr_type
), true);
2975 = vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
2976 get_name (base_name
));
2980 orig_stmt
= STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt
));
2985 add_referenced_var (vect_ptr
);
2987 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2988 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2989 def-use update cycles for the pointer: One relative to the outer-loop
2990 (LOOP), which is what steps (3) and (4) below do. The other is relative
2991 to the inner-loop (which is the inner-most loop containing the dataref),
2992 and this is done be step (5) below.
2994 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2995 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2996 redundant. Steps (3),(4) create the following:
2999 LOOP: vp1 = phi(vp0,vp2)
3005 If there is an inner-loop nested in loop, then step (5) will also be
3006 applied, and an additional update in the inner-loop will be created:
3009 LOOP: vp1 = phi(vp0,vp2)
3011 inner: vp3 = phi(vp1,vp4)
3012 vp4 = vp3 + inner_step
3018 /** (3) Calculate the initial address the vector-pointer, and set
3019 the vector-pointer to point to it before the loop: **/
3021 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3023 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
3029 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmt_list
);
3030 gcc_assert (!new_bb
);
3033 gsi_insert_seq_before (&gsi
, new_stmt_list
, GSI_SAME_STMT
);
3036 *initial_address
= new_temp
;
3038 /* Create: p = (vectype *) initial_base */
3039 if (TREE_CODE (new_temp
) != SSA_NAME
3040 || !useless_type_conversion_p (vect_ptr_type
, TREE_TYPE (new_temp
)))
3042 vec_stmt
= gimple_build_assign (vect_ptr
,
3043 fold_convert (vect_ptr_type
, new_temp
));
3044 vect_ptr_init
= make_ssa_name (vect_ptr
, vec_stmt
);
3045 /* Copy the points-to information if it exists. */
3046 if (DR_PTR_INFO (dr
))
3047 duplicate_ssa_name_ptr_info (vect_ptr_init
, DR_PTR_INFO (dr
));
3048 gimple_assign_set_lhs (vec_stmt
, vect_ptr_init
);
3051 new_bb
= gsi_insert_on_edge_immediate (pe
, vec_stmt
);
3052 gcc_assert (!new_bb
);
3055 gsi_insert_before (&gsi
, vec_stmt
, GSI_SAME_STMT
);
3058 vect_ptr_init
= new_temp
;
3060 /** (4) Handle the updating of the vector-pointer inside the loop.
3061 This is needed when ONLY_INIT is false, and also when AT_LOOP
3062 is the inner-loop nested in LOOP (during outer-loop vectorization).
3065 /* No update in loop is required. */
3066 if (only_init
&& (!loop_vinfo
|| at_loop
== loop
))
3067 vptr
= vect_ptr_init
;
3070 /* The step of the vector pointer is the Vector Size. */
3071 tree step
= TYPE_SIZE_UNIT (vectype
);
3072 /* One exception to the above is when the scalar step of the load in
3073 LOOP is zero. In this case the step here is also zero. */
3075 step
= size_zero_node
;
3077 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
3079 create_iv (vect_ptr_init
,
3080 fold_convert (vect_ptr_type
, step
),
3081 vect_ptr
, loop
, &incr_gsi
, insert_after
,
3082 &indx_before_incr
, &indx_after_incr
);
3083 incr
= gsi_stmt (incr_gsi
);
3084 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3086 /* Copy the points-to information if it exists. */
3087 if (DR_PTR_INFO (dr
))
3089 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3090 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3095 vptr
= indx_before_incr
;
3098 if (!nested_in_vect_loop
|| only_init
)
3102 /** (5) Handle the updating of the vector-pointer inside the inner-loop
3103 nested in LOOP, if exists: **/
3105 gcc_assert (nested_in_vect_loop
);
3108 standard_iv_increment_position (containing_loop
, &incr_gsi
,
3110 create_iv (vptr
, fold_convert (vect_ptr_type
, DR_STEP (dr
)), vect_ptr
,
3111 containing_loop
, &incr_gsi
, insert_after
, &indx_before_incr
,
3113 incr
= gsi_stmt (incr_gsi
);
3114 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3116 /* Copy the points-to information if it exists. */
3117 if (DR_PTR_INFO (dr
))
3119 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3120 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3125 return indx_before_incr
;
3132 /* Function bump_vector_ptr
3134 Increment a pointer (to a vector type) by vector-size. If requested,
3135 i.e. if PTR-INCR is given, then also connect the new increment stmt
3136 to the existing def-use update-chain of the pointer, by modifying
3137 the PTR_INCR as illustrated below:
3139 The pointer def-use update-chain before this function:
3140 DATAREF_PTR = phi (p_0, p_2)
3142 PTR_INCR: p_2 = DATAREF_PTR + step
3144 The pointer def-use update-chain after this function:
3145 DATAREF_PTR = phi (p_0, p_2)
3147 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3149 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3152 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3154 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3155 the loop. The increment amount across iterations is expected
3157 BSI - location where the new update stmt is to be placed.
3158 STMT - the original scalar memory-access stmt that is being vectorized.
3159 BUMP - optional. The offset by which to bump the pointer. If not given,
3160 the offset is assumed to be vector_size.
3162 Output: Return NEW_DATAREF_PTR as illustrated above.
3167 bump_vector_ptr (tree dataref_ptr
, gimple ptr_incr
, gimple_stmt_iterator
*gsi
,
3168 gimple stmt
, tree bump
)
3170 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3171 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3172 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3173 tree ptr_var
= SSA_NAME_VAR (dataref_ptr
);
3174 tree update
= TYPE_SIZE_UNIT (vectype
);
3177 use_operand_p use_p
;
3178 tree new_dataref_ptr
;
3183 incr_stmt
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, ptr_var
,
3184 dataref_ptr
, update
);
3185 new_dataref_ptr
= make_ssa_name (ptr_var
, incr_stmt
);
3186 gimple_assign_set_lhs (incr_stmt
, new_dataref_ptr
);
3187 vect_finish_stmt_generation (stmt
, incr_stmt
, gsi
);
3189 /* Copy the points-to information if it exists. */
3190 if (DR_PTR_INFO (dr
))
3191 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
3194 return new_dataref_ptr
;
3196 /* Update the vector-pointer's cross-iteration increment. */
3197 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
3199 tree use
= USE_FROM_PTR (use_p
);
3201 if (use
== dataref_ptr
)
3202 SET_USE (use_p
, new_dataref_ptr
);
3204 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
3207 return new_dataref_ptr
;
3211 /* Function vect_create_destination_var.
3213 Create a new temporary of type VECTYPE. */
3216 vect_create_destination_var (tree scalar_dest
, tree vectype
)
3219 const char *new_name
;
3221 enum vect_var_kind kind
;
3223 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
3224 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
3226 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
3228 new_name
= get_name (scalar_dest
);
3231 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
3232 add_referenced_var (vec_dest
);
3237 /* Function vect_strided_store_supported.
3239 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3240 and FALSE otherwise. */
3243 vect_strided_store_supported (tree vectype
)
3245 optab interleave_high_optab
, interleave_low_optab
;
3246 enum machine_mode mode
;
3248 mode
= TYPE_MODE (vectype
);
3250 /* Check that the operation is supported. */
3251 interleave_high_optab
= optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR
,
3252 vectype
, optab_default
);
3253 interleave_low_optab
= optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR
,
3254 vectype
, optab_default
);
3255 if (!interleave_high_optab
|| !interleave_low_optab
)
3257 if (vect_print_dump_info (REPORT_DETAILS
))
3258 fprintf (vect_dump
, "no optab for interleave.");
3262 if (optab_handler (interleave_high_optab
, mode
) == CODE_FOR_nothing
3263 || optab_handler (interleave_low_optab
, mode
) == CODE_FOR_nothing
)
3265 if (vect_print_dump_info (REPORT_DETAILS
))
3266 fprintf (vect_dump
, "interleave op not supported by target.");
3274 /* Function vect_permute_store_chain.
3276 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3277 a power of 2, generate interleave_high/low stmts to reorder the data
3278 correctly for the stores. Return the final references for stores in
3281 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3282 The input is 4 vectors each containing 8 elements. We assign a number to each
3283 element, the input sequence is:
3285 1st vec: 0 1 2 3 4 5 6 7
3286 2nd vec: 8 9 10 11 12 13 14 15
3287 3rd vec: 16 17 18 19 20 21 22 23
3288 4th vec: 24 25 26 27 28 29 30 31
3290 The output sequence should be:
3292 1st vec: 0 8 16 24 1 9 17 25
3293 2nd vec: 2 10 18 26 3 11 19 27
3294 3rd vec: 4 12 20 28 5 13 21 30
3295 4th vec: 6 14 22 30 7 15 23 31
3297 i.e., we interleave the contents of the four vectors in their order.
3299 We use interleave_high/low instructions to create such output. The input of
3300 each interleave_high/low operation is two vectors:
3303 the even elements of the result vector are obtained left-to-right from the
3304 high/low elements of the first vector. The odd elements of the result are
3305 obtained left-to-right from the high/low elements of the second vector.
3306 The output of interleave_high will be: 0 4 1 5
3307 and of interleave_low: 2 6 3 7
3310 The permutation is done in log LENGTH stages. In each stage interleave_high
3311 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3312 where the first argument is taken from the first half of DR_CHAIN and the
3313 second argument from it's second half.
3316 I1: interleave_high (1st vec, 3rd vec)
3317 I2: interleave_low (1st vec, 3rd vec)
3318 I3: interleave_high (2nd vec, 4th vec)
3319 I4: interleave_low (2nd vec, 4th vec)
3321 The output for the first stage is:
3323 I1: 0 16 1 17 2 18 3 19
3324 I2: 4 20 5 21 6 22 7 23
3325 I3: 8 24 9 25 10 26 11 27
3326 I4: 12 28 13 29 14 30 15 31
3328 The output of the second stage, i.e. the final result is:
3330 I1: 0 8 16 24 1 9 17 25
3331 I2: 2 10 18 26 3 11 19 27
3332 I3: 4 12 20 28 5 13 21 30
3333 I4: 6 14 22 30 7 15 23 31. */
3336 vect_permute_store_chain (VEC(tree
,heap
) *dr_chain
,
3337 unsigned int length
,
3339 gimple_stmt_iterator
*gsi
,
3340 VEC(tree
,heap
) **result_chain
)
3342 tree perm_dest
, vect1
, vect2
, high
, low
;
3344 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
3347 enum tree_code high_code
, low_code
;
3349 /* Check that the operation is supported. */
3350 if (!vect_strided_store_supported (vectype
))
3353 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
3355 for (i
= 0; i
< exact_log2 (length
); i
++)
3357 for (j
= 0; j
< length
/2; j
++)
3359 vect1
= VEC_index (tree
, dr_chain
, j
);
3360 vect2
= VEC_index (tree
, dr_chain
, j
+length
/2);
3362 /* Create interleaving stmt:
3363 in the case of big endian:
3364 high = interleave_high (vect1, vect2)
3365 and in the case of little endian:
3366 high = interleave_low (vect1, vect2). */
3367 perm_dest
= create_tmp_var (vectype
, "vect_inter_high");
3368 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3369 add_referenced_var (perm_dest
);
3370 if (BYTES_BIG_ENDIAN
)
3372 high_code
= VEC_INTERLEAVE_HIGH_EXPR
;
3373 low_code
= VEC_INTERLEAVE_LOW_EXPR
;
3377 low_code
= VEC_INTERLEAVE_HIGH_EXPR
;
3378 high_code
= VEC_INTERLEAVE_LOW_EXPR
;
3380 perm_stmt
= gimple_build_assign_with_ops (high_code
, perm_dest
,
3382 high
= make_ssa_name (perm_dest
, perm_stmt
);
3383 gimple_assign_set_lhs (perm_stmt
, high
);
3384 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3385 VEC_replace (tree
, *result_chain
, 2*j
, high
);
3387 /* Create interleaving stmt:
3388 in the case of big endian:
3389 low = interleave_low (vect1, vect2)
3390 and in the case of little endian:
3391 low = interleave_high (vect1, vect2). */
3392 perm_dest
= create_tmp_var (vectype
, "vect_inter_low");
3393 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3394 add_referenced_var (perm_dest
);
3395 perm_stmt
= gimple_build_assign_with_ops (low_code
, perm_dest
,
3397 low
= make_ssa_name (perm_dest
, perm_stmt
);
3398 gimple_assign_set_lhs (perm_stmt
, low
);
3399 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3400 VEC_replace (tree
, *result_chain
, 2*j
+1, low
);
3402 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
3407 /* Function vect_setup_realignment
3409 This function is called when vectorizing an unaligned load using
3410 the dr_explicit_realign[_optimized] scheme.
3411 This function generates the following code at the loop prolog:
3414 x msq_init = *(floor(p)); # prolog load
3415 realignment_token = call target_builtin;
3417 x msq = phi (msq_init, ---)
3419 The stmts marked with x are generated only for the case of
3420 dr_explicit_realign_optimized.
3422 The code above sets up a new (vector) pointer, pointing to the first
3423 location accessed by STMT, and a "floor-aligned" load using that pointer.
3424 It also generates code to compute the "realignment-token" (if the relevant
3425 target hook was defined), and creates a phi-node at the loop-header bb
3426 whose arguments are the result of the prolog-load (created by this
3427 function) and the result of a load that takes place in the loop (to be
3428 created by the caller to this function).
3430 For the case of dr_explicit_realign_optimized:
3431 The caller to this function uses the phi-result (msq) to create the
3432 realignment code inside the loop, and sets up the missing phi argument,
3435 msq = phi (msq_init, lsq)
3436 lsq = *(floor(p')); # load in loop
3437 result = realign_load (msq, lsq, realignment_token);
3439 For the case of dr_explicit_realign:
3441 msq = *(floor(p)); # load in loop
3443 lsq = *(floor(p')); # load in loop
3444 result = realign_load (msq, lsq, realignment_token);
3447 STMT - (scalar) load stmt to be vectorized. This load accesses
3448 a memory location that may be unaligned.
3449 BSI - place where new code is to be inserted.
3450 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3454 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3455 target hook, if defined.
3456 Return value - the result of the loop-header phi node. */
3459 vect_setup_realignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
3460 tree
*realignment_token
,
3461 enum dr_alignment_support alignment_support_scheme
,
3463 struct loop
**at_loop
)
3465 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3466 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3467 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3468 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3470 tree scalar_dest
= gimple_assign_lhs (stmt
);
3477 tree msq_init
= NULL_TREE
;
3480 tree msq
= NULL_TREE
;
3481 gimple_seq stmts
= NULL
;
3483 bool compute_in_loop
= false;
3484 bool nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
3485 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
3486 struct loop
*loop_for_initial_load
;
3488 gcc_assert (alignment_support_scheme
== dr_explicit_realign
3489 || alignment_support_scheme
== dr_explicit_realign_optimized
);
3491 /* We need to generate three things:
3492 1. the misalignment computation
3493 2. the extra vector load (for the optimized realignment scheme).
3494 3. the phi node for the two vectors from which the realignment is
3495 done (for the optimized realignment scheme).
3498 /* 1. Determine where to generate the misalignment computation.
3500 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3501 calculation will be generated by this function, outside the loop (in the
3502 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3503 caller, inside the loop.
3505 Background: If the misalignment remains fixed throughout the iterations of
3506 the loop, then both realignment schemes are applicable, and also the
3507 misalignment computation can be done outside LOOP. This is because we are
3508 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3509 are a multiple of VS (the Vector Size), and therefore the misalignment in
3510 different vectorized LOOP iterations is always the same.
3511 The problem arises only if the memory access is in an inner-loop nested
3512 inside LOOP, which is now being vectorized using outer-loop vectorization.
3513 This is the only case when the misalignment of the memory access may not
3514 remain fixed throughout the iterations of the inner-loop (as explained in
3515 detail in vect_supportable_dr_alignment). In this case, not only is the
3516 optimized realignment scheme not applicable, but also the misalignment
3517 computation (and generation of the realignment token that is passed to
3518 REALIGN_LOAD) have to be done inside the loop.
3520 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3521 or not, which in turn determines if the misalignment is computed inside
3522 the inner-loop, or outside LOOP. */
3524 if (init_addr
!= NULL_TREE
)
3526 compute_in_loop
= true;
3527 gcc_assert (alignment_support_scheme
== dr_explicit_realign
);
3531 /* 2. Determine where to generate the extra vector load.
3533 For the optimized realignment scheme, instead of generating two vector
3534 loads in each iteration, we generate a single extra vector load in the
3535 preheader of the loop, and in each iteration reuse the result of the
3536 vector load from the previous iteration. In case the memory access is in
3537 an inner-loop nested inside LOOP, which is now being vectorized using
3538 outer-loop vectorization, we need to determine whether this initial vector
3539 load should be generated at the preheader of the inner-loop, or can be
3540 generated at the preheader of LOOP. If the memory access has no evolution
3541 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3542 to be generated inside LOOP (in the preheader of the inner-loop). */
3544 if (nested_in_vect_loop
)
3546 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
3547 bool invariant_in_outerloop
=
3548 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
3549 loop_for_initial_load
= (invariant_in_outerloop
? loop
: loop
->inner
);
3552 loop_for_initial_load
= loop
;
3554 *at_loop
= loop_for_initial_load
;
3556 /* 3. For the case of the optimized realignment, create the first vector
3557 load at the loop preheader. */
3559 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
3561 /* Create msq_init = *(floor(p1)) in the loop preheader */
3563 gcc_assert (!compute_in_loop
);
3564 pe
= loop_preheader_edge (loop_for_initial_load
);
3565 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3566 ptr
= vect_create_data_ref_ptr (stmt
, loop_for_initial_load
, NULL_TREE
,
3567 &init_addr
, &inc
, true, &inv_p
);
3568 new_stmt
= gimple_build_assign_with_ops
3569 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
3570 build_int_cst (TREE_TYPE (ptr
),
3571 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
3572 new_temp
= make_ssa_name (SSA_NAME_VAR (ptr
), new_stmt
);
3573 gimple_assign_set_lhs (new_stmt
, new_temp
);
3574 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
3575 gcc_assert (!new_bb
);
3576 data_ref
= build_simple_mem_ref (new_temp
);
3577 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
3578 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3579 gimple_assign_set_lhs (new_stmt
, new_temp
);
3580 mark_symbols_for_renaming (new_stmt
);
3581 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
3582 gcc_assert (!new_bb
);
3583 msq_init
= gimple_assign_lhs (new_stmt
);
3586 /* 4. Create realignment token using a target builtin, if available.
3587 It is done either inside the containing loop, or before LOOP (as
3588 determined above). */
3590 if (targetm
.vectorize
.builtin_mask_for_load
)
3594 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3595 if (compute_in_loop
)
3596 gcc_assert (init_addr
); /* already computed by the caller. */
3599 /* Generate the INIT_ADDR computation outside LOOP. */
3600 init_addr
= vect_create_addr_base_for_vector_ref (stmt
, &stmts
,
3602 pe
= loop_preheader_edge (loop
);
3603 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3604 gcc_assert (!new_bb
);
3607 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
3608 new_stmt
= gimple_build_call (builtin_decl
, 1, init_addr
);
3610 vect_create_destination_var (scalar_dest
,
3611 gimple_call_return_type (new_stmt
));
3612 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3613 gimple_call_set_lhs (new_stmt
, new_temp
);
3615 if (compute_in_loop
)
3616 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
3619 /* Generate the misalignment computation outside LOOP. */
3620 pe
= loop_preheader_edge (loop
);
3621 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
3622 gcc_assert (!new_bb
);
3625 *realignment_token
= gimple_call_lhs (new_stmt
);
3627 /* The result of the CALL_EXPR to this builtin is determined from
3628 the value of the parameter and no global variables are touched
3629 which makes the builtin a "const" function. Requiring the
3630 builtin to have the "const" attribute makes it unnecessary
3631 to call mark_call_clobbered. */
3632 gcc_assert (TREE_READONLY (builtin_decl
));
3635 if (alignment_support_scheme
== dr_explicit_realign
)
3638 gcc_assert (!compute_in_loop
);
3639 gcc_assert (alignment_support_scheme
== dr_explicit_realign_optimized
);
3642 /* 5. Create msq = phi <msq_init, lsq> in loop */
3644 pe
= loop_preheader_edge (containing_loop
);
3645 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3646 msq
= make_ssa_name (vec_dest
, NULL
);
3647 phi_stmt
= create_phi_node (msq
, containing_loop
->header
);
3648 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
3649 add_phi_arg (phi_stmt
, msq_init
, pe
, UNKNOWN_LOCATION
);
3655 /* Function vect_strided_load_supported.
3657 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3658 and FALSE otherwise. */
3661 vect_strided_load_supported (tree vectype
)
3663 optab perm_even_optab
, perm_odd_optab
;
3664 enum machine_mode mode
;
3666 mode
= TYPE_MODE (vectype
);
3668 perm_even_optab
= optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR
, vectype
,
3670 if (!perm_even_optab
)
3672 if (vect_print_dump_info (REPORT_DETAILS
))
3673 fprintf (vect_dump
, "no optab for perm_even.");
3677 if (optab_handler (perm_even_optab
, mode
) == CODE_FOR_nothing
)
3679 if (vect_print_dump_info (REPORT_DETAILS
))
3680 fprintf (vect_dump
, "perm_even op not supported by target.");
3684 perm_odd_optab
= optab_for_tree_code (VEC_EXTRACT_ODD_EXPR
, vectype
,
3686 if (!perm_odd_optab
)
3688 if (vect_print_dump_info (REPORT_DETAILS
))
3689 fprintf (vect_dump
, "no optab for perm_odd.");
3693 if (optab_handler (perm_odd_optab
, mode
) == CODE_FOR_nothing
)
3695 if (vect_print_dump_info (REPORT_DETAILS
))
3696 fprintf (vect_dump
, "perm_odd op not supported by target.");
3703 /* Function vect_permute_load_chain.
3705 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3706 a power of 2, generate extract_even/odd stmts to reorder the input data
3707 correctly. Return the final references for loads in RESULT_CHAIN.
3709 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3710 The input is 4 vectors each containing 8 elements. We assign a number to each
3711 element, the input sequence is:
3713 1st vec: 0 1 2 3 4 5 6 7
3714 2nd vec: 8 9 10 11 12 13 14 15
3715 3rd vec: 16 17 18 19 20 21 22 23
3716 4th vec: 24 25 26 27 28 29 30 31
3718 The output sequence should be:
3720 1st vec: 0 4 8 12 16 20 24 28
3721 2nd vec: 1 5 9 13 17 21 25 29
3722 3rd vec: 2 6 10 14 18 22 26 30
3723 4th vec: 3 7 11 15 19 23 27 31
3725 i.e., the first output vector should contain the first elements of each
3726 interleaving group, etc.
3728 We use extract_even/odd instructions to create such output. The input of each
3729 extract_even/odd operation is two vectors
3733 and the output is the vector of extracted even/odd elements. The output of
3734 extract_even will be: 0 2 4 6
3735 and of extract_odd: 1 3 5 7
3738 The permutation is done in log LENGTH stages. In each stage extract_even and
3739 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3740 order. In our example,
3742 E1: extract_even (1st vec, 2nd vec)
3743 E2: extract_odd (1st vec, 2nd vec)
3744 E3: extract_even (3rd vec, 4th vec)
3745 E4: extract_odd (3rd vec, 4th vec)
3747 The output for the first stage will be:
3749 E1: 0 2 4 6 8 10 12 14
3750 E2: 1 3 5 7 9 11 13 15
3751 E3: 16 18 20 22 24 26 28 30
3752 E4: 17 19 21 23 25 27 29 31
3754 In order to proceed and create the correct sequence for the next stage (or
3755 for the correct output, if the second stage is the last one, as in our
3756 example), we first put the output of extract_even operation and then the
3757 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3758 The input for the second stage is:
3760 1st vec (E1): 0 2 4 6 8 10 12 14
3761 2nd vec (E3): 16 18 20 22 24 26 28 30
3762 3rd vec (E2): 1 3 5 7 9 11 13 15
3763 4th vec (E4): 17 19 21 23 25 27 29 31
3765 The output of the second stage:
3767 E1: 0 4 8 12 16 20 24 28
3768 E2: 2 6 10 14 18 22 26 30
3769 E3: 1 5 9 13 17 21 25 29
3770 E4: 3 7 11 15 19 23 27 31
3772 And RESULT_CHAIN after reordering:
3774 1st vec (E1): 0 4 8 12 16 20 24 28
3775 2nd vec (E3): 1 5 9 13 17 21 25 29
3776 3rd vec (E2): 2 6 10 14 18 22 26 30
3777 4th vec (E4): 3 7 11 15 19 23 27 31. */
3780 vect_permute_load_chain (VEC(tree
,heap
) *dr_chain
,
3781 unsigned int length
,
3783 gimple_stmt_iterator
*gsi
,
3784 VEC(tree
,heap
) **result_chain
)
3786 tree perm_dest
, data_ref
, first_vect
, second_vect
;
3788 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
3792 /* Check that the operation is supported. */
3793 if (!vect_strided_load_supported (vectype
))
3796 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
3797 for (i
= 0; i
< exact_log2 (length
); i
++)
3799 for (j
= 0; j
< length
; j
+=2)
3801 first_vect
= VEC_index (tree
, dr_chain
, j
);
3802 second_vect
= VEC_index (tree
, dr_chain
, j
+1);
3804 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3805 perm_dest
= create_tmp_var (vectype
, "vect_perm_even");
3806 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3807 add_referenced_var (perm_dest
);
3809 perm_stmt
= gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR
,
3810 perm_dest
, first_vect
,
3813 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3814 gimple_assign_set_lhs (perm_stmt
, data_ref
);
3815 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3816 mark_symbols_for_renaming (perm_stmt
);
3818 VEC_replace (tree
, *result_chain
, j
/2, data_ref
);
3820 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3821 perm_dest
= create_tmp_var (vectype
, "vect_perm_odd");
3822 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3823 add_referenced_var (perm_dest
);
3825 perm_stmt
= gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR
,
3826 perm_dest
, first_vect
,
3828 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3829 gimple_assign_set_lhs (perm_stmt
, data_ref
);
3830 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3831 mark_symbols_for_renaming (perm_stmt
);
3833 VEC_replace (tree
, *result_chain
, j
/2+length
/2, data_ref
);
3835 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
3841 /* Function vect_transform_strided_load.
3843 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3844 to perform their permutation and ascribe the result vectorized statements to
3845 the scalar statements.
3849 vect_transform_strided_load (gimple stmt
, VEC(tree
,heap
) *dr_chain
, int size
,
3850 gimple_stmt_iterator
*gsi
)
3852 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3853 gimple first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3854 gimple next_stmt
, new_stmt
;
3855 VEC(tree
,heap
) *result_chain
= NULL
;
3856 unsigned int i
, gap_count
;
3859 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3860 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3861 vectors, that are ready for vector computation. */
3862 result_chain
= VEC_alloc (tree
, heap
, size
);
3864 if (!vect_permute_load_chain (dr_chain
, size
, stmt
, gsi
, &result_chain
))
3867 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3868 Since we scan the chain starting from it's first node, their order
3869 corresponds the order of data-refs in RESULT_CHAIN. */
3870 next_stmt
= first_stmt
;
3872 for (i
= 0; VEC_iterate (tree
, result_chain
, i
, tmp_data_ref
); i
++)
3877 /* Skip the gaps. Loads created for the gaps will be removed by dead
3878 code elimination pass later. No need to check for the first stmt in
3879 the group, since it always exists.
3880 DR_GROUP_GAP is the number of steps in elements from the previous
3881 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3882 correspond to the gaps.
3884 if (next_stmt
!= first_stmt
3885 && gap_count
< DR_GROUP_GAP (vinfo_for_stmt (next_stmt
)))
3893 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
3894 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3895 copies, and we put the new vector statement in the first available
3897 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
3898 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
3901 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
3904 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
3906 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
));
3909 prev_stmt
= rel_stmt
;
3911 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
3914 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) =
3919 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
3921 /* If NEXT_STMT accesses the same DR as the previous statement,
3922 put the same TMP_DATA_REF as its vectorized statement; otherwise
3923 get the next data-ref from RESULT_CHAIN. */
3924 if (!next_stmt
|| !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
3929 VEC_free (tree
, heap
, result_chain
);
3933 /* Function vect_force_dr_alignment_p.
3935 Returns whether the alignment of a DECL can be forced to be aligned
3936 on ALIGNMENT bit boundary. */
3939 vect_can_force_dr_alignment_p (const_tree decl
, unsigned int alignment
)
3941 if (TREE_CODE (decl
) != VAR_DECL
)
3944 if (DECL_EXTERNAL (decl
))
3947 if (TREE_ASM_WRITTEN (decl
))
3950 if (TREE_STATIC (decl
))
3951 return (alignment
<= MAX_OFILE_ALIGNMENT
);
3953 return (alignment
<= MAX_STACK_ALIGNMENT
);
3957 /* Return whether the data reference DR is supported with respect to its
3959 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
3960 it is aligned, i.e., check if it is possible to vectorize it with different
3963 enum dr_alignment_support
3964 vect_supportable_dr_alignment (struct data_reference
*dr
,
3965 bool check_aligned_accesses
)
3967 gimple stmt
= DR_STMT (dr
);
3968 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3969 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3970 enum machine_mode mode
= TYPE_MODE (vectype
);
3971 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3972 struct loop
*vect_loop
= NULL
;
3973 bool nested_in_vect_loop
= false;
3975 if (aligned_access_p (dr
) && !check_aligned_accesses
)
3979 /* FORNOW: Misaligned accesses are supported only in loops. */
3980 return dr_unaligned_unsupported
;
3982 vect_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3983 nested_in_vect_loop
= nested_in_vect_loop_p (vect_loop
, stmt
);
3985 /* Possibly unaligned access. */
3987 /* We can choose between using the implicit realignment scheme (generating
3988 a misaligned_move stmt) and the explicit realignment scheme (generating
3989 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3990 realignment scheme: optimized, and unoptimized.
3991 We can optimize the realignment only if the step between consecutive
3992 vector loads is equal to the vector size. Since the vector memory
3993 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3994 is guaranteed that the misalignment amount remains the same throughout the
3995 execution of the vectorized loop. Therefore, we can create the
3996 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3997 at the loop preheader.
3999 However, in the case of outer-loop vectorization, when vectorizing a
4000 memory access in the inner-loop nested within the LOOP that is now being
4001 vectorized, while it is guaranteed that the misalignment of the
4002 vectorized memory access will remain the same in different outer-loop
4003 iterations, it is *not* guaranteed that is will remain the same throughout
4004 the execution of the inner-loop. This is because the inner-loop advances
4005 with the original scalar step (and not in steps of VS). If the inner-loop
4006 step happens to be a multiple of VS, then the misalignment remains fixed
4007 and we can use the optimized realignment scheme. For example:
4013 When vectorizing the i-loop in the above example, the step between
4014 consecutive vector loads is 1, and so the misalignment does not remain
4015 fixed across the execution of the inner-loop, and the realignment cannot
4016 be optimized (as illustrated in the following pseudo vectorized loop):
4018 for (i=0; i<N; i+=4)
4019 for (j=0; j<M; j++){
4020 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4021 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4022 // (assuming that we start from an aligned address).
4025 We therefore have to use the unoptimized realignment scheme:
4027 for (i=0; i<N; i+=4)
4028 for (j=k; j<M; j+=4)
4029 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4030 // that the misalignment of the initial address is
4033 The loop can then be vectorized as follows:
4035 for (k=0; k<4; k++){
4036 rt = get_realignment_token (&vp[k]);
4037 for (i=0; i<N; i+=4){
4039 for (j=k; j<M; j+=4){
4041 va = REALIGN_LOAD <v1,v2,rt>;
4048 if (DR_IS_READ (dr
))
4050 bool is_packed
= false;
4051 tree type
= (TREE_TYPE (DR_REF (dr
)));
4053 if (optab_handler (vec_realign_load_optab
, mode
) != CODE_FOR_nothing
4054 && (!targetm
.vectorize
.builtin_mask_for_load
4055 || targetm
.vectorize
.builtin_mask_for_load ()))
4057 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4058 if (nested_in_vect_loop
4059 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4060 != GET_MODE_SIZE (TYPE_MODE (vectype
))))
4061 return dr_explicit_realign
;
4063 return dr_explicit_realign_optimized
;
4065 if (!known_alignment_for_access_p (dr
))
4067 tree ba
= DR_BASE_OBJECT (dr
);
4070 is_packed
= contains_packed_reference (ba
);
4073 if (targetm
.vectorize
.
4074 support_vector_misalignment (mode
, type
,
4075 DR_MISALIGNMENT (dr
), is_packed
))
4076 /* Can't software pipeline the loads, but can at least do them. */
4077 return dr_unaligned_supported
;
4081 bool is_packed
= false;
4082 tree type
= (TREE_TYPE (DR_REF (dr
)));
4084 if (!known_alignment_for_access_p (dr
))
4086 tree ba
= DR_BASE_OBJECT (dr
);
4089 is_packed
= contains_packed_reference (ba
);
4092 if (targetm
.vectorize
.
4093 support_vector_misalignment (mode
, type
,
4094 DR_MISALIGNMENT (dr
), is_packed
))
4095 return dr_unaligned_supported
;
4099 return dr_unaligned_unsupported
;