1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
43 /* Need to include rtl.h, expr.h, etc. for optabs. */
47 /* Return the smallest scalar part of STMT.
48 This is used to determine the vectype of the stmt. We generally set the
49 vectype according to the type of the result (lhs). For stmts whose
50 result-type is different than the type of the arguments (e.g., demotion,
51 promotion), vectype will be reset appropriately (later). Note that we have
52 to visit the smallest datatype in this function, because that determines the
53 VF. If the smallest datatype in the loop is present only as the rhs of a
54 promotion operation - we'd miss it.
55 Such a case, where a variable of this datatype does not appear in the lhs
56 anywhere in the loop, can only occur if it's an invariant: e.g.:
57 'int_x = (int) short_inv', which we'd expect to have been optimized away by
58 invariant motion. However, we cannot rely on invariant motion to always
59 take invariants out of the loop, and so in the case of promotion we also
60 have to check the rhs.
61 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
65 vect_get_smallest_scalar_type (gimple stmt
, HOST_WIDE_INT
*lhs_size_unit
,
66 HOST_WIDE_INT
*rhs_size_unit
)
68 tree scalar_type
= gimple_expr_type (stmt
);
69 HOST_WIDE_INT lhs
, rhs
;
71 lhs
= rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
73 if (is_gimple_assign (stmt
)
74 && (gimple_assign_cast_p (stmt
)
75 || gimple_assign_rhs_code (stmt
) == WIDEN_MULT_EXPR
76 || gimple_assign_rhs_code (stmt
) == FLOAT_EXPR
))
78 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
80 rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type
));
82 scalar_type
= rhs_type
;
91 /* Find the place of the data-ref in STMT in the interleaving chain that starts
92 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
95 vect_get_place_in_interleaving_chain (gimple stmt
, gimple first_stmt
)
97 gimple next_stmt
= first_stmt
;
100 if (first_stmt
!= DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
103 while (next_stmt
&& next_stmt
!= stmt
)
106 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
116 /* Function vect_insert_into_interleaving_chain.
118 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
121 vect_insert_into_interleaving_chain (struct data_reference
*dra
,
122 struct data_reference
*drb
)
126 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
127 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
129 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
130 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
133 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
134 if (tree_int_cst_compare (next_init
, DR_INIT (dra
)) > 0)
137 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
138 DR_GROUP_NEXT_DR (stmtinfo_a
) = next
;
142 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
145 /* We got to the end of the list. Insert here. */
146 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
147 DR_GROUP_NEXT_DR (stmtinfo_a
) = NULL
;
151 /* Function vect_update_interleaving_chain.
153 For two data-refs DRA and DRB that are a part of a chain interleaved data
154 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
156 There are four possible cases:
157 1. New stmts - both DRA and DRB are not a part of any chain:
160 2. DRB is a part of a chain and DRA is not:
161 no need to update FIRST_DR
162 no need to insert DRB
163 insert DRA according to init
164 3. DRA is a part of a chain and DRB is not:
165 if (init of FIRST_DR > init of DRB)
167 NEXT(FIRST_DR) = previous FIRST_DR
169 insert DRB according to its init
170 4. both DRA and DRB are in some interleaving chains:
171 choose the chain with the smallest init of FIRST_DR
172 insert the nodes of the second chain into the first one. */
175 vect_update_interleaving_chain (struct data_reference
*drb
,
176 struct data_reference
*dra
)
178 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
179 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
180 tree next_init
, init_dra_chain
, init_drb_chain
;
181 gimple first_a
, first_b
;
183 gimple node
, prev
, next
, first_stmt
;
185 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
186 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
188 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_STMT (drb
);
189 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
190 DR_GROUP_NEXT_DR (stmtinfo_b
) = DR_STMT (dra
);
194 /* 2. DRB is a part of a chain and DRA is not. */
195 if (!DR_GROUP_FIRST_DR (stmtinfo_a
) && DR_GROUP_FIRST_DR (stmtinfo_b
))
197 DR_GROUP_FIRST_DR (stmtinfo_a
) = DR_GROUP_FIRST_DR (stmtinfo_b
);
198 /* Insert DRA into the chain of DRB. */
199 vect_insert_into_interleaving_chain (dra
, drb
);
203 /* 3. DRA is a part of a chain and DRB is not. */
204 if (DR_GROUP_FIRST_DR (stmtinfo_a
) && !DR_GROUP_FIRST_DR (stmtinfo_b
))
206 gimple old_first_stmt
= DR_GROUP_FIRST_DR (stmtinfo_a
);
207 tree init_old
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
211 if (tree_int_cst_compare (init_old
, DR_INIT (drb
)) > 0)
213 /* DRB's init is smaller than the init of the stmt previously marked
214 as the first stmt of the interleaving chain of DRA. Therefore, we
215 update FIRST_STMT and put DRB in the head of the list. */
216 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_STMT (drb
);
217 DR_GROUP_NEXT_DR (stmtinfo_b
) = old_first_stmt
;
219 /* Update all the stmts in the list to point to the new FIRST_STMT. */
220 tmp
= old_first_stmt
;
223 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp
)) = DR_STMT (drb
);
224 tmp
= DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp
));
229 /* Insert DRB in the list of DRA. */
230 vect_insert_into_interleaving_chain (drb
, dra
);
231 DR_GROUP_FIRST_DR (stmtinfo_b
) = DR_GROUP_FIRST_DR (stmtinfo_a
);
236 /* 4. both DRA and DRB are in some interleaving chains. */
237 first_a
= DR_GROUP_FIRST_DR (stmtinfo_a
);
238 first_b
= DR_GROUP_FIRST_DR (stmtinfo_b
);
239 if (first_a
== first_b
)
241 init_dra_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a
)));
242 init_drb_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b
)));
244 if (tree_int_cst_compare (init_dra_chain
, init_drb_chain
) > 0)
246 /* Insert the nodes of DRA chain into the DRB chain.
247 After inserting a node, continue from this node of the DRB chain (don't
248 start from the beginning. */
249 node
= DR_GROUP_FIRST_DR (stmtinfo_a
);
250 prev
= DR_GROUP_FIRST_DR (stmtinfo_b
);
251 first_stmt
= first_b
;
255 /* Insert the nodes of DRB chain into the DRA chain.
256 After inserting a node, continue from this node of the DRA chain (don't
257 start from the beginning. */
258 node
= DR_GROUP_FIRST_DR (stmtinfo_b
);
259 prev
= DR_GROUP_FIRST_DR (stmtinfo_a
);
260 first_stmt
= first_a
;
265 node_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node
)));
266 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
269 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
270 if (tree_int_cst_compare (next_init
, node_init
) > 0)
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
274 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = next
;
279 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
));
283 /* We got to the end of the list. Insert here. */
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev
)) = node
;
285 DR_GROUP_NEXT_DR (vinfo_for_stmt (node
)) = NULL
;
288 DR_GROUP_FIRST_DR (vinfo_for_stmt (node
)) = first_stmt
;
289 node
= DR_GROUP_NEXT_DR (vinfo_for_stmt (node
));
294 /* Function vect_equal_offsets.
296 Check if OFFSET1 and OFFSET2 are identical expressions. */
299 vect_equal_offsets (tree offset1
, tree offset2
)
303 STRIP_NOPS (offset1
);
304 STRIP_NOPS (offset2
);
306 if (offset1
== offset2
)
309 if (TREE_CODE (offset1
) != TREE_CODE (offset2
)
310 || (!BINARY_CLASS_P (offset1
) && !UNARY_CLASS_P (offset1
)))
313 res
= vect_equal_offsets (TREE_OPERAND (offset1
, 0),
314 TREE_OPERAND (offset2
, 0));
316 if (!res
|| !BINARY_CLASS_P (offset1
))
319 res
= vect_equal_offsets (TREE_OPERAND (offset1
, 1),
320 TREE_OPERAND (offset2
, 1));
326 /* Check dependence between DRA and DRB for basic block vectorization.
327 If the accesses share same bases and offsets, we can compare their initial
328 constant offsets to decide whether they differ or not. In case of a read-
329 write dependence we check that the load is before the store to ensure that
330 vectorization will not change the order of the accesses. */
333 vect_drs_dependent_in_basic_block (struct data_reference
*dra
,
334 struct data_reference
*drb
)
336 HOST_WIDE_INT type_size_a
, type_size_b
, init_a
, init_b
;
339 /* We only call this function for pairs of loads and stores, but we verify
341 if (DR_IS_READ (dra
) == DR_IS_READ (drb
))
343 if (DR_IS_READ (dra
))
349 /* Check that the data-refs have same bases and offsets. If not, we can't
350 determine if they are dependent. */
351 if ((DR_BASE_ADDRESS (dra
) != DR_BASE_ADDRESS (drb
)
352 && (TREE_CODE (DR_BASE_ADDRESS (dra
)) != ADDR_EXPR
353 || TREE_CODE (DR_BASE_ADDRESS (drb
)) != ADDR_EXPR
354 || TREE_OPERAND (DR_BASE_ADDRESS (dra
), 0)
355 != TREE_OPERAND (DR_BASE_ADDRESS (drb
),0)))
356 || !vect_equal_offsets (DR_OFFSET (dra
), DR_OFFSET (drb
)))
359 /* Check the types. */
360 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
361 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
363 if (type_size_a
!= type_size_b
364 || !types_compatible_p (TREE_TYPE (DR_REF (dra
)),
365 TREE_TYPE (DR_REF (drb
))))
368 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
369 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
371 /* Two different locations - no dependence. */
372 if (init_a
!= init_b
)
375 /* We have a read-write dependence. Check that the load is before the store.
376 When we vectorize basic blocks, vector load can be only before
377 corresponding scalar load, and vector store can be only after its
378 corresponding scalar store. So the order of the acceses is preserved in
379 case the load is before the store. */
380 earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
381 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
388 /* Function vect_check_interleaving.
390 Check if DRA and DRB are a part of interleaving. In case they are, insert
391 DRA and DRB in an interleaving chain. */
394 vect_check_interleaving (struct data_reference
*dra
,
395 struct data_reference
*drb
)
397 HOST_WIDE_INT type_size_a
, type_size_b
, diff_mod_size
, step
, init_a
, init_b
;
399 /* Check that the data-refs have same first location (except init) and they
400 are both either store or load (not load and store). */
401 if ((DR_BASE_ADDRESS (dra
) != DR_BASE_ADDRESS (drb
)
402 && (TREE_CODE (DR_BASE_ADDRESS (dra
)) != ADDR_EXPR
403 || TREE_CODE (DR_BASE_ADDRESS (drb
)) != ADDR_EXPR
404 || TREE_OPERAND (DR_BASE_ADDRESS (dra
), 0)
405 != TREE_OPERAND (DR_BASE_ADDRESS (drb
),0)))
406 || !vect_equal_offsets (DR_OFFSET (dra
), DR_OFFSET (drb
))
407 || !tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
))
408 || DR_IS_READ (dra
) != DR_IS_READ (drb
))
412 1. data-refs are of the same type
413 2. their steps are equal
414 3. the step (if greater than zero) is greater than the difference between
416 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
417 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
419 if (type_size_a
!= type_size_b
420 || tree_int_cst_compare (DR_STEP (dra
), DR_STEP (drb
))
421 || !types_compatible_p (TREE_TYPE (DR_REF (dra
)),
422 TREE_TYPE (DR_REF (drb
))))
425 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
426 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
427 step
= TREE_INT_CST_LOW (DR_STEP (dra
));
431 /* If init_a == init_b + the size of the type * k, we have an interleaving,
432 and DRB is accessed before DRA. */
433 diff_mod_size
= (init_a
- init_b
) % type_size_a
;
435 if (step
&& (init_a
- init_b
) > step
)
438 if (diff_mod_size
== 0)
440 vect_update_interleaving_chain (drb
, dra
);
441 if (vect_print_dump_info (REPORT_DR_DETAILS
))
443 fprintf (vect_dump
, "Detected interleaving ");
444 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
445 fprintf (vect_dump
, " and ");
446 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
453 /* If init_b == init_a + the size of the type * k, we have an
454 interleaving, and DRA is accessed before DRB. */
455 diff_mod_size
= (init_b
- init_a
) % type_size_a
;
457 if (step
&& (init_b
- init_a
) > step
)
460 if (diff_mod_size
== 0)
462 vect_update_interleaving_chain (dra
, drb
);
463 if (vect_print_dump_info (REPORT_DR_DETAILS
))
465 fprintf (vect_dump
, "Detected interleaving ");
466 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
467 fprintf (vect_dump
, " and ");
468 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
477 /* Check if data references pointed by DR_I and DR_J are same or
478 belong to same interleaving group. Return FALSE if drs are
479 different, otherwise return TRUE. */
482 vect_same_range_drs (data_reference_p dr_i
, data_reference_p dr_j
)
484 gimple stmt_i
= DR_STMT (dr_i
);
485 gimple stmt_j
= DR_STMT (dr_j
);
487 if (operand_equal_p (DR_REF (dr_i
), DR_REF (dr_j
), 0)
488 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i
))
489 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j
))
490 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i
))
491 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j
)))))
497 /* If address ranges represented by DDR_I and DDR_J are equal,
498 return TRUE, otherwise return FALSE. */
501 vect_vfa_range_equal (ddr_p ddr_i
, ddr_p ddr_j
)
503 if ((vect_same_range_drs (DDR_A (ddr_i
), DDR_A (ddr_j
))
504 && vect_same_range_drs (DDR_B (ddr_i
), DDR_B (ddr_j
)))
505 || (vect_same_range_drs (DDR_A (ddr_i
), DDR_B (ddr_j
))
506 && vect_same_range_drs (DDR_B (ddr_i
), DDR_A (ddr_j
))))
512 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
513 tested at run-time. Return TRUE if DDR was successfully inserted.
514 Return false if versioning is not supported. */
517 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
519 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
521 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
) == 0)
524 if (vect_print_dump_info (REPORT_DR_DETAILS
))
526 fprintf (vect_dump
, "mark for run-time aliasing test between ");
527 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr
)), TDF_SLIM
);
528 fprintf (vect_dump
, " and ");
529 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr
)), TDF_SLIM
);
532 if (optimize_loop_nest_for_size_p (loop
))
534 if (vect_print_dump_info (REPORT_DR_DETAILS
))
535 fprintf (vect_dump
, "versioning not supported when optimizing for size.");
539 /* FORNOW: We don't support versioning with outer-loop vectorization. */
542 if (vect_print_dump_info (REPORT_DR_DETAILS
))
543 fprintf (vect_dump
, "versioning not yet supported for outer-loops.");
547 VEC_safe_push (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), ddr
);
552 /* Function vect_analyze_data_ref_dependence.
554 Return TRUE if there (might) exist a dependence between a memory-reference
555 DRA and a memory-reference DRB. When versioning for alias may check a
556 dependence at run-time, return FALSE. Adjust *MAX_VF according to
557 the data dependence. */
560 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
561 loop_vec_info loop_vinfo
, int *max_vf
,
562 bool *data_dependence_in_bb
)
565 struct loop
*loop
= NULL
;
566 struct data_reference
*dra
= DDR_A (ddr
);
567 struct data_reference
*drb
= DDR_B (ddr
);
568 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
569 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
570 lambda_vector dist_v
;
571 unsigned int loop_depth
;
573 /* Don't bother to analyze statements marked as unvectorizable. */
574 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a
)
575 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b
))
578 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
580 /* Independent data accesses. */
581 vect_check_interleaving (dra
, drb
);
586 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
588 if ((DR_IS_READ (dra
) && DR_IS_READ (drb
) && loop_vinfo
) || dra
== drb
)
591 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
595 if (vect_print_dump_info (REPORT_DR_DETAILS
))
597 fprintf (vect_dump
, "versioning for alias required: "
598 "can't determine dependence between ");
599 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
600 fprintf (vect_dump
, " and ");
601 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
604 /* Add to list of ddrs that need to be tested at run-time. */
605 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
608 /* When vectorizing a basic block unknown depnedence can still mean
610 if (vect_check_interleaving (dra
, drb
))
613 if (vect_print_dump_info (REPORT_DR_DETAILS
))
615 fprintf (vect_dump
, "can't determine dependence between ");
616 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
617 fprintf (vect_dump
, " and ");
618 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
621 /* We do not vectorize basic blocks with write-write dependencies. */
622 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
625 /* We deal with read-write dependencies in basic blocks later (by
626 verifying that all the loads in the basic block are before all the
628 *data_dependence_in_bb
= true;
632 /* Versioning for alias is not yet supported for basic block SLP, and
633 dependence distance is unapplicable, hence, in case of known data
634 dependence, basic block vectorization is impossible for now. */
637 if (dra
!= drb
&& vect_check_interleaving (dra
, drb
))
640 if (vect_print_dump_info (REPORT_DR_DETAILS
))
642 fprintf (vect_dump
, "determined dependence between ");
643 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
644 fprintf (vect_dump
, " and ");
645 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
648 /* Do not vectorize basic blcoks with write-write dependences. */
649 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
652 /* Check if this dependence is allowed in basic block vectorization. */
653 return vect_drs_dependent_in_basic_block (dra
, drb
);
656 /* Loop-based vectorization and known data dependence. */
657 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
659 if (vect_print_dump_info (REPORT_DR_DETAILS
))
661 fprintf (vect_dump
, "versioning for alias required: bad dist vector for ");
662 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
663 fprintf (vect_dump
, " and ");
664 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
666 /* Add to list of ddrs that need to be tested at run-time. */
667 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
670 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
671 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
)
673 int dist
= dist_v
[loop_depth
];
675 if (vect_print_dump_info (REPORT_DR_DETAILS
))
676 fprintf (vect_dump
, "dependence distance = %d.", dist
);
680 if (vect_print_dump_info (REPORT_DR_DETAILS
))
682 fprintf (vect_dump
, "dependence distance == 0 between ");
683 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
684 fprintf (vect_dump
, " and ");
685 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
688 /* For interleaving, mark that there is a read-write dependency if
689 necessary. We check before that one of the data-refs is store. */
690 if (DR_IS_READ (dra
))
691 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
694 if (DR_IS_READ (drb
))
695 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
701 if (dist
> 0 && DDR_REVERSED_P (ddr
))
703 /* If DDR_REVERSED_P the order of the data-refs in DDR was
704 reversed (to make distance vector positive), and the actual
705 distance is negative. */
706 if (vect_print_dump_info (REPORT_DR_DETAILS
))
707 fprintf (vect_dump
, "dependence distance negative.");
712 && abs (dist
) < *max_vf
)
714 /* The dependence distance requires reduction of the maximal
715 vectorization factor. */
716 *max_vf
= abs (dist
);
717 if (vect_print_dump_info (REPORT_DR_DETAILS
))
718 fprintf (vect_dump
, "adjusting maximal vectorization factor to %i",
722 if (abs (dist
) >= *max_vf
)
724 /* Dependence distance does not create dependence, as far as
725 vectorization is concerned, in this case. */
726 if (vect_print_dump_info (REPORT_DR_DETAILS
))
727 fprintf (vect_dump
, "dependence distance >= VF.");
731 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
733 fprintf (vect_dump
, "not vectorized, possible dependence "
734 "between data-refs ");
735 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
736 fprintf (vect_dump
, " and ");
737 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
746 /* Function vect_analyze_data_ref_dependences.
748 Examine all the data references in the loop, and make sure there do not
749 exist any data dependences between them. Set *MAX_VF according to
750 the maximum vectorization factor the data dependences allow. */
753 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
,
754 bb_vec_info bb_vinfo
, int *max_vf
,
755 bool *data_dependence_in_bb
)
758 VEC (ddr_p
, heap
) *ddrs
= NULL
;
759 struct data_dependence_relation
*ddr
;
761 if (vect_print_dump_info (REPORT_DETAILS
))
762 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
765 ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
767 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
769 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
770 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
, max_vf
,
771 data_dependence_in_bb
))
778 /* Function vect_compute_data_ref_alignment
780 Compute the misalignment of the data reference DR.
783 1. If during the misalignment computation it is found that the data reference
784 cannot be vectorized then false is returned.
785 2. DR_MISALIGNMENT (DR) is defined.
787 FOR NOW: No analysis is actually performed. Misalignment is calculated
788 only for trivial cases. TODO. */
791 vect_compute_data_ref_alignment (struct data_reference
*dr
)
793 gimple stmt
= DR_STMT (dr
);
794 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
795 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
796 struct loop
*loop
= NULL
;
797 tree ref
= DR_REF (dr
);
799 tree base
, base_addr
;
802 tree aligned_to
, alignment
;
804 if (vect_print_dump_info (REPORT_DETAILS
))
805 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
808 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
810 /* Initialize misalignment to unknown. */
811 SET_DR_MISALIGNMENT (dr
, -1);
813 misalign
= DR_INIT (dr
);
814 aligned_to
= DR_ALIGNED_TO (dr
);
815 base_addr
= DR_BASE_ADDRESS (dr
);
816 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
818 /* In case the dataref is in an inner-loop of the loop that is being
819 vectorized (LOOP), we use the base and misalignment information
820 relative to the outer-loop (LOOP). This is ok only if the misalignment
821 stays the same throughout the execution of the inner-loop, which is why
822 we have to check that the stride of the dataref in the inner-loop evenly
823 divides by the vector size. */
824 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
826 tree step
= DR_STEP (dr
);
827 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
829 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) == 0)
831 if (vect_print_dump_info (REPORT_ALIGNMENT
))
832 fprintf (vect_dump
, "inner step divides the vector-size.");
833 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
834 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
835 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
839 if (vect_print_dump_info (REPORT_ALIGNMENT
))
840 fprintf (vect_dump
, "inner step doesn't divide the vector-size.");
841 misalign
= NULL_TREE
;
845 base
= build_fold_indirect_ref (base_addr
);
846 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
848 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
851 if (vect_print_dump_info (REPORT_ALIGNMENT
))
853 fprintf (vect_dump
, "Unknown alignment for access: ");
854 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
860 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
862 || (TREE_CODE (base_addr
) == SSA_NAME
863 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
864 TREE_TYPE (base_addr
)))),
868 base_aligned
= false;
872 /* Do not change the alignment of global variables if
873 flag_section_anchors is enabled. */
874 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
875 || (TREE_STATIC (base
) && flag_section_anchors
))
877 if (vect_print_dump_info (REPORT_DETAILS
))
879 fprintf (vect_dump
, "can't force alignment of ref: ");
880 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
885 /* Force the alignment of the decl.
886 NOTE: This is the only change to the code we make during
887 the analysis phase, before deciding to vectorize the loop. */
888 if (vect_print_dump_info (REPORT_DETAILS
))
890 fprintf (vect_dump
, "force alignment of ");
891 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
894 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
895 DECL_USER_ALIGN (base
) = 1;
898 /* At this point we assume that the base is aligned. */
899 gcc_assert (base_aligned
900 || (TREE_CODE (base
) == VAR_DECL
901 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
903 /* If this is a backward running DR then first access in the larger
904 vectype actually is N-1 elements before the address in the DR.
905 Adjust misalign accordingly. */
906 if (tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0)
908 tree offset
= ssize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
909 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
910 otherwise we wouldn't be here. */
911 offset
= fold_build2 (MULT_EXPR
, ssizetype
, offset
, DR_STEP (dr
));
912 /* PLUS because DR_STEP was negative. */
913 misalign
= size_binop (PLUS_EXPR
, misalign
, offset
);
916 /* Modulo alignment. */
917 misalign
= size_binop (FLOOR_MOD_EXPR
, misalign
, alignment
);
919 if (!host_integerp (misalign
, 1))
921 /* Negative or overflowed misalignment value. */
922 if (vect_print_dump_info (REPORT_DETAILS
))
923 fprintf (vect_dump
, "unexpected misalign value");
927 SET_DR_MISALIGNMENT (dr
, TREE_INT_CST_LOW (misalign
));
929 if (vect_print_dump_info (REPORT_DETAILS
))
931 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
932 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
939 /* Function vect_compute_data_refs_alignment
941 Compute the misalignment of data references in the loop.
942 Return FALSE if a data reference is found that cannot be vectorized. */
945 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
,
946 bb_vec_info bb_vinfo
)
948 VEC (data_reference_p
, heap
) *datarefs
;
949 struct data_reference
*dr
;
953 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
955 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
957 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
958 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
959 && !vect_compute_data_ref_alignment (dr
))
963 /* Mark unsupported statement as unvectorizable. */
964 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
975 /* Function vect_update_misalignment_for_peel
977 DR - the data reference whose misalignment is to be adjusted.
978 DR_PEEL - the data reference whose misalignment is being made
979 zero in the vector loop by the peel.
980 NPEEL - the number of iterations in the peel loop if the misalignment
981 of DR_PEEL is known at compile time. */
984 vect_update_misalignment_for_peel (struct data_reference
*dr
,
985 struct data_reference
*dr_peel
, int npeel
)
988 VEC(dr_p
,heap
) *same_align_drs
;
989 struct data_reference
*current_dr
;
990 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
991 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
992 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
993 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
995 /* For interleaved data accesses the step in the loop must be multiplied by
996 the size of the interleaving group. */
997 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
998 dr_size
*= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info
)));
999 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info
))
1000 dr_peel_size
*= DR_GROUP_SIZE (peel_stmt_info
);
1002 /* It can be assumed that the data refs with the same alignment as dr_peel
1003 are aligned in the vector loop. */
1005 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
1006 FOR_EACH_VEC_ELT (dr_p
, same_align_drs
, i
, current_dr
)
1008 if (current_dr
!= dr
)
1010 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
1011 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
1012 SET_DR_MISALIGNMENT (dr
, 0);
1016 if (known_alignment_for_access_p (dr
)
1017 && known_alignment_for_access_p (dr_peel
))
1019 bool negative
= tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0;
1020 int misal
= DR_MISALIGNMENT (dr
);
1021 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1022 misal
+= negative
? -npeel
* dr_size
: npeel
* dr_size
;
1023 misal
&= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1024 SET_DR_MISALIGNMENT (dr
, misal
);
1028 if (vect_print_dump_info (REPORT_DETAILS
))
1029 fprintf (vect_dump
, "Setting misalignment to -1.");
1030 SET_DR_MISALIGNMENT (dr
, -1);
1034 /* Function vect_verify_datarefs_alignment
1036 Return TRUE if all data references in the loop can be
1037 handled with respect to alignment. */
1040 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1042 VEC (data_reference_p
, heap
) *datarefs
;
1043 struct data_reference
*dr
;
1044 enum dr_alignment_support supportable_dr_alignment
;
1048 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1050 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
1052 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1054 gimple stmt
= DR_STMT (dr
);
1055 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1057 /* For interleaving, only the alignment of the first access matters.
1058 Skip statements marked as not vectorizable. */
1059 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1060 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1061 || !STMT_VINFO_VECTORIZABLE (stmt_info
))
1064 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1065 if (!supportable_dr_alignment
)
1067 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1069 if (DR_IS_READ (dr
))
1071 "not vectorized: unsupported unaligned load.");
1074 "not vectorized: unsupported unaligned store.");
1076 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1080 if (supportable_dr_alignment
!= dr_aligned
1081 && vect_print_dump_info (REPORT_ALIGNMENT
))
1082 fprintf (vect_dump
, "Vectorizing an unaligned access.");
1088 /* Function vector_alignment_reachable_p
1090 Return true if vector alignment for DR is reachable by peeling
1091 a few loop iterations. Return false otherwise. */
1094 vector_alignment_reachable_p (struct data_reference
*dr
)
1096 gimple stmt
= DR_STMT (dr
);
1097 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1098 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1100 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1102 /* For interleaved access we peel only if number of iterations in
1103 the prolog loop ({VF - misalignment}), is a multiple of the
1104 number of the interleaved accesses. */
1105 int elem_size
, mis_in_elements
;
1106 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1108 /* FORNOW: handle only known alignment. */
1109 if (!known_alignment_for_access_p (dr
))
1112 elem_size
= GET_MODE_SIZE (TYPE_MODE (vectype
)) / nelements
;
1113 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
1115 if ((nelements
- mis_in_elements
) % DR_GROUP_SIZE (stmt_info
))
1119 /* If misalignment is known at the compile time then allow peeling
1120 only if natural alignment is reachable through peeling. */
1121 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
1123 HOST_WIDE_INT elmsize
=
1124 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
1125 if (vect_print_dump_info (REPORT_DETAILS
))
1127 fprintf (vect_dump
, "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
1128 fprintf (vect_dump
, ". misalignment = %d. ", DR_MISALIGNMENT (dr
));
1130 if (DR_MISALIGNMENT (dr
) % elmsize
)
1132 if (vect_print_dump_info (REPORT_DETAILS
))
1133 fprintf (vect_dump
, "data size does not divide the misalignment.\n");
1138 if (!known_alignment_for_access_p (dr
))
1140 tree type
= (TREE_TYPE (DR_REF (dr
)));
1141 tree ba
= DR_BASE_OBJECT (dr
);
1142 bool is_packed
= false;
1145 is_packed
= contains_packed_reference (ba
);
1147 if (vect_print_dump_info (REPORT_DETAILS
))
1148 fprintf (vect_dump
, "Unknown misalignment, is_packed = %d",is_packed
);
1149 if (targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
1159 /* Calculate the cost of the memory access represented by DR. */
1162 vect_get_data_access_cost (struct data_reference
*dr
,
1163 unsigned int *inside_cost
,
1164 unsigned int *outside_cost
)
1166 gimple stmt
= DR_STMT (dr
);
1167 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1168 int nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1169 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1170 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1171 int ncopies
= vf
/ nunits
;
1172 bool supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1174 if (!supportable_dr_alignment
)
1175 *inside_cost
= VECT_MAX_COST
;
1178 if (DR_IS_READ (dr
))
1179 vect_get_load_cost (dr
, ncopies
, true, inside_cost
, outside_cost
);
1181 vect_get_store_cost (dr
, ncopies
, inside_cost
);
1184 if (vect_print_dump_info (REPORT_COST
))
1185 fprintf (vect_dump
, "vect_get_data_access_cost: inside_cost = %d, "
1186 "outside_cost = %d.", *inside_cost
, *outside_cost
);
1191 vect_peeling_hash (const void *elem
)
1193 const struct _vect_peel_info
*peel_info
;
1195 peel_info
= (const struct _vect_peel_info
*) elem
;
1196 return (hashval_t
) peel_info
->npeel
;
1201 vect_peeling_hash_eq (const void *elem1
, const void *elem2
)
1203 const struct _vect_peel_info
*a
, *b
;
1205 a
= (const struct _vect_peel_info
*) elem1
;
1206 b
= (const struct _vect_peel_info
*) elem2
;
1207 return (a
->npeel
== b
->npeel
);
1211 /* Insert DR into peeling hash table with NPEEL as key. */
1214 vect_peeling_hash_insert (loop_vec_info loop_vinfo
, struct data_reference
*dr
,
1217 struct _vect_peel_info elem
, *slot
;
1219 bool supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1222 slot
= (vect_peel_info
) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1228 slot
= XNEW (struct _vect_peel_info
);
1229 slot
->npeel
= npeel
;
1232 new_slot
= htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo
), slot
,
1237 if (!supportable_dr_alignment
&& !flag_vect_cost_model
)
1238 slot
->count
+= VECT_MAX_COST
;
1242 /* Traverse peeling hash table to find peeling option that aligns maximum
1243 number of data accesses. */
1246 vect_peeling_hash_get_most_frequent (void **slot
, void *data
)
1248 vect_peel_info elem
= (vect_peel_info
) *slot
;
1249 vect_peel_extended_info max
= (vect_peel_extended_info
) data
;
1251 if (elem
->count
> max
->peel_info
.count
)
1253 max
->peel_info
.npeel
= elem
->npeel
;
1254 max
->peel_info
.count
= elem
->count
;
1255 max
->peel_info
.dr
= elem
->dr
;
1262 /* Traverse peeling hash table and calculate cost for each peeling option.
1263 Find the one with the lowest cost. */
1266 vect_peeling_hash_get_lowest_cost (void **slot
, void *data
)
1268 vect_peel_info elem
= (vect_peel_info
) *slot
;
1269 vect_peel_extended_info min
= (vect_peel_extended_info
) data
;
1270 int save_misalignment
, dummy
;
1271 unsigned int inside_cost
= 0, outside_cost
= 0, i
;
1272 gimple stmt
= DR_STMT (elem
->dr
);
1273 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1274 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1275 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1276 struct data_reference
*dr
;
1278 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1280 stmt
= DR_STMT (dr
);
1281 stmt_info
= vinfo_for_stmt (stmt
);
1282 /* For interleaving, only the alignment of the first access
1284 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1285 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1288 save_misalignment
= DR_MISALIGNMENT (dr
);
1289 vect_update_misalignment_for_peel (dr
, elem
->dr
, elem
->npeel
);
1290 vect_get_data_access_cost (dr
, &inside_cost
, &outside_cost
);
1291 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1294 outside_cost
+= vect_get_known_peeling_cost (loop_vinfo
, elem
->npeel
, &dummy
,
1295 vect_get_single_scalar_iteraion_cost (loop_vinfo
));
1297 if (inside_cost
< min
->inside_cost
1298 || (inside_cost
== min
->inside_cost
&& outside_cost
< min
->outside_cost
))
1300 min
->inside_cost
= inside_cost
;
1301 min
->outside_cost
= outside_cost
;
1302 min
->peel_info
.dr
= elem
->dr
;
1303 min
->peel_info
.npeel
= elem
->npeel
;
1310 /* Choose best peeling option by traversing peeling hash table and either
1311 choosing an option with the lowest cost (if cost model is enabled) or the
1312 option that aligns as many accesses as possible. */
1314 static struct data_reference
*
1315 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo
,
1316 unsigned int *npeel
)
1318 struct _vect_peel_extended_info res
;
1320 res
.peel_info
.dr
= NULL
;
1322 if (flag_vect_cost_model
)
1324 res
.inside_cost
= INT_MAX
;
1325 res
.outside_cost
= INT_MAX
;
1326 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1327 vect_peeling_hash_get_lowest_cost
, &res
);
1331 res
.peel_info
.count
= 0;
1332 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1333 vect_peeling_hash_get_most_frequent
, &res
);
1336 *npeel
= res
.peel_info
.npeel
;
1337 return res
.peel_info
.dr
;
1341 /* Function vect_enhance_data_refs_alignment
1343 This pass will use loop versioning and loop peeling in order to enhance
1344 the alignment of data references in the loop.
1346 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1347 original loop is to be vectorized. Any other loops that are created by
1348 the transformations performed in this pass - are not supposed to be
1349 vectorized. This restriction will be relaxed.
1351 This pass will require a cost model to guide it whether to apply peeling
1352 or versioning or a combination of the two. For example, the scheme that
1353 intel uses when given a loop with several memory accesses, is as follows:
1354 choose one memory access ('p') which alignment you want to force by doing
1355 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1356 other accesses are not necessarily aligned, or (2) use loop versioning to
1357 generate one loop in which all accesses are aligned, and another loop in
1358 which only 'p' is necessarily aligned.
1360 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1361 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1362 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1364 Devising a cost model is the most critical aspect of this work. It will
1365 guide us on which access to peel for, whether to use loop versioning, how
1366 many versions to create, etc. The cost model will probably consist of
1367 generic considerations as well as target specific considerations (on
1368 powerpc for example, misaligned stores are more painful than misaligned
1371 Here are the general steps involved in alignment enhancements:
1373 -- original loop, before alignment analysis:
1374 for (i=0; i<N; i++){
1375 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1376 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1379 -- After vect_compute_data_refs_alignment:
1380 for (i=0; i<N; i++){
1381 x = q[i]; # DR_MISALIGNMENT(q) = 3
1382 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1385 -- Possibility 1: we do loop versioning:
1387 for (i=0; i<N; i++){ # loop 1A
1388 x = q[i]; # DR_MISALIGNMENT(q) = 3
1389 p[i] = y; # DR_MISALIGNMENT(p) = 0
1393 for (i=0; i<N; i++){ # loop 1B
1394 x = q[i]; # DR_MISALIGNMENT(q) = 3
1395 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1399 -- Possibility 2: we do loop peeling:
1400 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1404 for (i = 3; i < N; i++){ # loop 2A
1405 x = q[i]; # DR_MISALIGNMENT(q) = 0
1406 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1409 -- Possibility 3: combination of loop peeling and versioning:
1410 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1415 for (i = 3; i<N; i++){ # loop 3A
1416 x = q[i]; # DR_MISALIGNMENT(q) = 0
1417 p[i] = y; # DR_MISALIGNMENT(p) = 0
1421 for (i = 3; i<N; i++){ # loop 3B
1422 x = q[i]; # DR_MISALIGNMENT(q) = 0
1423 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1427 These loops are later passed to loop_transform to be vectorized. The
1428 vectorizer will use the alignment information to guide the transformation
1429 (whether to generate regular loads/stores, or with special handling for
1433 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1435 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1436 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1437 enum dr_alignment_support supportable_dr_alignment
;
1438 struct data_reference
*dr0
= NULL
, *first_store
= NULL
;
1439 struct data_reference
*dr
;
1441 bool do_peeling
= false;
1442 bool do_versioning
= false;
1445 stmt_vec_info stmt_info
;
1446 int vect_versioning_for_alias_required
;
1447 unsigned int npeel
= 0;
1448 bool all_misalignments_unknown
= true;
1449 unsigned int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1450 unsigned possible_npeel_number
= 1;
1452 unsigned int nelements
, mis
, same_align_drs_max
= 0;
1454 if (vect_print_dump_info (REPORT_DETAILS
))
1455 fprintf (vect_dump
, "=== vect_enhance_data_refs_alignment ===");
1457 /* While cost model enhancements are expected in the future, the high level
1458 view of the code at this time is as follows:
1460 A) If there is a misaligned access then see if peeling to align
1461 this access can make all data references satisfy
1462 vect_supportable_dr_alignment. If so, update data structures
1463 as needed and return true.
1465 B) If peeling wasn't possible and there is a data reference with an
1466 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1467 then see if loop versioning checks can be used to make all data
1468 references satisfy vect_supportable_dr_alignment. If so, update
1469 data structures as needed and return true.
1471 C) If neither peeling nor versioning were successful then return false if
1472 any data reference does not satisfy vect_supportable_dr_alignment.
1474 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1476 Note, Possibility 3 above (which is peeling and versioning together) is not
1477 being done at this time. */
1479 /* (1) Peeling to force alignment. */
1481 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1483 + How many accesses will become aligned due to the peeling
1484 - How many accesses will become unaligned due to the peeling,
1485 and the cost of misaligned accesses.
1486 - The cost of peeling (the extra runtime checks, the increase
1489 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1491 stmt
= DR_STMT (dr
);
1492 stmt_info
= vinfo_for_stmt (stmt
);
1494 /* For interleaving, only the alignment of the first access
1496 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1497 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1500 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1501 do_peeling
= vector_alignment_reachable_p (dr
);
1504 if (known_alignment_for_access_p (dr
))
1506 unsigned int npeel_tmp
;
1507 bool negative
= tree_int_cst_compare (DR_STEP (dr
),
1508 size_zero_node
) < 0;
1510 /* Save info about DR in the hash table. */
1511 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo
))
1512 LOOP_VINFO_PEELING_HTAB (loop_vinfo
) =
1513 htab_create (1, vect_peeling_hash
,
1514 vect_peeling_hash_eq
, free
);
1516 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1517 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1518 mis
= DR_MISALIGNMENT (dr
) / GET_MODE_SIZE (TYPE_MODE (
1519 TREE_TYPE (DR_REF (dr
))));
1520 npeel_tmp
= (negative
1521 ? (mis
- nelements
) : (nelements
- mis
)) & (vf
- 1);
1523 /* For multiple types, it is possible that the bigger type access
1524 will have more than one peeling option. E.g., a loop with two
1525 types: one of size (vector size / 4), and the other one of
1526 size (vector size / 8). Vectorization factor will 8. If both
1527 access are misaligned by 3, the first one needs one scalar
1528 iteration to be aligned, and the second one needs 5. But the
1529 the first one will be aligned also by peeling 5 scalar
1530 iterations, and in that case both accesses will be aligned.
1531 Hence, except for the immediate peeling amount, we also want
1532 to try to add full vector size, while we don't exceed
1533 vectorization factor.
1534 We do this automtically for cost model, since we calculate cost
1535 for every peeling option. */
1536 if (!flag_vect_cost_model
)
1537 possible_npeel_number
= vf
/nelements
;
1539 /* Handle the aligned case. We may decide to align some other
1540 access, making DR unaligned. */
1541 if (DR_MISALIGNMENT (dr
) == 0)
1544 if (!flag_vect_cost_model
)
1545 possible_npeel_number
++;
1548 for (j
= 0; j
< possible_npeel_number
; j
++)
1550 gcc_assert (npeel_tmp
<= vf
);
1551 vect_peeling_hash_insert (loop_vinfo
, dr
, npeel_tmp
);
1552 npeel_tmp
+= nelements
;
1555 all_misalignments_unknown
= false;
1556 /* Data-ref that was chosen for the case that all the
1557 misalignments are unknown is not relevant anymore, since we
1558 have a data-ref with known alignment. */
1563 /* If we don't know all the misalignment values, we prefer
1564 peeling for data-ref that has maximum number of data-refs
1565 with the same alignment, unless the target prefers to align
1566 stores over load. */
1567 if (all_misalignments_unknown
)
1569 if (same_align_drs_max
< VEC_length (dr_p
,
1570 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
))
1573 same_align_drs_max
= VEC_length (dr_p
,
1574 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
1578 if (!first_store
&& DR_IS_WRITE (dr
))
1582 /* If there are both known and unknown misaligned accesses in the
1583 loop, we choose peeling amount according to the known
1587 if (!supportable_dr_alignment
)
1590 if (!first_store
&& DR_IS_WRITE (dr
))
1597 if (!aligned_access_p (dr
))
1599 if (vect_print_dump_info (REPORT_DETAILS
))
1600 fprintf (vect_dump
, "vector alignment may not be reachable");
1607 vect_versioning_for_alias_required
1608 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
);
1610 /* Temporarily, if versioning for alias is required, we disable peeling
1611 until we support peeling and versioning. Often peeling for alignment
1612 will require peeling for loop-bound, which in turn requires that we
1613 know how to adjust the loop ivs after the loop. */
1614 if (vect_versioning_for_alias_required
1615 || !vect_can_advance_ivs_p (loop_vinfo
)
1616 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1619 if (do_peeling
&& all_misalignments_unknown
1620 && vect_supportable_dr_alignment (dr0
, false))
1623 /* Check if the target requires to prefer stores over loads, i.e., if
1624 misaligned stores are more expensive than misaligned loads (taking
1625 drs with same alignment into account). */
1626 if (first_store
&& DR_IS_READ (dr0
))
1628 unsigned int load_inside_cost
= 0, load_outside_cost
= 0;
1629 unsigned int store_inside_cost
= 0, store_outside_cost
= 0;
1630 unsigned int load_inside_penalty
= 0, load_outside_penalty
= 0;
1631 unsigned int store_inside_penalty
= 0, store_outside_penalty
= 0;
1633 vect_get_data_access_cost (dr0
, &load_inside_cost
,
1634 &load_outside_cost
);
1635 vect_get_data_access_cost (first_store
, &store_inside_cost
,
1636 &store_outside_cost
);
1638 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1639 aligning the load DR0). */
1640 load_inside_penalty
= store_inside_cost
;
1641 load_outside_penalty
= store_outside_cost
;
1642 for (i
= 0; VEC_iterate (dr_p
, STMT_VINFO_SAME_ALIGN_REFS
1643 (vinfo_for_stmt (DR_STMT (first_store
))),
1646 if (DR_IS_READ (dr
))
1648 load_inside_penalty
+= load_inside_cost
;
1649 load_outside_penalty
+= load_outside_cost
;
1653 load_inside_penalty
+= store_inside_cost
;
1654 load_outside_penalty
+= store_outside_cost
;
1657 /* Calculate the penalty for leaving DR0 unaligned (by
1658 aligning the FIRST_STORE). */
1659 store_inside_penalty
= load_inside_cost
;
1660 store_outside_penalty
= load_outside_cost
;
1661 for (i
= 0; VEC_iterate (dr_p
, STMT_VINFO_SAME_ALIGN_REFS
1662 (vinfo_for_stmt (DR_STMT (dr0
))),
1665 if (DR_IS_READ (dr
))
1667 store_inside_penalty
+= load_inside_cost
;
1668 store_outside_penalty
+= load_outside_cost
;
1672 store_inside_penalty
+= store_inside_cost
;
1673 store_outside_penalty
+= store_outside_cost
;
1676 if (load_inside_penalty
> store_inside_penalty
1677 || (load_inside_penalty
== store_inside_penalty
1678 && load_outside_penalty
> store_outside_penalty
))
1682 /* In case there are only loads with different unknown misalignments, use
1683 peeling only if it may help to align other accesses in the loop. */
1684 if (!first_store
&& !VEC_length (dr_p
, STMT_VINFO_SAME_ALIGN_REFS
1685 (vinfo_for_stmt (DR_STMT (dr0
))))
1686 && vect_supportable_dr_alignment (dr0
, false)
1687 != dr_unaligned_supported
)
1691 if (do_peeling
&& !dr0
)
1693 /* Peeling is possible, but there is no data access that is not supported
1694 unless aligned. So we try to choose the best possible peeling. */
1696 /* We should get here only if there are drs with known misalignment. */
1697 gcc_assert (!all_misalignments_unknown
);
1699 /* Choose the best peeling from the hash table. */
1700 dr0
= vect_peeling_hash_choose_best_peeling (loop_vinfo
, &npeel
);
1707 stmt
= DR_STMT (dr0
);
1708 stmt_info
= vinfo_for_stmt (stmt
);
1709 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1710 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1712 if (known_alignment_for_access_p (dr0
))
1714 bool negative
= tree_int_cst_compare (DR_STEP (dr0
),
1715 size_zero_node
) < 0;
1718 /* Since it's known at compile time, compute the number of
1719 iterations in the peeled loop (the peeling factor) for use in
1720 updating DR_MISALIGNMENT values. The peeling factor is the
1721 vectorization factor minus the misalignment as an element
1723 mis
= DR_MISALIGNMENT (dr0
);
1724 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1725 npeel
= (negative
? mis
- nelements
: nelements
- mis
) & (vf
- 1);
1728 /* For interleaved data access every iteration accesses all the
1729 members of the group, therefore we divide the number of iterations
1730 by the group size. */
1731 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1732 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1733 npeel
/= DR_GROUP_SIZE (stmt_info
);
1735 if (vect_print_dump_info (REPORT_DETAILS
))
1736 fprintf (vect_dump
, "Try peeling by %d", npeel
);
1739 /* Ensure that all data refs can be vectorized after the peel. */
1740 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1742 int save_misalignment
;
1747 stmt
= DR_STMT (dr
);
1748 stmt_info
= vinfo_for_stmt (stmt
);
1749 /* For interleaving, only the alignment of the first access
1751 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1752 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
)
1755 save_misalignment
= DR_MISALIGNMENT (dr
);
1756 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1757 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1758 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1760 if (!supportable_dr_alignment
)
1767 if (do_peeling
&& known_alignment_for_access_p (dr0
) && npeel
== 0)
1769 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1778 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1779 If the misalignment of DR_i is identical to that of dr0 then set
1780 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1781 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1782 by the peeling factor times the element size of DR_i (MOD the
1783 vectorization factor times the size). Otherwise, the
1784 misalignment of DR_i must be set to unknown. */
1785 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1787 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1789 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1791 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = npeel
;
1793 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1794 SET_DR_MISALIGNMENT (dr0
, 0);
1795 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1796 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1798 if (vect_print_dump_info (REPORT_DETAILS
))
1799 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1801 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1808 /* (2) Versioning to force alignment. */
1810 /* Try versioning if:
1811 1) flag_tree_vect_loop_version is TRUE
1812 2) optimize loop for speed
1813 3) there is at least one unsupported misaligned data ref with an unknown
1815 4) all misaligned data refs with a known misalignment are supported, and
1816 5) the number of runtime alignment checks is within reason. */
1819 flag_tree_vect_loop_version
1820 && optimize_loop_nest_for_speed_p (loop
)
1821 && (!loop
->inner
); /* FORNOW */
1825 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1827 stmt
= DR_STMT (dr
);
1828 stmt_info
= vinfo_for_stmt (stmt
);
1830 /* For interleaving, only the alignment of the first access
1832 if (aligned_access_p (dr
)
1833 || (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1834 && DR_GROUP_FIRST_DR (stmt_info
) != stmt
))
1837 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1839 if (!supportable_dr_alignment
)
1845 if (known_alignment_for_access_p (dr
)
1846 || VEC_length (gimple
,
1847 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1848 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
1850 do_versioning
= false;
1854 stmt
= DR_STMT (dr
);
1855 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1856 gcc_assert (vectype
);
1858 /* The rightmost bits of an aligned address must be zeros.
1859 Construct the mask needed for this test. For example,
1860 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1861 mask must be 15 = 0xf. */
1862 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1864 /* FORNOW: use the same mask to test all potentially unaligned
1865 references in the loop. The vectorizer currently supports
1866 a single vector size, see the reference to
1867 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1868 vectorization factor is computed. */
1869 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1870 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1871 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1872 VEC_safe_push (gimple
, heap
,
1873 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
1878 /* Versioning requires at least one misaligned data reference. */
1879 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
1880 do_versioning
= false;
1881 else if (!do_versioning
)
1882 VEC_truncate (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
1887 VEC(gimple
,heap
) *may_misalign_stmts
1888 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1891 /* It can now be assumed that the data references in the statements
1892 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1893 of the loop being vectorized. */
1894 FOR_EACH_VEC_ELT (gimple
, may_misalign_stmts
, i
, stmt
)
1896 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1897 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1898 SET_DR_MISALIGNMENT (dr
, 0);
1899 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1900 fprintf (vect_dump
, "Alignment of access forced using versioning.");
1903 if (vect_print_dump_info (REPORT_DETAILS
))
1904 fprintf (vect_dump
, "Versioning for alignment will be applied.");
1906 /* Peeling and versioning can't be done together at this time. */
1907 gcc_assert (! (do_peeling
&& do_versioning
));
1909 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1914 /* This point is reached if neither peeling nor versioning is being done. */
1915 gcc_assert (! (do_peeling
|| do_versioning
));
1917 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1922 /* Function vect_find_same_alignment_drs.
1924 Update group and alignment relations according to the chosen
1925 vectorization factor. */
1928 vect_find_same_alignment_drs (struct data_dependence_relation
*ddr
,
1929 loop_vec_info loop_vinfo
)
1932 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1933 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1934 struct data_reference
*dra
= DDR_A (ddr
);
1935 struct data_reference
*drb
= DDR_B (ddr
);
1936 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
1937 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
1938 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
1939 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
1940 lambda_vector dist_v
;
1941 unsigned int loop_depth
;
1943 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1949 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1952 /* Loop-based vectorization and known data dependence. */
1953 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1956 /* Data-dependence analysis reports a distance vector of zero
1957 for data-references that overlap only in the first iteration
1958 but have different sign step (see PR45764).
1959 So as a sanity check require equal DR_STEP. */
1960 if (!operand_equal_p (DR_STEP (dra
), DR_STEP (drb
), 0))
1963 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
1964 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
)
1966 int dist
= dist_v
[loop_depth
];
1968 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1969 fprintf (vect_dump
, "dependence distance = %d.", dist
);
1971 /* Same loop iteration. */
1973 || (dist
% vectorization_factor
== 0 && dra_size
== drb_size
))
1975 /* Two references with distance zero have the same alignment. */
1976 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
1977 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
1978 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1979 fprintf (vect_dump
, "accesses have the same alignment.");
1980 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1982 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
1983 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1984 fprintf (vect_dump
, " and ");
1985 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1992 /* Function vect_analyze_data_refs_alignment
1994 Analyze the alignment of the data-references in the loop.
1995 Return FALSE if a data reference is found that cannot be vectorized. */
1998 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
,
1999 bb_vec_info bb_vinfo
)
2001 if (vect_print_dump_info (REPORT_DETAILS
))
2002 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
2004 /* Mark groups of data references with same alignment using
2005 data dependence information. */
2008 VEC (ddr_p
, heap
) *ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
2009 struct data_dependence_relation
*ddr
;
2012 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
2013 vect_find_same_alignment_drs (ddr
, loop_vinfo
);
2016 if (!vect_compute_data_refs_alignment (loop_vinfo
, bb_vinfo
))
2018 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2020 "not vectorized: can't calculate alignment for data ref.");
2028 /* Analyze groups of strided accesses: check that DR belongs to a group of
2029 strided accesses of legal size, step, etc. Detect gaps, single element
2030 interleaving, and other special cases. Set strided access info.
2031 Collect groups of strided stores for further use in SLP analysis. */
2034 vect_analyze_group_access (struct data_reference
*dr
)
2036 tree step
= DR_STEP (dr
);
2037 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2038 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
2039 gimple stmt
= DR_STMT (dr
);
2040 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2041 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2042 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2043 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2044 HOST_WIDE_INT stride
;
2045 bool slp_impossible
= false;
2047 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2048 interleaving group (including gaps). */
2049 stride
= dr_step
/ type_size
;
2051 /* Not consecutive access is possible only if it is a part of interleaving. */
2052 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)))
2054 /* Check if it this DR is a part of interleaving, and is a single
2055 element of the group that is accessed in the loop. */
2057 /* Gaps are supported only for loads. STEP must be a multiple of the type
2058 size. The size of the group must be a power of 2. */
2060 && (dr_step
% type_size
) == 0
2062 && exact_log2 (stride
) != -1)
2064 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = stmt
;
2065 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2066 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2068 fprintf (vect_dump
, "Detected single element interleaving ");
2069 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2070 fprintf (vect_dump
, " step ");
2071 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2076 if (vect_print_dump_info (REPORT_DETAILS
))
2078 fprintf (vect_dump
, "not consecutive access ");
2079 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2084 /* Mark the statement as unvectorizable. */
2085 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2092 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) == stmt
)
2094 /* First stmt in the interleaving chain. Check the chain. */
2095 gimple next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt
));
2096 struct data_reference
*data_ref
= dr
;
2097 unsigned int count
= 1;
2099 tree prev_init
= DR_INIT (data_ref
);
2101 HOST_WIDE_INT diff
, count_in_bytes
, gaps
= 0;
2105 /* Skip same data-refs. In case that two or more stmts share
2106 data-ref (supported only for loads), we vectorize only the first
2107 stmt, and the rest get their vectorized loads from the first
2109 if (!tree_int_cst_compare (DR_INIT (data_ref
),
2110 DR_INIT (STMT_VINFO_DATA_REF (
2111 vinfo_for_stmt (next
)))))
2113 if (DR_IS_WRITE (data_ref
))
2115 if (vect_print_dump_info (REPORT_DETAILS
))
2116 fprintf (vect_dump
, "Two store stmts share the same dr.");
2120 /* Check that there is no load-store dependencies for this loads
2121 to prevent a case of load-store-load to the same location. */
2122 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
2123 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
2125 if (vect_print_dump_info (REPORT_DETAILS
))
2127 "READ_WRITE dependence in interleaving.");
2131 /* For load use the same data-ref load. */
2132 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
2135 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2140 /* Check that all the accesses have the same STEP. */
2141 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
2142 if (tree_int_cst_compare (step
, next_step
))
2144 if (vect_print_dump_info (REPORT_DETAILS
))
2145 fprintf (vect_dump
, "not consecutive access in interleaving");
2149 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
2150 /* Check that the distance between two accesses is equal to the type
2151 size. Otherwise, we have gaps. */
2152 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
2153 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
2156 /* FORNOW: SLP of accesses with gaps is not supported. */
2157 slp_impossible
= true;
2158 if (DR_IS_WRITE (data_ref
))
2160 if (vect_print_dump_info (REPORT_DETAILS
))
2161 fprintf (vect_dump
, "interleaved store with gaps");
2168 /* Store the gap from the previous member of the group. If there is no
2169 gap in the access, DR_GROUP_GAP is always 1. */
2170 DR_GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
2172 prev_init
= DR_INIT (data_ref
);
2173 next
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next
));
2174 /* Count the number of data-refs in the chain. */
2178 /* COUNT is the number of accesses found, we multiply it by the size of
2179 the type to get COUNT_IN_BYTES. */
2180 count_in_bytes
= type_size
* count
;
2182 /* Check that the size of the interleaving (including gaps) is not
2183 greater than STEP. */
2184 if (dr_step
&& dr_step
< count_in_bytes
+ gaps
* type_size
)
2186 if (vect_print_dump_info (REPORT_DETAILS
))
2188 fprintf (vect_dump
, "interleaving size is greater than step for ");
2189 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2194 /* Check that the size of the interleaving is equal to STEP for stores,
2195 i.e., that there are no gaps. */
2196 if (dr_step
&& dr_step
!= count_in_bytes
)
2198 if (DR_IS_READ (dr
))
2200 slp_impossible
= true;
2201 /* There is a gap after the last load in the group. This gap is a
2202 difference between the stride and the number of elements. When
2203 there is no gap, this difference should be 0. */
2204 DR_GROUP_GAP (vinfo_for_stmt (stmt
)) = stride
- count
;
2208 if (vect_print_dump_info (REPORT_DETAILS
))
2209 fprintf (vect_dump
, "interleaved store with gaps");
2214 /* Check that STEP is a multiple of type size. */
2215 if (dr_step
&& (dr_step
% type_size
) != 0)
2217 if (vect_print_dump_info (REPORT_DETAILS
))
2219 fprintf (vect_dump
, "step is not a multiple of type size: step ");
2220 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2221 fprintf (vect_dump
, " size ");
2222 print_generic_expr (vect_dump
, TYPE_SIZE_UNIT (scalar_type
),
2228 /* FORNOW: we handle only interleaving that is a power of 2.
2229 We don't fail here if it may be still possible to vectorize the
2230 group using SLP. If not, the size of the group will be checked in
2231 vect_analyze_operations, and the vectorization will fail. */
2232 if (exact_log2 (stride
) == -1)
2234 if (vect_print_dump_info (REPORT_DETAILS
))
2235 fprintf (vect_dump
, "interleaving is not a power of 2");
2244 DR_GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2245 if (vect_print_dump_info (REPORT_DETAILS
))
2246 fprintf (vect_dump
, "Detected interleaving of size %d", (int)stride
);
2248 /* SLP: create an SLP data structure for every interleaving group of
2249 stores for further analysis in vect_analyse_slp. */
2250 if (DR_IS_WRITE (dr
) && !slp_impossible
)
2253 VEC_safe_push (gimple
, heap
, LOOP_VINFO_STRIDED_STORES (loop_vinfo
),
2256 VEC_safe_push (gimple
, heap
, BB_VINFO_STRIDED_STORES (bb_vinfo
),
2265 /* Analyze the access pattern of the data-reference DR.
2266 In case of non-consecutive accesses call vect_analyze_group_access() to
2267 analyze groups of strided accesses. */
2270 vect_analyze_data_ref_access (struct data_reference
*dr
)
2272 tree step
= DR_STEP (dr
);
2273 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2274 gimple stmt
= DR_STMT (dr
);
2275 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2276 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2277 struct loop
*loop
= NULL
;
2278 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2281 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2283 if (loop_vinfo
&& !step
)
2285 if (vect_print_dump_info (REPORT_DETAILS
))
2286 fprintf (vect_dump
, "bad data-ref access in loop");
2290 /* Don't allow invariant accesses in loops. */
2291 if (loop_vinfo
&& dr_step
== 0)
2294 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2296 /* Interleaved accesses are not yet supported within outer-loop
2297 vectorization for references in the inner-loop. */
2298 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL
;
2300 /* For the rest of the analysis we use the outer-loop step. */
2301 step
= STMT_VINFO_DR_STEP (stmt_info
);
2302 dr_step
= TREE_INT_CST_LOW (step
);
2306 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2307 fprintf (vect_dump
, "zero step in outer loop.");
2308 if (DR_IS_READ (dr
))
2316 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
))
2318 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type
), -dr_step
)))
2320 /* Mark that it is not interleaving. */
2321 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt
)) = NULL
;
2325 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2327 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2328 fprintf (vect_dump
, "strided access in outer loop.");
2332 /* Not consecutive access - check if it's a part of interleaving group. */
2333 return vect_analyze_group_access (dr
);
2337 /* Function vect_analyze_data_ref_accesses.
2339 Analyze the access pattern of all the data references in the loop.
2341 FORNOW: the only access pattern that is considered vectorizable is a
2342 simple step 1 (consecutive) access.
2344 FORNOW: handle only arrays and pointer accesses. */
2347 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2350 VEC (data_reference_p
, heap
) *datarefs
;
2351 struct data_reference
*dr
;
2353 if (vect_print_dump_info (REPORT_DETAILS
))
2354 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
2357 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2359 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2361 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
2362 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
2363 && !vect_analyze_data_ref_access (dr
))
2365 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2366 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
2370 /* Mark the statement as not vectorizable. */
2371 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2381 /* Function vect_prune_runtime_alias_test_list.
2383 Prune a list of ddrs to be tested at run-time by versioning for alias.
2384 Return FALSE if resulting list of ddrs is longer then allowed by
2385 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2388 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo
)
2390 VEC (ddr_p
, heap
) * ddrs
=
2391 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
2394 if (vect_print_dump_info (REPORT_DETAILS
))
2395 fprintf (vect_dump
, "=== vect_prune_runtime_alias_test_list ===");
2397 for (i
= 0; i
< VEC_length (ddr_p
, ddrs
); )
2402 ddr_i
= VEC_index (ddr_p
, ddrs
, i
);
2405 for (j
= 0; j
< i
; j
++)
2407 ddr_p ddr_j
= VEC_index (ddr_p
, ddrs
, j
);
2409 if (vect_vfa_range_equal (ddr_i
, ddr_j
))
2411 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2413 fprintf (vect_dump
, "found equal ranges ");
2414 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_i
)), TDF_SLIM
);
2415 fprintf (vect_dump
, ", ");
2416 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_i
)), TDF_SLIM
);
2417 fprintf (vect_dump
, " and ");
2418 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_j
)), TDF_SLIM
);
2419 fprintf (vect_dump
, ", ");
2420 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_j
)), TDF_SLIM
);
2429 VEC_ordered_remove (ddr_p
, ddrs
, i
);
2435 if (VEC_length (ddr_p
, ddrs
) >
2436 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
2438 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2441 "disable versioning for alias - max number of generated "
2442 "checks exceeded.");
2445 VEC_truncate (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), 0);
2454 /* Function vect_analyze_data_refs.
2456 Find all the data references in the loop or basic block.
2458 The general structure of the analysis of data refs in the vectorizer is as
2460 1- vect_analyze_data_refs(loop/bb): call
2461 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2462 in the loop/bb and their dependences.
2463 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2464 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2465 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2470 vect_analyze_data_refs (loop_vec_info loop_vinfo
,
2471 bb_vec_info bb_vinfo
,
2474 struct loop
*loop
= NULL
;
2475 basic_block bb
= NULL
;
2477 VEC (data_reference_p
, heap
) *datarefs
;
2478 struct data_reference
*dr
;
2482 if (vect_print_dump_info (REPORT_DETAILS
))
2483 fprintf (vect_dump
, "=== vect_analyze_data_refs ===\n");
2487 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2488 res
= compute_data_dependences_for_loop
2489 (loop
, true, &LOOP_VINFO_DATAREFS (loop_vinfo
),
2490 &LOOP_VINFO_DDRS (loop_vinfo
));
2494 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2495 fprintf (vect_dump
, "not vectorized: loop contains function calls"
2496 " or data references that cannot be analyzed");
2500 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2504 bb
= BB_VINFO_BB (bb_vinfo
);
2505 res
= compute_data_dependences_for_bb (bb
, true,
2506 &BB_VINFO_DATAREFS (bb_vinfo
),
2507 &BB_VINFO_DDRS (bb_vinfo
));
2510 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2511 fprintf (vect_dump
, "not vectorized: basic block contains function"
2512 " calls or data references that cannot be analyzed");
2516 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2519 /* Go through the data-refs, check that the analysis succeeded. Update
2520 pointer from stmt_vec_info struct to DR and vectype. */
2522 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
2525 stmt_vec_info stmt_info
;
2526 tree base
, offset
, init
;
2529 if (!dr
|| !DR_REF (dr
))
2531 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2532 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
2536 stmt
= DR_STMT (dr
);
2537 stmt_info
= vinfo_for_stmt (stmt
);
2539 /* Check that analysis of the data-ref succeeded. */
2540 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
2543 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2545 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
2546 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2551 /* Mark the statement as not vectorizable. */
2552 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2559 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
2561 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2562 fprintf (vect_dump
, "not vectorized: base addr of dr is a "
2566 /* Mark the statement as not vectorizable. */
2567 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2574 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
2575 offset
= unshare_expr (DR_OFFSET (dr
));
2576 init
= unshare_expr (DR_INIT (dr
));
2578 if (stmt_could_throw_p (stmt
))
2580 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2582 fprintf (vect_dump
, "not vectorized: statement can throw an "
2584 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2589 /* Update DR field in stmt_vec_info struct. */
2591 /* If the dataref is in an inner-loop of the loop that is considered for
2592 for vectorization, we also want to analyze the access relative to
2593 the outer-loop (DR contains information only relative to the
2594 inner-most enclosing loop). We do that by building a reference to the
2595 first location accessed by the inner-loop, and analyze it relative to
2597 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2599 tree outer_step
, outer_base
, outer_init
;
2600 HOST_WIDE_INT pbitsize
, pbitpos
;
2602 enum machine_mode pmode
;
2603 int punsignedp
, pvolatilep
;
2604 affine_iv base_iv
, offset_iv
;
2607 /* Build a reference to the first location accessed by the
2608 inner-loop: *(BASE+INIT). (The first location is actually
2609 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2610 tree inner_base
= build_fold_indirect_ref
2611 (fold_build2 (POINTER_PLUS_EXPR
,
2612 TREE_TYPE (base
), base
,
2613 fold_convert (sizetype
, init
)));
2615 if (vect_print_dump_info (REPORT_DETAILS
))
2617 fprintf (vect_dump
, "analyze in outer-loop: ");
2618 print_generic_expr (vect_dump
, inner_base
, TDF_SLIM
);
2621 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
2622 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
2623 gcc_assert (outer_base
!= NULL_TREE
);
2625 if (pbitpos
% BITS_PER_UNIT
!= 0)
2627 if (vect_print_dump_info (REPORT_DETAILS
))
2628 fprintf (vect_dump
, "failed: bit offset alignment.\n");
2632 outer_base
= build_fold_addr_expr (outer_base
);
2633 if (!simple_iv (loop
, loop_containing_stmt (stmt
), outer_base
,
2636 if (vect_print_dump_info (REPORT_DETAILS
))
2637 fprintf (vect_dump
, "failed: evolution of base is not affine.\n");
2644 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
,
2652 offset_iv
.base
= ssize_int (0);
2653 offset_iv
.step
= ssize_int (0);
2655 else if (!simple_iv (loop
, loop_containing_stmt (stmt
), poffset
,
2658 if (vect_print_dump_info (REPORT_DETAILS
))
2659 fprintf (vect_dump
, "evolution of offset is not affine.\n");
2663 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
2664 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
2665 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
2666 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
2667 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
2669 outer_step
= size_binop (PLUS_EXPR
,
2670 fold_convert (ssizetype
, base_iv
.step
),
2671 fold_convert (ssizetype
, offset_iv
.step
));
2673 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
2674 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2675 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
2676 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
2677 STMT_VINFO_DR_OFFSET (stmt_info
) =
2678 fold_convert (ssizetype
, offset_iv
.base
);
2679 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
2680 size_int (highest_pow2_factor (offset_iv
.base
));
2682 if (vect_print_dump_info (REPORT_DETAILS
))
2684 fprintf (vect_dump
, "\touter base_address: ");
2685 print_generic_expr (vect_dump
, STMT_VINFO_DR_BASE_ADDRESS (stmt_info
), TDF_SLIM
);
2686 fprintf (vect_dump
, "\n\touter offset from base address: ");
2687 print_generic_expr (vect_dump
, STMT_VINFO_DR_OFFSET (stmt_info
), TDF_SLIM
);
2688 fprintf (vect_dump
, "\n\touter constant offset from base address: ");
2689 print_generic_expr (vect_dump
, STMT_VINFO_DR_INIT (stmt_info
), TDF_SLIM
);
2690 fprintf (vect_dump
, "\n\touter step: ");
2691 print_generic_expr (vect_dump
, STMT_VINFO_DR_STEP (stmt_info
), TDF_SLIM
);
2692 fprintf (vect_dump
, "\n\touter aligned to: ");
2693 print_generic_expr (vect_dump
, STMT_VINFO_DR_ALIGNED_TO (stmt_info
), TDF_SLIM
);
2697 if (STMT_VINFO_DATA_REF (stmt_info
))
2699 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2702 "not vectorized: more than one data ref in stmt: ");
2703 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2708 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
2710 /* Set vectype for STMT. */
2711 scalar_type
= TREE_TYPE (DR_REF (dr
));
2712 STMT_VINFO_VECTYPE (stmt_info
) =
2713 get_vectype_for_scalar_type (scalar_type
);
2714 if (!STMT_VINFO_VECTYPE (stmt_info
))
2716 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2719 "not vectorized: no vectype for stmt: ");
2720 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2721 fprintf (vect_dump
, " scalar_type: ");
2722 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
2727 /* Mark the statement as not vectorizable. */
2728 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2735 /* Adjust the minimal vectorization factor according to the
2737 vf
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
2746 /* Function vect_get_new_vect_var.
2748 Returns a name for a new variable. The current naming scheme appends the
2749 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2750 the name of vectorizer generated variables, and appends that to NAME if
2754 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
2761 case vect_simple_var
:
2764 case vect_scalar_var
:
2767 case vect_pointer_var
:
2776 char* tmp
= concat (prefix
, name
, NULL
);
2777 new_vect_var
= create_tmp_var (type
, tmp
);
2781 new_vect_var
= create_tmp_var (type
, prefix
);
2783 /* Mark vector typed variable as a gimple register variable. */
2784 if (TREE_CODE (type
) == VECTOR_TYPE
)
2785 DECL_GIMPLE_REG_P (new_vect_var
) = true;
2787 return new_vect_var
;
2791 /* Function vect_create_addr_base_for_vector_ref.
2793 Create an expression that computes the address of the first memory location
2794 that will be accessed for a data reference.
2797 STMT: The statement containing the data reference.
2798 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2799 OFFSET: Optional. If supplied, it is be added to the initial address.
2800 LOOP: Specify relative to which loop-nest should the address be computed.
2801 For example, when the dataref is in an inner-loop nested in an
2802 outer-loop that is now being vectorized, LOOP can be either the
2803 outer-loop, or the inner-loop. The first memory location accessed
2804 by the following dataref ('in' points to short):
2811 if LOOP=i_loop: &in (relative to i_loop)
2812 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2815 1. Return an SSA_NAME whose value is the address of the memory location of
2816 the first vector of the data reference.
2817 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2818 these statement(s) which define the returned SSA_NAME.
2820 FORNOW: We are only handling array accesses with step 1. */
2823 vect_create_addr_base_for_vector_ref (gimple stmt
,
2824 gimple_seq
*new_stmt_list
,
2828 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2829 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2830 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
2832 tree data_ref_base_var
;
2834 tree addr_base
, addr_expr
;
2836 gimple_seq seq
= NULL
;
2837 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
2838 tree init
= unshare_expr (DR_INIT (dr
));
2840 tree step
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
2841 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2844 if (loop_vinfo
&& loop
&& loop
!= (gimple_bb (stmt
))->loop_father
)
2846 struct loop
*outer_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2848 gcc_assert (nested_in_vect_loop_p (outer_loop
, stmt
));
2850 data_ref_base
= unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
2851 base_offset
= unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info
));
2852 init
= unshare_expr (STMT_VINFO_DR_INIT (stmt_info
));
2856 base_name
= build_fold_indirect_ref (data_ref_base
);
2859 base_offset
= ssize_int (0);
2860 init
= ssize_int (0);
2861 base_name
= build_fold_indirect_ref (unshare_expr (DR_REF (dr
)));
2864 data_ref_base_var
= create_tmp_var (TREE_TYPE (data_ref_base
), "batmp");
2865 add_referenced_var (data_ref_base_var
);
2866 data_ref_base
= force_gimple_operand (data_ref_base
, &seq
, true,
2868 gimple_seq_add_seq (new_stmt_list
, seq
);
2870 /* Create base_offset */
2871 base_offset
= size_binop (PLUS_EXPR
,
2872 fold_convert (sizetype
, base_offset
),
2873 fold_convert (sizetype
, init
));
2874 dest
= create_tmp_var (sizetype
, "base_off");
2875 add_referenced_var (dest
);
2876 base_offset
= force_gimple_operand (base_offset
, &seq
, true, dest
);
2877 gimple_seq_add_seq (new_stmt_list
, seq
);
2881 tree tmp
= create_tmp_var (sizetype
, "offset");
2883 add_referenced_var (tmp
);
2884 offset
= fold_build2 (MULT_EXPR
, sizetype
,
2885 fold_convert (sizetype
, offset
), step
);
2886 base_offset
= fold_build2 (PLUS_EXPR
, sizetype
,
2887 base_offset
, offset
);
2888 base_offset
= force_gimple_operand (base_offset
, &seq
, false, tmp
);
2889 gimple_seq_add_seq (new_stmt_list
, seq
);
2892 /* base + base_offset */
2894 addr_base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (data_ref_base
),
2895 data_ref_base
, base_offset
);
2898 addr_base
= build1 (ADDR_EXPR
,
2899 build_pointer_type (TREE_TYPE (DR_REF (dr
))),
2900 unshare_expr (DR_REF (dr
)));
2903 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
2904 base
= get_base_address (DR_REF (dr
));
2906 && TREE_CODE (base
) == MEM_REF
)
2908 = build_qualified_type (vect_ptr_type
,
2909 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base
, 0))));
2911 vec_stmt
= fold_convert (vect_ptr_type
, addr_base
);
2912 addr_expr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
2913 get_name (base_name
));
2914 add_referenced_var (addr_expr
);
2915 vec_stmt
= force_gimple_operand (vec_stmt
, &seq
, false, addr_expr
);
2916 gimple_seq_add_seq (new_stmt_list
, seq
);
2918 if (DR_PTR_INFO (dr
)
2919 && TREE_CODE (vec_stmt
) == SSA_NAME
)
2920 duplicate_ssa_name_ptr_info (vec_stmt
, DR_PTR_INFO (dr
));
2922 if (vect_print_dump_info (REPORT_DETAILS
))
2924 fprintf (vect_dump
, "created ");
2925 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
2932 /* Function vect_create_data_ref_ptr.
2934 Create a new pointer to vector type (vp), that points to the first location
2935 accessed in the loop by STMT, along with the def-use update chain to
2936 appropriately advance the pointer through the loop iterations. Also set
2937 aliasing information for the pointer. This vector pointer is used by the
2938 callers to this function to create a memory reference expression for vector
2942 1. STMT: a stmt that references memory. Expected to be of the form
2943 GIMPLE_ASSIGN <name, data-ref> or
2944 GIMPLE_ASSIGN <data-ref, name>.
2945 2. AT_LOOP: the loop where the vector memref is to be created.
2946 3. OFFSET (optional): an offset to be added to the initial address accessed
2947 by the data-ref in STMT.
2948 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2949 pointing to the initial address.
2950 5. TYPE: if not NULL indicates the required type of the data-ref.
2953 1. Declare a new ptr to vector_type, and have it point to the base of the
2954 data reference (initial addressed accessed by the data reference).
2955 For example, for vector of type V8HI, the following code is generated:
2958 vp = (v8hi *)initial_address;
2960 if OFFSET is not supplied:
2961 initial_address = &a[init];
2962 if OFFSET is supplied:
2963 initial_address = &a[init + OFFSET];
2965 Return the initial_address in INITIAL_ADDRESS.
2967 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2968 update the pointer in each iteration of the loop.
2970 Return the increment stmt that updates the pointer in PTR_INCR.
2972 3. Set INV_P to true if the access pattern of the data reference in the
2973 vectorized loop is invariant. Set it to false otherwise.
2975 4. Return the pointer. */
2978 vect_create_data_ref_ptr (gimple stmt
, struct loop
*at_loop
,
2979 tree offset
, tree
*initial_address
, gimple
*ptr_incr
,
2980 bool only_init
, bool *inv_p
)
2983 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2984 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2985 struct loop
*loop
= NULL
;
2986 bool nested_in_vect_loop
= false;
2987 struct loop
*containing_loop
= NULL
;
2988 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2993 gimple_seq new_stmt_list
= NULL
;
2997 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2999 gimple_stmt_iterator incr_gsi
;
3002 tree indx_before_incr
, indx_after_incr
;
3005 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3006 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt
);
3011 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3012 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
3013 containing_loop
= (gimple_bb (stmt
))->loop_father
;
3014 pe
= loop_preheader_edge (loop
);
3018 gcc_assert (bb_vinfo
);
3023 /* Check the step (evolution) of the load in LOOP, and record
3024 whether it's invariant. */
3025 if (nested_in_vect_loop
)
3026 step
= STMT_VINFO_DR_STEP (stmt_info
);
3028 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
3030 if (tree_int_cst_compare (step
, size_zero_node
) == 0)
3034 negative
= tree_int_cst_compare (step
, size_zero_node
) < 0;
3036 /* Create an expression for the first address accessed by this load
3038 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
3040 if (vect_print_dump_info (REPORT_DETAILS
))
3042 tree data_ref_base
= base_name
;
3043 fprintf (vect_dump
, "create vector-pointer variable to type: ");
3044 print_generic_expr (vect_dump
, vectype
, TDF_SLIM
);
3045 if (TREE_CODE (data_ref_base
) == VAR_DECL
3046 || TREE_CODE (data_ref_base
) == ARRAY_REF
)
3047 fprintf (vect_dump
, " vectorizing an array ref: ");
3048 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
3049 fprintf (vect_dump
, " vectorizing a record based array ref: ");
3050 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
3051 fprintf (vect_dump
, " vectorizing a pointer ref: ");
3052 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
3055 /* (1) Create the new vector-pointer variable. */
3056 vect_ptr_type
= build_pointer_type (vectype
);
3057 base
= get_base_address (DR_REF (dr
));
3059 && TREE_CODE (base
) == MEM_REF
)
3061 = build_qualified_type (vect_ptr_type
,
3062 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base
, 0))));
3063 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
3064 get_name (base_name
));
3066 /* Vector types inherit the alias set of their component type by default so
3067 we need to use a ref-all pointer if the data reference does not conflict
3068 with the created vector data reference because it is not addressable. */
3069 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr
),
3070 get_alias_set (DR_REF (dr
))))
3073 = build_pointer_type_for_mode (vectype
,
3074 TYPE_MODE (vect_ptr_type
), true);
3075 vect_ptr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
3076 get_name (base_name
));
3079 /* Likewise for any of the data references in the stmt group. */
3080 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info
) > 1)
3082 gimple orig_stmt
= STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info
);
3085 tree lhs
= gimple_assign_lhs (orig_stmt
);
3086 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr
),
3087 get_alias_set (lhs
)))
3090 = build_pointer_type_for_mode (vectype
,
3091 TYPE_MODE (vect_ptr_type
), true);
3093 = vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
3094 get_name (base_name
));
3098 orig_stmt
= STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt
));
3103 add_referenced_var (vect_ptr
);
3105 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3106 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3107 def-use update cycles for the pointer: one relative to the outer-loop
3108 (LOOP), which is what steps (3) and (4) below do. The other is relative
3109 to the inner-loop (which is the inner-most loop containing the dataref),
3110 and this is done be step (5) below.
3112 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3113 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3114 redundant. Steps (3),(4) create the following:
3117 LOOP: vp1 = phi(vp0,vp2)
3123 If there is an inner-loop nested in loop, then step (5) will also be
3124 applied, and an additional update in the inner-loop will be created:
3127 LOOP: vp1 = phi(vp0,vp2)
3129 inner: vp3 = phi(vp1,vp4)
3130 vp4 = vp3 + inner_step
3136 /* (2) Calculate the initial address the vector-pointer, and set
3137 the vector-pointer to point to it before the loop. */
3139 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3141 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
3147 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmt_list
);
3148 gcc_assert (!new_bb
);
3151 gsi_insert_seq_before (&gsi
, new_stmt_list
, GSI_SAME_STMT
);
3154 *initial_address
= new_temp
;
3156 /* Create: p = (vectype *) initial_base */
3157 if (TREE_CODE (new_temp
) != SSA_NAME
3158 || !useless_type_conversion_p (vect_ptr_type
, TREE_TYPE (new_temp
)))
3160 vec_stmt
= gimple_build_assign (vect_ptr
,
3161 fold_convert (vect_ptr_type
, new_temp
));
3162 vect_ptr_init
= make_ssa_name (vect_ptr
, vec_stmt
);
3163 /* Copy the points-to information if it exists. */
3164 if (DR_PTR_INFO (dr
))
3165 duplicate_ssa_name_ptr_info (vect_ptr_init
, DR_PTR_INFO (dr
));
3166 gimple_assign_set_lhs (vec_stmt
, vect_ptr_init
);
3169 new_bb
= gsi_insert_on_edge_immediate (pe
, vec_stmt
);
3170 gcc_assert (!new_bb
);
3173 gsi_insert_before (&gsi
, vec_stmt
, GSI_SAME_STMT
);
3176 vect_ptr_init
= new_temp
;
3178 /* (3) Handle the updating of the vector-pointer inside the loop.
3179 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3180 inner-loop nested in LOOP (during outer-loop vectorization). */
3182 /* No update in loop is required. */
3183 if (only_init
&& (!loop_vinfo
|| at_loop
== loop
))
3184 vptr
= vect_ptr_init
;
3187 /* The step of the vector pointer is the Vector Size. */
3188 tree step
= TYPE_SIZE_UNIT (vectype
);
3189 /* One exception to the above is when the scalar step of the load in
3190 LOOP is zero. In this case the step here is also zero. */
3192 step
= size_zero_node
;
3194 step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
3196 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
3198 create_iv (vect_ptr_init
,
3199 fold_convert (vect_ptr_type
, step
),
3200 vect_ptr
, loop
, &incr_gsi
, insert_after
,
3201 &indx_before_incr
, &indx_after_incr
);
3202 incr
= gsi_stmt (incr_gsi
);
3203 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3205 /* Copy the points-to information if it exists. */
3206 if (DR_PTR_INFO (dr
))
3208 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3209 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3214 vptr
= indx_before_incr
;
3217 if (!nested_in_vect_loop
|| only_init
)
3221 /* (4) Handle the updating of the vector-pointer inside the inner-loop
3222 nested in LOOP, if exists. */
3224 gcc_assert (nested_in_vect_loop
);
3227 standard_iv_increment_position (containing_loop
, &incr_gsi
,
3229 create_iv (vptr
, fold_convert (vect_ptr_type
, DR_STEP (dr
)), vect_ptr
,
3230 containing_loop
, &incr_gsi
, insert_after
, &indx_before_incr
,
3232 incr
= gsi_stmt (incr_gsi
);
3233 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3235 /* Copy the points-to information if it exists. */
3236 if (DR_PTR_INFO (dr
))
3238 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3239 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3244 return indx_before_incr
;
3251 /* Function bump_vector_ptr
3253 Increment a pointer (to a vector type) by vector-size. If requested,
3254 i.e. if PTR-INCR is given, then also connect the new increment stmt
3255 to the existing def-use update-chain of the pointer, by modifying
3256 the PTR_INCR as illustrated below:
3258 The pointer def-use update-chain before this function:
3259 DATAREF_PTR = phi (p_0, p_2)
3261 PTR_INCR: p_2 = DATAREF_PTR + step
3263 The pointer def-use update-chain after this function:
3264 DATAREF_PTR = phi (p_0, p_2)
3266 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3268 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3271 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3273 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3274 the loop. The increment amount across iterations is expected
3276 BSI - location where the new update stmt is to be placed.
3277 STMT - the original scalar memory-access stmt that is being vectorized.
3278 BUMP - optional. The offset by which to bump the pointer. If not given,
3279 the offset is assumed to be vector_size.
3281 Output: Return NEW_DATAREF_PTR as illustrated above.
3286 bump_vector_ptr (tree dataref_ptr
, gimple ptr_incr
, gimple_stmt_iterator
*gsi
,
3287 gimple stmt
, tree bump
)
3289 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3290 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3291 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3292 tree ptr_var
= SSA_NAME_VAR (dataref_ptr
);
3293 tree update
= TYPE_SIZE_UNIT (vectype
);
3296 use_operand_p use_p
;
3297 tree new_dataref_ptr
;
3302 incr_stmt
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, ptr_var
,
3303 dataref_ptr
, update
);
3304 new_dataref_ptr
= make_ssa_name (ptr_var
, incr_stmt
);
3305 gimple_assign_set_lhs (incr_stmt
, new_dataref_ptr
);
3306 vect_finish_stmt_generation (stmt
, incr_stmt
, gsi
);
3308 /* Copy the points-to information if it exists. */
3309 if (DR_PTR_INFO (dr
))
3310 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
3313 return new_dataref_ptr
;
3315 /* Update the vector-pointer's cross-iteration increment. */
3316 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
3318 tree use
= USE_FROM_PTR (use_p
);
3320 if (use
== dataref_ptr
)
3321 SET_USE (use_p
, new_dataref_ptr
);
3323 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
3326 return new_dataref_ptr
;
3330 /* Function vect_create_destination_var.
3332 Create a new temporary of type VECTYPE. */
3335 vect_create_destination_var (tree scalar_dest
, tree vectype
)
3338 const char *new_name
;
3340 enum vect_var_kind kind
;
3342 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
3343 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
3345 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
3347 new_name
= get_name (scalar_dest
);
3350 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
3351 add_referenced_var (vec_dest
);
3356 /* Function vect_strided_store_supported.
3358 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3359 and FALSE otherwise. */
3362 vect_strided_store_supported (tree vectype
)
3364 optab interleave_high_optab
, interleave_low_optab
;
3365 enum machine_mode mode
;
3367 mode
= TYPE_MODE (vectype
);
3369 /* Check that the operation is supported. */
3370 interleave_high_optab
= optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR
,
3371 vectype
, optab_default
);
3372 interleave_low_optab
= optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR
,
3373 vectype
, optab_default
);
3374 if (!interleave_high_optab
|| !interleave_low_optab
)
3376 if (vect_print_dump_info (REPORT_DETAILS
))
3377 fprintf (vect_dump
, "no optab for interleave.");
3381 if (optab_handler (interleave_high_optab
, mode
) == CODE_FOR_nothing
3382 || optab_handler (interleave_low_optab
, mode
) == CODE_FOR_nothing
)
3384 if (vect_print_dump_info (REPORT_DETAILS
))
3385 fprintf (vect_dump
, "interleave op not supported by target.");
3393 /* Function vect_permute_store_chain.
3395 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3396 a power of 2, generate interleave_high/low stmts to reorder the data
3397 correctly for the stores. Return the final references for stores in
3400 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3401 The input is 4 vectors each containing 8 elements. We assign a number to
3402 each element, the input sequence is:
3404 1st vec: 0 1 2 3 4 5 6 7
3405 2nd vec: 8 9 10 11 12 13 14 15
3406 3rd vec: 16 17 18 19 20 21 22 23
3407 4th vec: 24 25 26 27 28 29 30 31
3409 The output sequence should be:
3411 1st vec: 0 8 16 24 1 9 17 25
3412 2nd vec: 2 10 18 26 3 11 19 27
3413 3rd vec: 4 12 20 28 5 13 21 30
3414 4th vec: 6 14 22 30 7 15 23 31
3416 i.e., we interleave the contents of the four vectors in their order.
3418 We use interleave_high/low instructions to create such output. The input of
3419 each interleave_high/low operation is two vectors:
3422 the even elements of the result vector are obtained left-to-right from the
3423 high/low elements of the first vector. The odd elements of the result are
3424 obtained left-to-right from the high/low elements of the second vector.
3425 The output of interleave_high will be: 0 4 1 5
3426 and of interleave_low: 2 6 3 7
3429 The permutation is done in log LENGTH stages. In each stage interleave_high
3430 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3431 where the first argument is taken from the first half of DR_CHAIN and the
3432 second argument from it's second half.
3435 I1: interleave_high (1st vec, 3rd vec)
3436 I2: interleave_low (1st vec, 3rd vec)
3437 I3: interleave_high (2nd vec, 4th vec)
3438 I4: interleave_low (2nd vec, 4th vec)
3440 The output for the first stage is:
3442 I1: 0 16 1 17 2 18 3 19
3443 I2: 4 20 5 21 6 22 7 23
3444 I3: 8 24 9 25 10 26 11 27
3445 I4: 12 28 13 29 14 30 15 31
3447 The output of the second stage, i.e. the final result is:
3449 I1: 0 8 16 24 1 9 17 25
3450 I2: 2 10 18 26 3 11 19 27
3451 I3: 4 12 20 28 5 13 21 30
3452 I4: 6 14 22 30 7 15 23 31. */
3455 vect_permute_store_chain (VEC(tree
,heap
) *dr_chain
,
3456 unsigned int length
,
3458 gimple_stmt_iterator
*gsi
,
3459 VEC(tree
,heap
) **result_chain
)
3461 tree perm_dest
, vect1
, vect2
, high
, low
;
3463 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
3466 enum tree_code high_code
, low_code
;
3468 /* Check that the operation is supported. */
3469 if (!vect_strided_store_supported (vectype
))
3472 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
3474 for (i
= 0; i
< exact_log2 (length
); i
++)
3476 for (j
= 0; j
< length
/2; j
++)
3478 vect1
= VEC_index (tree
, dr_chain
, j
);
3479 vect2
= VEC_index (tree
, dr_chain
, j
+length
/2);
3481 /* Create interleaving stmt:
3482 in the case of big endian:
3483 high = interleave_high (vect1, vect2)
3484 and in the case of little endian:
3485 high = interleave_low (vect1, vect2). */
3486 perm_dest
= create_tmp_var (vectype
, "vect_inter_high");
3487 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3488 add_referenced_var (perm_dest
);
3489 if (BYTES_BIG_ENDIAN
)
3491 high_code
= VEC_INTERLEAVE_HIGH_EXPR
;
3492 low_code
= VEC_INTERLEAVE_LOW_EXPR
;
3496 low_code
= VEC_INTERLEAVE_HIGH_EXPR
;
3497 high_code
= VEC_INTERLEAVE_LOW_EXPR
;
3499 perm_stmt
= gimple_build_assign_with_ops (high_code
, perm_dest
,
3501 high
= make_ssa_name (perm_dest
, perm_stmt
);
3502 gimple_assign_set_lhs (perm_stmt
, high
);
3503 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3504 VEC_replace (tree
, *result_chain
, 2*j
, high
);
3506 /* Create interleaving stmt:
3507 in the case of big endian:
3508 low = interleave_low (vect1, vect2)
3509 and in the case of little endian:
3510 low = interleave_high (vect1, vect2). */
3511 perm_dest
= create_tmp_var (vectype
, "vect_inter_low");
3512 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3513 add_referenced_var (perm_dest
);
3514 perm_stmt
= gimple_build_assign_with_ops (low_code
, perm_dest
,
3516 low
= make_ssa_name (perm_dest
, perm_stmt
);
3517 gimple_assign_set_lhs (perm_stmt
, low
);
3518 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3519 VEC_replace (tree
, *result_chain
, 2*j
+1, low
);
3521 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
3526 /* Function vect_setup_realignment
3528 This function is called when vectorizing an unaligned load using
3529 the dr_explicit_realign[_optimized] scheme.
3530 This function generates the following code at the loop prolog:
3533 x msq_init = *(floor(p)); # prolog load
3534 realignment_token = call target_builtin;
3536 x msq = phi (msq_init, ---)
3538 The stmts marked with x are generated only for the case of
3539 dr_explicit_realign_optimized.
3541 The code above sets up a new (vector) pointer, pointing to the first
3542 location accessed by STMT, and a "floor-aligned" load using that pointer.
3543 It also generates code to compute the "realignment-token" (if the relevant
3544 target hook was defined), and creates a phi-node at the loop-header bb
3545 whose arguments are the result of the prolog-load (created by this
3546 function) and the result of a load that takes place in the loop (to be
3547 created by the caller to this function).
3549 For the case of dr_explicit_realign_optimized:
3550 The caller to this function uses the phi-result (msq) to create the
3551 realignment code inside the loop, and sets up the missing phi argument,
3554 msq = phi (msq_init, lsq)
3555 lsq = *(floor(p')); # load in loop
3556 result = realign_load (msq, lsq, realignment_token);
3558 For the case of dr_explicit_realign:
3560 msq = *(floor(p)); # load in loop
3562 lsq = *(floor(p')); # load in loop
3563 result = realign_load (msq, lsq, realignment_token);
3566 STMT - (scalar) load stmt to be vectorized. This load accesses
3567 a memory location that may be unaligned.
3568 BSI - place where new code is to be inserted.
3569 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3573 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3574 target hook, if defined.
3575 Return value - the result of the loop-header phi node. */
3578 vect_setup_realignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
3579 tree
*realignment_token
,
3580 enum dr_alignment_support alignment_support_scheme
,
3582 struct loop
**at_loop
)
3584 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3585 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3586 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3587 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3588 struct loop
*loop
= NULL
;
3590 tree scalar_dest
= gimple_assign_lhs (stmt
);
3597 tree msq_init
= NULL_TREE
;
3600 tree msq
= NULL_TREE
;
3601 gimple_seq stmts
= NULL
;
3603 bool compute_in_loop
= false;
3604 bool nested_in_vect_loop
= false;
3605 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
3606 struct loop
*loop_for_initial_load
= NULL
;
3610 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3611 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
3614 gcc_assert (alignment_support_scheme
== dr_explicit_realign
3615 || alignment_support_scheme
== dr_explicit_realign_optimized
);
3617 /* We need to generate three things:
3618 1. the misalignment computation
3619 2. the extra vector load (for the optimized realignment scheme).
3620 3. the phi node for the two vectors from which the realignment is
3621 done (for the optimized realignment scheme). */
3623 /* 1. Determine where to generate the misalignment computation.
3625 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3626 calculation will be generated by this function, outside the loop (in the
3627 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3628 caller, inside the loop.
3630 Background: If the misalignment remains fixed throughout the iterations of
3631 the loop, then both realignment schemes are applicable, and also the
3632 misalignment computation can be done outside LOOP. This is because we are
3633 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3634 are a multiple of VS (the Vector Size), and therefore the misalignment in
3635 different vectorized LOOP iterations is always the same.
3636 The problem arises only if the memory access is in an inner-loop nested
3637 inside LOOP, which is now being vectorized using outer-loop vectorization.
3638 This is the only case when the misalignment of the memory access may not
3639 remain fixed throughout the iterations of the inner-loop (as explained in
3640 detail in vect_supportable_dr_alignment). In this case, not only is the
3641 optimized realignment scheme not applicable, but also the misalignment
3642 computation (and generation of the realignment token that is passed to
3643 REALIGN_LOAD) have to be done inside the loop.
3645 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3646 or not, which in turn determines if the misalignment is computed inside
3647 the inner-loop, or outside LOOP. */
3649 if (init_addr
!= NULL_TREE
|| !loop_vinfo
)
3651 compute_in_loop
= true;
3652 gcc_assert (alignment_support_scheme
== dr_explicit_realign
);
3656 /* 2. Determine where to generate the extra vector load.
3658 For the optimized realignment scheme, instead of generating two vector
3659 loads in each iteration, we generate a single extra vector load in the
3660 preheader of the loop, and in each iteration reuse the result of the
3661 vector load from the previous iteration. In case the memory access is in
3662 an inner-loop nested inside LOOP, which is now being vectorized using
3663 outer-loop vectorization, we need to determine whether this initial vector
3664 load should be generated at the preheader of the inner-loop, or can be
3665 generated at the preheader of LOOP. If the memory access has no evolution
3666 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3667 to be generated inside LOOP (in the preheader of the inner-loop). */
3669 if (nested_in_vect_loop
)
3671 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
3672 bool invariant_in_outerloop
=
3673 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
3674 loop_for_initial_load
= (invariant_in_outerloop
? loop
: loop
->inner
);
3677 loop_for_initial_load
= loop
;
3679 *at_loop
= loop_for_initial_load
;
3681 if (loop_for_initial_load
)
3682 pe
= loop_preheader_edge (loop_for_initial_load
);
3684 /* 3. For the case of the optimized realignment, create the first vector
3685 load at the loop preheader. */
3687 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
3689 /* Create msq_init = *(floor(p1)) in the loop preheader */
3691 gcc_assert (!compute_in_loop
);
3692 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3693 ptr
= vect_create_data_ref_ptr (stmt
, loop_for_initial_load
, NULL_TREE
,
3694 &init_addr
, &inc
, true, &inv_p
);
3695 new_stmt
= gimple_build_assign_with_ops
3696 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
3697 build_int_cst (TREE_TYPE (ptr
),
3698 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
3699 new_temp
= make_ssa_name (SSA_NAME_VAR (ptr
), new_stmt
);
3700 gimple_assign_set_lhs (new_stmt
, new_temp
);
3701 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
3702 gcc_assert (!new_bb
);
3704 = build2 (MEM_REF
, TREE_TYPE (vec_dest
), new_temp
,
3705 build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0));
3706 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
3707 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3708 gimple_assign_set_lhs (new_stmt
, new_temp
);
3709 mark_symbols_for_renaming (new_stmt
);
3712 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
3713 gcc_assert (!new_bb
);
3716 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
3718 msq_init
= gimple_assign_lhs (new_stmt
);
3721 /* 4. Create realignment token using a target builtin, if available.
3722 It is done either inside the containing loop, or before LOOP (as
3723 determined above). */
3725 if (targetm
.vectorize
.builtin_mask_for_load
)
3729 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3732 /* Generate the INIT_ADDR computation outside LOOP. */
3733 init_addr
= vect_create_addr_base_for_vector_ref (stmt
, &stmts
,
3737 pe
= loop_preheader_edge (loop
);
3738 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3739 gcc_assert (!new_bb
);
3742 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
3745 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
3746 new_stmt
= gimple_build_call (builtin_decl
, 1, init_addr
);
3748 vect_create_destination_var (scalar_dest
,
3749 gimple_call_return_type (new_stmt
));
3750 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3751 gimple_call_set_lhs (new_stmt
, new_temp
);
3753 if (compute_in_loop
)
3754 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
3757 /* Generate the misalignment computation outside LOOP. */
3758 pe
= loop_preheader_edge (loop
);
3759 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
3760 gcc_assert (!new_bb
);
3763 *realignment_token
= gimple_call_lhs (new_stmt
);
3765 /* The result of the CALL_EXPR to this builtin is determined from
3766 the value of the parameter and no global variables are touched
3767 which makes the builtin a "const" function. Requiring the
3768 builtin to have the "const" attribute makes it unnecessary
3769 to call mark_call_clobbered. */
3770 gcc_assert (TREE_READONLY (builtin_decl
));
3773 if (alignment_support_scheme
== dr_explicit_realign
)
3776 gcc_assert (!compute_in_loop
);
3777 gcc_assert (alignment_support_scheme
== dr_explicit_realign_optimized
);
3780 /* 5. Create msq = phi <msq_init, lsq> in loop */
3782 pe
= loop_preheader_edge (containing_loop
);
3783 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3784 msq
= make_ssa_name (vec_dest
, NULL
);
3785 phi_stmt
= create_phi_node (msq
, containing_loop
->header
);
3786 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
3787 add_phi_arg (phi_stmt
, msq_init
, pe
, UNKNOWN_LOCATION
);
3793 /* Function vect_strided_load_supported.
3795 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3796 and FALSE otherwise. */
3799 vect_strided_load_supported (tree vectype
)
3801 optab perm_even_optab
, perm_odd_optab
;
3802 enum machine_mode mode
;
3804 mode
= TYPE_MODE (vectype
);
3806 perm_even_optab
= optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR
, vectype
,
3808 if (!perm_even_optab
)
3810 if (vect_print_dump_info (REPORT_DETAILS
))
3811 fprintf (vect_dump
, "no optab for perm_even.");
3815 if (optab_handler (perm_even_optab
, mode
) == CODE_FOR_nothing
)
3817 if (vect_print_dump_info (REPORT_DETAILS
))
3818 fprintf (vect_dump
, "perm_even op not supported by target.");
3822 perm_odd_optab
= optab_for_tree_code (VEC_EXTRACT_ODD_EXPR
, vectype
,
3824 if (!perm_odd_optab
)
3826 if (vect_print_dump_info (REPORT_DETAILS
))
3827 fprintf (vect_dump
, "no optab for perm_odd.");
3831 if (optab_handler (perm_odd_optab
, mode
) == CODE_FOR_nothing
)
3833 if (vect_print_dump_info (REPORT_DETAILS
))
3834 fprintf (vect_dump
, "perm_odd op not supported by target.");
3841 /* Function vect_permute_load_chain.
3843 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3844 a power of 2, generate extract_even/odd stmts to reorder the input data
3845 correctly. Return the final references for loads in RESULT_CHAIN.
3847 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3848 The input is 4 vectors each containing 8 elements. We assign a number to each
3849 element, the input sequence is:
3851 1st vec: 0 1 2 3 4 5 6 7
3852 2nd vec: 8 9 10 11 12 13 14 15
3853 3rd vec: 16 17 18 19 20 21 22 23
3854 4th vec: 24 25 26 27 28 29 30 31
3856 The output sequence should be:
3858 1st vec: 0 4 8 12 16 20 24 28
3859 2nd vec: 1 5 9 13 17 21 25 29
3860 3rd vec: 2 6 10 14 18 22 26 30
3861 4th vec: 3 7 11 15 19 23 27 31
3863 i.e., the first output vector should contain the first elements of each
3864 interleaving group, etc.
3866 We use extract_even/odd instructions to create such output. The input of
3867 each extract_even/odd operation is two vectors
3871 and the output is the vector of extracted even/odd elements. The output of
3872 extract_even will be: 0 2 4 6
3873 and of extract_odd: 1 3 5 7
3876 The permutation is done in log LENGTH stages. In each stage extract_even
3877 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3878 their order. In our example,
3880 E1: extract_even (1st vec, 2nd vec)
3881 E2: extract_odd (1st vec, 2nd vec)
3882 E3: extract_even (3rd vec, 4th vec)
3883 E4: extract_odd (3rd vec, 4th vec)
3885 The output for the first stage will be:
3887 E1: 0 2 4 6 8 10 12 14
3888 E2: 1 3 5 7 9 11 13 15
3889 E3: 16 18 20 22 24 26 28 30
3890 E4: 17 19 21 23 25 27 29 31
3892 In order to proceed and create the correct sequence for the next stage (or
3893 for the correct output, if the second stage is the last one, as in our
3894 example), we first put the output of extract_even operation and then the
3895 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3896 The input for the second stage is:
3898 1st vec (E1): 0 2 4 6 8 10 12 14
3899 2nd vec (E3): 16 18 20 22 24 26 28 30
3900 3rd vec (E2): 1 3 5 7 9 11 13 15
3901 4th vec (E4): 17 19 21 23 25 27 29 31
3903 The output of the second stage:
3905 E1: 0 4 8 12 16 20 24 28
3906 E2: 2 6 10 14 18 22 26 30
3907 E3: 1 5 9 13 17 21 25 29
3908 E4: 3 7 11 15 19 23 27 31
3910 And RESULT_CHAIN after reordering:
3912 1st vec (E1): 0 4 8 12 16 20 24 28
3913 2nd vec (E3): 1 5 9 13 17 21 25 29
3914 3rd vec (E2): 2 6 10 14 18 22 26 30
3915 4th vec (E4): 3 7 11 15 19 23 27 31. */
3918 vect_permute_load_chain (VEC(tree
,heap
) *dr_chain
,
3919 unsigned int length
,
3921 gimple_stmt_iterator
*gsi
,
3922 VEC(tree
,heap
) **result_chain
)
3924 tree perm_dest
, data_ref
, first_vect
, second_vect
;
3926 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
3930 /* Check that the operation is supported. */
3931 if (!vect_strided_load_supported (vectype
))
3934 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
3935 for (i
= 0; i
< exact_log2 (length
); i
++)
3937 for (j
= 0; j
< length
; j
+=2)
3939 first_vect
= VEC_index (tree
, dr_chain
, j
);
3940 second_vect
= VEC_index (tree
, dr_chain
, j
+1);
3942 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3943 perm_dest
= create_tmp_var (vectype
, "vect_perm_even");
3944 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3945 add_referenced_var (perm_dest
);
3947 perm_stmt
= gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR
,
3948 perm_dest
, first_vect
,
3951 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3952 gimple_assign_set_lhs (perm_stmt
, data_ref
);
3953 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3954 mark_symbols_for_renaming (perm_stmt
);
3956 VEC_replace (tree
, *result_chain
, j
/2, data_ref
);
3958 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3959 perm_dest
= create_tmp_var (vectype
, "vect_perm_odd");
3960 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3961 add_referenced_var (perm_dest
);
3963 perm_stmt
= gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR
,
3964 perm_dest
, first_vect
,
3966 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3967 gimple_assign_set_lhs (perm_stmt
, data_ref
);
3968 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3969 mark_symbols_for_renaming (perm_stmt
);
3971 VEC_replace (tree
, *result_chain
, j
/2+length
/2, data_ref
);
3973 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
3979 /* Function vect_transform_strided_load.
3981 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3982 to perform their permutation and ascribe the result vectorized statements to
3983 the scalar statements.
3987 vect_transform_strided_load (gimple stmt
, VEC(tree
,heap
) *dr_chain
, int size
,
3988 gimple_stmt_iterator
*gsi
)
3990 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3991 gimple first_stmt
= DR_GROUP_FIRST_DR (stmt_info
);
3992 gimple next_stmt
, new_stmt
;
3993 VEC(tree
,heap
) *result_chain
= NULL
;
3994 unsigned int i
, gap_count
;
3997 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3998 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3999 vectors, that are ready for vector computation. */
4000 result_chain
= VEC_alloc (tree
, heap
, size
);
4002 if (!vect_permute_load_chain (dr_chain
, size
, stmt
, gsi
, &result_chain
))
4005 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4006 Since we scan the chain starting from it's first node, their order
4007 corresponds the order of data-refs in RESULT_CHAIN. */
4008 next_stmt
= first_stmt
;
4010 FOR_EACH_VEC_ELT (tree
, result_chain
, i
, tmp_data_ref
)
4015 /* Skip the gaps. Loads created for the gaps will be removed by dead
4016 code elimination pass later. No need to check for the first stmt in
4017 the group, since it always exists.
4018 DR_GROUP_GAP is the number of steps in elements from the previous
4019 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4020 correspond to the gaps. */
4021 if (next_stmt
!= first_stmt
4022 && gap_count
< DR_GROUP_GAP (vinfo_for_stmt (next_stmt
)))
4030 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
4031 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4032 copies, and we put the new vector statement in the first available
4034 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
4035 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
4038 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
4041 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
4043 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
));
4046 prev_stmt
= rel_stmt
;
4048 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
4051 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) =
4056 next_stmt
= DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt
));
4058 /* If NEXT_STMT accesses the same DR as the previous statement,
4059 put the same TMP_DATA_REF as its vectorized statement; otherwise
4060 get the next data-ref from RESULT_CHAIN. */
4061 if (!next_stmt
|| !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
4066 VEC_free (tree
, heap
, result_chain
);
4070 /* Function vect_force_dr_alignment_p.
4072 Returns whether the alignment of a DECL can be forced to be aligned
4073 on ALIGNMENT bit boundary. */
4076 vect_can_force_dr_alignment_p (const_tree decl
, unsigned int alignment
)
4078 if (TREE_CODE (decl
) != VAR_DECL
)
4081 if (DECL_EXTERNAL (decl
))
4084 if (TREE_ASM_WRITTEN (decl
))
4087 if (TREE_STATIC (decl
))
4088 return (alignment
<= MAX_OFILE_ALIGNMENT
);
4090 return (alignment
<= MAX_STACK_ALIGNMENT
);
4094 /* Return whether the data reference DR is supported with respect to its
4096 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4097 it is aligned, i.e., check if it is possible to vectorize it with different
4100 enum dr_alignment_support
4101 vect_supportable_dr_alignment (struct data_reference
*dr
,
4102 bool check_aligned_accesses
)
4104 gimple stmt
= DR_STMT (dr
);
4105 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4106 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4107 enum machine_mode mode
= TYPE_MODE (vectype
);
4108 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4109 struct loop
*vect_loop
= NULL
;
4110 bool nested_in_vect_loop
= false;
4112 if (aligned_access_p (dr
) && !check_aligned_accesses
)
4117 vect_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4118 nested_in_vect_loop
= nested_in_vect_loop_p (vect_loop
, stmt
);
4121 /* Possibly unaligned access. */
4123 /* We can choose between using the implicit realignment scheme (generating
4124 a misaligned_move stmt) and the explicit realignment scheme (generating
4125 aligned loads with a REALIGN_LOAD). There are two variants to the
4126 explicit realignment scheme: optimized, and unoptimized.
4127 We can optimize the realignment only if the step between consecutive
4128 vector loads is equal to the vector size. Since the vector memory
4129 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4130 is guaranteed that the misalignment amount remains the same throughout the
4131 execution of the vectorized loop. Therefore, we can create the
4132 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4133 at the loop preheader.
4135 However, in the case of outer-loop vectorization, when vectorizing a
4136 memory access in the inner-loop nested within the LOOP that is now being
4137 vectorized, while it is guaranteed that the misalignment of the
4138 vectorized memory access will remain the same in different outer-loop
4139 iterations, it is *not* guaranteed that is will remain the same throughout
4140 the execution of the inner-loop. This is because the inner-loop advances
4141 with the original scalar step (and not in steps of VS). If the inner-loop
4142 step happens to be a multiple of VS, then the misalignment remains fixed
4143 and we can use the optimized realignment scheme. For example:
4149 When vectorizing the i-loop in the above example, the step between
4150 consecutive vector loads is 1, and so the misalignment does not remain
4151 fixed across the execution of the inner-loop, and the realignment cannot
4152 be optimized (as illustrated in the following pseudo vectorized loop):
4154 for (i=0; i<N; i+=4)
4155 for (j=0; j<M; j++){
4156 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4157 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4158 // (assuming that we start from an aligned address).
4161 We therefore have to use the unoptimized realignment scheme:
4163 for (i=0; i<N; i+=4)
4164 for (j=k; j<M; j+=4)
4165 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4166 // that the misalignment of the initial address is
4169 The loop can then be vectorized as follows:
4171 for (k=0; k<4; k++){
4172 rt = get_realignment_token (&vp[k]);
4173 for (i=0; i<N; i+=4){
4175 for (j=k; j<M; j+=4){
4177 va = REALIGN_LOAD <v1,v2,rt>;
4184 if (DR_IS_READ (dr
))
4186 bool is_packed
= false;
4187 tree type
= (TREE_TYPE (DR_REF (dr
)));
4189 if (optab_handler (vec_realign_load_optab
, mode
) != CODE_FOR_nothing
4190 && (!targetm
.vectorize
.builtin_mask_for_load
4191 || targetm
.vectorize
.builtin_mask_for_load ()))
4193 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4194 if ((nested_in_vect_loop
4195 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4196 != GET_MODE_SIZE (TYPE_MODE (vectype
))))
4198 return dr_explicit_realign
;
4200 return dr_explicit_realign_optimized
;
4202 if (!known_alignment_for_access_p (dr
))
4204 tree ba
= DR_BASE_OBJECT (dr
);
4207 is_packed
= contains_packed_reference (ba
);
4210 if (targetm
.vectorize
.
4211 support_vector_misalignment (mode
, type
,
4212 DR_MISALIGNMENT (dr
), is_packed
))
4213 /* Can't software pipeline the loads, but can at least do them. */
4214 return dr_unaligned_supported
;
4218 bool is_packed
= false;
4219 tree type
= (TREE_TYPE (DR_REF (dr
)));
4221 if (!known_alignment_for_access_p (dr
))
4223 tree ba
= DR_BASE_OBJECT (dr
);
4226 is_packed
= contains_packed_reference (ba
);
4229 if (targetm
.vectorize
.
4230 support_vector_misalignment (mode
, type
,
4231 DR_MISALIGNMENT (dr
), is_packed
))
4232 return dr_unaligned_supported
;
4236 return dr_unaligned_unsupported
;