1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
50 vect_lanes_optab_supported_p (const char *name
, convert_optab optab
,
51 tree vectype
, unsigned HOST_WIDE_INT count
)
53 enum machine_mode mode
, array_mode
;
56 mode
= TYPE_MODE (vectype
);
57 limit_p
= !targetm
.array_mode_supported_p (mode
, count
);
58 array_mode
= mode_for_size (count
* GET_MODE_BITSIZE (mode
),
61 if (array_mode
== BLKmode
)
63 if (vect_print_dump_info (REPORT_DETAILS
))
64 fprintf (vect_dump
, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC
"]",
65 GET_MODE_NAME (mode
), count
);
69 if (convert_optab_handler (optab
, array_mode
, mode
) == CODE_FOR_nothing
)
71 if (vect_print_dump_info (REPORT_DETAILS
))
72 fprintf (vect_dump
, "cannot use %s<%s><%s>",
73 name
, GET_MODE_NAME (array_mode
), GET_MODE_NAME (mode
));
77 if (vect_print_dump_info (REPORT_DETAILS
))
78 fprintf (vect_dump
, "can use %s<%s><%s>",
79 name
, GET_MODE_NAME (array_mode
), GET_MODE_NAME (mode
));
85 /* Return the smallest scalar part of STMT.
86 This is used to determine the vectype of the stmt. We generally set the
87 vectype according to the type of the result (lhs). For stmts whose
88 result-type is different than the type of the arguments (e.g., demotion,
89 promotion), vectype will be reset appropriately (later). Note that we have
90 to visit the smallest datatype in this function, because that determines the
91 VF. If the smallest datatype in the loop is present only as the rhs of a
92 promotion operation - we'd miss it.
93 Such a case, where a variable of this datatype does not appear in the lhs
94 anywhere in the loop, can only occur if it's an invariant: e.g.:
95 'int_x = (int) short_inv', which we'd expect to have been optimized away by
96 invariant motion. However, we cannot rely on invariant motion to always
97 take invariants out of the loop, and so in the case of promotion we also
98 have to check the rhs.
99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
103 vect_get_smallest_scalar_type (gimple stmt
, HOST_WIDE_INT
*lhs_size_unit
,
104 HOST_WIDE_INT
*rhs_size_unit
)
106 tree scalar_type
= gimple_expr_type (stmt
);
107 HOST_WIDE_INT lhs
, rhs
;
109 lhs
= rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
111 if (is_gimple_assign (stmt
)
112 && (gimple_assign_cast_p (stmt
)
113 || gimple_assign_rhs_code (stmt
) == WIDEN_MULT_EXPR
114 || gimple_assign_rhs_code (stmt
) == FLOAT_EXPR
))
116 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
118 rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type
));
120 scalar_type
= rhs_type
;
123 *lhs_size_unit
= lhs
;
124 *rhs_size_unit
= rhs
;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt
, gimple first_stmt
)
135 gimple next_stmt
= first_stmt
;
138 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
141 while (next_stmt
&& next_stmt
!= stmt
)
144 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
159 vect_insert_into_interleaving_chain (struct data_reference
*dra
,
160 struct data_reference
*drb
)
164 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
165 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
167 prev
= GROUP_FIRST_ELEMENT (stmtinfo_b
);
168 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
));
171 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
172 if (tree_int_cst_compare (next_init
, DR_INIT (dra
)) > 0)
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
176 GROUP_NEXT_ELEMENT (stmtinfo_a
) = next
;
180 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
)) = DR_STMT (dra
);
185 GROUP_NEXT_ELEMENT (stmtinfo_a
) = NULL
;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
205 NEXT(FIRST_DR) = previous FIRST_DR
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
213 vect_update_interleaving_chain (struct data_reference
*drb
,
214 struct data_reference
*dra
)
216 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
217 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
218 tree next_init
, init_dra_chain
, init_drb_chain
;
219 gimple first_a
, first_b
;
221 gimple node
, prev
, next
, first_stmt
;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a
) && !GROUP_FIRST_ELEMENT (stmtinfo_b
))
226 GROUP_FIRST_ELEMENT (stmtinfo_a
) = DR_STMT (drb
);
227 GROUP_FIRST_ELEMENT (stmtinfo_b
) = DR_STMT (drb
);
228 GROUP_NEXT_ELEMENT (stmtinfo_b
) = DR_STMT (dra
);
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a
) && GROUP_FIRST_ELEMENT (stmtinfo_b
))
235 GROUP_FIRST_ELEMENT (stmtinfo_a
) = GROUP_FIRST_ELEMENT (stmtinfo_b
);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra
, drb
);
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a
) && !GROUP_FIRST_ELEMENT (stmtinfo_b
))
244 gimple old_first_stmt
= GROUP_FIRST_ELEMENT (stmtinfo_a
);
245 tree init_old
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
249 if (tree_int_cst_compare (init_old
, DR_INIT (drb
)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b
) = DR_STMT (drb
);
255 GROUP_NEXT_ELEMENT (stmtinfo_b
) = old_first_stmt
;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp
= old_first_stmt
;
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp
)) = DR_STMT (drb
);
262 tmp
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp
));
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb
, dra
);
269 GROUP_FIRST_ELEMENT (stmtinfo_b
) = GROUP_FIRST_ELEMENT (stmtinfo_a
);
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a
= GROUP_FIRST_ELEMENT (stmtinfo_a
);
276 first_b
= GROUP_FIRST_ELEMENT (stmtinfo_b
);
277 if (first_a
== first_b
)
279 init_dra_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a
)));
280 init_drb_chain
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b
)));
282 if (tree_int_cst_compare (init_dra_chain
, init_drb_chain
) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node
= GROUP_FIRST_ELEMENT (stmtinfo_a
);
288 prev
= GROUP_FIRST_ELEMENT (stmtinfo_b
);
289 first_stmt
= first_b
;
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node
= GROUP_FIRST_ELEMENT (stmtinfo_b
);
297 prev
= GROUP_FIRST_ELEMENT (stmtinfo_a
);
298 first_stmt
= first_a
;
303 node_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node
)));
304 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
));
307 next_init
= DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
308 if (tree_int_cst_compare (next_init
, node_init
) > 0)
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
)) = node
;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node
)) = next
;
317 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
));
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev
)) = node
;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node
)) = NULL
;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node
)) = first_stmt
;
327 node
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (node
));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
338 vect_drs_dependent_in_basic_block (struct data_reference
*dra
,
339 struct data_reference
*drb
)
341 HOST_WIDE_INT type_size_a
, type_size_b
, init_a
, init_b
;
344 /* We only call this function for pairs of loads and stores, but we verify
346 if (DR_IS_READ (dra
) == DR_IS_READ (drb
))
348 if (DR_IS_READ (dra
))
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra
), DR_BASE_ADDRESS (drb
), 0)
357 || !dr_equal_offsets_p (dra
, drb
))
360 /* Check the types. */
361 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
362 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
364 if (type_size_a
!= type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra
)),
366 TREE_TYPE (DR_REF (drb
))))
369 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
370 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
372 /* Two different locations - no dependence. */
373 if (init_a
!= init_b
)
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
395 vect_check_interleaving (struct data_reference
*dra
,
396 struct data_reference
*drb
)
398 HOST_WIDE_INT type_size_a
, type_size_b
, diff_mod_size
, step
, init_a
, init_b
;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra
), DR_BASE_ADDRESS (drb
), 0)
403 || !dr_equal_offsets_p (dra
, drb
)
404 || !tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
))
405 || DR_IS_READ (dra
) != DR_IS_READ (drb
))
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
413 type_size_a
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))));
414 type_size_b
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
416 if (type_size_a
!= type_size_b
417 || tree_int_cst_compare (DR_STEP (dra
), DR_STEP (drb
))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra
)),
419 TREE_TYPE (DR_REF (drb
))))
422 init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
423 init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
424 step
= TREE_INT_CST_LOW (DR_STEP (dra
));
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size
= (init_a
- init_b
) % type_size_a
;
432 if (step
&& (init_a
- init_b
) > step
)
435 if (diff_mod_size
== 0)
437 vect_update_interleaving_chain (drb
, dra
);
438 if (vect_print_dump_info (REPORT_DR_DETAILS
))
440 fprintf (vect_dump
, "Detected interleaving ");
441 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
442 fprintf (vect_dump
, " and ");
443 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size
= (init_b
- init_a
) % type_size_a
;
454 if (step
&& (init_b
- init_a
) > step
)
457 if (diff_mod_size
== 0)
459 vect_update_interleaving_chain (dra
, drb
);
460 if (vect_print_dump_info (REPORT_DR_DETAILS
))
462 fprintf (vect_dump
, "Detected interleaving ");
463 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
464 fprintf (vect_dump
, " and ");
465 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
479 vect_same_range_drs (data_reference_p dr_i
, data_reference_p dr_j
)
481 gimple stmt_i
= DR_STMT (dr_i
);
482 gimple stmt_j
= DR_STMT (dr_j
);
484 if (operand_equal_p (DR_REF (dr_i
), DR_REF (dr_j
), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i
))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j
))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i
))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j
)))))
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
498 vect_vfa_range_equal (ddr_p ddr_i
, ddr_p ddr_j
)
500 if ((vect_same_range_drs (DDR_A (ddr_i
), DDR_A (ddr_j
))
501 && vect_same_range_drs (DDR_B (ddr_i
), DDR_B (ddr_j
)))
502 || (vect_same_range_drs (DDR_A (ddr_i
), DDR_B (ddr_j
))
503 && vect_same_range_drs (DDR_B (ddr_i
), DDR_A (ddr_j
))))
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
514 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
516 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
) == 0)
521 if (vect_print_dump_info (REPORT_DR_DETAILS
))
523 fprintf (vect_dump
, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr
)), TDF_SLIM
);
525 fprintf (vect_dump
, " and ");
526 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr
)), TDF_SLIM
);
529 if (optimize_loop_nest_for_size_p (loop
))
531 if (vect_print_dump_info (REPORT_DR_DETAILS
))
532 fprintf (vect_dump
, "versioning not supported when optimizing for size.");
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
539 if (vect_print_dump_info (REPORT_DR_DETAILS
))
540 fprintf (vect_dump
, "versioning not yet supported for outer-loops.");
544 VEC_safe_push (ddr_p
, heap
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), ddr
);
549 /* Function vect_analyze_data_ref_dependence.
551 Return TRUE if there (might) exist a dependence between a memory-reference
552 DRA and a memory-reference DRB. When versioning for alias may check a
553 dependence at run-time, return FALSE. Adjust *MAX_VF according to
554 the data dependence. */
557 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
558 loop_vec_info loop_vinfo
, int *max_vf
,
559 bool *data_dependence_in_bb
)
562 struct loop
*loop
= NULL
;
563 struct data_reference
*dra
= DDR_A (ddr
);
564 struct data_reference
*drb
= DDR_B (ddr
);
565 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
566 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
567 lambda_vector dist_v
;
568 unsigned int loop_depth
;
570 /* Don't bother to analyze statements marked as unvectorizable. */
571 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a
)
572 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b
))
575 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
577 /* Independent data accesses. */
578 vect_check_interleaving (dra
, drb
);
583 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
585 if ((DR_IS_READ (dra
) && DR_IS_READ (drb
) && loop_vinfo
) || dra
== drb
)
588 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
592 if (vect_print_dump_info (REPORT_DR_DETAILS
))
594 fprintf (vect_dump
, "versioning for alias required: "
595 "can't determine dependence between ");
596 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
597 fprintf (vect_dump
, " and ");
598 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
601 /* Add to list of ddrs that need to be tested at run-time. */
602 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
605 /* When vectorizing a basic block unknown depnedence can still mean
607 if (vect_check_interleaving (dra
, drb
))
610 if (vect_print_dump_info (REPORT_DR_DETAILS
))
612 fprintf (vect_dump
, "can't determine dependence between ");
613 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
614 fprintf (vect_dump
, " and ");
615 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
618 /* We do not vectorize basic blocks with write-write dependencies. */
619 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
622 /* We deal with read-write dependencies in basic blocks later (by
623 verifying that all the loads in the basic block are before all the
625 *data_dependence_in_bb
= true;
629 /* Versioning for alias is not yet supported for basic block SLP, and
630 dependence distance is unapplicable, hence, in case of known data
631 dependence, basic block vectorization is impossible for now. */
634 if (dra
!= drb
&& vect_check_interleaving (dra
, drb
))
637 if (vect_print_dump_info (REPORT_DR_DETAILS
))
639 fprintf (vect_dump
, "determined dependence between ");
640 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
641 fprintf (vect_dump
, " and ");
642 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
645 /* Do not vectorize basic blcoks with write-write dependences. */
646 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
649 /* Check if this dependence is allowed in basic block vectorization. */
650 return vect_drs_dependent_in_basic_block (dra
, drb
);
653 /* Loop-based vectorization and known data dependence. */
654 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
656 if (vect_print_dump_info (REPORT_DR_DETAILS
))
658 fprintf (vect_dump
, "versioning for alias required: bad dist vector for ");
659 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
660 fprintf (vect_dump
, " and ");
661 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
663 /* Add to list of ddrs that need to be tested at run-time. */
664 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
667 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
668 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
)
670 int dist
= dist_v
[loop_depth
];
672 if (vect_print_dump_info (REPORT_DR_DETAILS
))
673 fprintf (vect_dump
, "dependence distance = %d.", dist
);
677 if (vect_print_dump_info (REPORT_DR_DETAILS
))
679 fprintf (vect_dump
, "dependence distance == 0 between ");
680 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
681 fprintf (vect_dump
, " and ");
682 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
685 /* For interleaving, mark that there is a read-write dependency if
686 necessary. We check before that one of the data-refs is store. */
687 if (DR_IS_READ (dra
))
688 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a
) = true;
691 if (DR_IS_READ (drb
))
692 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b
) = true;
698 if (dist
> 0 && DDR_REVERSED_P (ddr
))
700 /* If DDR_REVERSED_P the order of the data-refs in DDR was
701 reversed (to make distance vector positive), and the actual
702 distance is negative. */
703 if (vect_print_dump_info (REPORT_DR_DETAILS
))
704 fprintf (vect_dump
, "dependence distance negative.");
709 && abs (dist
) < *max_vf
)
711 /* The dependence distance requires reduction of the maximal
712 vectorization factor. */
713 *max_vf
= abs (dist
);
714 if (vect_print_dump_info (REPORT_DR_DETAILS
))
715 fprintf (vect_dump
, "adjusting maximal vectorization factor to %i",
719 if (abs (dist
) >= *max_vf
)
721 /* Dependence distance does not create dependence, as far as
722 vectorization is concerned, in this case. */
723 if (vect_print_dump_info (REPORT_DR_DETAILS
))
724 fprintf (vect_dump
, "dependence distance >= VF.");
728 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
730 fprintf (vect_dump
, "not vectorized, possible dependence "
731 "between data-refs ");
732 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
733 fprintf (vect_dump
, " and ");
734 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
743 /* Function vect_analyze_data_ref_dependences.
745 Examine all the data references in the loop, and make sure there do not
746 exist any data dependences between them. Set *MAX_VF according to
747 the maximum vectorization factor the data dependences allow. */
750 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
,
751 bb_vec_info bb_vinfo
, int *max_vf
,
752 bool *data_dependence_in_bb
)
755 VEC (ddr_p
, heap
) *ddrs
= NULL
;
756 struct data_dependence_relation
*ddr
;
758 if (vect_print_dump_info (REPORT_DETAILS
))
759 fprintf (vect_dump
, "=== vect_analyze_dependences ===");
762 ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
764 ddrs
= BB_VINFO_DDRS (bb_vinfo
);
766 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
767 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
, max_vf
,
768 data_dependence_in_bb
))
775 /* Function vect_compute_data_ref_alignment
777 Compute the misalignment of the data reference DR.
780 1. If during the misalignment computation it is found that the data reference
781 cannot be vectorized then false is returned.
782 2. DR_MISALIGNMENT (DR) is defined.
784 FOR NOW: No analysis is actually performed. Misalignment is calculated
785 only for trivial cases. TODO. */
788 vect_compute_data_ref_alignment (struct data_reference
*dr
)
790 gimple stmt
= DR_STMT (dr
);
791 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
792 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
793 struct loop
*loop
= NULL
;
794 tree ref
= DR_REF (dr
);
796 tree base
, base_addr
;
799 tree aligned_to
, alignment
;
801 if (vect_print_dump_info (REPORT_DETAILS
))
802 fprintf (vect_dump
, "vect_compute_data_ref_alignment:");
805 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
807 /* Initialize misalignment to unknown. */
808 SET_DR_MISALIGNMENT (dr
, -1);
810 misalign
= DR_INIT (dr
);
811 aligned_to
= DR_ALIGNED_TO (dr
);
812 base_addr
= DR_BASE_ADDRESS (dr
);
813 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
815 /* In case the dataref is in an inner-loop of the loop that is being
816 vectorized (LOOP), we use the base and misalignment information
817 relative to the outer-loop (LOOP). This is ok only if the misalignment
818 stays the same throughout the execution of the inner-loop, which is why
819 we have to check that the stride of the dataref in the inner-loop evenly
820 divides by the vector size. */
821 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
823 tree step
= DR_STEP (dr
);
824 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
826 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) == 0)
828 if (vect_print_dump_info (REPORT_ALIGNMENT
))
829 fprintf (vect_dump
, "inner step divides the vector-size.");
830 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
831 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
832 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
836 if (vect_print_dump_info (REPORT_ALIGNMENT
))
837 fprintf (vect_dump
, "inner step doesn't divide the vector-size.");
838 misalign
= NULL_TREE
;
842 base
= build_fold_indirect_ref (base_addr
);
843 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
845 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
848 if (vect_print_dump_info (REPORT_ALIGNMENT
))
850 fprintf (vect_dump
, "Unknown alignment for access: ");
851 print_generic_expr (vect_dump
, base
, TDF_SLIM
);
857 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
859 || (TREE_CODE (base_addr
) == SSA_NAME
860 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
861 TREE_TYPE (base_addr
)))),
865 base_aligned
= false;
869 /* Do not change the alignment of global variables if
870 flag_section_anchors is enabled. */
871 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
872 || (TREE_STATIC (base
) && flag_section_anchors
))
874 if (vect_print_dump_info (REPORT_DETAILS
))
876 fprintf (vect_dump
, "can't force alignment of ref: ");
877 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
882 /* Force the alignment of the decl.
883 NOTE: This is the only change to the code we make during
884 the analysis phase, before deciding to vectorize the loop. */
885 if (vect_print_dump_info (REPORT_DETAILS
))
887 fprintf (vect_dump
, "force alignment of ");
888 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
891 DECL_ALIGN (base
) = TYPE_ALIGN (vectype
);
892 DECL_USER_ALIGN (base
) = 1;
895 /* At this point we assume that the base is aligned. */
896 gcc_assert (base_aligned
897 || (TREE_CODE (base
) == VAR_DECL
898 && DECL_ALIGN (base
) >= TYPE_ALIGN (vectype
)));
900 /* If this is a backward running DR then first access in the larger
901 vectype actually is N-1 elements before the address in the DR.
902 Adjust misalign accordingly. */
903 if (tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0)
905 tree offset
= ssize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
906 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
907 otherwise we wouldn't be here. */
908 offset
= fold_build2 (MULT_EXPR
, ssizetype
, offset
, DR_STEP (dr
));
909 /* PLUS because DR_STEP was negative. */
910 misalign
= size_binop (PLUS_EXPR
, misalign
, offset
);
913 /* Modulo alignment. */
914 misalign
= size_binop (FLOOR_MOD_EXPR
, misalign
, alignment
);
916 if (!host_integerp (misalign
, 1))
918 /* Negative or overflowed misalignment value. */
919 if (vect_print_dump_info (REPORT_DETAILS
))
920 fprintf (vect_dump
, "unexpected misalign value");
924 SET_DR_MISALIGNMENT (dr
, TREE_INT_CST_LOW (misalign
));
926 if (vect_print_dump_info (REPORT_DETAILS
))
928 fprintf (vect_dump
, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
929 print_generic_expr (vect_dump
, ref
, TDF_SLIM
);
936 /* Function vect_compute_data_refs_alignment
938 Compute the misalignment of data references in the loop.
939 Return FALSE if a data reference is found that cannot be vectorized. */
942 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
,
943 bb_vec_info bb_vinfo
)
945 VEC (data_reference_p
, heap
) *datarefs
;
946 struct data_reference
*dr
;
950 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
952 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
954 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
955 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
956 && !vect_compute_data_ref_alignment (dr
))
960 /* Mark unsupported statement as unvectorizable. */
961 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
972 /* Function vect_update_misalignment_for_peel
974 DR - the data reference whose misalignment is to be adjusted.
975 DR_PEEL - the data reference whose misalignment is being made
976 zero in the vector loop by the peel.
977 NPEEL - the number of iterations in the peel loop if the misalignment
978 of DR_PEEL is known at compile time. */
981 vect_update_misalignment_for_peel (struct data_reference
*dr
,
982 struct data_reference
*dr_peel
, int npeel
)
985 VEC(dr_p
,heap
) *same_align_drs
;
986 struct data_reference
*current_dr
;
987 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
988 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
989 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
990 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
992 /* For interleaved data accesses the step in the loop must be multiplied by
993 the size of the interleaving group. */
994 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
995 dr_size
*= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
996 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info
))
997 dr_peel_size
*= GROUP_SIZE (peel_stmt_info
);
999 /* It can be assumed that the data refs with the same alignment as dr_peel
1000 are aligned in the vector loop. */
1002 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
1003 FOR_EACH_VEC_ELT (dr_p
, same_align_drs
, i
, current_dr
)
1005 if (current_dr
!= dr
)
1007 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
1008 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
1009 SET_DR_MISALIGNMENT (dr
, 0);
1013 if (known_alignment_for_access_p (dr
)
1014 && known_alignment_for_access_p (dr_peel
))
1016 bool negative
= tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0;
1017 int misal
= DR_MISALIGNMENT (dr
);
1018 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1019 misal
+= negative
? -npeel
* dr_size
: npeel
* dr_size
;
1020 misal
&= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1021 SET_DR_MISALIGNMENT (dr
, misal
);
1025 if (vect_print_dump_info (REPORT_DETAILS
))
1026 fprintf (vect_dump
, "Setting misalignment to -1.");
1027 SET_DR_MISALIGNMENT (dr
, -1);
1031 /* Function vect_verify_datarefs_alignment
1033 Return TRUE if all data references in the loop can be
1034 handled with respect to alignment. */
1037 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
1039 VEC (data_reference_p
, heap
) *datarefs
;
1040 struct data_reference
*dr
;
1041 enum dr_alignment_support supportable_dr_alignment
;
1045 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1047 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
1049 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1051 gimple stmt
= DR_STMT (dr
);
1052 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1054 /* For interleaving, only the alignment of the first access matters.
1055 Skip statements marked as not vectorizable. */
1056 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1057 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1058 || !STMT_VINFO_VECTORIZABLE (stmt_info
))
1061 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1062 if (!supportable_dr_alignment
)
1064 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
1066 if (DR_IS_READ (dr
))
1068 "not vectorized: unsupported unaligned load.");
1071 "not vectorized: unsupported unaligned store.");
1073 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
1077 if (supportable_dr_alignment
!= dr_aligned
1078 && vect_print_dump_info (REPORT_ALIGNMENT
))
1079 fprintf (vect_dump
, "Vectorizing an unaligned access.");
1085 /* Function vector_alignment_reachable_p
1087 Return true if vector alignment for DR is reachable by peeling
1088 a few loop iterations. Return false otherwise. */
1091 vector_alignment_reachable_p (struct data_reference
*dr
)
1093 gimple stmt
= DR_STMT (dr
);
1094 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1095 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1097 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1099 /* For interleaved access we peel only if number of iterations in
1100 the prolog loop ({VF - misalignment}), is a multiple of the
1101 number of the interleaved accesses. */
1102 int elem_size
, mis_in_elements
;
1103 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1105 /* FORNOW: handle only known alignment. */
1106 if (!known_alignment_for_access_p (dr
))
1109 elem_size
= GET_MODE_SIZE (TYPE_MODE (vectype
)) / nelements
;
1110 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
1112 if ((nelements
- mis_in_elements
) % GROUP_SIZE (stmt_info
))
1116 /* If misalignment is known at the compile time then allow peeling
1117 only if natural alignment is reachable through peeling. */
1118 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
1120 HOST_WIDE_INT elmsize
=
1121 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
1122 if (vect_print_dump_info (REPORT_DETAILS
))
1124 fprintf (vect_dump
, "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
1125 fprintf (vect_dump
, ". misalignment = %d. ", DR_MISALIGNMENT (dr
));
1127 if (DR_MISALIGNMENT (dr
) % elmsize
)
1129 if (vect_print_dump_info (REPORT_DETAILS
))
1130 fprintf (vect_dump
, "data size does not divide the misalignment.\n");
1135 if (!known_alignment_for_access_p (dr
))
1137 tree type
= (TREE_TYPE (DR_REF (dr
)));
1138 tree ba
= DR_BASE_OBJECT (dr
);
1139 bool is_packed
= false;
1142 is_packed
= contains_packed_reference (ba
);
1144 if (compare_tree_int (TYPE_SIZE (type
), TYPE_ALIGN (type
)) > 0)
1147 if (vect_print_dump_info (REPORT_DETAILS
))
1148 fprintf (vect_dump
, "Unknown misalignment, is_packed = %d",is_packed
);
1149 if (targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
1159 /* Calculate the cost of the memory access represented by DR. */
1162 vect_get_data_access_cost (struct data_reference
*dr
,
1163 unsigned int *inside_cost
,
1164 unsigned int *outside_cost
)
1166 gimple stmt
= DR_STMT (dr
);
1167 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1168 int nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1169 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1170 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1171 int ncopies
= vf
/ nunits
;
1172 bool supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1174 if (!supportable_dr_alignment
)
1175 *inside_cost
= VECT_MAX_COST
;
1178 if (DR_IS_READ (dr
))
1179 vect_get_load_cost (dr
, ncopies
, true, inside_cost
, outside_cost
);
1181 vect_get_store_cost (dr
, ncopies
, inside_cost
);
1184 if (vect_print_dump_info (REPORT_COST
))
1185 fprintf (vect_dump
, "vect_get_data_access_cost: inside_cost = %d, "
1186 "outside_cost = %d.", *inside_cost
, *outside_cost
);
1191 vect_peeling_hash (const void *elem
)
1193 const struct _vect_peel_info
*peel_info
;
1195 peel_info
= (const struct _vect_peel_info
*) elem
;
1196 return (hashval_t
) peel_info
->npeel
;
1201 vect_peeling_hash_eq (const void *elem1
, const void *elem2
)
1203 const struct _vect_peel_info
*a
, *b
;
1205 a
= (const struct _vect_peel_info
*) elem1
;
1206 b
= (const struct _vect_peel_info
*) elem2
;
1207 return (a
->npeel
== b
->npeel
);
1211 /* Insert DR into peeling hash table with NPEEL as key. */
1214 vect_peeling_hash_insert (loop_vec_info loop_vinfo
, struct data_reference
*dr
,
1217 struct _vect_peel_info elem
, *slot
;
1219 bool supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1222 slot
= (vect_peel_info
) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1228 slot
= XNEW (struct _vect_peel_info
);
1229 slot
->npeel
= npeel
;
1232 new_slot
= htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo
), slot
,
1237 if (!supportable_dr_alignment
&& !flag_vect_cost_model
)
1238 slot
->count
+= VECT_MAX_COST
;
1242 /* Traverse peeling hash table to find peeling option that aligns maximum
1243 number of data accesses. */
1246 vect_peeling_hash_get_most_frequent (void **slot
, void *data
)
1248 vect_peel_info elem
= (vect_peel_info
) *slot
;
1249 vect_peel_extended_info max
= (vect_peel_extended_info
) data
;
1251 if (elem
->count
> max
->peel_info
.count
)
1253 max
->peel_info
.npeel
= elem
->npeel
;
1254 max
->peel_info
.count
= elem
->count
;
1255 max
->peel_info
.dr
= elem
->dr
;
1262 /* Traverse peeling hash table and calculate cost for each peeling option.
1263 Find the one with the lowest cost. */
1266 vect_peeling_hash_get_lowest_cost (void **slot
, void *data
)
1268 vect_peel_info elem
= (vect_peel_info
) *slot
;
1269 vect_peel_extended_info min
= (vect_peel_extended_info
) data
;
1270 int save_misalignment
, dummy
;
1271 unsigned int inside_cost
= 0, outside_cost
= 0, i
;
1272 gimple stmt
= DR_STMT (elem
->dr
);
1273 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1274 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1275 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1276 struct data_reference
*dr
;
1278 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1280 stmt
= DR_STMT (dr
);
1281 stmt_info
= vinfo_for_stmt (stmt
);
1282 /* For interleaving, only the alignment of the first access
1284 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1285 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1288 save_misalignment
= DR_MISALIGNMENT (dr
);
1289 vect_update_misalignment_for_peel (dr
, elem
->dr
, elem
->npeel
);
1290 vect_get_data_access_cost (dr
, &inside_cost
, &outside_cost
);
1291 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1294 outside_cost
+= vect_get_known_peeling_cost (loop_vinfo
, elem
->npeel
, &dummy
,
1295 vect_get_single_scalar_iteraion_cost (loop_vinfo
));
1297 if (inside_cost
< min
->inside_cost
1298 || (inside_cost
== min
->inside_cost
&& outside_cost
< min
->outside_cost
))
1300 min
->inside_cost
= inside_cost
;
1301 min
->outside_cost
= outside_cost
;
1302 min
->peel_info
.dr
= elem
->dr
;
1303 min
->peel_info
.npeel
= elem
->npeel
;
1310 /* Choose best peeling option by traversing peeling hash table and either
1311 choosing an option with the lowest cost (if cost model is enabled) or the
1312 option that aligns as many accesses as possible. */
1314 static struct data_reference
*
1315 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo
,
1316 unsigned int *npeel
)
1318 struct _vect_peel_extended_info res
;
1320 res
.peel_info
.dr
= NULL
;
1322 if (flag_vect_cost_model
)
1324 res
.inside_cost
= INT_MAX
;
1325 res
.outside_cost
= INT_MAX
;
1326 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1327 vect_peeling_hash_get_lowest_cost
, &res
);
1331 res
.peel_info
.count
= 0;
1332 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo
),
1333 vect_peeling_hash_get_most_frequent
, &res
);
1336 *npeel
= res
.peel_info
.npeel
;
1337 return res
.peel_info
.dr
;
1341 /* Function vect_enhance_data_refs_alignment
1343 This pass will use loop versioning and loop peeling in order to enhance
1344 the alignment of data references in the loop.
1346 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1347 original loop is to be vectorized. Any other loops that are created by
1348 the transformations performed in this pass - are not supposed to be
1349 vectorized. This restriction will be relaxed.
1351 This pass will require a cost model to guide it whether to apply peeling
1352 or versioning or a combination of the two. For example, the scheme that
1353 intel uses when given a loop with several memory accesses, is as follows:
1354 choose one memory access ('p') which alignment you want to force by doing
1355 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1356 other accesses are not necessarily aligned, or (2) use loop versioning to
1357 generate one loop in which all accesses are aligned, and another loop in
1358 which only 'p' is necessarily aligned.
1360 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1361 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1362 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1364 Devising a cost model is the most critical aspect of this work. It will
1365 guide us on which access to peel for, whether to use loop versioning, how
1366 many versions to create, etc. The cost model will probably consist of
1367 generic considerations as well as target specific considerations (on
1368 powerpc for example, misaligned stores are more painful than misaligned
1371 Here are the general steps involved in alignment enhancements:
1373 -- original loop, before alignment analysis:
1374 for (i=0; i<N; i++){
1375 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1376 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1379 -- After vect_compute_data_refs_alignment:
1380 for (i=0; i<N; i++){
1381 x = q[i]; # DR_MISALIGNMENT(q) = 3
1382 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1385 -- Possibility 1: we do loop versioning:
1387 for (i=0; i<N; i++){ # loop 1A
1388 x = q[i]; # DR_MISALIGNMENT(q) = 3
1389 p[i] = y; # DR_MISALIGNMENT(p) = 0
1393 for (i=0; i<N; i++){ # loop 1B
1394 x = q[i]; # DR_MISALIGNMENT(q) = 3
1395 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1399 -- Possibility 2: we do loop peeling:
1400 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1404 for (i = 3; i < N; i++){ # loop 2A
1405 x = q[i]; # DR_MISALIGNMENT(q) = 0
1406 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1409 -- Possibility 3: combination of loop peeling and versioning:
1410 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1415 for (i = 3; i<N; i++){ # loop 3A
1416 x = q[i]; # DR_MISALIGNMENT(q) = 0
1417 p[i] = y; # DR_MISALIGNMENT(p) = 0
1421 for (i = 3; i<N; i++){ # loop 3B
1422 x = q[i]; # DR_MISALIGNMENT(q) = 0
1423 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1427 These loops are later passed to loop_transform to be vectorized. The
1428 vectorizer will use the alignment information to guide the transformation
1429 (whether to generate regular loads/stores, or with special handling for
1433 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1435 VEC (data_reference_p
, heap
) *datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1436 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1437 enum dr_alignment_support supportable_dr_alignment
;
1438 struct data_reference
*dr0
= NULL
, *first_store
= NULL
;
1439 struct data_reference
*dr
;
1441 bool do_peeling
= false;
1442 bool do_versioning
= false;
1445 stmt_vec_info stmt_info
;
1446 int vect_versioning_for_alias_required
;
1447 unsigned int npeel
= 0;
1448 bool all_misalignments_unknown
= true;
1449 unsigned int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1450 unsigned possible_npeel_number
= 1;
1452 unsigned int nelements
, mis
, same_align_drs_max
= 0;
1454 if (vect_print_dump_info (REPORT_DETAILS
))
1455 fprintf (vect_dump
, "=== vect_enhance_data_refs_alignment ===");
1457 /* While cost model enhancements are expected in the future, the high level
1458 view of the code at this time is as follows:
1460 A) If there is a misaligned access then see if peeling to align
1461 this access can make all data references satisfy
1462 vect_supportable_dr_alignment. If so, update data structures
1463 as needed and return true.
1465 B) If peeling wasn't possible and there is a data reference with an
1466 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1467 then see if loop versioning checks can be used to make all data
1468 references satisfy vect_supportable_dr_alignment. If so, update
1469 data structures as needed and return true.
1471 C) If neither peeling nor versioning were successful then return false if
1472 any data reference does not satisfy vect_supportable_dr_alignment.
1474 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1476 Note, Possibility 3 above (which is peeling and versioning together) is not
1477 being done at this time. */
1479 /* (1) Peeling to force alignment. */
1481 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1483 + How many accesses will become aligned due to the peeling
1484 - How many accesses will become unaligned due to the peeling,
1485 and the cost of misaligned accesses.
1486 - The cost of peeling (the extra runtime checks, the increase
1489 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1491 stmt
= DR_STMT (dr
);
1492 stmt_info
= vinfo_for_stmt (stmt
);
1494 /* For interleaving, only the alignment of the first access
1496 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1497 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1500 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1501 do_peeling
= vector_alignment_reachable_p (dr
);
1504 if (known_alignment_for_access_p (dr
))
1506 unsigned int npeel_tmp
;
1507 bool negative
= tree_int_cst_compare (DR_STEP (dr
),
1508 size_zero_node
) < 0;
1510 /* Save info about DR in the hash table. */
1511 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo
))
1512 LOOP_VINFO_PEELING_HTAB (loop_vinfo
) =
1513 htab_create (1, vect_peeling_hash
,
1514 vect_peeling_hash_eq
, free
);
1516 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1517 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1518 mis
= DR_MISALIGNMENT (dr
) / GET_MODE_SIZE (TYPE_MODE (
1519 TREE_TYPE (DR_REF (dr
))));
1520 npeel_tmp
= (negative
1521 ? (mis
- nelements
) : (nelements
- mis
))
1524 /* For multiple types, it is possible that the bigger type access
1525 will have more than one peeling option. E.g., a loop with two
1526 types: one of size (vector size / 4), and the other one of
1527 size (vector size / 8). Vectorization factor will 8. If both
1528 access are misaligned by 3, the first one needs one scalar
1529 iteration to be aligned, and the second one needs 5. But the
1530 the first one will be aligned also by peeling 5 scalar
1531 iterations, and in that case both accesses will be aligned.
1532 Hence, except for the immediate peeling amount, we also want
1533 to try to add full vector size, while we don't exceed
1534 vectorization factor.
1535 We do this automtically for cost model, since we calculate cost
1536 for every peeling option. */
1537 if (!flag_vect_cost_model
)
1538 possible_npeel_number
= vf
/nelements
;
1540 /* Handle the aligned case. We may decide to align some other
1541 access, making DR unaligned. */
1542 if (DR_MISALIGNMENT (dr
) == 0)
1545 if (!flag_vect_cost_model
)
1546 possible_npeel_number
++;
1549 for (j
= 0; j
< possible_npeel_number
; j
++)
1551 gcc_assert (npeel_tmp
<= vf
);
1552 vect_peeling_hash_insert (loop_vinfo
, dr
, npeel_tmp
);
1553 npeel_tmp
+= nelements
;
1556 all_misalignments_unknown
= false;
1557 /* Data-ref that was chosen for the case that all the
1558 misalignments are unknown is not relevant anymore, since we
1559 have a data-ref with known alignment. */
1564 /* If we don't know all the misalignment values, we prefer
1565 peeling for data-ref that has maximum number of data-refs
1566 with the same alignment, unless the target prefers to align
1567 stores over load. */
1568 if (all_misalignments_unknown
)
1570 if (same_align_drs_max
< VEC_length (dr_p
,
1571 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
))
1574 same_align_drs_max
= VEC_length (dr_p
,
1575 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
));
1579 if (!first_store
&& DR_IS_WRITE (dr
))
1583 /* If there are both known and unknown misaligned accesses in the
1584 loop, we choose peeling amount according to the known
1588 if (!supportable_dr_alignment
)
1591 if (!first_store
&& DR_IS_WRITE (dr
))
1598 if (!aligned_access_p (dr
))
1600 if (vect_print_dump_info (REPORT_DETAILS
))
1601 fprintf (vect_dump
, "vector alignment may not be reachable");
1608 vect_versioning_for_alias_required
1609 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo
);
1611 /* Temporarily, if versioning for alias is required, we disable peeling
1612 until we support peeling and versioning. Often peeling for alignment
1613 will require peeling for loop-bound, which in turn requires that we
1614 know how to adjust the loop ivs after the loop. */
1615 if (vect_versioning_for_alias_required
1616 || !vect_can_advance_ivs_p (loop_vinfo
)
1617 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1620 if (do_peeling
&& all_misalignments_unknown
1621 && vect_supportable_dr_alignment (dr0
, false))
1624 /* Check if the target requires to prefer stores over loads, i.e., if
1625 misaligned stores are more expensive than misaligned loads (taking
1626 drs with same alignment into account). */
1627 if (first_store
&& DR_IS_READ (dr0
))
1629 unsigned int load_inside_cost
= 0, load_outside_cost
= 0;
1630 unsigned int store_inside_cost
= 0, store_outside_cost
= 0;
1631 unsigned int load_inside_penalty
= 0, load_outside_penalty
= 0;
1632 unsigned int store_inside_penalty
= 0, store_outside_penalty
= 0;
1634 vect_get_data_access_cost (dr0
, &load_inside_cost
,
1635 &load_outside_cost
);
1636 vect_get_data_access_cost (first_store
, &store_inside_cost
,
1637 &store_outside_cost
);
1639 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1640 aligning the load DR0). */
1641 load_inside_penalty
= store_inside_cost
;
1642 load_outside_penalty
= store_outside_cost
;
1643 for (i
= 0; VEC_iterate (dr_p
, STMT_VINFO_SAME_ALIGN_REFS
1644 (vinfo_for_stmt (DR_STMT (first_store
))),
1647 if (DR_IS_READ (dr
))
1649 load_inside_penalty
+= load_inside_cost
;
1650 load_outside_penalty
+= load_outside_cost
;
1654 load_inside_penalty
+= store_inside_cost
;
1655 load_outside_penalty
+= store_outside_cost
;
1658 /* Calculate the penalty for leaving DR0 unaligned (by
1659 aligning the FIRST_STORE). */
1660 store_inside_penalty
= load_inside_cost
;
1661 store_outside_penalty
= load_outside_cost
;
1662 for (i
= 0; VEC_iterate (dr_p
, STMT_VINFO_SAME_ALIGN_REFS
1663 (vinfo_for_stmt (DR_STMT (dr0
))),
1666 if (DR_IS_READ (dr
))
1668 store_inside_penalty
+= load_inside_cost
;
1669 store_outside_penalty
+= load_outside_cost
;
1673 store_inside_penalty
+= store_inside_cost
;
1674 store_outside_penalty
+= store_outside_cost
;
1677 if (load_inside_penalty
> store_inside_penalty
1678 || (load_inside_penalty
== store_inside_penalty
1679 && load_outside_penalty
> store_outside_penalty
))
1683 /* In case there are only loads with different unknown misalignments, use
1684 peeling only if it may help to align other accesses in the loop. */
1685 if (!first_store
&& !VEC_length (dr_p
, STMT_VINFO_SAME_ALIGN_REFS
1686 (vinfo_for_stmt (DR_STMT (dr0
))))
1687 && vect_supportable_dr_alignment (dr0
, false)
1688 != dr_unaligned_supported
)
1692 if (do_peeling
&& !dr0
)
1694 /* Peeling is possible, but there is no data access that is not supported
1695 unless aligned. So we try to choose the best possible peeling. */
1697 /* We should get here only if there are drs with known misalignment. */
1698 gcc_assert (!all_misalignments_unknown
);
1700 /* Choose the best peeling from the hash table. */
1701 dr0
= vect_peeling_hash_choose_best_peeling (loop_vinfo
, &npeel
);
1708 stmt
= DR_STMT (dr0
);
1709 stmt_info
= vinfo_for_stmt (stmt
);
1710 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1711 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1713 if (known_alignment_for_access_p (dr0
))
1715 bool negative
= tree_int_cst_compare (DR_STEP (dr0
),
1716 size_zero_node
) < 0;
1719 /* Since it's known at compile time, compute the number of
1720 iterations in the peeled loop (the peeling factor) for use in
1721 updating DR_MISALIGNMENT values. The peeling factor is the
1722 vectorization factor minus the misalignment as an element
1724 mis
= DR_MISALIGNMENT (dr0
);
1725 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1726 npeel
= ((negative
? mis
- nelements
: nelements
- mis
)
1730 /* For interleaved data access every iteration accesses all the
1731 members of the group, therefore we divide the number of iterations
1732 by the group size. */
1733 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1734 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
))
1735 npeel
/= GROUP_SIZE (stmt_info
);
1737 if (vect_print_dump_info (REPORT_DETAILS
))
1738 fprintf (vect_dump
, "Try peeling by %d", npeel
);
1741 /* Ensure that all data refs can be vectorized after the peel. */
1742 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1744 int save_misalignment
;
1749 stmt
= DR_STMT (dr
);
1750 stmt_info
= vinfo_for_stmt (stmt
);
1751 /* For interleaving, only the alignment of the first access
1753 if (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1754 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1757 save_misalignment
= DR_MISALIGNMENT (dr
);
1758 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1759 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1760 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1762 if (!supportable_dr_alignment
)
1769 if (do_peeling
&& known_alignment_for_access_p (dr0
) && npeel
== 0)
1771 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1780 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1781 If the misalignment of DR_i is identical to that of dr0 then set
1782 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1783 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1784 by the peeling factor times the element size of DR_i (MOD the
1785 vectorization factor times the size). Otherwise, the
1786 misalignment of DR_i must be set to unknown. */
1787 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1789 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1791 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1793 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = npeel
;
1795 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo
) = DR_MISALIGNMENT (dr0
);
1796 SET_DR_MISALIGNMENT (dr0
, 0);
1797 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1798 fprintf (vect_dump
, "Alignment of access forced using peeling.");
1800 if (vect_print_dump_info (REPORT_DETAILS
))
1801 fprintf (vect_dump
, "Peeling for alignment will be applied.");
1803 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1810 /* (2) Versioning to force alignment. */
1812 /* Try versioning if:
1813 1) flag_tree_vect_loop_version is TRUE
1814 2) optimize loop for speed
1815 3) there is at least one unsupported misaligned data ref with an unknown
1817 4) all misaligned data refs with a known misalignment are supported, and
1818 5) the number of runtime alignment checks is within reason. */
1821 flag_tree_vect_loop_version
1822 && optimize_loop_nest_for_speed_p (loop
)
1823 && (!loop
->inner
); /* FORNOW */
1827 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
1829 stmt
= DR_STMT (dr
);
1830 stmt_info
= vinfo_for_stmt (stmt
);
1832 /* For interleaving, only the alignment of the first access
1834 if (aligned_access_p (dr
)
1835 || (STMT_VINFO_STRIDED_ACCESS (stmt_info
)
1836 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
))
1839 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1841 if (!supportable_dr_alignment
)
1847 if (known_alignment_for_access_p (dr
)
1848 || VEC_length (gimple
,
1849 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
))
1850 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
1852 do_versioning
= false;
1856 stmt
= DR_STMT (dr
);
1857 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1858 gcc_assert (vectype
);
1860 /* The rightmost bits of an aligned address must be zeros.
1861 Construct the mask needed for this test. For example,
1862 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1863 mask must be 15 = 0xf. */
1864 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1866 /* FORNOW: use the same mask to test all potentially unaligned
1867 references in the loop. The vectorizer currently supports
1868 a single vector size, see the reference to
1869 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1870 vectorization factor is computed. */
1871 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1872 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1873 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1874 VEC_safe_push (gimple
, heap
,
1875 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
),
1880 /* Versioning requires at least one misaligned data reference. */
1881 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
1882 do_versioning
= false;
1883 else if (!do_versioning
)
1884 VEC_truncate (gimple
, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
), 0);
1889 VEC(gimple
,heap
) *may_misalign_stmts
1890 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1893 /* It can now be assumed that the data references in the statements
1894 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1895 of the loop being vectorized. */
1896 FOR_EACH_VEC_ELT (gimple
, may_misalign_stmts
, i
, stmt
)
1898 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1899 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1900 SET_DR_MISALIGNMENT (dr
, 0);
1901 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1902 fprintf (vect_dump
, "Alignment of access forced using versioning.");
1905 if (vect_print_dump_info (REPORT_DETAILS
))
1906 fprintf (vect_dump
, "Versioning for alignment will be applied.");
1908 /* Peeling and versioning can't be done together at this time. */
1909 gcc_assert (! (do_peeling
&& do_versioning
));
1911 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1916 /* This point is reached if neither peeling nor versioning is being done. */
1917 gcc_assert (! (do_peeling
|| do_versioning
));
1919 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1924 /* Function vect_find_same_alignment_drs.
1926 Update group and alignment relations according to the chosen
1927 vectorization factor. */
1930 vect_find_same_alignment_drs (struct data_dependence_relation
*ddr
,
1931 loop_vec_info loop_vinfo
)
1934 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1935 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1936 struct data_reference
*dra
= DDR_A (ddr
);
1937 struct data_reference
*drb
= DDR_B (ddr
);
1938 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
1939 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
1940 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
1941 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
1942 lambda_vector dist_v
;
1943 unsigned int loop_depth
;
1945 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1951 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1954 /* Loop-based vectorization and known data dependence. */
1955 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1958 /* Data-dependence analysis reports a distance vector of zero
1959 for data-references that overlap only in the first iteration
1960 but have different sign step (see PR45764).
1961 So as a sanity check require equal DR_STEP. */
1962 if (!operand_equal_p (DR_STEP (dra
), DR_STEP (drb
), 0))
1965 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
1966 FOR_EACH_VEC_ELT (lambda_vector
, DDR_DIST_VECTS (ddr
), i
, dist_v
)
1968 int dist
= dist_v
[loop_depth
];
1970 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1971 fprintf (vect_dump
, "dependence distance = %d.", dist
);
1973 /* Same loop iteration. */
1975 || (dist
% vectorization_factor
== 0 && dra_size
== drb_size
))
1977 /* Two references with distance zero have the same alignment. */
1978 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
), drb
);
1979 VEC_safe_push (dr_p
, heap
, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
), dra
);
1980 if (vect_print_dump_info (REPORT_ALIGNMENT
))
1981 fprintf (vect_dump
, "accesses have the same alignment.");
1982 if (vect_print_dump_info (REPORT_DR_DETAILS
))
1984 fprintf (vect_dump
, "dependence distance modulo vf == 0 between ");
1985 print_generic_expr (vect_dump
, DR_REF (dra
), TDF_SLIM
);
1986 fprintf (vect_dump
, " and ");
1987 print_generic_expr (vect_dump
, DR_REF (drb
), TDF_SLIM
);
1994 /* Function vect_analyze_data_refs_alignment
1996 Analyze the alignment of the data-references in the loop.
1997 Return FALSE if a data reference is found that cannot be vectorized. */
2000 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
,
2001 bb_vec_info bb_vinfo
)
2003 if (vect_print_dump_info (REPORT_DETAILS
))
2004 fprintf (vect_dump
, "=== vect_analyze_data_refs_alignment ===");
2006 /* Mark groups of data references with same alignment using
2007 data dependence information. */
2010 VEC (ddr_p
, heap
) *ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
2011 struct data_dependence_relation
*ddr
;
2014 FOR_EACH_VEC_ELT (ddr_p
, ddrs
, i
, ddr
)
2015 vect_find_same_alignment_drs (ddr
, loop_vinfo
);
2018 if (!vect_compute_data_refs_alignment (loop_vinfo
, bb_vinfo
))
2020 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2022 "not vectorized: can't calculate alignment for data ref.");
2030 /* Analyze groups of strided accesses: check that DR belongs to a group of
2031 strided accesses of legal size, step, etc. Detect gaps, single element
2032 interleaving, and other special cases. Set strided access info.
2033 Collect groups of strided stores for further use in SLP analysis. */
2036 vect_analyze_group_access (struct data_reference
*dr
)
2038 tree step
= DR_STEP (dr
);
2039 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2040 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
2041 gimple stmt
= DR_STMT (dr
);
2042 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2043 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2044 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2045 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2046 HOST_WIDE_INT stride
;
2047 bool slp_impossible
= false;
2049 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2050 interleaving group (including gaps). */
2051 stride
= dr_step
/ type_size
;
2053 /* Not consecutive access is possible only if it is a part of interleaving. */
2054 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2056 /* Check if it this DR is a part of interleaving, and is a single
2057 element of the group that is accessed in the loop. */
2059 /* Gaps are supported only for loads. STEP must be a multiple of the type
2060 size. The size of the group must be a power of 2. */
2062 && (dr_step
% type_size
) == 0
2064 && exact_log2 (stride
) != -1)
2066 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = stmt
;
2067 GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2068 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2070 fprintf (vect_dump
, "Detected single element interleaving ");
2071 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2072 fprintf (vect_dump
, " step ");
2073 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2078 if (vect_print_dump_info (REPORT_DETAILS
))
2080 fprintf (vect_dump
, "not consecutive access ");
2081 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2086 /* Mark the statement as unvectorizable. */
2087 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2094 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
)
2096 /* First stmt in the interleaving chain. Check the chain. */
2097 gimple next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
2098 struct data_reference
*data_ref
= dr
;
2099 unsigned int count
= 1;
2101 tree prev_init
= DR_INIT (data_ref
);
2103 HOST_WIDE_INT diff
, count_in_bytes
, gaps
= 0;
2107 /* Skip same data-refs. In case that two or more stmts share
2108 data-ref (supported only for loads), we vectorize only the first
2109 stmt, and the rest get their vectorized loads from the first
2111 if (!tree_int_cst_compare (DR_INIT (data_ref
),
2112 DR_INIT (STMT_VINFO_DATA_REF (
2113 vinfo_for_stmt (next
)))))
2115 if (DR_IS_WRITE (data_ref
))
2117 if (vect_print_dump_info (REPORT_DETAILS
))
2118 fprintf (vect_dump
, "Two store stmts share the same dr.");
2122 /* Check that there is no load-store dependencies for this loads
2123 to prevent a case of load-store-load to the same location. */
2124 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next
))
2125 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev
)))
2127 if (vect_print_dump_info (REPORT_DETAILS
))
2129 "READ_WRITE dependence in interleaving.");
2133 /* For load use the same data-ref load. */
2134 GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
2137 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2142 /* Check that all the accesses have the same STEP. */
2143 next_step
= DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next
)));
2144 if (tree_int_cst_compare (step
, next_step
))
2146 if (vect_print_dump_info (REPORT_DETAILS
))
2147 fprintf (vect_dump
, "not consecutive access in interleaving");
2151 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
2152 /* Check that the distance between two accesses is equal to the type
2153 size. Otherwise, we have gaps. */
2154 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
2155 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
2158 /* FORNOW: SLP of accesses with gaps is not supported. */
2159 slp_impossible
= true;
2160 if (DR_IS_WRITE (data_ref
))
2162 if (vect_print_dump_info (REPORT_DETAILS
))
2163 fprintf (vect_dump
, "interleaved store with gaps");
2170 /* Store the gap from the previous member of the group. If there is no
2171 gap in the access, GROUP_GAP is always 1. */
2172 GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
2174 prev_init
= DR_INIT (data_ref
);
2175 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2176 /* Count the number of data-refs in the chain. */
2180 /* COUNT is the number of accesses found, we multiply it by the size of
2181 the type to get COUNT_IN_BYTES. */
2182 count_in_bytes
= type_size
* count
;
2184 /* Check that the size of the interleaving (including gaps) is not
2185 greater than STEP. */
2186 if (dr_step
&& dr_step
< count_in_bytes
+ gaps
* type_size
)
2188 if (vect_print_dump_info (REPORT_DETAILS
))
2190 fprintf (vect_dump
, "interleaving size is greater than step for ");
2191 print_generic_expr (vect_dump
, DR_REF (dr
), TDF_SLIM
);
2196 /* Check that the size of the interleaving is equal to STEP for stores,
2197 i.e., that there are no gaps. */
2198 if (dr_step
&& dr_step
!= count_in_bytes
)
2200 if (DR_IS_READ (dr
))
2202 slp_impossible
= true;
2203 /* There is a gap after the last load in the group. This gap is a
2204 difference between the stride and the number of elements. When
2205 there is no gap, this difference should be 0. */
2206 GROUP_GAP (vinfo_for_stmt (stmt
)) = stride
- count
;
2210 if (vect_print_dump_info (REPORT_DETAILS
))
2211 fprintf (vect_dump
, "interleaved store with gaps");
2216 /* Check that STEP is a multiple of type size. */
2217 if (dr_step
&& (dr_step
% type_size
) != 0)
2219 if (vect_print_dump_info (REPORT_DETAILS
))
2221 fprintf (vect_dump
, "step is not a multiple of type size: step ");
2222 print_generic_expr (vect_dump
, step
, TDF_SLIM
);
2223 fprintf (vect_dump
, " size ");
2224 print_generic_expr (vect_dump
, TYPE_SIZE_UNIT (scalar_type
),
2233 GROUP_SIZE (vinfo_for_stmt (stmt
)) = stride
;
2234 if (vect_print_dump_info (REPORT_DETAILS
))
2235 fprintf (vect_dump
, "Detected interleaving of size %d", (int)stride
);
2237 /* SLP: create an SLP data structure for every interleaving group of
2238 stores for further analysis in vect_analyse_slp. */
2239 if (DR_IS_WRITE (dr
) && !slp_impossible
)
2242 VEC_safe_push (gimple
, heap
, LOOP_VINFO_STRIDED_STORES (loop_vinfo
),
2245 VEC_safe_push (gimple
, heap
, BB_VINFO_STRIDED_STORES (bb_vinfo
),
2254 /* Analyze the access pattern of the data-reference DR.
2255 In case of non-consecutive accesses call vect_analyze_group_access() to
2256 analyze groups of strided accesses. */
2259 vect_analyze_data_ref_access (struct data_reference
*dr
)
2261 tree step
= DR_STEP (dr
);
2262 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2263 gimple stmt
= DR_STMT (dr
);
2264 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2265 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2266 struct loop
*loop
= NULL
;
2267 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2270 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2272 if (loop_vinfo
&& !step
)
2274 if (vect_print_dump_info (REPORT_DETAILS
))
2275 fprintf (vect_dump
, "bad data-ref access in loop");
2279 /* Don't allow invariant accesses in loops. */
2280 if (loop_vinfo
&& dr_step
== 0)
2283 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2285 /* Interleaved accesses are not yet supported within outer-loop
2286 vectorization for references in the inner-loop. */
2287 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2289 /* For the rest of the analysis we use the outer-loop step. */
2290 step
= STMT_VINFO_DR_STEP (stmt_info
);
2291 dr_step
= TREE_INT_CST_LOW (step
);
2295 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2296 fprintf (vect_dump
, "zero step in outer loop.");
2297 if (DR_IS_READ (dr
))
2305 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
))
2307 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type
), -dr_step
)))
2309 /* Mark that it is not interleaving. */
2310 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2314 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2316 if (vect_print_dump_info (REPORT_ALIGNMENT
))
2317 fprintf (vect_dump
, "strided access in outer loop.");
2321 /* Not consecutive access - check if it's a part of interleaving group. */
2322 return vect_analyze_group_access (dr
);
2326 /* Function vect_analyze_data_ref_accesses.
2328 Analyze the access pattern of all the data references in the loop.
2330 FORNOW: the only access pattern that is considered vectorizable is a
2331 simple step 1 (consecutive) access.
2333 FORNOW: handle only arrays and pointer accesses. */
2336 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2339 VEC (data_reference_p
, heap
) *datarefs
;
2340 struct data_reference
*dr
;
2342 if (vect_print_dump_info (REPORT_DETAILS
))
2343 fprintf (vect_dump
, "=== vect_analyze_data_ref_accesses ===");
2346 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2348 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2350 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
2351 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
2352 && !vect_analyze_data_ref_access (dr
))
2354 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2355 fprintf (vect_dump
, "not vectorized: complicated access pattern.");
2359 /* Mark the statement as not vectorizable. */
2360 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2370 /* Function vect_prune_runtime_alias_test_list.
2372 Prune a list of ddrs to be tested at run-time by versioning for alias.
2373 Return FALSE if resulting list of ddrs is longer then allowed by
2374 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2377 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo
)
2379 VEC (ddr_p
, heap
) * ddrs
=
2380 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
2383 if (vect_print_dump_info (REPORT_DETAILS
))
2384 fprintf (vect_dump
, "=== vect_prune_runtime_alias_test_list ===");
2386 for (i
= 0; i
< VEC_length (ddr_p
, ddrs
); )
2391 ddr_i
= VEC_index (ddr_p
, ddrs
, i
);
2394 for (j
= 0; j
< i
; j
++)
2396 ddr_p ddr_j
= VEC_index (ddr_p
, ddrs
, j
);
2398 if (vect_vfa_range_equal (ddr_i
, ddr_j
))
2400 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2402 fprintf (vect_dump
, "found equal ranges ");
2403 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_i
)), TDF_SLIM
);
2404 fprintf (vect_dump
, ", ");
2405 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_i
)), TDF_SLIM
);
2406 fprintf (vect_dump
, " and ");
2407 print_generic_expr (vect_dump
, DR_REF (DDR_A (ddr_j
)), TDF_SLIM
);
2408 fprintf (vect_dump
, ", ");
2409 print_generic_expr (vect_dump
, DR_REF (DDR_B (ddr_j
)), TDF_SLIM
);
2418 VEC_ordered_remove (ddr_p
, ddrs
, i
);
2424 if (VEC_length (ddr_p
, ddrs
) >
2425 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
2427 if (vect_print_dump_info (REPORT_DR_DETAILS
))
2430 "disable versioning for alias - max number of generated "
2431 "checks exceeded.");
2434 VEC_truncate (ddr_p
, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
), 0);
2443 /* Function vect_analyze_data_refs.
2445 Find all the data references in the loop or basic block.
2447 The general structure of the analysis of data refs in the vectorizer is as
2449 1- vect_analyze_data_refs(loop/bb): call
2450 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2451 in the loop/bb and their dependences.
2452 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2453 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2454 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2459 vect_analyze_data_refs (loop_vec_info loop_vinfo
,
2460 bb_vec_info bb_vinfo
,
2463 struct loop
*loop
= NULL
;
2464 basic_block bb
= NULL
;
2466 VEC (data_reference_p
, heap
) *datarefs
;
2467 struct data_reference
*dr
;
2471 if (vect_print_dump_info (REPORT_DETAILS
))
2472 fprintf (vect_dump
, "=== vect_analyze_data_refs ===\n");
2476 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2477 res
= compute_data_dependences_for_loop
2479 &LOOP_VINFO_LOOP_NEST (loop_vinfo
),
2480 &LOOP_VINFO_DATAREFS (loop_vinfo
),
2481 &LOOP_VINFO_DDRS (loop_vinfo
));
2485 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2486 fprintf (vect_dump
, "not vectorized: loop contains function calls"
2487 " or data references that cannot be analyzed");
2491 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2495 bb
= BB_VINFO_BB (bb_vinfo
);
2496 res
= compute_data_dependences_for_bb (bb
, true,
2497 &BB_VINFO_DATAREFS (bb_vinfo
),
2498 &BB_VINFO_DDRS (bb_vinfo
));
2501 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2502 fprintf (vect_dump
, "not vectorized: basic block contains function"
2503 " calls or data references that cannot be analyzed");
2507 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2510 /* Go through the data-refs, check that the analysis succeeded. Update
2511 pointer from stmt_vec_info struct to DR and vectype. */
2513 FOR_EACH_VEC_ELT (data_reference_p
, datarefs
, i
, dr
)
2516 stmt_vec_info stmt_info
;
2517 tree base
, offset
, init
;
2520 if (!dr
|| !DR_REF (dr
))
2522 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2523 fprintf (vect_dump
, "not vectorized: unhandled data-ref ");
2527 stmt
= DR_STMT (dr
);
2528 stmt_info
= vinfo_for_stmt (stmt
);
2530 /* Check that analysis of the data-ref succeeded. */
2531 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
2534 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2536 fprintf (vect_dump
, "not vectorized: data ref analysis failed ");
2537 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2542 /* Mark the statement as not vectorizable. */
2543 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2550 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
2552 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2553 fprintf (vect_dump
, "not vectorized: base addr of dr is a "
2557 /* Mark the statement as not vectorizable. */
2558 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2565 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
2566 offset
= unshare_expr (DR_OFFSET (dr
));
2567 init
= unshare_expr (DR_INIT (dr
));
2569 if (stmt_can_throw_internal (stmt
))
2571 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2573 fprintf (vect_dump
, "not vectorized: statement can throw an "
2575 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2580 /* Update DR field in stmt_vec_info struct. */
2582 /* If the dataref is in an inner-loop of the loop that is considered for
2583 for vectorization, we also want to analyze the access relative to
2584 the outer-loop (DR contains information only relative to the
2585 inner-most enclosing loop). We do that by building a reference to the
2586 first location accessed by the inner-loop, and analyze it relative to
2588 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2590 tree outer_step
, outer_base
, outer_init
;
2591 HOST_WIDE_INT pbitsize
, pbitpos
;
2593 enum machine_mode pmode
;
2594 int punsignedp
, pvolatilep
;
2595 affine_iv base_iv
, offset_iv
;
2598 /* Build a reference to the first location accessed by the
2599 inner-loop: *(BASE+INIT). (The first location is actually
2600 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2601 tree inner_base
= build_fold_indirect_ref
2602 (fold_build2 (POINTER_PLUS_EXPR
,
2603 TREE_TYPE (base
), base
,
2604 fold_convert (sizetype
, init
)));
2606 if (vect_print_dump_info (REPORT_DETAILS
))
2608 fprintf (vect_dump
, "analyze in outer-loop: ");
2609 print_generic_expr (vect_dump
, inner_base
, TDF_SLIM
);
2612 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
2613 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
2614 gcc_assert (outer_base
!= NULL_TREE
);
2616 if (pbitpos
% BITS_PER_UNIT
!= 0)
2618 if (vect_print_dump_info (REPORT_DETAILS
))
2619 fprintf (vect_dump
, "failed: bit offset alignment.\n");
2623 outer_base
= build_fold_addr_expr (outer_base
);
2624 if (!simple_iv (loop
, loop_containing_stmt (stmt
), outer_base
,
2627 if (vect_print_dump_info (REPORT_DETAILS
))
2628 fprintf (vect_dump
, "failed: evolution of base is not affine.\n");
2635 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
,
2643 offset_iv
.base
= ssize_int (0);
2644 offset_iv
.step
= ssize_int (0);
2646 else if (!simple_iv (loop
, loop_containing_stmt (stmt
), poffset
,
2649 if (vect_print_dump_info (REPORT_DETAILS
))
2650 fprintf (vect_dump
, "evolution of offset is not affine.\n");
2654 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
2655 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
2656 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
2657 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
2658 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
2660 outer_step
= size_binop (PLUS_EXPR
,
2661 fold_convert (ssizetype
, base_iv
.step
),
2662 fold_convert (ssizetype
, offset_iv
.step
));
2664 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
2665 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2666 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
2667 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
2668 STMT_VINFO_DR_OFFSET (stmt_info
) =
2669 fold_convert (ssizetype
, offset_iv
.base
);
2670 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
2671 size_int (highest_pow2_factor (offset_iv
.base
));
2673 if (vect_print_dump_info (REPORT_DETAILS
))
2675 fprintf (vect_dump
, "\touter base_address: ");
2676 print_generic_expr (vect_dump
, STMT_VINFO_DR_BASE_ADDRESS (stmt_info
), TDF_SLIM
);
2677 fprintf (vect_dump
, "\n\touter offset from base address: ");
2678 print_generic_expr (vect_dump
, STMT_VINFO_DR_OFFSET (stmt_info
), TDF_SLIM
);
2679 fprintf (vect_dump
, "\n\touter constant offset from base address: ");
2680 print_generic_expr (vect_dump
, STMT_VINFO_DR_INIT (stmt_info
), TDF_SLIM
);
2681 fprintf (vect_dump
, "\n\touter step: ");
2682 print_generic_expr (vect_dump
, STMT_VINFO_DR_STEP (stmt_info
), TDF_SLIM
);
2683 fprintf (vect_dump
, "\n\touter aligned to: ");
2684 print_generic_expr (vect_dump
, STMT_VINFO_DR_ALIGNED_TO (stmt_info
), TDF_SLIM
);
2688 if (STMT_VINFO_DATA_REF (stmt_info
))
2690 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2693 "not vectorized: more than one data ref in stmt: ");
2694 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2699 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
2701 /* Set vectype for STMT. */
2702 scalar_type
= TREE_TYPE (DR_REF (dr
));
2703 STMT_VINFO_VECTYPE (stmt_info
) =
2704 get_vectype_for_scalar_type (scalar_type
);
2705 if (!STMT_VINFO_VECTYPE (stmt_info
))
2707 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS
))
2710 "not vectorized: no vectype for stmt: ");
2711 print_gimple_stmt (vect_dump
, stmt
, 0, TDF_SLIM
);
2712 fprintf (vect_dump
, " scalar_type: ");
2713 print_generic_expr (vect_dump
, scalar_type
, TDF_DETAILS
);
2718 /* Mark the statement as not vectorizable. */
2719 STMT_VINFO_VECTORIZABLE (stmt_info
) = false;
2726 /* Adjust the minimal vectorization factor according to the
2728 vf
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
2737 /* Function vect_get_new_vect_var.
2739 Returns a name for a new variable. The current naming scheme appends the
2740 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2741 the name of vectorizer generated variables, and appends that to NAME if
2745 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
2752 case vect_simple_var
:
2755 case vect_scalar_var
:
2758 case vect_pointer_var
:
2767 char* tmp
= concat (prefix
, name
, NULL
);
2768 new_vect_var
= create_tmp_var (type
, tmp
);
2772 new_vect_var
= create_tmp_var (type
, prefix
);
2774 /* Mark vector typed variable as a gimple register variable. */
2775 if (TREE_CODE (type
) == VECTOR_TYPE
)
2776 DECL_GIMPLE_REG_P (new_vect_var
) = true;
2778 return new_vect_var
;
2782 /* Function vect_create_addr_base_for_vector_ref.
2784 Create an expression that computes the address of the first memory location
2785 that will be accessed for a data reference.
2788 STMT: The statement containing the data reference.
2789 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2790 OFFSET: Optional. If supplied, it is be added to the initial address.
2791 LOOP: Specify relative to which loop-nest should the address be computed.
2792 For example, when the dataref is in an inner-loop nested in an
2793 outer-loop that is now being vectorized, LOOP can be either the
2794 outer-loop, or the inner-loop. The first memory location accessed
2795 by the following dataref ('in' points to short):
2802 if LOOP=i_loop: &in (relative to i_loop)
2803 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2806 1. Return an SSA_NAME whose value is the address of the memory location of
2807 the first vector of the data reference.
2808 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2809 these statement(s) which define the returned SSA_NAME.
2811 FORNOW: We are only handling array accesses with step 1. */
2814 vect_create_addr_base_for_vector_ref (gimple stmt
,
2815 gimple_seq
*new_stmt_list
,
2819 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2820 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2821 tree data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
2823 tree data_ref_base_var
;
2825 tree addr_base
, addr_expr
;
2827 gimple_seq seq
= NULL
;
2828 tree base_offset
= unshare_expr (DR_OFFSET (dr
));
2829 tree init
= unshare_expr (DR_INIT (dr
));
2831 tree step
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
2832 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2835 if (loop_vinfo
&& loop
&& loop
!= (gimple_bb (stmt
))->loop_father
)
2837 struct loop
*outer_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2839 gcc_assert (nested_in_vect_loop_p (outer_loop
, stmt
));
2841 data_ref_base
= unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
2842 base_offset
= unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info
));
2843 init
= unshare_expr (STMT_VINFO_DR_INIT (stmt_info
));
2847 base_name
= build_fold_indirect_ref (data_ref_base
);
2850 base_offset
= ssize_int (0);
2851 init
= ssize_int (0);
2852 base_name
= build_fold_indirect_ref (unshare_expr (DR_REF (dr
)));
2855 data_ref_base_var
= create_tmp_var (TREE_TYPE (data_ref_base
), "batmp");
2856 add_referenced_var (data_ref_base_var
);
2857 data_ref_base
= force_gimple_operand (data_ref_base
, &seq
, true,
2859 gimple_seq_add_seq (new_stmt_list
, seq
);
2861 /* Create base_offset */
2862 base_offset
= size_binop (PLUS_EXPR
,
2863 fold_convert (sizetype
, base_offset
),
2864 fold_convert (sizetype
, init
));
2865 dest
= create_tmp_var (sizetype
, "base_off");
2866 add_referenced_var (dest
);
2867 base_offset
= force_gimple_operand (base_offset
, &seq
, true, dest
);
2868 gimple_seq_add_seq (new_stmt_list
, seq
);
2872 tree tmp
= create_tmp_var (sizetype
, "offset");
2874 add_referenced_var (tmp
);
2875 offset
= fold_build2 (MULT_EXPR
, sizetype
,
2876 fold_convert (sizetype
, offset
), step
);
2877 base_offset
= fold_build2 (PLUS_EXPR
, sizetype
,
2878 base_offset
, offset
);
2879 base_offset
= force_gimple_operand (base_offset
, &seq
, false, tmp
);
2880 gimple_seq_add_seq (new_stmt_list
, seq
);
2883 /* base + base_offset */
2885 addr_base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (data_ref_base
),
2886 data_ref_base
, base_offset
);
2889 addr_base
= build1 (ADDR_EXPR
,
2890 build_pointer_type (TREE_TYPE (DR_REF (dr
))),
2891 unshare_expr (DR_REF (dr
)));
2894 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
2895 base
= get_base_address (DR_REF (dr
));
2897 && TREE_CODE (base
) == MEM_REF
)
2899 = build_qualified_type (vect_ptr_type
,
2900 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base
, 0))));
2902 vec_stmt
= fold_convert (vect_ptr_type
, addr_base
);
2903 addr_expr
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
,
2904 get_name (base_name
));
2905 add_referenced_var (addr_expr
);
2906 vec_stmt
= force_gimple_operand (vec_stmt
, &seq
, false, addr_expr
);
2907 gimple_seq_add_seq (new_stmt_list
, seq
);
2909 if (DR_PTR_INFO (dr
)
2910 && TREE_CODE (vec_stmt
) == SSA_NAME
)
2912 duplicate_ssa_name_ptr_info (vec_stmt
, DR_PTR_INFO (dr
));
2915 SSA_NAME_PTR_INFO (vec_stmt
)->align
= 1;
2916 SSA_NAME_PTR_INFO (vec_stmt
)->misalign
= 0;
2920 if (vect_print_dump_info (REPORT_DETAILS
))
2922 fprintf (vect_dump
, "created ");
2923 print_generic_expr (vect_dump
, vec_stmt
, TDF_SLIM
);
2930 /* Function vect_create_data_ref_ptr.
2932 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
2933 location accessed in the loop by STMT, along with the def-use update
2934 chain to appropriately advance the pointer through the loop iterations.
2935 Also set aliasing information for the pointer. This pointer is used by
2936 the callers to this function to create a memory reference expression for
2937 vector load/store access.
2940 1. STMT: a stmt that references memory. Expected to be of the form
2941 GIMPLE_ASSIGN <name, data-ref> or
2942 GIMPLE_ASSIGN <data-ref, name>.
2943 2. AGGR_TYPE: the type of the reference, which should be either a vector
2945 3. AT_LOOP: the loop where the vector memref is to be created.
2946 4. OFFSET (optional): an offset to be added to the initial address accessed
2947 by the data-ref in STMT.
2948 5. BSI: location where the new stmts are to be placed if there is no loop
2949 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
2950 pointing to the initial address.
2953 1. Declare a new ptr to vector_type, and have it point to the base of the
2954 data reference (initial addressed accessed by the data reference).
2955 For example, for vector of type V8HI, the following code is generated:
2958 ap = (v8hi *)initial_address;
2960 if OFFSET is not supplied:
2961 initial_address = &a[init];
2962 if OFFSET is supplied:
2963 initial_address = &a[init + OFFSET];
2965 Return the initial_address in INITIAL_ADDRESS.
2967 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2968 update the pointer in each iteration of the loop.
2970 Return the increment stmt that updates the pointer in PTR_INCR.
2972 3. Set INV_P to true if the access pattern of the data reference in the
2973 vectorized loop is invariant. Set it to false otherwise.
2975 4. Return the pointer. */
2978 vect_create_data_ref_ptr (gimple stmt
, tree aggr_type
, struct loop
*at_loop
,
2979 tree offset
, tree
*initial_address
,
2980 gimple_stmt_iterator
*gsi
, gimple
*ptr_incr
,
2981 bool only_init
, bool *inv_p
)
2984 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2985 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2986 struct loop
*loop
= NULL
;
2987 bool nested_in_vect_loop
= false;
2988 struct loop
*containing_loop
= NULL
;
2993 gimple_seq new_stmt_list
= NULL
;
2997 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2999 gimple_stmt_iterator incr_gsi
;
3002 tree indx_before_incr
, indx_after_incr
;
3005 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3008 gcc_assert (TREE_CODE (aggr_type
) == ARRAY_TYPE
3009 || TREE_CODE (aggr_type
) == VECTOR_TYPE
);
3013 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3014 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
3015 containing_loop
= (gimple_bb (stmt
))->loop_father
;
3016 pe
= loop_preheader_edge (loop
);
3020 gcc_assert (bb_vinfo
);
3025 /* Check the step (evolution) of the load in LOOP, and record
3026 whether it's invariant. */
3027 if (nested_in_vect_loop
)
3028 step
= STMT_VINFO_DR_STEP (stmt_info
);
3030 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
3032 if (tree_int_cst_compare (step
, size_zero_node
) == 0)
3036 negative
= tree_int_cst_compare (step
, size_zero_node
) < 0;
3038 /* Create an expression for the first address accessed by this load
3040 base_name
= build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr
)));
3042 if (vect_print_dump_info (REPORT_DETAILS
))
3044 tree data_ref_base
= base_name
;
3045 fprintf (vect_dump
, "create %s-pointer variable to type: ",
3046 tree_code_name
[(int) TREE_CODE (aggr_type
)]);
3047 print_generic_expr (vect_dump
, aggr_type
, TDF_SLIM
);
3048 if (TREE_CODE (data_ref_base
) == VAR_DECL
3049 || TREE_CODE (data_ref_base
) == ARRAY_REF
)
3050 fprintf (vect_dump
, " vectorizing an array ref: ");
3051 else if (TREE_CODE (data_ref_base
) == COMPONENT_REF
)
3052 fprintf (vect_dump
, " vectorizing a record based array ref: ");
3053 else if (TREE_CODE (data_ref_base
) == SSA_NAME
)
3054 fprintf (vect_dump
, " vectorizing a pointer ref: ");
3055 print_generic_expr (vect_dump
, base_name
, TDF_SLIM
);
3058 /* (1) Create the new aggregate-pointer variable. */
3059 aggr_ptr_type
= build_pointer_type (aggr_type
);
3060 base
= get_base_address (DR_REF (dr
));
3062 && TREE_CODE (base
) == MEM_REF
)
3064 = build_qualified_type (aggr_ptr_type
,
3065 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base
, 0))));
3066 aggr_ptr
= vect_get_new_vect_var (aggr_ptr_type
, vect_pointer_var
,
3067 get_name (base_name
));
3069 /* Vector and array types inherit the alias set of their component
3070 type by default so we need to use a ref-all pointer if the data
3071 reference does not conflict with the created aggregated data
3072 reference because it is not addressable. */
3073 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr
),
3074 get_alias_set (DR_REF (dr
))))
3077 = build_pointer_type_for_mode (aggr_type
,
3078 TYPE_MODE (aggr_ptr_type
), true);
3079 aggr_ptr
= vect_get_new_vect_var (aggr_ptr_type
, vect_pointer_var
,
3080 get_name (base_name
));
3083 /* Likewise for any of the data references in the stmt group. */
3084 else if (STMT_VINFO_GROUP_SIZE (stmt_info
) > 1)
3086 gimple orig_stmt
= STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
);
3089 tree lhs
= gimple_assign_lhs (orig_stmt
);
3090 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr
),
3091 get_alias_set (lhs
)))
3094 = build_pointer_type_for_mode (aggr_type
,
3095 TYPE_MODE (aggr_ptr_type
), true);
3097 = vect_get_new_vect_var (aggr_ptr_type
, vect_pointer_var
,
3098 get_name (base_name
));
3102 orig_stmt
= STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt
));
3107 add_referenced_var (aggr_ptr
);
3109 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3110 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3111 def-use update cycles for the pointer: one relative to the outer-loop
3112 (LOOP), which is what steps (3) and (4) below do. The other is relative
3113 to the inner-loop (which is the inner-most loop containing the dataref),
3114 and this is done be step (5) below.
3116 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3117 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3118 redundant. Steps (3),(4) create the following:
3121 LOOP: vp1 = phi(vp0,vp2)
3127 If there is an inner-loop nested in loop, then step (5) will also be
3128 applied, and an additional update in the inner-loop will be created:
3131 LOOP: vp1 = phi(vp0,vp2)
3133 inner: vp3 = phi(vp1,vp4)
3134 vp4 = vp3 + inner_step
3140 /* (2) Calculate the initial address of the aggregate-pointer, and set
3141 the aggregate-pointer to point to it before the loop. */
3143 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3145 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
3151 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmt_list
);
3152 gcc_assert (!new_bb
);
3155 gsi_insert_seq_before (gsi
, new_stmt_list
, GSI_SAME_STMT
);
3158 *initial_address
= new_temp
;
3160 /* Create: p = (aggr_type *) initial_base */
3161 if (TREE_CODE (new_temp
) != SSA_NAME
3162 || !useless_type_conversion_p (aggr_ptr_type
, TREE_TYPE (new_temp
)))
3164 vec_stmt
= gimple_build_assign (aggr_ptr
,
3165 fold_convert (aggr_ptr_type
, new_temp
));
3166 aggr_ptr_init
= make_ssa_name (aggr_ptr
, vec_stmt
);
3167 /* Copy the points-to information if it exists. */
3168 if (DR_PTR_INFO (dr
))
3169 duplicate_ssa_name_ptr_info (aggr_ptr_init
, DR_PTR_INFO (dr
));
3170 gimple_assign_set_lhs (vec_stmt
, aggr_ptr_init
);
3173 new_bb
= gsi_insert_on_edge_immediate (pe
, vec_stmt
);
3174 gcc_assert (!new_bb
);
3177 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
3180 aggr_ptr_init
= new_temp
;
3182 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3183 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3184 inner-loop nested in LOOP (during outer-loop vectorization). */
3186 /* No update in loop is required. */
3187 if (only_init
&& (!loop_vinfo
|| at_loop
== loop
))
3188 aptr
= aggr_ptr_init
;
3191 /* The step of the aggregate pointer is the type size. */
3192 tree step
= TYPE_SIZE_UNIT (aggr_type
);
3193 /* One exception to the above is when the scalar step of the load in
3194 LOOP is zero. In this case the step here is also zero. */
3196 step
= size_zero_node
;
3198 step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
3200 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
3202 create_iv (aggr_ptr_init
,
3203 fold_convert (aggr_ptr_type
, step
),
3204 aggr_ptr
, loop
, &incr_gsi
, insert_after
,
3205 &indx_before_incr
, &indx_after_incr
);
3206 incr
= gsi_stmt (incr_gsi
);
3207 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3209 /* Copy the points-to information if it exists. */
3210 if (DR_PTR_INFO (dr
))
3212 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3213 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3218 aptr
= indx_before_incr
;
3221 if (!nested_in_vect_loop
|| only_init
)
3225 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3226 nested in LOOP, if exists. */
3228 gcc_assert (nested_in_vect_loop
);
3231 standard_iv_increment_position (containing_loop
, &incr_gsi
,
3233 create_iv (aptr
, fold_convert (aggr_ptr_type
, DR_STEP (dr
)), aggr_ptr
,
3234 containing_loop
, &incr_gsi
, insert_after
, &indx_before_incr
,
3236 incr
= gsi_stmt (incr_gsi
);
3237 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
3239 /* Copy the points-to information if it exists. */
3240 if (DR_PTR_INFO (dr
))
3242 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
3243 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
3248 return indx_before_incr
;
3255 /* Function bump_vector_ptr
3257 Increment a pointer (to a vector type) by vector-size. If requested,
3258 i.e. if PTR-INCR is given, then also connect the new increment stmt
3259 to the existing def-use update-chain of the pointer, by modifying
3260 the PTR_INCR as illustrated below:
3262 The pointer def-use update-chain before this function:
3263 DATAREF_PTR = phi (p_0, p_2)
3265 PTR_INCR: p_2 = DATAREF_PTR + step
3267 The pointer def-use update-chain after this function:
3268 DATAREF_PTR = phi (p_0, p_2)
3270 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3272 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3275 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3277 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3278 the loop. The increment amount across iterations is expected
3280 BSI - location where the new update stmt is to be placed.
3281 STMT - the original scalar memory-access stmt that is being vectorized.
3282 BUMP - optional. The offset by which to bump the pointer. If not given,
3283 the offset is assumed to be vector_size.
3285 Output: Return NEW_DATAREF_PTR as illustrated above.
3290 bump_vector_ptr (tree dataref_ptr
, gimple ptr_incr
, gimple_stmt_iterator
*gsi
,
3291 gimple stmt
, tree bump
)
3293 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3294 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3295 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3296 tree ptr_var
= SSA_NAME_VAR (dataref_ptr
);
3297 tree update
= TYPE_SIZE_UNIT (vectype
);
3300 use_operand_p use_p
;
3301 tree new_dataref_ptr
;
3306 incr_stmt
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, ptr_var
,
3307 dataref_ptr
, update
);
3308 new_dataref_ptr
= make_ssa_name (ptr_var
, incr_stmt
);
3309 gimple_assign_set_lhs (incr_stmt
, new_dataref_ptr
);
3310 vect_finish_stmt_generation (stmt
, incr_stmt
, gsi
);
3312 /* Copy the points-to information if it exists. */
3313 if (DR_PTR_INFO (dr
))
3315 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
3316 SSA_NAME_PTR_INFO (new_dataref_ptr
)->align
= 1;
3317 SSA_NAME_PTR_INFO (new_dataref_ptr
)->misalign
= 0;
3321 return new_dataref_ptr
;
3323 /* Update the vector-pointer's cross-iteration increment. */
3324 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
3326 tree use
= USE_FROM_PTR (use_p
);
3328 if (use
== dataref_ptr
)
3329 SET_USE (use_p
, new_dataref_ptr
);
3331 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
3334 return new_dataref_ptr
;
3338 /* Function vect_create_destination_var.
3340 Create a new temporary of type VECTYPE. */
3343 vect_create_destination_var (tree scalar_dest
, tree vectype
)
3346 const char *new_name
;
3348 enum vect_var_kind kind
;
3350 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
3351 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
3353 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
3355 new_name
= get_name (scalar_dest
);
3358 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
3359 add_referenced_var (vec_dest
);
3364 /* Function vect_strided_store_supported.
3366 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3367 and FALSE otherwise. */
3370 vect_strided_store_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
3372 optab interleave_high_optab
, interleave_low_optab
;
3373 enum machine_mode mode
;
3375 mode
= TYPE_MODE (vectype
);
3377 /* vect_permute_store_chain requires the group size to be a power of two. */
3378 if (exact_log2 (count
) == -1)
3380 if (vect_print_dump_info (REPORT_DETAILS
))
3381 fprintf (vect_dump
, "the size of the group of strided accesses"
3382 " is not a power of 2");
3386 /* Check that the operation is supported. */
3387 interleave_high_optab
= optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR
,
3388 vectype
, optab_default
);
3389 interleave_low_optab
= optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR
,
3390 vectype
, optab_default
);
3391 if (!interleave_high_optab
|| !interleave_low_optab
)
3393 if (vect_print_dump_info (REPORT_DETAILS
))
3394 fprintf (vect_dump
, "no optab for interleave.");
3398 if (optab_handler (interleave_high_optab
, mode
) == CODE_FOR_nothing
3399 || optab_handler (interleave_low_optab
, mode
) == CODE_FOR_nothing
)
3401 if (vect_print_dump_info (REPORT_DETAILS
))
3402 fprintf (vect_dump
, "interleave op not supported by target.");
3410 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3414 vect_store_lanes_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
3416 return vect_lanes_optab_supported_p ("vec_store_lanes",
3417 vec_store_lanes_optab
,
3422 /* Function vect_permute_store_chain.
3424 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3425 a power of 2, generate interleave_high/low stmts to reorder the data
3426 correctly for the stores. Return the final references for stores in
3429 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3430 The input is 4 vectors each containing 8 elements. We assign a number to
3431 each element, the input sequence is:
3433 1st vec: 0 1 2 3 4 5 6 7
3434 2nd vec: 8 9 10 11 12 13 14 15
3435 3rd vec: 16 17 18 19 20 21 22 23
3436 4th vec: 24 25 26 27 28 29 30 31
3438 The output sequence should be:
3440 1st vec: 0 8 16 24 1 9 17 25
3441 2nd vec: 2 10 18 26 3 11 19 27
3442 3rd vec: 4 12 20 28 5 13 21 30
3443 4th vec: 6 14 22 30 7 15 23 31
3445 i.e., we interleave the contents of the four vectors in their order.
3447 We use interleave_high/low instructions to create such output. The input of
3448 each interleave_high/low operation is two vectors:
3451 the even elements of the result vector are obtained left-to-right from the
3452 high/low elements of the first vector. The odd elements of the result are
3453 obtained left-to-right from the high/low elements of the second vector.
3454 The output of interleave_high will be: 0 4 1 5
3455 and of interleave_low: 2 6 3 7
3458 The permutation is done in log LENGTH stages. In each stage interleave_high
3459 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3460 where the first argument is taken from the first half of DR_CHAIN and the
3461 second argument from it's second half.
3464 I1: interleave_high (1st vec, 3rd vec)
3465 I2: interleave_low (1st vec, 3rd vec)
3466 I3: interleave_high (2nd vec, 4th vec)
3467 I4: interleave_low (2nd vec, 4th vec)
3469 The output for the first stage is:
3471 I1: 0 16 1 17 2 18 3 19
3472 I2: 4 20 5 21 6 22 7 23
3473 I3: 8 24 9 25 10 26 11 27
3474 I4: 12 28 13 29 14 30 15 31
3476 The output of the second stage, i.e. the final result is:
3478 I1: 0 8 16 24 1 9 17 25
3479 I2: 2 10 18 26 3 11 19 27
3480 I3: 4 12 20 28 5 13 21 30
3481 I4: 6 14 22 30 7 15 23 31. */
3484 vect_permute_store_chain (VEC(tree
,heap
) *dr_chain
,
3485 unsigned int length
,
3487 gimple_stmt_iterator
*gsi
,
3488 VEC(tree
,heap
) **result_chain
)
3490 tree perm_dest
, vect1
, vect2
, high
, low
;
3492 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
3495 enum tree_code high_code
, low_code
;
3497 gcc_assert (vect_strided_store_supported (vectype
, length
));
3499 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
3501 for (i
= 0; i
< exact_log2 (length
); i
++)
3503 for (j
= 0; j
< length
/2; j
++)
3505 vect1
= VEC_index (tree
, dr_chain
, j
);
3506 vect2
= VEC_index (tree
, dr_chain
, j
+length
/2);
3508 /* Create interleaving stmt:
3509 in the case of big endian:
3510 high = interleave_high (vect1, vect2)
3511 and in the case of little endian:
3512 high = interleave_low (vect1, vect2). */
3513 perm_dest
= create_tmp_var (vectype
, "vect_inter_high");
3514 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3515 add_referenced_var (perm_dest
);
3516 if (BYTES_BIG_ENDIAN
)
3518 high_code
= VEC_INTERLEAVE_HIGH_EXPR
;
3519 low_code
= VEC_INTERLEAVE_LOW_EXPR
;
3523 low_code
= VEC_INTERLEAVE_HIGH_EXPR
;
3524 high_code
= VEC_INTERLEAVE_LOW_EXPR
;
3526 perm_stmt
= gimple_build_assign_with_ops (high_code
, perm_dest
,
3528 high
= make_ssa_name (perm_dest
, perm_stmt
);
3529 gimple_assign_set_lhs (perm_stmt
, high
);
3530 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3531 VEC_replace (tree
, *result_chain
, 2*j
, high
);
3533 /* Create interleaving stmt:
3534 in the case of big endian:
3535 low = interleave_low (vect1, vect2)
3536 and in the case of little endian:
3537 low = interleave_high (vect1, vect2). */
3538 perm_dest
= create_tmp_var (vectype
, "vect_inter_low");
3539 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3540 add_referenced_var (perm_dest
);
3541 perm_stmt
= gimple_build_assign_with_ops (low_code
, perm_dest
,
3543 low
= make_ssa_name (perm_dest
, perm_stmt
);
3544 gimple_assign_set_lhs (perm_stmt
, low
);
3545 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3546 VEC_replace (tree
, *result_chain
, 2*j
+1, low
);
3548 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
3552 /* Function vect_setup_realignment
3554 This function is called when vectorizing an unaligned load using
3555 the dr_explicit_realign[_optimized] scheme.
3556 This function generates the following code at the loop prolog:
3559 x msq_init = *(floor(p)); # prolog load
3560 realignment_token = call target_builtin;
3562 x msq = phi (msq_init, ---)
3564 The stmts marked with x are generated only for the case of
3565 dr_explicit_realign_optimized.
3567 The code above sets up a new (vector) pointer, pointing to the first
3568 location accessed by STMT, and a "floor-aligned" load using that pointer.
3569 It also generates code to compute the "realignment-token" (if the relevant
3570 target hook was defined), and creates a phi-node at the loop-header bb
3571 whose arguments are the result of the prolog-load (created by this
3572 function) and the result of a load that takes place in the loop (to be
3573 created by the caller to this function).
3575 For the case of dr_explicit_realign_optimized:
3576 The caller to this function uses the phi-result (msq) to create the
3577 realignment code inside the loop, and sets up the missing phi argument,
3580 msq = phi (msq_init, lsq)
3581 lsq = *(floor(p')); # load in loop
3582 result = realign_load (msq, lsq, realignment_token);
3584 For the case of dr_explicit_realign:
3586 msq = *(floor(p)); # load in loop
3588 lsq = *(floor(p')); # load in loop
3589 result = realign_load (msq, lsq, realignment_token);
3592 STMT - (scalar) load stmt to be vectorized. This load accesses
3593 a memory location that may be unaligned.
3594 BSI - place where new code is to be inserted.
3595 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3599 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3600 target hook, if defined.
3601 Return value - the result of the loop-header phi node. */
3604 vect_setup_realignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
3605 tree
*realignment_token
,
3606 enum dr_alignment_support alignment_support_scheme
,
3608 struct loop
**at_loop
)
3610 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3611 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3612 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3613 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3614 struct loop
*loop
= NULL
;
3616 tree scalar_dest
= gimple_assign_lhs (stmt
);
3623 tree msq_init
= NULL_TREE
;
3626 tree msq
= NULL_TREE
;
3627 gimple_seq stmts
= NULL
;
3629 bool compute_in_loop
= false;
3630 bool nested_in_vect_loop
= false;
3631 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
3632 struct loop
*loop_for_initial_load
= NULL
;
3636 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3637 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
3640 gcc_assert (alignment_support_scheme
== dr_explicit_realign
3641 || alignment_support_scheme
== dr_explicit_realign_optimized
);
3643 /* We need to generate three things:
3644 1. the misalignment computation
3645 2. the extra vector load (for the optimized realignment scheme).
3646 3. the phi node for the two vectors from which the realignment is
3647 done (for the optimized realignment scheme). */
3649 /* 1. Determine where to generate the misalignment computation.
3651 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3652 calculation will be generated by this function, outside the loop (in the
3653 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3654 caller, inside the loop.
3656 Background: If the misalignment remains fixed throughout the iterations of
3657 the loop, then both realignment schemes are applicable, and also the
3658 misalignment computation can be done outside LOOP. This is because we are
3659 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3660 are a multiple of VS (the Vector Size), and therefore the misalignment in
3661 different vectorized LOOP iterations is always the same.
3662 The problem arises only if the memory access is in an inner-loop nested
3663 inside LOOP, which is now being vectorized using outer-loop vectorization.
3664 This is the only case when the misalignment of the memory access may not
3665 remain fixed throughout the iterations of the inner-loop (as explained in
3666 detail in vect_supportable_dr_alignment). In this case, not only is the
3667 optimized realignment scheme not applicable, but also the misalignment
3668 computation (and generation of the realignment token that is passed to
3669 REALIGN_LOAD) have to be done inside the loop.
3671 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3672 or not, which in turn determines if the misalignment is computed inside
3673 the inner-loop, or outside LOOP. */
3675 if (init_addr
!= NULL_TREE
|| !loop_vinfo
)
3677 compute_in_loop
= true;
3678 gcc_assert (alignment_support_scheme
== dr_explicit_realign
);
3682 /* 2. Determine where to generate the extra vector load.
3684 For the optimized realignment scheme, instead of generating two vector
3685 loads in each iteration, we generate a single extra vector load in the
3686 preheader of the loop, and in each iteration reuse the result of the
3687 vector load from the previous iteration. In case the memory access is in
3688 an inner-loop nested inside LOOP, which is now being vectorized using
3689 outer-loop vectorization, we need to determine whether this initial vector
3690 load should be generated at the preheader of the inner-loop, or can be
3691 generated at the preheader of LOOP. If the memory access has no evolution
3692 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3693 to be generated inside LOOP (in the preheader of the inner-loop). */
3695 if (nested_in_vect_loop
)
3697 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
3698 bool invariant_in_outerloop
=
3699 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
3700 loop_for_initial_load
= (invariant_in_outerloop
? loop
: loop
->inner
);
3703 loop_for_initial_load
= loop
;
3705 *at_loop
= loop_for_initial_load
;
3707 if (loop_for_initial_load
)
3708 pe
= loop_preheader_edge (loop_for_initial_load
);
3710 /* 3. For the case of the optimized realignment, create the first vector
3711 load at the loop preheader. */
3713 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
3715 /* Create msq_init = *(floor(p1)) in the loop preheader */
3717 gcc_assert (!compute_in_loop
);
3718 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3719 ptr
= vect_create_data_ref_ptr (stmt
, vectype
, loop_for_initial_load
,
3720 NULL_TREE
, &init_addr
, NULL
, &inc
,
3722 new_stmt
= gimple_build_assign_with_ops
3723 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
3724 build_int_cst (TREE_TYPE (ptr
),
3725 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
3726 new_temp
= make_ssa_name (SSA_NAME_VAR (ptr
), new_stmt
);
3727 gimple_assign_set_lhs (new_stmt
, new_temp
);
3728 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
3729 gcc_assert (!new_bb
);
3731 = build2 (MEM_REF
, TREE_TYPE (vec_dest
), new_temp
,
3732 build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0));
3733 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
3734 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3735 gimple_assign_set_lhs (new_stmt
, new_temp
);
3736 mark_symbols_for_renaming (new_stmt
);
3739 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
3740 gcc_assert (!new_bb
);
3743 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
3745 msq_init
= gimple_assign_lhs (new_stmt
);
3748 /* 4. Create realignment token using a target builtin, if available.
3749 It is done either inside the containing loop, or before LOOP (as
3750 determined above). */
3752 if (targetm
.vectorize
.builtin_mask_for_load
)
3756 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3759 /* Generate the INIT_ADDR computation outside LOOP. */
3760 init_addr
= vect_create_addr_base_for_vector_ref (stmt
, &stmts
,
3764 pe
= loop_preheader_edge (loop
);
3765 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
3766 gcc_assert (!new_bb
);
3769 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
3772 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
3773 new_stmt
= gimple_build_call (builtin_decl
, 1, init_addr
);
3775 vect_create_destination_var (scalar_dest
,
3776 gimple_call_return_type (new_stmt
));
3777 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3778 gimple_call_set_lhs (new_stmt
, new_temp
);
3780 if (compute_in_loop
)
3781 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
3784 /* Generate the misalignment computation outside LOOP. */
3785 pe
= loop_preheader_edge (loop
);
3786 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
3787 gcc_assert (!new_bb
);
3790 *realignment_token
= gimple_call_lhs (new_stmt
);
3792 /* The result of the CALL_EXPR to this builtin is determined from
3793 the value of the parameter and no global variables are touched
3794 which makes the builtin a "const" function. Requiring the
3795 builtin to have the "const" attribute makes it unnecessary
3796 to call mark_call_clobbered. */
3797 gcc_assert (TREE_READONLY (builtin_decl
));
3800 if (alignment_support_scheme
== dr_explicit_realign
)
3803 gcc_assert (!compute_in_loop
);
3804 gcc_assert (alignment_support_scheme
== dr_explicit_realign_optimized
);
3807 /* 5. Create msq = phi <msq_init, lsq> in loop */
3809 pe
= loop_preheader_edge (containing_loop
);
3810 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3811 msq
= make_ssa_name (vec_dest
, NULL
);
3812 phi_stmt
= create_phi_node (msq
, containing_loop
->header
);
3813 SSA_NAME_DEF_STMT (msq
) = phi_stmt
;
3814 add_phi_arg (phi_stmt
, msq_init
, pe
, UNKNOWN_LOCATION
);
3820 /* Function vect_strided_load_supported.
3822 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3823 and FALSE otherwise. */
3826 vect_strided_load_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
3828 optab perm_even_optab
, perm_odd_optab
;
3829 enum machine_mode mode
;
3831 mode
= TYPE_MODE (vectype
);
3833 /* vect_permute_load_chain requires the group size to be a power of two. */
3834 if (exact_log2 (count
) == -1)
3836 if (vect_print_dump_info (REPORT_DETAILS
))
3837 fprintf (vect_dump
, "the size of the group of strided accesses"
3838 " is not a power of 2");
3842 perm_even_optab
= optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR
, vectype
,
3844 if (!perm_even_optab
)
3846 if (vect_print_dump_info (REPORT_DETAILS
))
3847 fprintf (vect_dump
, "no optab for perm_even.");
3851 if (optab_handler (perm_even_optab
, mode
) == CODE_FOR_nothing
)
3853 if (vect_print_dump_info (REPORT_DETAILS
))
3854 fprintf (vect_dump
, "perm_even op not supported by target.");
3858 perm_odd_optab
= optab_for_tree_code (VEC_EXTRACT_ODD_EXPR
, vectype
,
3860 if (!perm_odd_optab
)
3862 if (vect_print_dump_info (REPORT_DETAILS
))
3863 fprintf (vect_dump
, "no optab for perm_odd.");
3867 if (optab_handler (perm_odd_optab
, mode
) == CODE_FOR_nothing
)
3869 if (vect_print_dump_info (REPORT_DETAILS
))
3870 fprintf (vect_dump
, "perm_odd op not supported by target.");
3876 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
3880 vect_load_lanes_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
3882 return vect_lanes_optab_supported_p ("vec_load_lanes",
3883 vec_load_lanes_optab
,
3887 /* Function vect_permute_load_chain.
3889 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3890 a power of 2, generate extract_even/odd stmts to reorder the input data
3891 correctly. Return the final references for loads in RESULT_CHAIN.
3893 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3894 The input is 4 vectors each containing 8 elements. We assign a number to each
3895 element, the input sequence is:
3897 1st vec: 0 1 2 3 4 5 6 7
3898 2nd vec: 8 9 10 11 12 13 14 15
3899 3rd vec: 16 17 18 19 20 21 22 23
3900 4th vec: 24 25 26 27 28 29 30 31
3902 The output sequence should be:
3904 1st vec: 0 4 8 12 16 20 24 28
3905 2nd vec: 1 5 9 13 17 21 25 29
3906 3rd vec: 2 6 10 14 18 22 26 30
3907 4th vec: 3 7 11 15 19 23 27 31
3909 i.e., the first output vector should contain the first elements of each
3910 interleaving group, etc.
3912 We use extract_even/odd instructions to create such output. The input of
3913 each extract_even/odd operation is two vectors
3917 and the output is the vector of extracted even/odd elements. The output of
3918 extract_even will be: 0 2 4 6
3919 and of extract_odd: 1 3 5 7
3922 The permutation is done in log LENGTH stages. In each stage extract_even
3923 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3924 their order. In our example,
3926 E1: extract_even (1st vec, 2nd vec)
3927 E2: extract_odd (1st vec, 2nd vec)
3928 E3: extract_even (3rd vec, 4th vec)
3929 E4: extract_odd (3rd vec, 4th vec)
3931 The output for the first stage will be:
3933 E1: 0 2 4 6 8 10 12 14
3934 E2: 1 3 5 7 9 11 13 15
3935 E3: 16 18 20 22 24 26 28 30
3936 E4: 17 19 21 23 25 27 29 31
3938 In order to proceed and create the correct sequence for the next stage (or
3939 for the correct output, if the second stage is the last one, as in our
3940 example), we first put the output of extract_even operation and then the
3941 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3942 The input for the second stage is:
3944 1st vec (E1): 0 2 4 6 8 10 12 14
3945 2nd vec (E3): 16 18 20 22 24 26 28 30
3946 3rd vec (E2): 1 3 5 7 9 11 13 15
3947 4th vec (E4): 17 19 21 23 25 27 29 31
3949 The output of the second stage:
3951 E1: 0 4 8 12 16 20 24 28
3952 E2: 2 6 10 14 18 22 26 30
3953 E3: 1 5 9 13 17 21 25 29
3954 E4: 3 7 11 15 19 23 27 31
3956 And RESULT_CHAIN after reordering:
3958 1st vec (E1): 0 4 8 12 16 20 24 28
3959 2nd vec (E3): 1 5 9 13 17 21 25 29
3960 3rd vec (E2): 2 6 10 14 18 22 26 30
3961 4th vec (E4): 3 7 11 15 19 23 27 31. */
3964 vect_permute_load_chain (VEC(tree
,heap
) *dr_chain
,
3965 unsigned int length
,
3967 gimple_stmt_iterator
*gsi
,
3968 VEC(tree
,heap
) **result_chain
)
3970 tree perm_dest
, data_ref
, first_vect
, second_vect
;
3972 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
3976 gcc_assert (vect_strided_load_supported (vectype
, length
));
3978 *result_chain
= VEC_copy (tree
, heap
, dr_chain
);
3979 for (i
= 0; i
< exact_log2 (length
); i
++)
3981 for (j
= 0; j
< length
; j
+=2)
3983 first_vect
= VEC_index (tree
, dr_chain
, j
);
3984 second_vect
= VEC_index (tree
, dr_chain
, j
+1);
3986 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3987 perm_dest
= create_tmp_var (vectype
, "vect_perm_even");
3988 DECL_GIMPLE_REG_P (perm_dest
) = 1;
3989 add_referenced_var (perm_dest
);
3991 perm_stmt
= gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR
,
3992 perm_dest
, first_vect
,
3995 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3996 gimple_assign_set_lhs (perm_stmt
, data_ref
);
3997 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3998 mark_symbols_for_renaming (perm_stmt
);
4000 VEC_replace (tree
, *result_chain
, j
/2, data_ref
);
4002 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4003 perm_dest
= create_tmp_var (vectype
, "vect_perm_odd");
4004 DECL_GIMPLE_REG_P (perm_dest
) = 1;
4005 add_referenced_var (perm_dest
);
4007 perm_stmt
= gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR
,
4008 perm_dest
, first_vect
,
4010 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
4011 gimple_assign_set_lhs (perm_stmt
, data_ref
);
4012 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4013 mark_symbols_for_renaming (perm_stmt
);
4015 VEC_replace (tree
, *result_chain
, j
/2+length
/2, data_ref
);
4017 dr_chain
= VEC_copy (tree
, heap
, *result_chain
);
4022 /* Function vect_transform_strided_load.
4024 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4025 to perform their permutation and ascribe the result vectorized statements to
4026 the scalar statements.
4030 vect_transform_strided_load (gimple stmt
, VEC(tree
,heap
) *dr_chain
, int size
,
4031 gimple_stmt_iterator
*gsi
)
4033 VEC(tree
,heap
) *result_chain
= NULL
;
4035 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4036 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4037 vectors, that are ready for vector computation. */
4038 result_chain
= VEC_alloc (tree
, heap
, size
);
4039 vect_permute_load_chain (dr_chain
, size
, stmt
, gsi
, &result_chain
);
4040 vect_record_strided_load_vectors (stmt
, result_chain
);
4041 VEC_free (tree
, heap
, result_chain
);
4044 /* RESULT_CHAIN contains the output of a group of strided loads that were
4045 generated as part of the vectorization of STMT. Assign the statement
4046 for each vector to the associated scalar statement. */
4049 vect_record_strided_load_vectors (gimple stmt
, VEC(tree
,heap
) *result_chain
)
4051 gimple first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
4052 gimple next_stmt
, new_stmt
;
4053 unsigned int i
, gap_count
;
4056 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4057 Since we scan the chain starting from it's first node, their order
4058 corresponds the order of data-refs in RESULT_CHAIN. */
4059 next_stmt
= first_stmt
;
4061 FOR_EACH_VEC_ELT (tree
, result_chain
, i
, tmp_data_ref
)
4066 /* Skip the gaps. Loads created for the gaps will be removed by dead
4067 code elimination pass later. No need to check for the first stmt in
4068 the group, since it always exists.
4069 GROUP_GAP is the number of steps in elements from the previous
4070 access (if there is no gap GROUP_GAP is 1). We skip loads that
4071 correspond to the gaps. */
4072 if (next_stmt
!= first_stmt
4073 && gap_count
< GROUP_GAP (vinfo_for_stmt (next_stmt
)))
4081 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
4082 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4083 copies, and we put the new vector statement in the first available
4085 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
4086 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
4089 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
4092 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
4094 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
));
4097 prev_stmt
= rel_stmt
;
4099 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
4102 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) =
4107 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4109 /* If NEXT_STMT accesses the same DR as the previous statement,
4110 put the same TMP_DATA_REF as its vectorized statement; otherwise
4111 get the next data-ref from RESULT_CHAIN. */
4112 if (!next_stmt
|| !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
4118 /* Function vect_force_dr_alignment_p.
4120 Returns whether the alignment of a DECL can be forced to be aligned
4121 on ALIGNMENT bit boundary. */
4124 vect_can_force_dr_alignment_p (const_tree decl
, unsigned int alignment
)
4126 if (TREE_CODE (decl
) != VAR_DECL
)
4129 if (DECL_EXTERNAL (decl
))
4132 if (TREE_ASM_WRITTEN (decl
))
4135 if (TREE_STATIC (decl
))
4136 return (alignment
<= MAX_OFILE_ALIGNMENT
);
4138 return (alignment
<= MAX_STACK_ALIGNMENT
);
4142 /* Return whether the data reference DR is supported with respect to its
4144 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4145 it is aligned, i.e., check if it is possible to vectorize it with different
4148 enum dr_alignment_support
4149 vect_supportable_dr_alignment (struct data_reference
*dr
,
4150 bool check_aligned_accesses
)
4152 gimple stmt
= DR_STMT (dr
);
4153 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4154 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4155 enum machine_mode mode
= TYPE_MODE (vectype
);
4156 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4157 struct loop
*vect_loop
= NULL
;
4158 bool nested_in_vect_loop
= false;
4160 if (aligned_access_p (dr
) && !check_aligned_accesses
)
4165 vect_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4166 nested_in_vect_loop
= nested_in_vect_loop_p (vect_loop
, stmt
);
4169 /* Possibly unaligned access. */
4171 /* We can choose between using the implicit realignment scheme (generating
4172 a misaligned_move stmt) and the explicit realignment scheme (generating
4173 aligned loads with a REALIGN_LOAD). There are two variants to the
4174 explicit realignment scheme: optimized, and unoptimized.
4175 We can optimize the realignment only if the step between consecutive
4176 vector loads is equal to the vector size. Since the vector memory
4177 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4178 is guaranteed that the misalignment amount remains the same throughout the
4179 execution of the vectorized loop. Therefore, we can create the
4180 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4181 at the loop preheader.
4183 However, in the case of outer-loop vectorization, when vectorizing a
4184 memory access in the inner-loop nested within the LOOP that is now being
4185 vectorized, while it is guaranteed that the misalignment of the
4186 vectorized memory access will remain the same in different outer-loop
4187 iterations, it is *not* guaranteed that is will remain the same throughout
4188 the execution of the inner-loop. This is because the inner-loop advances
4189 with the original scalar step (and not in steps of VS). If the inner-loop
4190 step happens to be a multiple of VS, then the misalignment remains fixed
4191 and we can use the optimized realignment scheme. For example:
4197 When vectorizing the i-loop in the above example, the step between
4198 consecutive vector loads is 1, and so the misalignment does not remain
4199 fixed across the execution of the inner-loop, and the realignment cannot
4200 be optimized (as illustrated in the following pseudo vectorized loop):
4202 for (i=0; i<N; i+=4)
4203 for (j=0; j<M; j++){
4204 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4205 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4206 // (assuming that we start from an aligned address).
4209 We therefore have to use the unoptimized realignment scheme:
4211 for (i=0; i<N; i+=4)
4212 for (j=k; j<M; j+=4)
4213 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4214 // that the misalignment of the initial address is
4217 The loop can then be vectorized as follows:
4219 for (k=0; k<4; k++){
4220 rt = get_realignment_token (&vp[k]);
4221 for (i=0; i<N; i+=4){
4223 for (j=k; j<M; j+=4){
4225 va = REALIGN_LOAD <v1,v2,rt>;
4232 if (DR_IS_READ (dr
))
4234 bool is_packed
= false;
4235 tree type
= (TREE_TYPE (DR_REF (dr
)));
4237 if (optab_handler (vec_realign_load_optab
, mode
) != CODE_FOR_nothing
4238 && (!targetm
.vectorize
.builtin_mask_for_load
4239 || targetm
.vectorize
.builtin_mask_for_load ()))
4241 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4242 if ((nested_in_vect_loop
4243 && (TREE_INT_CST_LOW (DR_STEP (dr
))
4244 != GET_MODE_SIZE (TYPE_MODE (vectype
))))
4246 return dr_explicit_realign
;
4248 return dr_explicit_realign_optimized
;
4250 if (!known_alignment_for_access_p (dr
))
4252 tree ba
= DR_BASE_OBJECT (dr
);
4255 is_packed
= contains_packed_reference (ba
);
4258 if (targetm
.vectorize
.
4259 support_vector_misalignment (mode
, type
,
4260 DR_MISALIGNMENT (dr
), is_packed
))
4261 /* Can't software pipeline the loads, but can at least do them. */
4262 return dr_unaligned_supported
;
4266 bool is_packed
= false;
4267 tree type
= (TREE_TYPE (DR_REF (dr
)));
4269 if (!known_alignment_for_access_p (dr
))
4271 tree ba
= DR_BASE_OBJECT (dr
);
4274 is_packed
= contains_packed_reference (ba
);
4277 if (targetm
.vectorize
.
4278 support_vector_misalignment (mode
, type
,
4279 DR_MISALIGNMENT (dr
), is_packed
))
4280 return dr_unaligned_supported
;
4284 return dr_unaligned_unsupported
;