new tests
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob9ce4626f4b13cf9f75733d36c130f34e8d6e3f88
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "target.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
36 #include "cfgloop.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return the smallest scalar part of STMT.
47 This is used to determine the vectype of the stmt. We generally set the
48 vectype according to the type of the result (lhs). For stmts whose
49 result-type is different than the type of the arguments (e.g., demotion,
50 promotion), vectype will be reset appropriately (later). Note that we have
51 to visit the smallest datatype in this function, because that determines the
52 VF. If the smallest datatype in the loop is present only as the rhs of a
53 promotion operation - we'd miss it.
54 Such a case, where a variable of this datatype does not appear in the lhs
55 anywhere in the loop, can only occur if it's an invariant: e.g.:
56 'int_x = (int) short_inv', which we'd expect to have been optimized away by
57 invariant motion. However, we cannot rely on invariant motion to always
58 take invariants out of the loop, and so in the case of promotion we also
59 have to check the rhs.
60 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
61 types. */
63 tree
64 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
65 HOST_WIDE_INT *rhs_size_unit)
67 tree scalar_type = gimple_expr_type (stmt);
68 HOST_WIDE_INT lhs, rhs;
70 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
72 if (is_gimple_assign (stmt)
73 && (gimple_assign_cast_p (stmt)
74 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
75 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
77 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
79 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
80 if (rhs < lhs)
81 scalar_type = rhs_type;
84 *lhs_size_unit = lhs;
85 *rhs_size_unit = rhs;
86 return scalar_type;
90 /* Find the place of the data-ref in STMT in the interleaving chain that starts
91 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
93 int
94 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
96 gimple next_stmt = first_stmt;
97 int result = 0;
99 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
100 return -1;
102 while (next_stmt && next_stmt != stmt)
104 result++;
105 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
108 if (next_stmt)
109 return result;
110 else
111 return -1;
115 /* Function vect_insert_into_interleaving_chain.
117 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
119 static void
120 vect_insert_into_interleaving_chain (struct data_reference *dra,
121 struct data_reference *drb)
123 gimple prev, next;
124 tree next_init;
125 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
126 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
128 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
129 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
130 while (next)
132 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
133 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
135 /* Insert here. */
136 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
137 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
138 return;
140 prev = next;
141 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
144 /* We got to the end of the list. Insert here. */
145 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
146 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
150 /* Function vect_update_interleaving_chain.
152 For two data-refs DRA and DRB that are a part of a chain interleaved data
153 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
155 There are four possible cases:
156 1. New stmts - both DRA and DRB are not a part of any chain:
157 FIRST_DR = DRB
158 NEXT_DR (DRB) = DRA
159 2. DRB is a part of a chain and DRA is not:
160 no need to update FIRST_DR
161 no need to insert DRB
162 insert DRA according to init
163 3. DRA is a part of a chain and DRB is not:
164 if (init of FIRST_DR > init of DRB)
165 FIRST_DR = DRB
166 NEXT(FIRST_DR) = previous FIRST_DR
167 else
168 insert DRB according to its init
169 4. both DRA and DRB are in some interleaving chains:
170 choose the chain with the smallest init of FIRST_DR
171 insert the nodes of the second chain into the first one. */
173 static void
174 vect_update_interleaving_chain (struct data_reference *drb,
175 struct data_reference *dra)
177 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
178 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
179 tree next_init, init_dra_chain, init_drb_chain;
180 gimple first_a, first_b;
181 tree node_init;
182 gimple node, prev, next, first_stmt;
184 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
185 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
187 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
188 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
189 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
190 return;
193 /* 2. DRB is a part of a chain and DRA is not. */
194 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
196 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
197 /* Insert DRA into the chain of DRB. */
198 vect_insert_into_interleaving_chain (dra, drb);
199 return;
202 /* 3. DRA is a part of a chain and DRB is not. */
203 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
205 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
206 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
207 old_first_stmt)));
208 gimple tmp;
210 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
212 /* DRB's init is smaller than the init of the stmt previously marked
213 as the first stmt of the interleaving chain of DRA. Therefore, we
214 update FIRST_STMT and put DRB in the head of the list. */
215 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
216 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
218 /* Update all the stmts in the list to point to the new FIRST_STMT. */
219 tmp = old_first_stmt;
220 while (tmp)
222 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
223 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
226 else
228 /* Insert DRB in the list of DRA. */
229 vect_insert_into_interleaving_chain (drb, dra);
230 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
232 return;
235 /* 4. both DRA and DRB are in some interleaving chains. */
236 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
237 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
238 if (first_a == first_b)
239 return;
240 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
241 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
243 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
245 /* Insert the nodes of DRA chain into the DRB chain.
246 After inserting a node, continue from this node of the DRB chain (don't
247 start from the beginning. */
248 node = DR_GROUP_FIRST_DR (stmtinfo_a);
249 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
250 first_stmt = first_b;
252 else
254 /* Insert the nodes of DRB chain into the DRA chain.
255 After inserting a node, continue from this node of the DRA chain (don't
256 start from the beginning. */
257 node = DR_GROUP_FIRST_DR (stmtinfo_b);
258 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
259 first_stmt = first_a;
262 while (node)
264 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
265 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
266 while (next)
268 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
269 if (tree_int_cst_compare (next_init, node_init) > 0)
271 /* Insert here. */
272 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
274 prev = node;
275 break;
277 prev = next;
278 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
280 if (!next)
282 /* We got to the end of the list. Insert here. */
283 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
285 prev = node;
287 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
288 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
292 /* Check dependence between DRA and DRB for basic block vectorization.
293 If the accesses share same bases and offsets, we can compare their initial
294 constant offsets to decide whether they differ or not. In case of a read-
295 write dependence we check that the load is before the store to ensure that
296 vectorization will not change the order of the accesses. */
298 static bool
299 vect_drs_dependent_in_basic_block (struct data_reference *dra,
300 struct data_reference *drb)
302 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
303 gimple earlier_stmt;
305 /* We only call this function for pairs of loads and stores, but we verify
306 it here. */
307 if (DR_IS_READ (dra) == DR_IS_READ (drb))
309 if (DR_IS_READ (dra))
310 return false;
311 else
312 return true;
315 /* Check that the data-refs have same bases and offsets. If not, we can't
316 determine if they are dependent. */
317 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
318 || !dr_equal_offsets_p (dra, drb))
319 return true;
321 /* Check the types. */
322 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
323 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
325 if (type_size_a != type_size_b
326 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
327 TREE_TYPE (DR_REF (drb))))
328 return true;
330 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
331 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
333 /* Two different locations - no dependence. */
334 if (init_a != init_b)
335 return false;
337 /* We have a read-write dependence. Check that the load is before the store.
338 When we vectorize basic blocks, vector load can be only before
339 corresponding scalar load, and vector store can be only after its
340 corresponding scalar store. So the order of the acceses is preserved in
341 case the load is before the store. */
342 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
343 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
344 return false;
346 return true;
350 /* Function vect_check_interleaving.
352 Check if DRA and DRB are a part of interleaving. In case they are, insert
353 DRA and DRB in an interleaving chain. */
355 static bool
356 vect_check_interleaving (struct data_reference *dra,
357 struct data_reference *drb)
359 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
361 /* Check that the data-refs have same first location (except init) and they
362 are both either store or load (not load and store). */
363 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
364 || !dr_equal_offsets_p (dra, drb)
365 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
366 || DR_IS_READ (dra) != DR_IS_READ (drb))
367 return false;
369 /* Check:
370 1. data-refs are of the same type
371 2. their steps are equal
372 3. the step (if greater than zero) is greater than the difference between
373 data-refs' inits. */
374 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
375 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
377 if (type_size_a != type_size_b
378 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
379 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
380 TREE_TYPE (DR_REF (drb))))
381 return false;
383 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
384 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
385 step = TREE_INT_CST_LOW (DR_STEP (dra));
387 if (init_a > init_b)
389 /* If init_a == init_b + the size of the type * k, we have an interleaving,
390 and DRB is accessed before DRA. */
391 diff_mod_size = (init_a - init_b) % type_size_a;
393 if (step && (init_a - init_b) > step)
394 return false;
396 if (diff_mod_size == 0)
398 vect_update_interleaving_chain (drb, dra);
399 if (vect_print_dump_info (REPORT_DR_DETAILS))
401 fprintf (vect_dump, "Detected interleaving ");
402 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
403 fprintf (vect_dump, " and ");
404 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
406 return true;
409 else
411 /* If init_b == init_a + the size of the type * k, we have an
412 interleaving, and DRA is accessed before DRB. */
413 diff_mod_size = (init_b - init_a) % type_size_a;
415 if (step && (init_b - init_a) > step)
416 return false;
418 if (diff_mod_size == 0)
420 vect_update_interleaving_chain (dra, drb);
421 if (vect_print_dump_info (REPORT_DR_DETAILS))
423 fprintf (vect_dump, "Detected interleaving ");
424 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
425 fprintf (vect_dump, " and ");
426 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
428 return true;
432 return false;
435 /* Check if data references pointed by DR_I and DR_J are same or
436 belong to same interleaving group. Return FALSE if drs are
437 different, otherwise return TRUE. */
439 static bool
440 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
442 gimple stmt_i = DR_STMT (dr_i);
443 gimple stmt_j = DR_STMT (dr_j);
445 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
446 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
447 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
448 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
449 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
450 return true;
451 else
452 return false;
455 /* If address ranges represented by DDR_I and DDR_J are equal,
456 return TRUE, otherwise return FALSE. */
458 static bool
459 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
461 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
462 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
463 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
464 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
465 return true;
466 else
467 return false;
470 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
471 tested at run-time. Return TRUE if DDR was successfully inserted.
472 Return false if versioning is not supported. */
474 static bool
475 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
477 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
479 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
480 return false;
482 if (vect_print_dump_info (REPORT_DR_DETAILS))
484 fprintf (vect_dump, "mark for run-time aliasing test between ");
485 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
486 fprintf (vect_dump, " and ");
487 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
490 if (optimize_loop_nest_for_size_p (loop))
492 if (vect_print_dump_info (REPORT_DR_DETAILS))
493 fprintf (vect_dump, "versioning not supported when optimizing for size.");
494 return false;
497 /* FORNOW: We don't support versioning with outer-loop vectorization. */
498 if (loop->inner)
500 if (vect_print_dump_info (REPORT_DR_DETAILS))
501 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
502 return false;
505 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
506 return true;
510 /* Function vect_analyze_data_ref_dependence.
512 Return TRUE if there (might) exist a dependence between a memory-reference
513 DRA and a memory-reference DRB. When versioning for alias may check a
514 dependence at run-time, return FALSE. Adjust *MAX_VF according to
515 the data dependence. */
517 static bool
518 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
519 loop_vec_info loop_vinfo, int *max_vf,
520 bool *data_dependence_in_bb)
522 unsigned int i;
523 struct loop *loop = NULL;
524 struct data_reference *dra = DDR_A (ddr);
525 struct data_reference *drb = DDR_B (ddr);
526 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
527 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
528 lambda_vector dist_v;
529 unsigned int loop_depth;
531 /* Don't bother to analyze statements marked as unvectorizable. */
532 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
533 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
534 return false;
536 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
538 /* Independent data accesses. */
539 vect_check_interleaving (dra, drb);
540 return false;
543 if (loop_vinfo)
544 loop = LOOP_VINFO_LOOP (loop_vinfo);
546 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
547 return false;
549 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
551 if (loop_vinfo)
553 if (vect_print_dump_info (REPORT_DR_DETAILS))
555 fprintf (vect_dump, "versioning for alias required: "
556 "can't determine dependence between ");
557 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
558 fprintf (vect_dump, " and ");
559 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
562 /* Add to list of ddrs that need to be tested at run-time. */
563 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
566 /* When vectorizing a basic block unknown depnedence can still mean
567 strided access. */
568 if (vect_check_interleaving (dra, drb))
569 return false;
571 if (vect_print_dump_info (REPORT_DR_DETAILS))
573 fprintf (vect_dump, "can't determine dependence between ");
574 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
575 fprintf (vect_dump, " and ");
576 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
579 /* We do not vectorize basic blocks with write-write dependencies. */
580 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
581 return true;
583 /* We deal with read-write dependencies in basic blocks later (by
584 verifying that all the loads in the basic block are before all the
585 stores). */
586 *data_dependence_in_bb = true;
587 return false;
590 /* Versioning for alias is not yet supported for basic block SLP, and
591 dependence distance is unapplicable, hence, in case of known data
592 dependence, basic block vectorization is impossible for now. */
593 if (!loop_vinfo)
595 if (dra != drb && vect_check_interleaving (dra, drb))
596 return false;
598 if (vect_print_dump_info (REPORT_DR_DETAILS))
600 fprintf (vect_dump, "determined dependence between ");
601 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
602 fprintf (vect_dump, " and ");
603 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
606 /* Do not vectorize basic blcoks with write-write dependences. */
607 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
608 return true;
610 /* Check if this dependence is allowed in basic block vectorization. */
611 return vect_drs_dependent_in_basic_block (dra, drb);
614 /* Loop-based vectorization and known data dependence. */
615 if (DDR_NUM_DIST_VECTS (ddr) == 0)
617 if (vect_print_dump_info (REPORT_DR_DETAILS))
619 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
620 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
621 fprintf (vect_dump, " and ");
622 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
624 /* Add to list of ddrs that need to be tested at run-time. */
625 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
628 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
629 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
631 int dist = dist_v[loop_depth];
633 if (vect_print_dump_info (REPORT_DR_DETAILS))
634 fprintf (vect_dump, "dependence distance = %d.", dist);
636 if (dist == 0)
638 if (vect_print_dump_info (REPORT_DR_DETAILS))
640 fprintf (vect_dump, "dependence distance == 0 between ");
641 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
642 fprintf (vect_dump, " and ");
643 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
646 /* For interleaving, mark that there is a read-write dependency if
647 necessary. We check before that one of the data-refs is store. */
648 if (DR_IS_READ (dra))
649 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
650 else
652 if (DR_IS_READ (drb))
653 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
656 continue;
659 if (dist > 0 && DDR_REVERSED_P (ddr))
661 /* If DDR_REVERSED_P the order of the data-refs in DDR was
662 reversed (to make distance vector positive), and the actual
663 distance is negative. */
664 if (vect_print_dump_info (REPORT_DR_DETAILS))
665 fprintf (vect_dump, "dependence distance negative.");
666 continue;
669 if (abs (dist) >= 2
670 && abs (dist) < *max_vf)
672 /* The dependence distance requires reduction of the maximal
673 vectorization factor. */
674 *max_vf = abs (dist);
675 if (vect_print_dump_info (REPORT_DR_DETAILS))
676 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
677 *max_vf);
680 if (abs (dist) >= *max_vf)
682 /* Dependence distance does not create dependence, as far as
683 vectorization is concerned, in this case. */
684 if (vect_print_dump_info (REPORT_DR_DETAILS))
685 fprintf (vect_dump, "dependence distance >= VF.");
686 continue;
689 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
691 fprintf (vect_dump, "not vectorized, possible dependence "
692 "between data-refs ");
693 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
694 fprintf (vect_dump, " and ");
695 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
698 return true;
701 return false;
704 /* Function vect_analyze_data_ref_dependences.
706 Examine all the data references in the loop, and make sure there do not
707 exist any data dependences between them. Set *MAX_VF according to
708 the maximum vectorization factor the data dependences allow. */
710 bool
711 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
712 bb_vec_info bb_vinfo, int *max_vf,
713 bool *data_dependence_in_bb)
715 unsigned int i;
716 VEC (ddr_p, heap) *ddrs = NULL;
717 struct data_dependence_relation *ddr;
719 if (vect_print_dump_info (REPORT_DETAILS))
720 fprintf (vect_dump, "=== vect_analyze_dependences ===");
722 if (loop_vinfo)
723 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
724 else
725 ddrs = BB_VINFO_DDRS (bb_vinfo);
727 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
728 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
729 data_dependence_in_bb))
730 return false;
732 return true;
736 /* Function vect_compute_data_ref_alignment
738 Compute the misalignment of the data reference DR.
740 Output:
741 1. If during the misalignment computation it is found that the data reference
742 cannot be vectorized then false is returned.
743 2. DR_MISALIGNMENT (DR) is defined.
745 FOR NOW: No analysis is actually performed. Misalignment is calculated
746 only for trivial cases. TODO. */
748 static bool
749 vect_compute_data_ref_alignment (struct data_reference *dr)
751 gimple stmt = DR_STMT (dr);
752 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
753 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
754 struct loop *loop = NULL;
755 tree ref = DR_REF (dr);
756 tree vectype;
757 tree base, base_addr;
758 bool base_aligned;
759 tree misalign;
760 tree aligned_to, alignment;
762 if (vect_print_dump_info (REPORT_DETAILS))
763 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
765 if (loop_vinfo)
766 loop = LOOP_VINFO_LOOP (loop_vinfo);
768 /* Initialize misalignment to unknown. */
769 SET_DR_MISALIGNMENT (dr, -1);
771 misalign = DR_INIT (dr);
772 aligned_to = DR_ALIGNED_TO (dr);
773 base_addr = DR_BASE_ADDRESS (dr);
774 vectype = STMT_VINFO_VECTYPE (stmt_info);
776 /* In case the dataref is in an inner-loop of the loop that is being
777 vectorized (LOOP), we use the base and misalignment information
778 relative to the outer-loop (LOOP). This is ok only if the misalignment
779 stays the same throughout the execution of the inner-loop, which is why
780 we have to check that the stride of the dataref in the inner-loop evenly
781 divides by the vector size. */
782 if (loop && nested_in_vect_loop_p (loop, stmt))
784 tree step = DR_STEP (dr);
785 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
787 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
789 if (vect_print_dump_info (REPORT_ALIGNMENT))
790 fprintf (vect_dump, "inner step divides the vector-size.");
791 misalign = STMT_VINFO_DR_INIT (stmt_info);
792 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
793 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
795 else
797 if (vect_print_dump_info (REPORT_ALIGNMENT))
798 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
799 misalign = NULL_TREE;
803 base = build_fold_indirect_ref (base_addr);
804 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
806 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
807 || !misalign)
809 if (vect_print_dump_info (REPORT_ALIGNMENT))
811 fprintf (vect_dump, "Unknown alignment for access: ");
812 print_generic_expr (vect_dump, base, TDF_SLIM);
814 return true;
817 if ((DECL_P (base)
818 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
819 alignment) >= 0)
820 || (TREE_CODE (base_addr) == SSA_NAME
821 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
822 TREE_TYPE (base_addr)))),
823 alignment) >= 0))
824 base_aligned = true;
825 else
826 base_aligned = false;
828 if (!base_aligned)
830 /* Do not change the alignment of global variables if
831 flag_section_anchors is enabled. */
832 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
833 || (TREE_STATIC (base) && flag_section_anchors))
835 if (vect_print_dump_info (REPORT_DETAILS))
837 fprintf (vect_dump, "can't force alignment of ref: ");
838 print_generic_expr (vect_dump, ref, TDF_SLIM);
840 return true;
843 /* Force the alignment of the decl.
844 NOTE: This is the only change to the code we make during
845 the analysis phase, before deciding to vectorize the loop. */
846 if (vect_print_dump_info (REPORT_DETAILS))
848 fprintf (vect_dump, "force alignment of ");
849 print_generic_expr (vect_dump, ref, TDF_SLIM);
852 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
853 DECL_USER_ALIGN (base) = 1;
856 /* At this point we assume that the base is aligned. */
857 gcc_assert (base_aligned
858 || (TREE_CODE (base) == VAR_DECL
859 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
861 /* If this is a backward running DR then first access in the larger
862 vectype actually is N-1 elements before the address in the DR.
863 Adjust misalign accordingly. */
864 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
866 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
867 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
868 otherwise we wouldn't be here. */
869 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
870 /* PLUS because DR_STEP was negative. */
871 misalign = size_binop (PLUS_EXPR, misalign, offset);
874 /* Modulo alignment. */
875 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
877 if (!host_integerp (misalign, 1))
879 /* Negative or overflowed misalignment value. */
880 if (vect_print_dump_info (REPORT_DETAILS))
881 fprintf (vect_dump, "unexpected misalign value");
882 return false;
885 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
887 if (vect_print_dump_info (REPORT_DETAILS))
889 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
890 print_generic_expr (vect_dump, ref, TDF_SLIM);
893 return true;
897 /* Function vect_compute_data_refs_alignment
899 Compute the misalignment of data references in the loop.
900 Return FALSE if a data reference is found that cannot be vectorized. */
902 static bool
903 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
904 bb_vec_info bb_vinfo)
906 VEC (data_reference_p, heap) *datarefs;
907 struct data_reference *dr;
908 unsigned int i;
910 if (loop_vinfo)
911 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
912 else
913 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
915 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
916 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
917 && !vect_compute_data_ref_alignment (dr))
919 if (bb_vinfo)
921 /* Mark unsupported statement as unvectorizable. */
922 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
923 continue;
925 else
926 return false;
929 return true;
933 /* Function vect_update_misalignment_for_peel
935 DR - the data reference whose misalignment is to be adjusted.
936 DR_PEEL - the data reference whose misalignment is being made
937 zero in the vector loop by the peel.
938 NPEEL - the number of iterations in the peel loop if the misalignment
939 of DR_PEEL is known at compile time. */
941 static void
942 vect_update_misalignment_for_peel (struct data_reference *dr,
943 struct data_reference *dr_peel, int npeel)
945 unsigned int i;
946 VEC(dr_p,heap) *same_align_drs;
947 struct data_reference *current_dr;
948 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
949 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
950 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
951 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
953 /* For interleaved data accesses the step in the loop must be multiplied by
954 the size of the interleaving group. */
955 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
956 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
957 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
958 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
960 /* It can be assumed that the data refs with the same alignment as dr_peel
961 are aligned in the vector loop. */
962 same_align_drs
963 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
964 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
966 if (current_dr != dr)
967 continue;
968 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
969 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
970 SET_DR_MISALIGNMENT (dr, 0);
971 return;
974 if (known_alignment_for_access_p (dr)
975 && known_alignment_for_access_p (dr_peel))
977 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
978 int misal = DR_MISALIGNMENT (dr);
979 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
980 misal += negative ? -npeel * dr_size : npeel * dr_size;
981 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
982 SET_DR_MISALIGNMENT (dr, misal);
983 return;
986 if (vect_print_dump_info (REPORT_DETAILS))
987 fprintf (vect_dump, "Setting misalignment to -1.");
988 SET_DR_MISALIGNMENT (dr, -1);
992 /* Function vect_verify_datarefs_alignment
994 Return TRUE if all data references in the loop can be
995 handled with respect to alignment. */
997 bool
998 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1000 VEC (data_reference_p, heap) *datarefs;
1001 struct data_reference *dr;
1002 enum dr_alignment_support supportable_dr_alignment;
1003 unsigned int i;
1005 if (loop_vinfo)
1006 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1007 else
1008 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1010 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1012 gimple stmt = DR_STMT (dr);
1013 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1015 /* For interleaving, only the alignment of the first access matters.
1016 Skip statements marked as not vectorizable. */
1017 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1018 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1019 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1020 continue;
1022 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1023 if (!supportable_dr_alignment)
1025 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1027 if (DR_IS_READ (dr))
1028 fprintf (vect_dump,
1029 "not vectorized: unsupported unaligned load.");
1030 else
1031 fprintf (vect_dump,
1032 "not vectorized: unsupported unaligned store.");
1034 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1036 return false;
1038 if (supportable_dr_alignment != dr_aligned
1039 && vect_print_dump_info (REPORT_ALIGNMENT))
1040 fprintf (vect_dump, "Vectorizing an unaligned access.");
1042 return true;
1046 /* Function vector_alignment_reachable_p
1048 Return true if vector alignment for DR is reachable by peeling
1049 a few loop iterations. Return false otherwise. */
1051 static bool
1052 vector_alignment_reachable_p (struct data_reference *dr)
1054 gimple stmt = DR_STMT (dr);
1055 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1056 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1058 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1060 /* For interleaved access we peel only if number of iterations in
1061 the prolog loop ({VF - misalignment}), is a multiple of the
1062 number of the interleaved accesses. */
1063 int elem_size, mis_in_elements;
1064 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1066 /* FORNOW: handle only known alignment. */
1067 if (!known_alignment_for_access_p (dr))
1068 return false;
1070 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1071 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1073 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1074 return false;
1077 /* If misalignment is known at the compile time then allow peeling
1078 only if natural alignment is reachable through peeling. */
1079 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1081 HOST_WIDE_INT elmsize =
1082 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1083 if (vect_print_dump_info (REPORT_DETAILS))
1085 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1086 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1088 if (DR_MISALIGNMENT (dr) % elmsize)
1090 if (vect_print_dump_info (REPORT_DETAILS))
1091 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1092 return false;
1096 if (!known_alignment_for_access_p (dr))
1098 tree type = (TREE_TYPE (DR_REF (dr)));
1099 tree ba = DR_BASE_OBJECT (dr);
1100 bool is_packed = false;
1102 if (ba)
1103 is_packed = contains_packed_reference (ba);
1105 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1106 is_packed = true;
1108 if (vect_print_dump_info (REPORT_DETAILS))
1109 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1110 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1111 return true;
1112 else
1113 return false;
1116 return true;
1120 /* Calculate the cost of the memory access represented by DR. */
1122 static void
1123 vect_get_data_access_cost (struct data_reference *dr,
1124 unsigned int *inside_cost,
1125 unsigned int *outside_cost)
1127 gimple stmt = DR_STMT (dr);
1128 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1129 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1130 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1131 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1132 int ncopies = vf / nunits;
1133 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1135 if (!supportable_dr_alignment)
1136 *inside_cost = VECT_MAX_COST;
1137 else
1139 if (DR_IS_READ (dr))
1140 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1141 else
1142 vect_get_store_cost (dr, ncopies, inside_cost);
1145 if (vect_print_dump_info (REPORT_COST))
1146 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1147 "outside_cost = %d.", *inside_cost, *outside_cost);
1151 static hashval_t
1152 vect_peeling_hash (const void *elem)
1154 const struct _vect_peel_info *peel_info;
1156 peel_info = (const struct _vect_peel_info *) elem;
1157 return (hashval_t) peel_info->npeel;
1161 static int
1162 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1164 const struct _vect_peel_info *a, *b;
1166 a = (const struct _vect_peel_info *) elem1;
1167 b = (const struct _vect_peel_info *) elem2;
1168 return (a->npeel == b->npeel);
1172 /* Insert DR into peeling hash table with NPEEL as key. */
1174 static void
1175 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1176 int npeel)
1178 struct _vect_peel_info elem, *slot;
1179 void **new_slot;
1180 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1182 elem.npeel = npeel;
1183 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1184 &elem);
1185 if (slot)
1186 slot->count++;
1187 else
1189 slot = XNEW (struct _vect_peel_info);
1190 slot->npeel = npeel;
1191 slot->dr = dr;
1192 slot->count = 1;
1193 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1194 INSERT);
1195 *new_slot = slot;
1198 if (!supportable_dr_alignment && !flag_vect_cost_model)
1199 slot->count += VECT_MAX_COST;
1203 /* Traverse peeling hash table to find peeling option that aligns maximum
1204 number of data accesses. */
1206 static int
1207 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1209 vect_peel_info elem = (vect_peel_info) *slot;
1210 vect_peel_extended_info max = (vect_peel_extended_info) data;
1212 if (elem->count > max->peel_info.count)
1214 max->peel_info.npeel = elem->npeel;
1215 max->peel_info.count = elem->count;
1216 max->peel_info.dr = elem->dr;
1219 return 1;
1223 /* Traverse peeling hash table and calculate cost for each peeling option.
1224 Find the one with the lowest cost. */
1226 static int
1227 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1229 vect_peel_info elem = (vect_peel_info) *slot;
1230 vect_peel_extended_info min = (vect_peel_extended_info) data;
1231 int save_misalignment, dummy;
1232 unsigned int inside_cost = 0, outside_cost = 0, i;
1233 gimple stmt = DR_STMT (elem->dr);
1234 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1236 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1237 struct data_reference *dr;
1239 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1241 stmt = DR_STMT (dr);
1242 stmt_info = vinfo_for_stmt (stmt);
1243 /* For interleaving, only the alignment of the first access
1244 matters. */
1245 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1246 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1247 continue;
1249 save_misalignment = DR_MISALIGNMENT (dr);
1250 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1251 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1252 SET_DR_MISALIGNMENT (dr, save_misalignment);
1255 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1256 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1258 if (inside_cost < min->inside_cost
1259 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1261 min->inside_cost = inside_cost;
1262 min->outside_cost = outside_cost;
1263 min->peel_info.dr = elem->dr;
1264 min->peel_info.npeel = elem->npeel;
1267 return 1;
1271 /* Choose best peeling option by traversing peeling hash table and either
1272 choosing an option with the lowest cost (if cost model is enabled) or the
1273 option that aligns as many accesses as possible. */
1275 static struct data_reference *
1276 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1277 unsigned int *npeel)
1279 struct _vect_peel_extended_info res;
1281 res.peel_info.dr = NULL;
1283 if (flag_vect_cost_model)
1285 res.inside_cost = INT_MAX;
1286 res.outside_cost = INT_MAX;
1287 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1288 vect_peeling_hash_get_lowest_cost, &res);
1290 else
1292 res.peel_info.count = 0;
1293 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1294 vect_peeling_hash_get_most_frequent, &res);
1297 *npeel = res.peel_info.npeel;
1298 return res.peel_info.dr;
1302 /* Function vect_enhance_data_refs_alignment
1304 This pass will use loop versioning and loop peeling in order to enhance
1305 the alignment of data references in the loop.
1307 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1308 original loop is to be vectorized. Any other loops that are created by
1309 the transformations performed in this pass - are not supposed to be
1310 vectorized. This restriction will be relaxed.
1312 This pass will require a cost model to guide it whether to apply peeling
1313 or versioning or a combination of the two. For example, the scheme that
1314 intel uses when given a loop with several memory accesses, is as follows:
1315 choose one memory access ('p') which alignment you want to force by doing
1316 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1317 other accesses are not necessarily aligned, or (2) use loop versioning to
1318 generate one loop in which all accesses are aligned, and another loop in
1319 which only 'p' is necessarily aligned.
1321 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1322 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1323 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1325 Devising a cost model is the most critical aspect of this work. It will
1326 guide us on which access to peel for, whether to use loop versioning, how
1327 many versions to create, etc. The cost model will probably consist of
1328 generic considerations as well as target specific considerations (on
1329 powerpc for example, misaligned stores are more painful than misaligned
1330 loads).
1332 Here are the general steps involved in alignment enhancements:
1334 -- original loop, before alignment analysis:
1335 for (i=0; i<N; i++){
1336 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1337 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1340 -- After vect_compute_data_refs_alignment:
1341 for (i=0; i<N; i++){
1342 x = q[i]; # DR_MISALIGNMENT(q) = 3
1343 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1346 -- Possibility 1: we do loop versioning:
1347 if (p is aligned) {
1348 for (i=0; i<N; i++){ # loop 1A
1349 x = q[i]; # DR_MISALIGNMENT(q) = 3
1350 p[i] = y; # DR_MISALIGNMENT(p) = 0
1353 else {
1354 for (i=0; i<N; i++){ # loop 1B
1355 x = q[i]; # DR_MISALIGNMENT(q) = 3
1356 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1360 -- Possibility 2: we do loop peeling:
1361 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1362 x = q[i];
1363 p[i] = y;
1365 for (i = 3; i < N; i++){ # loop 2A
1366 x = q[i]; # DR_MISALIGNMENT(q) = 0
1367 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1370 -- Possibility 3: combination of loop peeling and versioning:
1371 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1372 x = q[i];
1373 p[i] = y;
1375 if (p is aligned) {
1376 for (i = 3; i<N; i++){ # loop 3A
1377 x = q[i]; # DR_MISALIGNMENT(q) = 0
1378 p[i] = y; # DR_MISALIGNMENT(p) = 0
1381 else {
1382 for (i = 3; i<N; i++){ # loop 3B
1383 x = q[i]; # DR_MISALIGNMENT(q) = 0
1384 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1388 These loops are later passed to loop_transform to be vectorized. The
1389 vectorizer will use the alignment information to guide the transformation
1390 (whether to generate regular loads/stores, or with special handling for
1391 misalignment). */
1393 bool
1394 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1396 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1397 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1398 enum dr_alignment_support supportable_dr_alignment;
1399 struct data_reference *dr0 = NULL, *first_store = NULL;
1400 struct data_reference *dr;
1401 unsigned int i, j;
1402 bool do_peeling = false;
1403 bool do_versioning = false;
1404 bool stat;
1405 gimple stmt;
1406 stmt_vec_info stmt_info;
1407 int vect_versioning_for_alias_required;
1408 unsigned int npeel = 0;
1409 bool all_misalignments_unknown = true;
1410 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1411 unsigned possible_npeel_number = 1;
1412 tree vectype;
1413 unsigned int nelements, mis, same_align_drs_max = 0;
1415 if (vect_print_dump_info (REPORT_DETAILS))
1416 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1418 /* While cost model enhancements are expected in the future, the high level
1419 view of the code at this time is as follows:
1421 A) If there is a misaligned access then see if peeling to align
1422 this access can make all data references satisfy
1423 vect_supportable_dr_alignment. If so, update data structures
1424 as needed and return true.
1426 B) If peeling wasn't possible and there is a data reference with an
1427 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1428 then see if loop versioning checks can be used to make all data
1429 references satisfy vect_supportable_dr_alignment. If so, update
1430 data structures as needed and return true.
1432 C) If neither peeling nor versioning were successful then return false if
1433 any data reference does not satisfy vect_supportable_dr_alignment.
1435 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1437 Note, Possibility 3 above (which is peeling and versioning together) is not
1438 being done at this time. */
1440 /* (1) Peeling to force alignment. */
1442 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1443 Considerations:
1444 + How many accesses will become aligned due to the peeling
1445 - How many accesses will become unaligned due to the peeling,
1446 and the cost of misaligned accesses.
1447 - The cost of peeling (the extra runtime checks, the increase
1448 in code size). */
1450 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1452 stmt = DR_STMT (dr);
1453 stmt_info = vinfo_for_stmt (stmt);
1455 /* For interleaving, only the alignment of the first access
1456 matters. */
1457 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1458 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1459 continue;
1461 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1462 do_peeling = vector_alignment_reachable_p (dr);
1463 if (do_peeling)
1465 if (known_alignment_for_access_p (dr))
1467 unsigned int npeel_tmp;
1468 bool negative = tree_int_cst_compare (DR_STEP (dr),
1469 size_zero_node) < 0;
1471 /* Save info about DR in the hash table. */
1472 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1473 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1474 htab_create (1, vect_peeling_hash,
1475 vect_peeling_hash_eq, free);
1477 vectype = STMT_VINFO_VECTYPE (stmt_info);
1478 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1479 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1480 TREE_TYPE (DR_REF (dr))));
1481 npeel_tmp = (negative
1482 ? (mis - nelements) : (nelements - mis))
1483 & (nelements - 1);
1485 /* For multiple types, it is possible that the bigger type access
1486 will have more than one peeling option. E.g., a loop with two
1487 types: one of size (vector size / 4), and the other one of
1488 size (vector size / 8). Vectorization factor will 8. If both
1489 access are misaligned by 3, the first one needs one scalar
1490 iteration to be aligned, and the second one needs 5. But the
1491 the first one will be aligned also by peeling 5 scalar
1492 iterations, and in that case both accesses will be aligned.
1493 Hence, except for the immediate peeling amount, we also want
1494 to try to add full vector size, while we don't exceed
1495 vectorization factor.
1496 We do this automtically for cost model, since we calculate cost
1497 for every peeling option. */
1498 if (!flag_vect_cost_model)
1499 possible_npeel_number = vf /nelements;
1501 /* Handle the aligned case. We may decide to align some other
1502 access, making DR unaligned. */
1503 if (DR_MISALIGNMENT (dr) == 0)
1505 npeel_tmp = 0;
1506 if (!flag_vect_cost_model)
1507 possible_npeel_number++;
1510 for (j = 0; j < possible_npeel_number; j++)
1512 gcc_assert (npeel_tmp <= vf);
1513 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1514 npeel_tmp += nelements;
1517 all_misalignments_unknown = false;
1518 /* Data-ref that was chosen for the case that all the
1519 misalignments are unknown is not relevant anymore, since we
1520 have a data-ref with known alignment. */
1521 dr0 = NULL;
1523 else
1525 /* If we don't know all the misalignment values, we prefer
1526 peeling for data-ref that has maximum number of data-refs
1527 with the same alignment, unless the target prefers to align
1528 stores over load. */
1529 if (all_misalignments_unknown)
1531 if (same_align_drs_max < VEC_length (dr_p,
1532 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1533 || !dr0)
1535 same_align_drs_max = VEC_length (dr_p,
1536 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1537 dr0 = dr;
1540 if (!first_store && DR_IS_WRITE (dr))
1541 first_store = dr;
1544 /* If there are both known and unknown misaligned accesses in the
1545 loop, we choose peeling amount according to the known
1546 accesses. */
1549 if (!supportable_dr_alignment)
1551 dr0 = dr;
1552 if (!first_store && DR_IS_WRITE (dr))
1553 first_store = dr;
1557 else
1559 if (!aligned_access_p (dr))
1561 if (vect_print_dump_info (REPORT_DETAILS))
1562 fprintf (vect_dump, "vector alignment may not be reachable");
1564 break;
1569 vect_versioning_for_alias_required
1570 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1572 /* Temporarily, if versioning for alias is required, we disable peeling
1573 until we support peeling and versioning. Often peeling for alignment
1574 will require peeling for loop-bound, which in turn requires that we
1575 know how to adjust the loop ivs after the loop. */
1576 if (vect_versioning_for_alias_required
1577 || !vect_can_advance_ivs_p (loop_vinfo)
1578 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1579 do_peeling = false;
1581 if (do_peeling && all_misalignments_unknown
1582 && vect_supportable_dr_alignment (dr0, false))
1585 /* Check if the target requires to prefer stores over loads, i.e., if
1586 misaligned stores are more expensive than misaligned loads (taking
1587 drs with same alignment into account). */
1588 if (first_store && DR_IS_READ (dr0))
1590 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1591 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1592 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1593 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1595 vect_get_data_access_cost (dr0, &load_inside_cost,
1596 &load_outside_cost);
1597 vect_get_data_access_cost (first_store, &store_inside_cost,
1598 &store_outside_cost);
1600 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1601 aligning the load DR0). */
1602 load_inside_penalty = store_inside_cost;
1603 load_outside_penalty = store_outside_cost;
1604 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1605 (vinfo_for_stmt (DR_STMT (first_store))),
1606 i, dr);
1607 i++)
1608 if (DR_IS_READ (dr))
1610 load_inside_penalty += load_inside_cost;
1611 load_outside_penalty += load_outside_cost;
1613 else
1615 load_inside_penalty += store_inside_cost;
1616 load_outside_penalty += store_outside_cost;
1619 /* Calculate the penalty for leaving DR0 unaligned (by
1620 aligning the FIRST_STORE). */
1621 store_inside_penalty = load_inside_cost;
1622 store_outside_penalty = load_outside_cost;
1623 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1624 (vinfo_for_stmt (DR_STMT (dr0))),
1625 i, dr);
1626 i++)
1627 if (DR_IS_READ (dr))
1629 store_inside_penalty += load_inside_cost;
1630 store_outside_penalty += load_outside_cost;
1632 else
1634 store_inside_penalty += store_inside_cost;
1635 store_outside_penalty += store_outside_cost;
1638 if (load_inside_penalty > store_inside_penalty
1639 || (load_inside_penalty == store_inside_penalty
1640 && load_outside_penalty > store_outside_penalty))
1641 dr0 = first_store;
1644 /* In case there are only loads with different unknown misalignments, use
1645 peeling only if it may help to align other accesses in the loop. */
1646 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1647 (vinfo_for_stmt (DR_STMT (dr0))))
1648 && vect_supportable_dr_alignment (dr0, false)
1649 != dr_unaligned_supported)
1650 do_peeling = false;
1653 if (do_peeling && !dr0)
1655 /* Peeling is possible, but there is no data access that is not supported
1656 unless aligned. So we try to choose the best possible peeling. */
1658 /* We should get here only if there are drs with known misalignment. */
1659 gcc_assert (!all_misalignments_unknown);
1661 /* Choose the best peeling from the hash table. */
1662 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1663 if (!dr0 || !npeel)
1664 do_peeling = false;
1667 if (do_peeling)
1669 stmt = DR_STMT (dr0);
1670 stmt_info = vinfo_for_stmt (stmt);
1671 vectype = STMT_VINFO_VECTYPE (stmt_info);
1672 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1674 if (known_alignment_for_access_p (dr0))
1676 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1677 size_zero_node) < 0;
1678 if (!npeel)
1680 /* Since it's known at compile time, compute the number of
1681 iterations in the peeled loop (the peeling factor) for use in
1682 updating DR_MISALIGNMENT values. The peeling factor is the
1683 vectorization factor minus the misalignment as an element
1684 count. */
1685 mis = DR_MISALIGNMENT (dr0);
1686 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1687 npeel = ((negative ? mis - nelements : nelements - mis)
1688 & (nelements - 1));
1691 /* For interleaved data access every iteration accesses all the
1692 members of the group, therefore we divide the number of iterations
1693 by the group size. */
1694 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1695 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1696 npeel /= DR_GROUP_SIZE (stmt_info);
1698 if (vect_print_dump_info (REPORT_DETAILS))
1699 fprintf (vect_dump, "Try peeling by %d", npeel);
1702 /* Ensure that all data refs can be vectorized after the peel. */
1703 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1705 int save_misalignment;
1707 if (dr == dr0)
1708 continue;
1710 stmt = DR_STMT (dr);
1711 stmt_info = vinfo_for_stmt (stmt);
1712 /* For interleaving, only the alignment of the first access
1713 matters. */
1714 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1715 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1716 continue;
1718 save_misalignment = DR_MISALIGNMENT (dr);
1719 vect_update_misalignment_for_peel (dr, dr0, npeel);
1720 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1721 SET_DR_MISALIGNMENT (dr, save_misalignment);
1723 if (!supportable_dr_alignment)
1725 do_peeling = false;
1726 break;
1730 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1732 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1733 if (!stat)
1734 do_peeling = false;
1735 else
1736 return stat;
1739 if (do_peeling)
1741 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1742 If the misalignment of DR_i is identical to that of dr0 then set
1743 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1744 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1745 by the peeling factor times the element size of DR_i (MOD the
1746 vectorization factor times the size). Otherwise, the
1747 misalignment of DR_i must be set to unknown. */
1748 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1749 if (dr != dr0)
1750 vect_update_misalignment_for_peel (dr, dr0, npeel);
1752 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1753 if (npeel)
1754 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1755 else
1756 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1757 SET_DR_MISALIGNMENT (dr0, 0);
1758 if (vect_print_dump_info (REPORT_ALIGNMENT))
1759 fprintf (vect_dump, "Alignment of access forced using peeling.");
1761 if (vect_print_dump_info (REPORT_DETAILS))
1762 fprintf (vect_dump, "Peeling for alignment will be applied.");
1764 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1765 gcc_assert (stat);
1766 return stat;
1771 /* (2) Versioning to force alignment. */
1773 /* Try versioning if:
1774 1) flag_tree_vect_loop_version is TRUE
1775 2) optimize loop for speed
1776 3) there is at least one unsupported misaligned data ref with an unknown
1777 misalignment, and
1778 4) all misaligned data refs with a known misalignment are supported, and
1779 5) the number of runtime alignment checks is within reason. */
1781 do_versioning =
1782 flag_tree_vect_loop_version
1783 && optimize_loop_nest_for_speed_p (loop)
1784 && (!loop->inner); /* FORNOW */
1786 if (do_versioning)
1788 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1790 stmt = DR_STMT (dr);
1791 stmt_info = vinfo_for_stmt (stmt);
1793 /* For interleaving, only the alignment of the first access
1794 matters. */
1795 if (aligned_access_p (dr)
1796 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1797 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1798 continue;
1800 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1802 if (!supportable_dr_alignment)
1804 gimple stmt;
1805 int mask;
1806 tree vectype;
1808 if (known_alignment_for_access_p (dr)
1809 || VEC_length (gimple,
1810 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1811 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1813 do_versioning = false;
1814 break;
1817 stmt = DR_STMT (dr);
1818 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1819 gcc_assert (vectype);
1821 /* The rightmost bits of an aligned address must be zeros.
1822 Construct the mask needed for this test. For example,
1823 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1824 mask must be 15 = 0xf. */
1825 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1827 /* FORNOW: use the same mask to test all potentially unaligned
1828 references in the loop. The vectorizer currently supports
1829 a single vector size, see the reference to
1830 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1831 vectorization factor is computed. */
1832 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1833 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1834 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1835 VEC_safe_push (gimple, heap,
1836 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1837 DR_STMT (dr));
1841 /* Versioning requires at least one misaligned data reference. */
1842 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1843 do_versioning = false;
1844 else if (!do_versioning)
1845 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1848 if (do_versioning)
1850 VEC(gimple,heap) *may_misalign_stmts
1851 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1852 gimple stmt;
1854 /* It can now be assumed that the data references in the statements
1855 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1856 of the loop being vectorized. */
1857 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1859 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1860 dr = STMT_VINFO_DATA_REF (stmt_info);
1861 SET_DR_MISALIGNMENT (dr, 0);
1862 if (vect_print_dump_info (REPORT_ALIGNMENT))
1863 fprintf (vect_dump, "Alignment of access forced using versioning.");
1866 if (vect_print_dump_info (REPORT_DETAILS))
1867 fprintf (vect_dump, "Versioning for alignment will be applied.");
1869 /* Peeling and versioning can't be done together at this time. */
1870 gcc_assert (! (do_peeling && do_versioning));
1872 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1873 gcc_assert (stat);
1874 return stat;
1877 /* This point is reached if neither peeling nor versioning is being done. */
1878 gcc_assert (! (do_peeling || do_versioning));
1880 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1881 return stat;
1885 /* Function vect_find_same_alignment_drs.
1887 Update group and alignment relations according to the chosen
1888 vectorization factor. */
1890 static void
1891 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1892 loop_vec_info loop_vinfo)
1894 unsigned int i;
1895 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1896 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1897 struct data_reference *dra = DDR_A (ddr);
1898 struct data_reference *drb = DDR_B (ddr);
1899 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1900 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1901 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1902 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1903 lambda_vector dist_v;
1904 unsigned int loop_depth;
1906 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1907 return;
1909 if (dra == drb)
1910 return;
1912 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1913 return;
1915 /* Loop-based vectorization and known data dependence. */
1916 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1917 return;
1919 /* Data-dependence analysis reports a distance vector of zero
1920 for data-references that overlap only in the first iteration
1921 but have different sign step (see PR45764).
1922 So as a sanity check require equal DR_STEP. */
1923 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1924 return;
1926 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1927 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1929 int dist = dist_v[loop_depth];
1931 if (vect_print_dump_info (REPORT_DR_DETAILS))
1932 fprintf (vect_dump, "dependence distance = %d.", dist);
1934 /* Same loop iteration. */
1935 if (dist == 0
1936 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1938 /* Two references with distance zero have the same alignment. */
1939 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1940 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1941 if (vect_print_dump_info (REPORT_ALIGNMENT))
1942 fprintf (vect_dump, "accesses have the same alignment.");
1943 if (vect_print_dump_info (REPORT_DR_DETAILS))
1945 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1946 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1947 fprintf (vect_dump, " and ");
1948 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1955 /* Function vect_analyze_data_refs_alignment
1957 Analyze the alignment of the data-references in the loop.
1958 Return FALSE if a data reference is found that cannot be vectorized. */
1960 bool
1961 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1962 bb_vec_info bb_vinfo)
1964 if (vect_print_dump_info (REPORT_DETAILS))
1965 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1967 /* Mark groups of data references with same alignment using
1968 data dependence information. */
1969 if (loop_vinfo)
1971 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1972 struct data_dependence_relation *ddr;
1973 unsigned int i;
1975 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
1976 vect_find_same_alignment_drs (ddr, loop_vinfo);
1979 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1981 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1982 fprintf (vect_dump,
1983 "not vectorized: can't calculate alignment for data ref.");
1984 return false;
1987 return true;
1991 /* Analyze groups of strided accesses: check that DR belongs to a group of
1992 strided accesses of legal size, step, etc. Detect gaps, single element
1993 interleaving, and other special cases. Set strided access info.
1994 Collect groups of strided stores for further use in SLP analysis. */
1996 static bool
1997 vect_analyze_group_access (struct data_reference *dr)
1999 tree step = DR_STEP (dr);
2000 tree scalar_type = TREE_TYPE (DR_REF (dr));
2001 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2002 gimple stmt = DR_STMT (dr);
2003 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2004 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2005 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2006 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2007 HOST_WIDE_INT stride;
2008 bool slp_impossible = false;
2010 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2011 interleaving group (including gaps). */
2012 stride = dr_step / type_size;
2014 /* Not consecutive access is possible only if it is a part of interleaving. */
2015 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2017 /* Check if it this DR is a part of interleaving, and is a single
2018 element of the group that is accessed in the loop. */
2020 /* Gaps are supported only for loads. STEP must be a multiple of the type
2021 size. The size of the group must be a power of 2. */
2022 if (DR_IS_READ (dr)
2023 && (dr_step % type_size) == 0
2024 && stride > 0
2025 && exact_log2 (stride) != -1)
2027 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2028 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2029 if (vect_print_dump_info (REPORT_DR_DETAILS))
2031 fprintf (vect_dump, "Detected single element interleaving ");
2032 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2033 fprintf (vect_dump, " step ");
2034 print_generic_expr (vect_dump, step, TDF_SLIM);
2036 return true;
2039 if (vect_print_dump_info (REPORT_DETAILS))
2041 fprintf (vect_dump, "not consecutive access ");
2042 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2045 if (bb_vinfo)
2047 /* Mark the statement as unvectorizable. */
2048 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2049 return true;
2052 return false;
2055 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2057 /* First stmt in the interleaving chain. Check the chain. */
2058 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2059 struct data_reference *data_ref = dr;
2060 unsigned int count = 1;
2061 tree next_step;
2062 tree prev_init = DR_INIT (data_ref);
2063 gimple prev = stmt;
2064 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2066 while (next)
2068 /* Skip same data-refs. In case that two or more stmts share
2069 data-ref (supported only for loads), we vectorize only the first
2070 stmt, and the rest get their vectorized loads from the first
2071 one. */
2072 if (!tree_int_cst_compare (DR_INIT (data_ref),
2073 DR_INIT (STMT_VINFO_DATA_REF (
2074 vinfo_for_stmt (next)))))
2076 if (DR_IS_WRITE (data_ref))
2078 if (vect_print_dump_info (REPORT_DETAILS))
2079 fprintf (vect_dump, "Two store stmts share the same dr.");
2080 return false;
2083 /* Check that there is no load-store dependencies for this loads
2084 to prevent a case of load-store-load to the same location. */
2085 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2086 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2088 if (vect_print_dump_info (REPORT_DETAILS))
2089 fprintf (vect_dump,
2090 "READ_WRITE dependence in interleaving.");
2091 return false;
2094 /* For load use the same data-ref load. */
2095 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2097 prev = next;
2098 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2099 continue;
2101 prev = next;
2103 /* Check that all the accesses have the same STEP. */
2104 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2105 if (tree_int_cst_compare (step, next_step))
2107 if (vect_print_dump_info (REPORT_DETAILS))
2108 fprintf (vect_dump, "not consecutive access in interleaving");
2109 return false;
2112 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2113 /* Check that the distance between two accesses is equal to the type
2114 size. Otherwise, we have gaps. */
2115 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2116 - TREE_INT_CST_LOW (prev_init)) / type_size;
2117 if (diff != 1)
2119 /* FORNOW: SLP of accesses with gaps is not supported. */
2120 slp_impossible = true;
2121 if (DR_IS_WRITE (data_ref))
2123 if (vect_print_dump_info (REPORT_DETAILS))
2124 fprintf (vect_dump, "interleaved store with gaps");
2125 return false;
2128 gaps += diff - 1;
2131 /* Store the gap from the previous member of the group. If there is no
2132 gap in the access, DR_GROUP_GAP is always 1. */
2133 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2135 prev_init = DR_INIT (data_ref);
2136 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2137 /* Count the number of data-refs in the chain. */
2138 count++;
2141 /* COUNT is the number of accesses found, we multiply it by the size of
2142 the type to get COUNT_IN_BYTES. */
2143 count_in_bytes = type_size * count;
2145 /* Check that the size of the interleaving (including gaps) is not
2146 greater than STEP. */
2147 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2149 if (vect_print_dump_info (REPORT_DETAILS))
2151 fprintf (vect_dump, "interleaving size is greater than step for ");
2152 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2154 return false;
2157 /* Check that the size of the interleaving is equal to STEP for stores,
2158 i.e., that there are no gaps. */
2159 if (dr_step && dr_step != count_in_bytes)
2161 if (DR_IS_READ (dr))
2163 slp_impossible = true;
2164 /* There is a gap after the last load in the group. This gap is a
2165 difference between the stride and the number of elements. When
2166 there is no gap, this difference should be 0. */
2167 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2169 else
2171 if (vect_print_dump_info (REPORT_DETAILS))
2172 fprintf (vect_dump, "interleaved store with gaps");
2173 return false;
2177 /* Check that STEP is a multiple of type size. */
2178 if (dr_step && (dr_step % type_size) != 0)
2180 if (vect_print_dump_info (REPORT_DETAILS))
2182 fprintf (vect_dump, "step is not a multiple of type size: step ");
2183 print_generic_expr (vect_dump, step, TDF_SLIM);
2184 fprintf (vect_dump, " size ");
2185 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2186 TDF_SLIM);
2188 return false;
2191 if (stride == 0)
2192 stride = count;
2194 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2195 if (vect_print_dump_info (REPORT_DETAILS))
2196 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2198 /* SLP: create an SLP data structure for every interleaving group of
2199 stores for further analysis in vect_analyse_slp. */
2200 if (DR_IS_WRITE (dr) && !slp_impossible)
2202 if (loop_vinfo)
2203 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2204 stmt);
2205 if (bb_vinfo)
2206 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2207 stmt);
2211 return true;
2215 /* Analyze the access pattern of the data-reference DR.
2216 In case of non-consecutive accesses call vect_analyze_group_access() to
2217 analyze groups of strided accesses. */
2219 static bool
2220 vect_analyze_data_ref_access (struct data_reference *dr)
2222 tree step = DR_STEP (dr);
2223 tree scalar_type = TREE_TYPE (DR_REF (dr));
2224 gimple stmt = DR_STMT (dr);
2225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2226 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2227 struct loop *loop = NULL;
2228 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2230 if (loop_vinfo)
2231 loop = LOOP_VINFO_LOOP (loop_vinfo);
2233 if (loop_vinfo && !step)
2235 if (vect_print_dump_info (REPORT_DETAILS))
2236 fprintf (vect_dump, "bad data-ref access in loop");
2237 return false;
2240 /* Don't allow invariant accesses in loops. */
2241 if (loop_vinfo && dr_step == 0)
2242 return false;
2244 if (loop && nested_in_vect_loop_p (loop, stmt))
2246 /* Interleaved accesses are not yet supported within outer-loop
2247 vectorization for references in the inner-loop. */
2248 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2250 /* For the rest of the analysis we use the outer-loop step. */
2251 step = STMT_VINFO_DR_STEP (stmt_info);
2252 dr_step = TREE_INT_CST_LOW (step);
2254 if (dr_step == 0)
2256 if (vect_print_dump_info (REPORT_ALIGNMENT))
2257 fprintf (vect_dump, "zero step in outer loop.");
2258 if (DR_IS_READ (dr))
2259 return true;
2260 else
2261 return false;
2265 /* Consecutive? */
2266 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2267 || (dr_step < 0
2268 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2270 /* Mark that it is not interleaving. */
2271 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2272 return true;
2275 if (loop && nested_in_vect_loop_p (loop, stmt))
2277 if (vect_print_dump_info (REPORT_ALIGNMENT))
2278 fprintf (vect_dump, "strided access in outer loop.");
2279 return false;
2282 /* Not consecutive access - check if it's a part of interleaving group. */
2283 return vect_analyze_group_access (dr);
2287 /* Function vect_analyze_data_ref_accesses.
2289 Analyze the access pattern of all the data references in the loop.
2291 FORNOW: the only access pattern that is considered vectorizable is a
2292 simple step 1 (consecutive) access.
2294 FORNOW: handle only arrays and pointer accesses. */
2296 bool
2297 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2299 unsigned int i;
2300 VEC (data_reference_p, heap) *datarefs;
2301 struct data_reference *dr;
2303 if (vect_print_dump_info (REPORT_DETAILS))
2304 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2306 if (loop_vinfo)
2307 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2308 else
2309 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2311 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2312 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2313 && !vect_analyze_data_ref_access (dr))
2315 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2316 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2318 if (bb_vinfo)
2320 /* Mark the statement as not vectorizable. */
2321 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2322 continue;
2324 else
2325 return false;
2328 return true;
2331 /* Function vect_prune_runtime_alias_test_list.
2333 Prune a list of ddrs to be tested at run-time by versioning for alias.
2334 Return FALSE if resulting list of ddrs is longer then allowed by
2335 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2337 bool
2338 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2340 VEC (ddr_p, heap) * ddrs =
2341 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2342 unsigned i, j;
2344 if (vect_print_dump_info (REPORT_DETAILS))
2345 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2347 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2349 bool found;
2350 ddr_p ddr_i;
2352 ddr_i = VEC_index (ddr_p, ddrs, i);
2353 found = false;
2355 for (j = 0; j < i; j++)
2357 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2359 if (vect_vfa_range_equal (ddr_i, ddr_j))
2361 if (vect_print_dump_info (REPORT_DR_DETAILS))
2363 fprintf (vect_dump, "found equal ranges ");
2364 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2365 fprintf (vect_dump, ", ");
2366 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2367 fprintf (vect_dump, " and ");
2368 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2369 fprintf (vect_dump, ", ");
2370 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2372 found = true;
2373 break;
2377 if (found)
2379 VEC_ordered_remove (ddr_p, ddrs, i);
2380 continue;
2382 i++;
2385 if (VEC_length (ddr_p, ddrs) >
2386 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2388 if (vect_print_dump_info (REPORT_DR_DETAILS))
2390 fprintf (vect_dump,
2391 "disable versioning for alias - max number of generated "
2392 "checks exceeded.");
2395 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2397 return false;
2400 return true;
2404 /* Function vect_analyze_data_refs.
2406 Find all the data references in the loop or basic block.
2408 The general structure of the analysis of data refs in the vectorizer is as
2409 follows:
2410 1- vect_analyze_data_refs(loop/bb): call
2411 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2412 in the loop/bb and their dependences.
2413 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2414 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2415 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2419 bool
2420 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2421 bb_vec_info bb_vinfo,
2422 int *min_vf)
2424 struct loop *loop = NULL;
2425 basic_block bb = NULL;
2426 unsigned int i;
2427 VEC (data_reference_p, heap) *datarefs;
2428 struct data_reference *dr;
2429 tree scalar_type;
2430 bool res;
2432 if (vect_print_dump_info (REPORT_DETAILS))
2433 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2435 if (loop_vinfo)
2437 loop = LOOP_VINFO_LOOP (loop_vinfo);
2438 res = compute_data_dependences_for_loop
2439 (loop, true,
2440 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2441 &LOOP_VINFO_DATAREFS (loop_vinfo),
2442 &LOOP_VINFO_DDRS (loop_vinfo));
2444 if (!res)
2446 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2447 fprintf (vect_dump, "not vectorized: loop contains function calls"
2448 " or data references that cannot be analyzed");
2449 return false;
2452 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2454 else
2456 bb = BB_VINFO_BB (bb_vinfo);
2457 res = compute_data_dependences_for_bb (bb, true,
2458 &BB_VINFO_DATAREFS (bb_vinfo),
2459 &BB_VINFO_DDRS (bb_vinfo));
2460 if (!res)
2462 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2463 fprintf (vect_dump, "not vectorized: basic block contains function"
2464 " calls or data references that cannot be analyzed");
2465 return false;
2468 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2471 /* Go through the data-refs, check that the analysis succeeded. Update
2472 pointer from stmt_vec_info struct to DR and vectype. */
2474 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2476 gimple stmt;
2477 stmt_vec_info stmt_info;
2478 tree base, offset, init;
2479 int vf;
2481 if (!dr || !DR_REF (dr))
2483 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2484 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2485 return false;
2488 stmt = DR_STMT (dr);
2489 stmt_info = vinfo_for_stmt (stmt);
2491 /* Check that analysis of the data-ref succeeded. */
2492 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2493 || !DR_STEP (dr))
2495 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2497 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2498 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2501 if (bb_vinfo)
2503 /* Mark the statement as not vectorizable. */
2504 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2505 continue;
2507 else
2508 return false;
2511 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2513 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2514 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2515 "constant");
2516 if (bb_vinfo)
2518 /* Mark the statement as not vectorizable. */
2519 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2520 continue;
2522 else
2523 return false;
2526 base = unshare_expr (DR_BASE_ADDRESS (dr));
2527 offset = unshare_expr (DR_OFFSET (dr));
2528 init = unshare_expr (DR_INIT (dr));
2530 if (stmt_can_throw_internal (stmt))
2532 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2534 fprintf (vect_dump, "not vectorized: statement can throw an "
2535 "exception ");
2536 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2538 return false;
2541 /* Update DR field in stmt_vec_info struct. */
2543 /* If the dataref is in an inner-loop of the loop that is considered for
2544 for vectorization, we also want to analyze the access relative to
2545 the outer-loop (DR contains information only relative to the
2546 inner-most enclosing loop). We do that by building a reference to the
2547 first location accessed by the inner-loop, and analyze it relative to
2548 the outer-loop. */
2549 if (loop && nested_in_vect_loop_p (loop, stmt))
2551 tree outer_step, outer_base, outer_init;
2552 HOST_WIDE_INT pbitsize, pbitpos;
2553 tree poffset;
2554 enum machine_mode pmode;
2555 int punsignedp, pvolatilep;
2556 affine_iv base_iv, offset_iv;
2557 tree dinit;
2559 /* Build a reference to the first location accessed by the
2560 inner-loop: *(BASE+INIT). (The first location is actually
2561 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2562 tree inner_base = build_fold_indirect_ref
2563 (fold_build2 (POINTER_PLUS_EXPR,
2564 TREE_TYPE (base), base,
2565 fold_convert (sizetype, init)));
2567 if (vect_print_dump_info (REPORT_DETAILS))
2569 fprintf (vect_dump, "analyze in outer-loop: ");
2570 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2573 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2574 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2575 gcc_assert (outer_base != NULL_TREE);
2577 if (pbitpos % BITS_PER_UNIT != 0)
2579 if (vect_print_dump_info (REPORT_DETAILS))
2580 fprintf (vect_dump, "failed: bit offset alignment.\n");
2581 return false;
2584 outer_base = build_fold_addr_expr (outer_base);
2585 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2586 &base_iv, false))
2588 if (vect_print_dump_info (REPORT_DETAILS))
2589 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2590 return false;
2593 if (offset)
2595 if (poffset)
2596 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2597 poffset);
2598 else
2599 poffset = offset;
2602 if (!poffset)
2604 offset_iv.base = ssize_int (0);
2605 offset_iv.step = ssize_int (0);
2607 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2608 &offset_iv, false))
2610 if (vect_print_dump_info (REPORT_DETAILS))
2611 fprintf (vect_dump, "evolution of offset is not affine.\n");
2612 return false;
2615 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2616 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2617 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2618 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2619 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2621 outer_step = size_binop (PLUS_EXPR,
2622 fold_convert (ssizetype, base_iv.step),
2623 fold_convert (ssizetype, offset_iv.step));
2625 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2626 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2627 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2628 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2629 STMT_VINFO_DR_OFFSET (stmt_info) =
2630 fold_convert (ssizetype, offset_iv.base);
2631 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2632 size_int (highest_pow2_factor (offset_iv.base));
2634 if (vect_print_dump_info (REPORT_DETAILS))
2636 fprintf (vect_dump, "\touter base_address: ");
2637 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2638 fprintf (vect_dump, "\n\touter offset from base address: ");
2639 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2640 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2641 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2642 fprintf (vect_dump, "\n\touter step: ");
2643 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2644 fprintf (vect_dump, "\n\touter aligned to: ");
2645 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2649 if (STMT_VINFO_DATA_REF (stmt_info))
2651 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2653 fprintf (vect_dump,
2654 "not vectorized: more than one data ref in stmt: ");
2655 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2657 return false;
2660 STMT_VINFO_DATA_REF (stmt_info) = dr;
2662 /* Set vectype for STMT. */
2663 scalar_type = TREE_TYPE (DR_REF (dr));
2664 STMT_VINFO_VECTYPE (stmt_info) =
2665 get_vectype_for_scalar_type (scalar_type);
2666 if (!STMT_VINFO_VECTYPE (stmt_info))
2668 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2670 fprintf (vect_dump,
2671 "not vectorized: no vectype for stmt: ");
2672 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2673 fprintf (vect_dump, " scalar_type: ");
2674 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2677 if (bb_vinfo)
2679 /* Mark the statement as not vectorizable. */
2680 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2681 continue;
2683 else
2684 return false;
2687 /* Adjust the minimal vectorization factor according to the
2688 vector type. */
2689 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2690 if (vf > *min_vf)
2691 *min_vf = vf;
2694 return true;
2698 /* Function vect_get_new_vect_var.
2700 Returns a name for a new variable. The current naming scheme appends the
2701 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2702 the name of vectorizer generated variables, and appends that to NAME if
2703 provided. */
2705 tree
2706 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2708 const char *prefix;
2709 tree new_vect_var;
2711 switch (var_kind)
2713 case vect_simple_var:
2714 prefix = "vect_";
2715 break;
2716 case vect_scalar_var:
2717 prefix = "stmp_";
2718 break;
2719 case vect_pointer_var:
2720 prefix = "vect_p";
2721 break;
2722 default:
2723 gcc_unreachable ();
2726 if (name)
2728 char* tmp = concat (prefix, name, NULL);
2729 new_vect_var = create_tmp_var (type, tmp);
2730 free (tmp);
2732 else
2733 new_vect_var = create_tmp_var (type, prefix);
2735 /* Mark vector typed variable as a gimple register variable. */
2736 if (TREE_CODE (type) == VECTOR_TYPE)
2737 DECL_GIMPLE_REG_P (new_vect_var) = true;
2739 return new_vect_var;
2743 /* Function vect_create_addr_base_for_vector_ref.
2745 Create an expression that computes the address of the first memory location
2746 that will be accessed for a data reference.
2748 Input:
2749 STMT: The statement containing the data reference.
2750 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2751 OFFSET: Optional. If supplied, it is be added to the initial address.
2752 LOOP: Specify relative to which loop-nest should the address be computed.
2753 For example, when the dataref is in an inner-loop nested in an
2754 outer-loop that is now being vectorized, LOOP can be either the
2755 outer-loop, or the inner-loop. The first memory location accessed
2756 by the following dataref ('in' points to short):
2758 for (i=0; i<N; i++)
2759 for (j=0; j<M; j++)
2760 s += in[i+j]
2762 is as follows:
2763 if LOOP=i_loop: &in (relative to i_loop)
2764 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2766 Output:
2767 1. Return an SSA_NAME whose value is the address of the memory location of
2768 the first vector of the data reference.
2769 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2770 these statement(s) which define the returned SSA_NAME.
2772 FORNOW: We are only handling array accesses with step 1. */
2774 tree
2775 vect_create_addr_base_for_vector_ref (gimple stmt,
2776 gimple_seq *new_stmt_list,
2777 tree offset,
2778 struct loop *loop)
2780 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2781 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2782 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2783 tree base_name;
2784 tree data_ref_base_var;
2785 tree vec_stmt;
2786 tree addr_base, addr_expr;
2787 tree dest;
2788 gimple_seq seq = NULL;
2789 tree base_offset = unshare_expr (DR_OFFSET (dr));
2790 tree init = unshare_expr (DR_INIT (dr));
2791 tree vect_ptr_type;
2792 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2793 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2794 tree base;
2796 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2798 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2800 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2802 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2803 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2804 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2807 if (loop_vinfo)
2808 base_name = build_fold_indirect_ref (data_ref_base);
2809 else
2811 base_offset = ssize_int (0);
2812 init = ssize_int (0);
2813 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2816 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2817 add_referenced_var (data_ref_base_var);
2818 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2819 data_ref_base_var);
2820 gimple_seq_add_seq (new_stmt_list, seq);
2822 /* Create base_offset */
2823 base_offset = size_binop (PLUS_EXPR,
2824 fold_convert (sizetype, base_offset),
2825 fold_convert (sizetype, init));
2826 dest = create_tmp_var (sizetype, "base_off");
2827 add_referenced_var (dest);
2828 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2829 gimple_seq_add_seq (new_stmt_list, seq);
2831 if (offset)
2833 tree tmp = create_tmp_var (sizetype, "offset");
2835 add_referenced_var (tmp);
2836 offset = fold_build2 (MULT_EXPR, sizetype,
2837 fold_convert (sizetype, offset), step);
2838 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2839 base_offset, offset);
2840 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2841 gimple_seq_add_seq (new_stmt_list, seq);
2844 /* base + base_offset */
2845 if (loop_vinfo)
2846 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2847 data_ref_base, base_offset);
2848 else
2850 addr_base = build1 (ADDR_EXPR,
2851 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2852 unshare_expr (DR_REF (dr)));
2855 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2856 base = get_base_address (DR_REF (dr));
2857 if (base
2858 && TREE_CODE (base) == MEM_REF)
2859 vect_ptr_type
2860 = build_qualified_type (vect_ptr_type,
2861 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2863 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2864 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2865 get_name (base_name));
2866 add_referenced_var (addr_expr);
2867 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2868 gimple_seq_add_seq (new_stmt_list, seq);
2870 if (DR_PTR_INFO (dr)
2871 && TREE_CODE (vec_stmt) == SSA_NAME)
2873 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2874 if (offset)
2876 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2877 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2881 if (vect_print_dump_info (REPORT_DETAILS))
2883 fprintf (vect_dump, "created ");
2884 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2887 return vec_stmt;
2891 /* Function vect_create_data_ref_ptr.
2893 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
2894 location accessed in the loop by STMT, along with the def-use update
2895 chain to appropriately advance the pointer through the loop iterations.
2896 Also set aliasing information for the pointer. This pointer is used by
2897 the callers to this function to create a memory reference expression for
2898 vector load/store access.
2900 Input:
2901 1. STMT: a stmt that references memory. Expected to be of the form
2902 GIMPLE_ASSIGN <name, data-ref> or
2903 GIMPLE_ASSIGN <data-ref, name>.
2904 2. AGGR_TYPE: the type of the reference, which should be either a vector
2905 or an array.
2906 3. AT_LOOP: the loop where the vector memref is to be created.
2907 4. OFFSET (optional): an offset to be added to the initial address accessed
2908 by the data-ref in STMT.
2909 5. BSI: location where the new stmts are to be placed if there is no loop
2910 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
2911 pointing to the initial address.
2913 Output:
2914 1. Declare a new ptr to vector_type, and have it point to the base of the
2915 data reference (initial addressed accessed by the data reference).
2916 For example, for vector of type V8HI, the following code is generated:
2918 v8hi *ap;
2919 ap = (v8hi *)initial_address;
2921 if OFFSET is not supplied:
2922 initial_address = &a[init];
2923 if OFFSET is supplied:
2924 initial_address = &a[init + OFFSET];
2926 Return the initial_address in INITIAL_ADDRESS.
2928 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2929 update the pointer in each iteration of the loop.
2931 Return the increment stmt that updates the pointer in PTR_INCR.
2933 3. Set INV_P to true if the access pattern of the data reference in the
2934 vectorized loop is invariant. Set it to false otherwise.
2936 4. Return the pointer. */
2938 tree
2939 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
2940 tree offset, tree *initial_address,
2941 gimple_stmt_iterator *gsi, gimple *ptr_incr,
2942 bool only_init, bool *inv_p)
2944 tree base_name;
2945 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2946 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2947 struct loop *loop = NULL;
2948 bool nested_in_vect_loop = false;
2949 struct loop *containing_loop = NULL;
2950 tree aggr_ptr_type;
2951 tree aggr_ptr;
2952 tree new_temp;
2953 gimple vec_stmt;
2954 gimple_seq new_stmt_list = NULL;
2955 edge pe = NULL;
2956 basic_block new_bb;
2957 tree aggr_ptr_init;
2958 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2959 tree aptr;
2960 gimple_stmt_iterator incr_gsi;
2961 bool insert_after;
2962 bool negative;
2963 tree indx_before_incr, indx_after_incr;
2964 gimple incr;
2965 tree step;
2966 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2967 tree base;
2969 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
2970 || TREE_CODE (aggr_type) == VECTOR_TYPE);
2972 if (loop_vinfo)
2974 loop = LOOP_VINFO_LOOP (loop_vinfo);
2975 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2976 containing_loop = (gimple_bb (stmt))->loop_father;
2977 pe = loop_preheader_edge (loop);
2979 else
2981 gcc_assert (bb_vinfo);
2982 only_init = true;
2983 *ptr_incr = NULL;
2986 /* Check the step (evolution) of the load in LOOP, and record
2987 whether it's invariant. */
2988 if (nested_in_vect_loop)
2989 step = STMT_VINFO_DR_STEP (stmt_info);
2990 else
2991 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2993 if (tree_int_cst_compare (step, size_zero_node) == 0)
2994 *inv_p = true;
2995 else
2996 *inv_p = false;
2997 negative = tree_int_cst_compare (step, size_zero_node) < 0;
2999 /* Create an expression for the first address accessed by this load
3000 in LOOP. */
3001 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3003 if (vect_print_dump_info (REPORT_DETAILS))
3005 tree data_ref_base = base_name;
3006 fprintf (vect_dump, "create %s-pointer variable to type: ",
3007 tree_code_name[(int) TREE_CODE (aggr_type)]);
3008 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3009 if (TREE_CODE (data_ref_base) == VAR_DECL
3010 || TREE_CODE (data_ref_base) == ARRAY_REF)
3011 fprintf (vect_dump, " vectorizing an array ref: ");
3012 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3013 fprintf (vect_dump, " vectorizing a record based array ref: ");
3014 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3015 fprintf (vect_dump, " vectorizing a pointer ref: ");
3016 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3019 /* (1) Create the new aggregate-pointer variable. */
3020 aggr_ptr_type = build_pointer_type (aggr_type);
3021 base = get_base_address (DR_REF (dr));
3022 if (base
3023 && TREE_CODE (base) == MEM_REF)
3024 aggr_ptr_type
3025 = build_qualified_type (aggr_ptr_type,
3026 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3027 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3028 get_name (base_name));
3030 /* Vector and array types inherit the alias set of their component
3031 type by default so we need to use a ref-all pointer if the data
3032 reference does not conflict with the created aggregated data
3033 reference because it is not addressable. */
3034 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3035 get_alias_set (DR_REF (dr))))
3037 aggr_ptr_type
3038 = build_pointer_type_for_mode (aggr_type,
3039 TYPE_MODE (aggr_ptr_type), true);
3040 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3041 get_name (base_name));
3044 /* Likewise for any of the data references in the stmt group. */
3045 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
3047 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
3050 tree lhs = gimple_assign_lhs (orig_stmt);
3051 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3052 get_alias_set (lhs)))
3054 aggr_ptr_type
3055 = build_pointer_type_for_mode (aggr_type,
3056 TYPE_MODE (aggr_ptr_type), true);
3057 aggr_ptr
3058 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3059 get_name (base_name));
3060 break;
3063 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
3065 while (orig_stmt);
3068 add_referenced_var (aggr_ptr);
3070 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3071 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3072 def-use update cycles for the pointer: one relative to the outer-loop
3073 (LOOP), which is what steps (3) and (4) below do. The other is relative
3074 to the inner-loop (which is the inner-most loop containing the dataref),
3075 and this is done be step (5) below.
3077 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3078 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3079 redundant. Steps (3),(4) create the following:
3081 vp0 = &base_addr;
3082 LOOP: vp1 = phi(vp0,vp2)
3085 vp2 = vp1 + step
3086 goto LOOP
3088 If there is an inner-loop nested in loop, then step (5) will also be
3089 applied, and an additional update in the inner-loop will be created:
3091 vp0 = &base_addr;
3092 LOOP: vp1 = phi(vp0,vp2)
3094 inner: vp3 = phi(vp1,vp4)
3095 vp4 = vp3 + inner_step
3096 if () goto inner
3098 vp2 = vp1 + step
3099 if () goto LOOP */
3101 /* (2) Calculate the initial address of the aggregate-pointer, and set
3102 the aggregate-pointer to point to it before the loop. */
3104 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3106 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3107 offset, loop);
3108 if (new_stmt_list)
3110 if (pe)
3112 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3113 gcc_assert (!new_bb);
3115 else
3116 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3119 *initial_address = new_temp;
3121 /* Create: p = (aggr_type *) initial_base */
3122 if (TREE_CODE (new_temp) != SSA_NAME
3123 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3125 vec_stmt = gimple_build_assign (aggr_ptr,
3126 fold_convert (aggr_ptr_type, new_temp));
3127 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3128 /* Copy the points-to information if it exists. */
3129 if (DR_PTR_INFO (dr))
3130 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3131 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3132 if (pe)
3134 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3135 gcc_assert (!new_bb);
3137 else
3138 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3140 else
3141 aggr_ptr_init = new_temp;
3143 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3144 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3145 inner-loop nested in LOOP (during outer-loop vectorization). */
3147 /* No update in loop is required. */
3148 if (only_init && (!loop_vinfo || at_loop == loop))
3149 aptr = aggr_ptr_init;
3150 else
3152 /* The step of the aggregate pointer is the type size. */
3153 tree step = TYPE_SIZE_UNIT (aggr_type);
3154 /* One exception to the above is when the scalar step of the load in
3155 LOOP is zero. In this case the step here is also zero. */
3156 if (*inv_p)
3157 step = size_zero_node;
3158 else if (negative)
3159 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3161 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3163 create_iv (aggr_ptr_init,
3164 fold_convert (aggr_ptr_type, step),
3165 aggr_ptr, loop, &incr_gsi, insert_after,
3166 &indx_before_incr, &indx_after_incr);
3167 incr = gsi_stmt (incr_gsi);
3168 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3170 /* Copy the points-to information if it exists. */
3171 if (DR_PTR_INFO (dr))
3173 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3174 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3176 if (ptr_incr)
3177 *ptr_incr = incr;
3179 aptr = indx_before_incr;
3182 if (!nested_in_vect_loop || only_init)
3183 return aptr;
3186 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3187 nested in LOOP, if exists. */
3189 gcc_assert (nested_in_vect_loop);
3190 if (!only_init)
3192 standard_iv_increment_position (containing_loop, &incr_gsi,
3193 &insert_after);
3194 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3195 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3196 &indx_after_incr);
3197 incr = gsi_stmt (incr_gsi);
3198 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3200 /* Copy the points-to information if it exists. */
3201 if (DR_PTR_INFO (dr))
3203 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3204 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3206 if (ptr_incr)
3207 *ptr_incr = incr;
3209 return indx_before_incr;
3211 else
3212 gcc_unreachable ();
3216 /* Function bump_vector_ptr
3218 Increment a pointer (to a vector type) by vector-size. If requested,
3219 i.e. if PTR-INCR is given, then also connect the new increment stmt
3220 to the existing def-use update-chain of the pointer, by modifying
3221 the PTR_INCR as illustrated below:
3223 The pointer def-use update-chain before this function:
3224 DATAREF_PTR = phi (p_0, p_2)
3225 ....
3226 PTR_INCR: p_2 = DATAREF_PTR + step
3228 The pointer def-use update-chain after this function:
3229 DATAREF_PTR = phi (p_0, p_2)
3230 ....
3231 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3232 ....
3233 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3235 Input:
3236 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3237 in the loop.
3238 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3239 the loop. The increment amount across iterations is expected
3240 to be vector_size.
3241 BSI - location where the new update stmt is to be placed.
3242 STMT - the original scalar memory-access stmt that is being vectorized.
3243 BUMP - optional. The offset by which to bump the pointer. If not given,
3244 the offset is assumed to be vector_size.
3246 Output: Return NEW_DATAREF_PTR as illustrated above.
3250 tree
3251 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3252 gimple stmt, tree bump)
3254 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3255 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3256 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3257 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3258 tree update = TYPE_SIZE_UNIT (vectype);
3259 gimple incr_stmt;
3260 ssa_op_iter iter;
3261 use_operand_p use_p;
3262 tree new_dataref_ptr;
3264 if (bump)
3265 update = bump;
3267 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3268 dataref_ptr, update);
3269 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3270 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3271 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3273 /* Copy the points-to information if it exists. */
3274 if (DR_PTR_INFO (dr))
3276 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3277 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3278 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3281 if (!ptr_incr)
3282 return new_dataref_ptr;
3284 /* Update the vector-pointer's cross-iteration increment. */
3285 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3287 tree use = USE_FROM_PTR (use_p);
3289 if (use == dataref_ptr)
3290 SET_USE (use_p, new_dataref_ptr);
3291 else
3292 gcc_assert (tree_int_cst_compare (use, update) == 0);
3295 return new_dataref_ptr;
3299 /* Function vect_create_destination_var.
3301 Create a new temporary of type VECTYPE. */
3303 tree
3304 vect_create_destination_var (tree scalar_dest, tree vectype)
3306 tree vec_dest;
3307 const char *new_name;
3308 tree type;
3309 enum vect_var_kind kind;
3311 kind = vectype ? vect_simple_var : vect_scalar_var;
3312 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3314 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3316 new_name = get_name (scalar_dest);
3317 if (!new_name)
3318 new_name = "var_";
3319 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3320 add_referenced_var (vec_dest);
3322 return vec_dest;
3325 /* Function vect_strided_store_supported.
3327 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3328 and FALSE otherwise. */
3330 bool
3331 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3333 optab interleave_high_optab, interleave_low_optab;
3334 enum machine_mode mode;
3336 mode = TYPE_MODE (vectype);
3338 /* vect_permute_store_chain requires the group size to be a power of two. */
3339 if (exact_log2 (count) == -1)
3341 if (vect_print_dump_info (REPORT_DETAILS))
3342 fprintf (vect_dump, "the size of the group of strided accesses"
3343 " is not a power of 2");
3344 return false;
3347 /* Check that the operation is supported. */
3348 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3349 vectype, optab_default);
3350 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3351 vectype, optab_default);
3352 if (!interleave_high_optab || !interleave_low_optab)
3354 if (vect_print_dump_info (REPORT_DETAILS))
3355 fprintf (vect_dump, "no optab for interleave.");
3356 return false;
3359 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3360 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3362 if (vect_print_dump_info (REPORT_DETAILS))
3363 fprintf (vect_dump, "interleave op not supported by target.");
3364 return false;
3367 return true;
3371 /* Function vect_permute_store_chain.
3373 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3374 a power of 2, generate interleave_high/low stmts to reorder the data
3375 correctly for the stores. Return the final references for stores in
3376 RESULT_CHAIN.
3378 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3379 The input is 4 vectors each containing 8 elements. We assign a number to
3380 each element, the input sequence is:
3382 1st vec: 0 1 2 3 4 5 6 7
3383 2nd vec: 8 9 10 11 12 13 14 15
3384 3rd vec: 16 17 18 19 20 21 22 23
3385 4th vec: 24 25 26 27 28 29 30 31
3387 The output sequence should be:
3389 1st vec: 0 8 16 24 1 9 17 25
3390 2nd vec: 2 10 18 26 3 11 19 27
3391 3rd vec: 4 12 20 28 5 13 21 30
3392 4th vec: 6 14 22 30 7 15 23 31
3394 i.e., we interleave the contents of the four vectors in their order.
3396 We use interleave_high/low instructions to create such output. The input of
3397 each interleave_high/low operation is two vectors:
3398 1st vec 2nd vec
3399 0 1 2 3 4 5 6 7
3400 the even elements of the result vector are obtained left-to-right from the
3401 high/low elements of the first vector. The odd elements of the result are
3402 obtained left-to-right from the high/low elements of the second vector.
3403 The output of interleave_high will be: 0 4 1 5
3404 and of interleave_low: 2 6 3 7
3407 The permutation is done in log LENGTH stages. In each stage interleave_high
3408 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3409 where the first argument is taken from the first half of DR_CHAIN and the
3410 second argument from it's second half.
3411 In our example,
3413 I1: interleave_high (1st vec, 3rd vec)
3414 I2: interleave_low (1st vec, 3rd vec)
3415 I3: interleave_high (2nd vec, 4th vec)
3416 I4: interleave_low (2nd vec, 4th vec)
3418 The output for the first stage is:
3420 I1: 0 16 1 17 2 18 3 19
3421 I2: 4 20 5 21 6 22 7 23
3422 I3: 8 24 9 25 10 26 11 27
3423 I4: 12 28 13 29 14 30 15 31
3425 The output of the second stage, i.e. the final result is:
3427 I1: 0 8 16 24 1 9 17 25
3428 I2: 2 10 18 26 3 11 19 27
3429 I3: 4 12 20 28 5 13 21 30
3430 I4: 6 14 22 30 7 15 23 31. */
3432 void
3433 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3434 unsigned int length,
3435 gimple stmt,
3436 gimple_stmt_iterator *gsi,
3437 VEC(tree,heap) **result_chain)
3439 tree perm_dest, vect1, vect2, high, low;
3440 gimple perm_stmt;
3441 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3442 int i;
3443 unsigned int j;
3444 enum tree_code high_code, low_code;
3446 gcc_assert (vect_strided_store_supported (vectype, length));
3448 *result_chain = VEC_copy (tree, heap, dr_chain);
3450 for (i = 0; i < exact_log2 (length); i++)
3452 for (j = 0; j < length/2; j++)
3454 vect1 = VEC_index (tree, dr_chain, j);
3455 vect2 = VEC_index (tree, dr_chain, j+length/2);
3457 /* Create interleaving stmt:
3458 in the case of big endian:
3459 high = interleave_high (vect1, vect2)
3460 and in the case of little endian:
3461 high = interleave_low (vect1, vect2). */
3462 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3463 DECL_GIMPLE_REG_P (perm_dest) = 1;
3464 add_referenced_var (perm_dest);
3465 if (BYTES_BIG_ENDIAN)
3467 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3468 low_code = VEC_INTERLEAVE_LOW_EXPR;
3470 else
3472 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3473 high_code = VEC_INTERLEAVE_LOW_EXPR;
3475 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3476 vect1, vect2);
3477 high = make_ssa_name (perm_dest, perm_stmt);
3478 gimple_assign_set_lhs (perm_stmt, high);
3479 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3480 VEC_replace (tree, *result_chain, 2*j, high);
3482 /* Create interleaving stmt:
3483 in the case of big endian:
3484 low = interleave_low (vect1, vect2)
3485 and in the case of little endian:
3486 low = interleave_high (vect1, vect2). */
3487 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3488 DECL_GIMPLE_REG_P (perm_dest) = 1;
3489 add_referenced_var (perm_dest);
3490 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3491 vect1, vect2);
3492 low = make_ssa_name (perm_dest, perm_stmt);
3493 gimple_assign_set_lhs (perm_stmt, low);
3494 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3495 VEC_replace (tree, *result_chain, 2*j+1, low);
3497 dr_chain = VEC_copy (tree, heap, *result_chain);
3501 /* Function vect_setup_realignment
3503 This function is called when vectorizing an unaligned load using
3504 the dr_explicit_realign[_optimized] scheme.
3505 This function generates the following code at the loop prolog:
3507 p = initial_addr;
3508 x msq_init = *(floor(p)); # prolog load
3509 realignment_token = call target_builtin;
3510 loop:
3511 x msq = phi (msq_init, ---)
3513 The stmts marked with x are generated only for the case of
3514 dr_explicit_realign_optimized.
3516 The code above sets up a new (vector) pointer, pointing to the first
3517 location accessed by STMT, and a "floor-aligned" load using that pointer.
3518 It also generates code to compute the "realignment-token" (if the relevant
3519 target hook was defined), and creates a phi-node at the loop-header bb
3520 whose arguments are the result of the prolog-load (created by this
3521 function) and the result of a load that takes place in the loop (to be
3522 created by the caller to this function).
3524 For the case of dr_explicit_realign_optimized:
3525 The caller to this function uses the phi-result (msq) to create the
3526 realignment code inside the loop, and sets up the missing phi argument,
3527 as follows:
3528 loop:
3529 msq = phi (msq_init, lsq)
3530 lsq = *(floor(p')); # load in loop
3531 result = realign_load (msq, lsq, realignment_token);
3533 For the case of dr_explicit_realign:
3534 loop:
3535 msq = *(floor(p)); # load in loop
3536 p' = p + (VS-1);
3537 lsq = *(floor(p')); # load in loop
3538 result = realign_load (msq, lsq, realignment_token);
3540 Input:
3541 STMT - (scalar) load stmt to be vectorized. This load accesses
3542 a memory location that may be unaligned.
3543 BSI - place where new code is to be inserted.
3544 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3545 is used.
3547 Output:
3548 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3549 target hook, if defined.
3550 Return value - the result of the loop-header phi node. */
3552 tree
3553 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3554 tree *realignment_token,
3555 enum dr_alignment_support alignment_support_scheme,
3556 tree init_addr,
3557 struct loop **at_loop)
3559 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3560 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3561 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3562 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3563 struct loop *loop = NULL;
3564 edge pe = NULL;
3565 tree scalar_dest = gimple_assign_lhs (stmt);
3566 tree vec_dest;
3567 gimple inc;
3568 tree ptr;
3569 tree data_ref;
3570 gimple new_stmt;
3571 basic_block new_bb;
3572 tree msq_init = NULL_TREE;
3573 tree new_temp;
3574 gimple phi_stmt;
3575 tree msq = NULL_TREE;
3576 gimple_seq stmts = NULL;
3577 bool inv_p;
3578 bool compute_in_loop = false;
3579 bool nested_in_vect_loop = false;
3580 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3581 struct loop *loop_for_initial_load = NULL;
3583 if (loop_vinfo)
3585 loop = LOOP_VINFO_LOOP (loop_vinfo);
3586 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3589 gcc_assert (alignment_support_scheme == dr_explicit_realign
3590 || alignment_support_scheme == dr_explicit_realign_optimized);
3592 /* We need to generate three things:
3593 1. the misalignment computation
3594 2. the extra vector load (for the optimized realignment scheme).
3595 3. the phi node for the two vectors from which the realignment is
3596 done (for the optimized realignment scheme). */
3598 /* 1. Determine where to generate the misalignment computation.
3600 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3601 calculation will be generated by this function, outside the loop (in the
3602 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3603 caller, inside the loop.
3605 Background: If the misalignment remains fixed throughout the iterations of
3606 the loop, then both realignment schemes are applicable, and also the
3607 misalignment computation can be done outside LOOP. This is because we are
3608 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3609 are a multiple of VS (the Vector Size), and therefore the misalignment in
3610 different vectorized LOOP iterations is always the same.
3611 The problem arises only if the memory access is in an inner-loop nested
3612 inside LOOP, which is now being vectorized using outer-loop vectorization.
3613 This is the only case when the misalignment of the memory access may not
3614 remain fixed throughout the iterations of the inner-loop (as explained in
3615 detail in vect_supportable_dr_alignment). In this case, not only is the
3616 optimized realignment scheme not applicable, but also the misalignment
3617 computation (and generation of the realignment token that is passed to
3618 REALIGN_LOAD) have to be done inside the loop.
3620 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3621 or not, which in turn determines if the misalignment is computed inside
3622 the inner-loop, or outside LOOP. */
3624 if (init_addr != NULL_TREE || !loop_vinfo)
3626 compute_in_loop = true;
3627 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3631 /* 2. Determine where to generate the extra vector load.
3633 For the optimized realignment scheme, instead of generating two vector
3634 loads in each iteration, we generate a single extra vector load in the
3635 preheader of the loop, and in each iteration reuse the result of the
3636 vector load from the previous iteration. In case the memory access is in
3637 an inner-loop nested inside LOOP, which is now being vectorized using
3638 outer-loop vectorization, we need to determine whether this initial vector
3639 load should be generated at the preheader of the inner-loop, or can be
3640 generated at the preheader of LOOP. If the memory access has no evolution
3641 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3642 to be generated inside LOOP (in the preheader of the inner-loop). */
3644 if (nested_in_vect_loop)
3646 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3647 bool invariant_in_outerloop =
3648 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3649 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3651 else
3652 loop_for_initial_load = loop;
3653 if (at_loop)
3654 *at_loop = loop_for_initial_load;
3656 if (loop_for_initial_load)
3657 pe = loop_preheader_edge (loop_for_initial_load);
3659 /* 3. For the case of the optimized realignment, create the first vector
3660 load at the loop preheader. */
3662 if (alignment_support_scheme == dr_explicit_realign_optimized)
3664 /* Create msq_init = *(floor(p1)) in the loop preheader */
3666 gcc_assert (!compute_in_loop);
3667 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3668 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
3669 NULL_TREE, &init_addr, NULL, &inc,
3670 true, &inv_p);
3671 new_stmt = gimple_build_assign_with_ops
3672 (BIT_AND_EXPR, NULL_TREE, ptr,
3673 build_int_cst (TREE_TYPE (ptr),
3674 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3675 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3676 gimple_assign_set_lhs (new_stmt, new_temp);
3677 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3678 gcc_assert (!new_bb);
3679 data_ref
3680 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3681 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3682 new_stmt = gimple_build_assign (vec_dest, data_ref);
3683 new_temp = make_ssa_name (vec_dest, new_stmt);
3684 gimple_assign_set_lhs (new_stmt, new_temp);
3685 mark_symbols_for_renaming (new_stmt);
3686 if (pe)
3688 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3689 gcc_assert (!new_bb);
3691 else
3692 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3694 msq_init = gimple_assign_lhs (new_stmt);
3697 /* 4. Create realignment token using a target builtin, if available.
3698 It is done either inside the containing loop, or before LOOP (as
3699 determined above). */
3701 if (targetm.vectorize.builtin_mask_for_load)
3703 tree builtin_decl;
3705 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3706 if (!init_addr)
3708 /* Generate the INIT_ADDR computation outside LOOP. */
3709 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3710 NULL_TREE, loop);
3711 if (loop)
3713 pe = loop_preheader_edge (loop);
3714 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3715 gcc_assert (!new_bb);
3717 else
3718 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3721 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3722 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3723 vec_dest =
3724 vect_create_destination_var (scalar_dest,
3725 gimple_call_return_type (new_stmt));
3726 new_temp = make_ssa_name (vec_dest, new_stmt);
3727 gimple_call_set_lhs (new_stmt, new_temp);
3729 if (compute_in_loop)
3730 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3731 else
3733 /* Generate the misalignment computation outside LOOP. */
3734 pe = loop_preheader_edge (loop);
3735 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3736 gcc_assert (!new_bb);
3739 *realignment_token = gimple_call_lhs (new_stmt);
3741 /* The result of the CALL_EXPR to this builtin is determined from
3742 the value of the parameter and no global variables are touched
3743 which makes the builtin a "const" function. Requiring the
3744 builtin to have the "const" attribute makes it unnecessary
3745 to call mark_call_clobbered. */
3746 gcc_assert (TREE_READONLY (builtin_decl));
3749 if (alignment_support_scheme == dr_explicit_realign)
3750 return msq;
3752 gcc_assert (!compute_in_loop);
3753 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3756 /* 5. Create msq = phi <msq_init, lsq> in loop */
3758 pe = loop_preheader_edge (containing_loop);
3759 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3760 msq = make_ssa_name (vec_dest, NULL);
3761 phi_stmt = create_phi_node (msq, containing_loop->header);
3762 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3763 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3765 return msq;
3769 /* Function vect_strided_load_supported.
3771 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3772 and FALSE otherwise. */
3774 bool
3775 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
3777 optab perm_even_optab, perm_odd_optab;
3778 enum machine_mode mode;
3780 mode = TYPE_MODE (vectype);
3782 /* vect_permute_load_chain requires the group size to be a power of two. */
3783 if (exact_log2 (count) == -1)
3785 if (vect_print_dump_info (REPORT_DETAILS))
3786 fprintf (vect_dump, "the size of the group of strided accesses"
3787 " is not a power of 2");
3788 return false;
3791 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3792 optab_default);
3793 if (!perm_even_optab)
3795 if (vect_print_dump_info (REPORT_DETAILS))
3796 fprintf (vect_dump, "no optab for perm_even.");
3797 return false;
3800 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3802 if (vect_print_dump_info (REPORT_DETAILS))
3803 fprintf (vect_dump, "perm_even op not supported by target.");
3804 return false;
3807 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3808 optab_default);
3809 if (!perm_odd_optab)
3811 if (vect_print_dump_info (REPORT_DETAILS))
3812 fprintf (vect_dump, "no optab for perm_odd.");
3813 return false;
3816 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3818 if (vect_print_dump_info (REPORT_DETAILS))
3819 fprintf (vect_dump, "perm_odd op not supported by target.");
3820 return false;
3822 return true;
3826 /* Function vect_permute_load_chain.
3828 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3829 a power of 2, generate extract_even/odd stmts to reorder the input data
3830 correctly. Return the final references for loads in RESULT_CHAIN.
3832 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3833 The input is 4 vectors each containing 8 elements. We assign a number to each
3834 element, the input sequence is:
3836 1st vec: 0 1 2 3 4 5 6 7
3837 2nd vec: 8 9 10 11 12 13 14 15
3838 3rd vec: 16 17 18 19 20 21 22 23
3839 4th vec: 24 25 26 27 28 29 30 31
3841 The output sequence should be:
3843 1st vec: 0 4 8 12 16 20 24 28
3844 2nd vec: 1 5 9 13 17 21 25 29
3845 3rd vec: 2 6 10 14 18 22 26 30
3846 4th vec: 3 7 11 15 19 23 27 31
3848 i.e., the first output vector should contain the first elements of each
3849 interleaving group, etc.
3851 We use extract_even/odd instructions to create such output. The input of
3852 each extract_even/odd operation is two vectors
3853 1st vec 2nd vec
3854 0 1 2 3 4 5 6 7
3856 and the output is the vector of extracted even/odd elements. The output of
3857 extract_even will be: 0 2 4 6
3858 and of extract_odd: 1 3 5 7
3861 The permutation is done in log LENGTH stages. In each stage extract_even
3862 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3863 their order. In our example,
3865 E1: extract_even (1st vec, 2nd vec)
3866 E2: extract_odd (1st vec, 2nd vec)
3867 E3: extract_even (3rd vec, 4th vec)
3868 E4: extract_odd (3rd vec, 4th vec)
3870 The output for the first stage will be:
3872 E1: 0 2 4 6 8 10 12 14
3873 E2: 1 3 5 7 9 11 13 15
3874 E3: 16 18 20 22 24 26 28 30
3875 E4: 17 19 21 23 25 27 29 31
3877 In order to proceed and create the correct sequence for the next stage (or
3878 for the correct output, if the second stage is the last one, as in our
3879 example), we first put the output of extract_even operation and then the
3880 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3881 The input for the second stage is:
3883 1st vec (E1): 0 2 4 6 8 10 12 14
3884 2nd vec (E3): 16 18 20 22 24 26 28 30
3885 3rd vec (E2): 1 3 5 7 9 11 13 15
3886 4th vec (E4): 17 19 21 23 25 27 29 31
3888 The output of the second stage:
3890 E1: 0 4 8 12 16 20 24 28
3891 E2: 2 6 10 14 18 22 26 30
3892 E3: 1 5 9 13 17 21 25 29
3893 E4: 3 7 11 15 19 23 27 31
3895 And RESULT_CHAIN after reordering:
3897 1st vec (E1): 0 4 8 12 16 20 24 28
3898 2nd vec (E3): 1 5 9 13 17 21 25 29
3899 3rd vec (E2): 2 6 10 14 18 22 26 30
3900 4th vec (E4): 3 7 11 15 19 23 27 31. */
3902 static void
3903 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3904 unsigned int length,
3905 gimple stmt,
3906 gimple_stmt_iterator *gsi,
3907 VEC(tree,heap) **result_chain)
3909 tree perm_dest, data_ref, first_vect, second_vect;
3910 gimple perm_stmt;
3911 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3912 int i;
3913 unsigned int j;
3915 gcc_assert (vect_strided_load_supported (vectype, length));
3917 *result_chain = VEC_copy (tree, heap, dr_chain);
3918 for (i = 0; i < exact_log2 (length); i++)
3920 for (j = 0; j < length; j +=2)
3922 first_vect = VEC_index (tree, dr_chain, j);
3923 second_vect = VEC_index (tree, dr_chain, j+1);
3925 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3926 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3927 DECL_GIMPLE_REG_P (perm_dest) = 1;
3928 add_referenced_var (perm_dest);
3930 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3931 perm_dest, first_vect,
3932 second_vect);
3934 data_ref = make_ssa_name (perm_dest, perm_stmt);
3935 gimple_assign_set_lhs (perm_stmt, data_ref);
3936 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3937 mark_symbols_for_renaming (perm_stmt);
3939 VEC_replace (tree, *result_chain, j/2, data_ref);
3941 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3942 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3943 DECL_GIMPLE_REG_P (perm_dest) = 1;
3944 add_referenced_var (perm_dest);
3946 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3947 perm_dest, first_vect,
3948 second_vect);
3949 data_ref = make_ssa_name (perm_dest, perm_stmt);
3950 gimple_assign_set_lhs (perm_stmt, data_ref);
3951 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3952 mark_symbols_for_renaming (perm_stmt);
3954 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3956 dr_chain = VEC_copy (tree, heap, *result_chain);
3961 /* Function vect_transform_strided_load.
3963 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3964 to perform their permutation and ascribe the result vectorized statements to
3965 the scalar statements.
3968 void
3969 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3970 gimple_stmt_iterator *gsi)
3972 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3973 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3974 gimple next_stmt, new_stmt;
3975 VEC(tree,heap) *result_chain = NULL;
3976 unsigned int i, gap_count;
3977 tree tmp_data_ref;
3979 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3980 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3981 vectors, that are ready for vector computation. */
3982 result_chain = VEC_alloc (tree, heap, size);
3983 /* Permute. */
3984 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
3986 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3987 Since we scan the chain starting from it's first node, their order
3988 corresponds the order of data-refs in RESULT_CHAIN. */
3989 next_stmt = first_stmt;
3990 gap_count = 1;
3991 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
3993 if (!next_stmt)
3994 break;
3996 /* Skip the gaps. Loads created for the gaps will be removed by dead
3997 code elimination pass later. No need to check for the first stmt in
3998 the group, since it always exists.
3999 DR_GROUP_GAP is the number of steps in elements from the previous
4000 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4001 correspond to the gaps. */
4002 if (next_stmt != first_stmt
4003 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4005 gap_count++;
4006 continue;
4009 while (next_stmt)
4011 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4012 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4013 copies, and we put the new vector statement in the first available
4014 RELATED_STMT. */
4015 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4016 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4017 else
4019 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4021 gimple prev_stmt =
4022 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4023 gimple rel_stmt =
4024 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4025 while (rel_stmt)
4027 prev_stmt = rel_stmt;
4028 rel_stmt =
4029 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4032 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4033 new_stmt;
4037 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4038 gap_count = 1;
4039 /* If NEXT_STMT accesses the same DR as the previous statement,
4040 put the same TMP_DATA_REF as its vectorized statement; otherwise
4041 get the next data-ref from RESULT_CHAIN. */
4042 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4043 break;
4047 VEC_free (tree, heap, result_chain);
4050 /* Function vect_force_dr_alignment_p.
4052 Returns whether the alignment of a DECL can be forced to be aligned
4053 on ALIGNMENT bit boundary. */
4055 bool
4056 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4058 if (TREE_CODE (decl) != VAR_DECL)
4059 return false;
4061 if (DECL_EXTERNAL (decl))
4062 return false;
4064 if (TREE_ASM_WRITTEN (decl))
4065 return false;
4067 if (TREE_STATIC (decl))
4068 return (alignment <= MAX_OFILE_ALIGNMENT);
4069 else
4070 return (alignment <= MAX_STACK_ALIGNMENT);
4074 /* Return whether the data reference DR is supported with respect to its
4075 alignment.
4076 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4077 it is aligned, i.e., check if it is possible to vectorize it with different
4078 alignment. */
4080 enum dr_alignment_support
4081 vect_supportable_dr_alignment (struct data_reference *dr,
4082 bool check_aligned_accesses)
4084 gimple stmt = DR_STMT (dr);
4085 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4086 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4087 enum machine_mode mode = TYPE_MODE (vectype);
4088 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4089 struct loop *vect_loop = NULL;
4090 bool nested_in_vect_loop = false;
4092 if (aligned_access_p (dr) && !check_aligned_accesses)
4093 return dr_aligned;
4095 if (loop_vinfo)
4097 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4098 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4101 /* Possibly unaligned access. */
4103 /* We can choose between using the implicit realignment scheme (generating
4104 a misaligned_move stmt) and the explicit realignment scheme (generating
4105 aligned loads with a REALIGN_LOAD). There are two variants to the
4106 explicit realignment scheme: optimized, and unoptimized.
4107 We can optimize the realignment only if the step between consecutive
4108 vector loads is equal to the vector size. Since the vector memory
4109 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4110 is guaranteed that the misalignment amount remains the same throughout the
4111 execution of the vectorized loop. Therefore, we can create the
4112 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4113 at the loop preheader.
4115 However, in the case of outer-loop vectorization, when vectorizing a
4116 memory access in the inner-loop nested within the LOOP that is now being
4117 vectorized, while it is guaranteed that the misalignment of the
4118 vectorized memory access will remain the same in different outer-loop
4119 iterations, it is *not* guaranteed that is will remain the same throughout
4120 the execution of the inner-loop. This is because the inner-loop advances
4121 with the original scalar step (and not in steps of VS). If the inner-loop
4122 step happens to be a multiple of VS, then the misalignment remains fixed
4123 and we can use the optimized realignment scheme. For example:
4125 for (i=0; i<N; i++)
4126 for (j=0; j<M; j++)
4127 s += a[i+j];
4129 When vectorizing the i-loop in the above example, the step between
4130 consecutive vector loads is 1, and so the misalignment does not remain
4131 fixed across the execution of the inner-loop, and the realignment cannot
4132 be optimized (as illustrated in the following pseudo vectorized loop):
4134 for (i=0; i<N; i+=4)
4135 for (j=0; j<M; j++){
4136 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4137 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4138 // (assuming that we start from an aligned address).
4141 We therefore have to use the unoptimized realignment scheme:
4143 for (i=0; i<N; i+=4)
4144 for (j=k; j<M; j+=4)
4145 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4146 // that the misalignment of the initial address is
4147 // 0).
4149 The loop can then be vectorized as follows:
4151 for (k=0; k<4; k++){
4152 rt = get_realignment_token (&vp[k]);
4153 for (i=0; i<N; i+=4){
4154 v1 = vp[i+k];
4155 for (j=k; j<M; j+=4){
4156 v2 = vp[i+j+VS-1];
4157 va = REALIGN_LOAD <v1,v2,rt>;
4158 vs += va;
4159 v1 = v2;
4162 } */
4164 if (DR_IS_READ (dr))
4166 bool is_packed = false;
4167 tree type = (TREE_TYPE (DR_REF (dr)));
4169 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4170 && (!targetm.vectorize.builtin_mask_for_load
4171 || targetm.vectorize.builtin_mask_for_load ()))
4173 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4174 if ((nested_in_vect_loop
4175 && (TREE_INT_CST_LOW (DR_STEP (dr))
4176 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4177 || !loop_vinfo)
4178 return dr_explicit_realign;
4179 else
4180 return dr_explicit_realign_optimized;
4182 if (!known_alignment_for_access_p (dr))
4184 tree ba = DR_BASE_OBJECT (dr);
4186 if (ba)
4187 is_packed = contains_packed_reference (ba);
4190 if (targetm.vectorize.
4191 support_vector_misalignment (mode, type,
4192 DR_MISALIGNMENT (dr), is_packed))
4193 /* Can't software pipeline the loads, but can at least do them. */
4194 return dr_unaligned_supported;
4196 else
4198 bool is_packed = false;
4199 tree type = (TREE_TYPE (DR_REF (dr)));
4201 if (!known_alignment_for_access_p (dr))
4203 tree ba = DR_BASE_OBJECT (dr);
4205 if (ba)
4206 is_packed = contains_packed_reference (ba);
4209 if (targetm.vectorize.
4210 support_vector_misalignment (mode, type,
4211 DR_MISALIGNMENT (dr), is_packed))
4212 return dr_unaligned_supported;
4215 /* Unsupported. */
4216 return dr_unaligned_unsupported;