2010-07-27 Paolo Carlini <paolo.carlini@oracle.com>
[official-gcc/alias-decl.git] / gcc / tree-vect-data-refs.c
blobefd95a78acb75626bb38e6eef74b7000d5788a30
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
39 #include "diagnostic-core.h"
40 #include "toplev.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
43 #include "expr.h"
44 #include "optabs.h"
46 /* Return the smallest scalar part of STMT.
47 This is used to determine the vectype of the stmt. We generally set the
48 vectype according to the type of the result (lhs). For stmts whose
49 result-type is different than the type of the arguments (e.g., demotion,
50 promotion), vectype will be reset appropriately (later). Note that we have
51 to visit the smallest datatype in this function, because that determines the
52 VF. If the smallest datatype in the loop is present only as the rhs of a
53 promotion operation - we'd miss it.
54 Such a case, where a variable of this datatype does not appear in the lhs
55 anywhere in the loop, can only occur if it's an invariant: e.g.:
56 'int_x = (int) short_inv', which we'd expect to have been optimized away by
57 invariant motion. However, we cannot rely on invariant motion to always take
58 invariants out of the loop, and so in the case of promotion we also have to
59 check the rhs.
60 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
61 types. */
63 tree
64 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
65 HOST_WIDE_INT *rhs_size_unit)
67 tree scalar_type = gimple_expr_type (stmt);
68 HOST_WIDE_INT lhs, rhs;
70 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
72 if (is_gimple_assign (stmt)
73 && (gimple_assign_cast_p (stmt)
74 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
75 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
77 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
79 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
80 if (rhs < lhs)
81 scalar_type = rhs_type;
84 *lhs_size_unit = lhs;
85 *rhs_size_unit = rhs;
86 return scalar_type;
90 /* Find the place of the data-ref in STMT in the interleaving chain that starts
91 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
93 int
94 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
96 gimple next_stmt = first_stmt;
97 int result = 0;
99 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
100 return -1;
102 while (next_stmt && next_stmt != stmt)
104 result++;
105 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
108 if (next_stmt)
109 return result;
110 else
111 return -1;
115 /* Function vect_insert_into_interleaving_chain.
117 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
119 static void
120 vect_insert_into_interleaving_chain (struct data_reference *dra,
121 struct data_reference *drb)
123 gimple prev, next;
124 tree next_init;
125 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
126 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
128 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
129 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
130 while (next)
132 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
133 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
135 /* Insert here. */
136 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
137 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
138 return;
140 prev = next;
141 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
144 /* We got to the end of the list. Insert here. */
145 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
146 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
150 /* Function vect_update_interleaving_chain.
152 For two data-refs DRA and DRB that are a part of a chain interleaved data
153 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
155 There are four possible cases:
156 1. New stmts - both DRA and DRB are not a part of any chain:
157 FIRST_DR = DRB
158 NEXT_DR (DRB) = DRA
159 2. DRB is a part of a chain and DRA is not:
160 no need to update FIRST_DR
161 no need to insert DRB
162 insert DRA according to init
163 3. DRA is a part of a chain and DRB is not:
164 if (init of FIRST_DR > init of DRB)
165 FIRST_DR = DRB
166 NEXT(FIRST_DR) = previous FIRST_DR
167 else
168 insert DRB according to its init
169 4. both DRA and DRB are in some interleaving chains:
170 choose the chain with the smallest init of FIRST_DR
171 insert the nodes of the second chain into the first one. */
173 static void
174 vect_update_interleaving_chain (struct data_reference *drb,
175 struct data_reference *dra)
177 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
178 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
179 tree next_init, init_dra_chain, init_drb_chain;
180 gimple first_a, first_b;
181 tree node_init;
182 gimple node, prev, next, first_stmt;
184 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
185 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
187 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
188 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
189 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
190 return;
193 /* 2. DRB is a part of a chain and DRA is not. */
194 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
196 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
197 /* Insert DRA into the chain of DRB. */
198 vect_insert_into_interleaving_chain (dra, drb);
199 return;
202 /* 3. DRA is a part of a chain and DRB is not. */
203 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
205 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
206 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
207 old_first_stmt)));
208 gimple tmp;
210 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
212 /* DRB's init is smaller than the init of the stmt previously marked
213 as the first stmt of the interleaving chain of DRA. Therefore, we
214 update FIRST_STMT and put DRB in the head of the list. */
215 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
216 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
218 /* Update all the stmts in the list to point to the new FIRST_STMT. */
219 tmp = old_first_stmt;
220 while (tmp)
222 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
223 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
226 else
228 /* Insert DRB in the list of DRA. */
229 vect_insert_into_interleaving_chain (drb, dra);
230 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
232 return;
235 /* 4. both DRA and DRB are in some interleaving chains. */
236 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
237 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
238 if (first_a == first_b)
239 return;
240 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
241 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
243 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
245 /* Insert the nodes of DRA chain into the DRB chain.
246 After inserting a node, continue from this node of the DRB chain (don't
247 start from the beginning. */
248 node = DR_GROUP_FIRST_DR (stmtinfo_a);
249 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
250 first_stmt = first_b;
252 else
254 /* Insert the nodes of DRB chain into the DRA chain.
255 After inserting a node, continue from this node of the DRA chain (don't
256 start from the beginning. */
257 node = DR_GROUP_FIRST_DR (stmtinfo_b);
258 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
259 first_stmt = first_a;
262 while (node)
264 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
265 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
266 while (next)
268 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
269 if (tree_int_cst_compare (next_init, node_init) > 0)
271 /* Insert here. */
272 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
274 prev = node;
275 break;
277 prev = next;
278 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
280 if (!next)
282 /* We got to the end of the list. Insert here. */
283 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
285 prev = node;
287 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
288 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
293 /* Function vect_equal_offsets.
295 Check if OFFSET1 and OFFSET2 are identical expressions. */
297 static bool
298 vect_equal_offsets (tree offset1, tree offset2)
300 bool res;
302 STRIP_NOPS (offset1);
303 STRIP_NOPS (offset2);
305 if (offset1 == offset2)
306 return true;
308 if (TREE_CODE (offset1) != TREE_CODE (offset2)
309 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
310 return false;
312 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
313 TREE_OPERAND (offset2, 0));
315 if (!res || !BINARY_CLASS_P (offset1))
316 return res;
318 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
319 TREE_OPERAND (offset2, 1));
321 return res;
325 /* Function vect_check_interleaving.
327 Check if DRA and DRB are a part of interleaving. In case they are, insert
328 DRA and DRB in an interleaving chain. */
330 static bool
331 vect_check_interleaving (struct data_reference *dra,
332 struct data_reference *drb)
334 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
336 /* Check that the data-refs have same first location (except init) and they
337 are both either store or load (not load and store). */
338 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
339 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
340 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
341 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
342 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
343 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
344 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
345 || DR_IS_READ (dra) != DR_IS_READ (drb))
346 return false;
348 /* Check:
349 1. data-refs are of the same type
350 2. their steps are equal
351 3. the step (if greater than zero) is greater than the difference between
352 data-refs' inits. */
353 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
354 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
356 if (type_size_a != type_size_b
357 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
358 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
359 TREE_TYPE (DR_REF (drb))))
360 return false;
362 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
363 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
364 step = TREE_INT_CST_LOW (DR_STEP (dra));
366 if (init_a > init_b)
368 /* If init_a == init_b + the size of the type * k, we have an interleaving,
369 and DRB is accessed before DRA. */
370 diff_mod_size = (init_a - init_b) % type_size_a;
372 if (step && (init_a - init_b) > step)
373 return false;
375 if (diff_mod_size == 0)
377 vect_update_interleaving_chain (drb, dra);
378 if (vect_print_dump_info (REPORT_DR_DETAILS))
380 fprintf (vect_dump, "Detected interleaving ");
381 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
382 fprintf (vect_dump, " and ");
383 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
385 return true;
388 else
390 /* If init_b == init_a + the size of the type * k, we have an
391 interleaving, and DRA is accessed before DRB. */
392 diff_mod_size = (init_b - init_a) % type_size_a;
394 if (step && (init_b - init_a) > step)
395 return false;
397 if (diff_mod_size == 0)
399 vect_update_interleaving_chain (dra, drb);
400 if (vect_print_dump_info (REPORT_DR_DETAILS))
402 fprintf (vect_dump, "Detected interleaving ");
403 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
404 fprintf (vect_dump, " and ");
405 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
407 return true;
411 return false;
414 /* Check if data references pointed by DR_I and DR_J are same or
415 belong to same interleaving group. Return FALSE if drs are
416 different, otherwise return TRUE. */
418 static bool
419 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
421 gimple stmt_i = DR_STMT (dr_i);
422 gimple stmt_j = DR_STMT (dr_j);
424 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
425 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
426 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
427 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
428 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
429 return true;
430 else
431 return false;
434 /* If address ranges represented by DDR_I and DDR_J are equal,
435 return TRUE, otherwise return FALSE. */
437 static bool
438 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
440 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
441 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
442 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
443 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
444 return true;
445 else
446 return false;
449 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
450 tested at run-time. Return TRUE if DDR was successfully inserted.
451 Return false if versioning is not supported. */
453 static bool
454 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
456 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
458 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
459 return false;
461 if (vect_print_dump_info (REPORT_DR_DETAILS))
463 fprintf (vect_dump, "mark for run-time aliasing test between ");
464 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
465 fprintf (vect_dump, " and ");
466 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
469 if (optimize_loop_nest_for_size_p (loop))
471 if (vect_print_dump_info (REPORT_DR_DETAILS))
472 fprintf (vect_dump, "versioning not supported when optimizing for size.");
473 return false;
476 /* FORNOW: We don't support versioning with outer-loop vectorization. */
477 if (loop->inner)
479 if (vect_print_dump_info (REPORT_DR_DETAILS))
480 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
481 return false;
484 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
485 return true;
489 /* Function vect_analyze_data_ref_dependence.
491 Return TRUE if there (might) exist a dependence between a memory-reference
492 DRA and a memory-reference DRB. When versioning for alias may check a
493 dependence at run-time, return FALSE. Adjust *MAX_VF according to
494 the data dependence. */
496 static bool
497 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
498 loop_vec_info loop_vinfo, int *max_vf)
500 unsigned int i;
501 struct loop *loop = NULL;
502 struct data_reference *dra = DDR_A (ddr);
503 struct data_reference *drb = DDR_B (ddr);
504 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
505 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
506 lambda_vector dist_v;
507 unsigned int loop_depth;
509 /* Don't bother to analyze statements marked as unvectorizable. */
510 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
511 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
512 return false;
514 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
516 /* Independent data accesses. */
517 vect_check_interleaving (dra, drb);
518 return false;
521 if (loop_vinfo)
522 loop = LOOP_VINFO_LOOP (loop_vinfo);
524 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
525 return false;
527 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
529 if (loop_vinfo)
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
533 fprintf (vect_dump, "versioning for alias required: "
534 "can't determine dependence between ");
535 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
536 fprintf (vect_dump, " and ");
537 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
540 /* Add to list of ddrs that need to be tested at run-time. */
541 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
544 /* When vectorizing a basic block unknown depnedence can still mean
545 strided access. */
546 if (vect_check_interleaving (dra, drb))
547 return false;
549 if (vect_print_dump_info (REPORT_DR_DETAILS))
551 fprintf (vect_dump, "can't determine dependence between ");
552 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
553 fprintf (vect_dump, " and ");
554 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
557 /* Mark the statements as unvectorizable. */
558 STMT_VINFO_VECTORIZABLE (stmtinfo_a) = false;
559 STMT_VINFO_VECTORIZABLE (stmtinfo_b) = false;
561 return false;
564 /* Versioning for alias is not yet supported for basic block SLP, and
565 dependence distance is unapplicable, hence, in case of known data
566 dependence, basic block vectorization is impossible for now. */
567 if (!loop_vinfo)
569 if (dra != drb && vect_check_interleaving (dra, drb))
570 return false;
572 if (vect_print_dump_info (REPORT_DR_DETAILS))
574 fprintf (vect_dump, "determined dependence between ");
575 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
576 fprintf (vect_dump, " and ");
577 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
580 return true;
583 /* Loop-based vectorization and known data dependence. */
584 if (DDR_NUM_DIST_VECTS (ddr) == 0)
586 if (vect_print_dump_info (REPORT_DR_DETAILS))
588 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
589 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
590 fprintf (vect_dump, " and ");
591 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
593 /* Add to list of ddrs that need to be tested at run-time. */
594 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
597 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
598 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
600 int dist = dist_v[loop_depth];
602 if (vect_print_dump_info (REPORT_DR_DETAILS))
603 fprintf (vect_dump, "dependence distance = %d.", dist);
605 if (dist == 0)
607 if (vect_print_dump_info (REPORT_DR_DETAILS))
609 fprintf (vect_dump, "dependence distance == 0 between ");
610 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
611 fprintf (vect_dump, " and ");
612 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
615 /* For interleaving, mark that there is a read-write dependency if
616 necessary. We check before that one of the data-refs is store. */
617 if (DR_IS_READ (dra))
618 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
619 else
621 if (DR_IS_READ (drb))
622 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
625 continue;
628 if (dist > 0 && DDR_REVERSED_P (ddr))
630 /* If DDR_REVERSED_P the order of the data-refs in DDR was
631 reversed (to make distance vector positive), and the actual
632 distance is negative. */
633 if (vect_print_dump_info (REPORT_DR_DETAILS))
634 fprintf (vect_dump, "dependence distance negative.");
635 continue;
638 if (abs (dist) >= 2
639 && abs (dist) < *max_vf)
641 /* The dependence distance requires reduction of the maximal
642 vectorization factor. */
643 *max_vf = abs (dist);
644 if (vect_print_dump_info (REPORT_DR_DETAILS))
645 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
646 *max_vf);
649 if (abs (dist) >= *max_vf)
651 /* Dependence distance does not create dependence, as far as
652 vectorization is concerned, in this case. */
653 if (vect_print_dump_info (REPORT_DR_DETAILS))
654 fprintf (vect_dump, "dependence distance >= VF.");
655 continue;
658 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
660 fprintf (vect_dump, "not vectorized, possible dependence "
661 "between data-refs ");
662 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
663 fprintf (vect_dump, " and ");
664 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
667 return true;
670 return false;
673 /* Function vect_analyze_data_ref_dependences.
675 Examine all the data references in the loop, and make sure there do not
676 exist any data dependences between them. Set *MAX_VF according to
677 the maximum vectorization factor the data dependences allow. */
679 bool
680 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
681 bb_vec_info bb_vinfo, int *max_vf)
683 unsigned int i;
684 VEC (ddr_p, heap) *ddrs = NULL;
685 struct data_dependence_relation *ddr;
687 if (vect_print_dump_info (REPORT_DETAILS))
688 fprintf (vect_dump, "=== vect_analyze_dependences ===");
690 if (loop_vinfo)
691 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
692 else
693 ddrs = BB_VINFO_DDRS (bb_vinfo);
695 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
696 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
697 return false;
699 return true;
703 /* Function vect_compute_data_ref_alignment
705 Compute the misalignment of the data reference DR.
707 Output:
708 1. If during the misalignment computation it is found that the data reference
709 cannot be vectorized then false is returned.
710 2. DR_MISALIGNMENT (DR) is defined.
712 FOR NOW: No analysis is actually performed. Misalignment is calculated
713 only for trivial cases. TODO. */
715 static bool
716 vect_compute_data_ref_alignment (struct data_reference *dr)
718 gimple stmt = DR_STMT (dr);
719 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
721 struct loop *loop = NULL;
722 tree ref = DR_REF (dr);
723 tree vectype;
724 tree base, base_addr;
725 bool base_aligned;
726 tree misalign;
727 tree aligned_to, alignment;
729 if (vect_print_dump_info (REPORT_DETAILS))
730 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
732 if (loop_vinfo)
733 loop = LOOP_VINFO_LOOP (loop_vinfo);
735 /* Initialize misalignment to unknown. */
736 SET_DR_MISALIGNMENT (dr, -1);
738 misalign = DR_INIT (dr);
739 aligned_to = DR_ALIGNED_TO (dr);
740 base_addr = DR_BASE_ADDRESS (dr);
741 vectype = STMT_VINFO_VECTYPE (stmt_info);
743 /* In case the dataref is in an inner-loop of the loop that is being
744 vectorized (LOOP), we use the base and misalignment information
745 relative to the outer-loop (LOOP). This is ok only if the misalignment
746 stays the same throughout the execution of the inner-loop, which is why
747 we have to check that the stride of the dataref in the inner-loop evenly
748 divides by the vector size. */
749 if (loop && nested_in_vect_loop_p (loop, stmt))
751 tree step = DR_STEP (dr);
752 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
754 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
756 if (vect_print_dump_info (REPORT_ALIGNMENT))
757 fprintf (vect_dump, "inner step divides the vector-size.");
758 misalign = STMT_VINFO_DR_INIT (stmt_info);
759 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
760 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
762 else
764 if (vect_print_dump_info (REPORT_ALIGNMENT))
765 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
766 misalign = NULL_TREE;
770 base = build_fold_indirect_ref (base_addr);
771 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
773 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
774 || !misalign)
776 if (vect_print_dump_info (REPORT_ALIGNMENT))
778 fprintf (vect_dump, "Unknown alignment for access: ");
779 print_generic_expr (vect_dump, base, TDF_SLIM);
781 return true;
784 if ((DECL_P (base)
785 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
786 alignment) >= 0)
787 || (TREE_CODE (base_addr) == SSA_NAME
788 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
789 TREE_TYPE (base_addr)))),
790 alignment) >= 0))
791 base_aligned = true;
792 else
793 base_aligned = false;
795 if (!base_aligned)
797 /* Do not change the alignment of global variables if
798 flag_section_anchors is enabled. */
799 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
800 || (TREE_STATIC (base) && flag_section_anchors))
802 if (vect_print_dump_info (REPORT_DETAILS))
804 fprintf (vect_dump, "can't force alignment of ref: ");
805 print_generic_expr (vect_dump, ref, TDF_SLIM);
807 return true;
810 /* Force the alignment of the decl.
811 NOTE: This is the only change to the code we make during
812 the analysis phase, before deciding to vectorize the loop. */
813 if (vect_print_dump_info (REPORT_DETAILS))
815 fprintf (vect_dump, "force alignment of ");
816 print_generic_expr (vect_dump, ref, TDF_SLIM);
819 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
820 DECL_USER_ALIGN (base) = 1;
823 /* At this point we assume that the base is aligned. */
824 gcc_assert (base_aligned
825 || (TREE_CODE (base) == VAR_DECL
826 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
828 /* Modulo alignment. */
829 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
831 if (!host_integerp (misalign, 1))
833 /* Negative or overflowed misalignment value. */
834 if (vect_print_dump_info (REPORT_DETAILS))
835 fprintf (vect_dump, "unexpected misalign value");
836 return false;
839 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
841 if (vect_print_dump_info (REPORT_DETAILS))
843 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
844 print_generic_expr (vect_dump, ref, TDF_SLIM);
847 return true;
851 /* Function vect_compute_data_refs_alignment
853 Compute the misalignment of data references in the loop.
854 Return FALSE if a data reference is found that cannot be vectorized. */
856 static bool
857 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
858 bb_vec_info bb_vinfo)
860 VEC (data_reference_p, heap) *datarefs;
861 struct data_reference *dr;
862 unsigned int i;
864 if (loop_vinfo)
865 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
866 else
867 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
869 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
870 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
871 && !vect_compute_data_ref_alignment (dr))
873 if (bb_vinfo)
875 /* Mark unsupported statement as unvectorizable. */
876 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
877 continue;
879 else
880 return false;
883 return true;
887 /* Function vect_update_misalignment_for_peel
889 DR - the data reference whose misalignment is to be adjusted.
890 DR_PEEL - the data reference whose misalignment is being made
891 zero in the vector loop by the peel.
892 NPEEL - the number of iterations in the peel loop if the misalignment
893 of DR_PEEL is known at compile time. */
895 static void
896 vect_update_misalignment_for_peel (struct data_reference *dr,
897 struct data_reference *dr_peel, int npeel)
899 unsigned int i;
900 VEC(dr_p,heap) *same_align_drs;
901 struct data_reference *current_dr;
902 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
903 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
904 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
905 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
907 /* For interleaved data accesses the step in the loop must be multiplied by
908 the size of the interleaving group. */
909 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
910 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
911 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
912 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
914 /* It can be assumed that the data refs with the same alignment as dr_peel
915 are aligned in the vector loop. */
916 same_align_drs
917 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
918 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
920 if (current_dr != dr)
921 continue;
922 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
923 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
924 SET_DR_MISALIGNMENT (dr, 0);
925 return;
928 if (known_alignment_for_access_p (dr)
929 && known_alignment_for_access_p (dr_peel))
931 int misal = DR_MISALIGNMENT (dr);
932 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
933 misal += npeel * dr_size;
934 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
935 SET_DR_MISALIGNMENT (dr, misal);
936 return;
939 if (vect_print_dump_info (REPORT_DETAILS))
940 fprintf (vect_dump, "Setting misalignment to -1.");
941 SET_DR_MISALIGNMENT (dr, -1);
945 /* Function vect_verify_datarefs_alignment
947 Return TRUE if all data references in the loop can be
948 handled with respect to alignment. */
950 bool
951 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
953 VEC (data_reference_p, heap) *datarefs;
954 struct data_reference *dr;
955 enum dr_alignment_support supportable_dr_alignment;
956 unsigned int i;
958 if (loop_vinfo)
959 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
960 else
961 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
963 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
965 gimple stmt = DR_STMT (dr);
966 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
968 /* For interleaving, only the alignment of the first access matters.
969 Skip statements marked as not vectorizable. */
970 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
971 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
972 || !STMT_VINFO_VECTORIZABLE (stmt_info))
973 continue;
975 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
976 if (!supportable_dr_alignment)
978 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
980 if (DR_IS_READ (dr))
981 fprintf (vect_dump,
982 "not vectorized: unsupported unaligned load.");
983 else
984 fprintf (vect_dump,
985 "not vectorized: unsupported unaligned store.");
987 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
989 return false;
991 if (supportable_dr_alignment != dr_aligned
992 && vect_print_dump_info (REPORT_ALIGNMENT))
993 fprintf (vect_dump, "Vectorizing an unaligned access.");
995 return true;
999 /* Function vector_alignment_reachable_p
1001 Return true if vector alignment for DR is reachable by peeling
1002 a few loop iterations. Return false otherwise. */
1004 static bool
1005 vector_alignment_reachable_p (struct data_reference *dr)
1007 gimple stmt = DR_STMT (dr);
1008 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1009 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1011 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1013 /* For interleaved access we peel only if number of iterations in
1014 the prolog loop ({VF - misalignment}), is a multiple of the
1015 number of the interleaved accesses. */
1016 int elem_size, mis_in_elements;
1017 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1019 /* FORNOW: handle only known alignment. */
1020 if (!known_alignment_for_access_p (dr))
1021 return false;
1023 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1024 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1026 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1027 return false;
1030 /* If misalignment is known at the compile time then allow peeling
1031 only if natural alignment is reachable through peeling. */
1032 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1034 HOST_WIDE_INT elmsize =
1035 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1036 if (vect_print_dump_info (REPORT_DETAILS))
1038 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1039 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1041 if (DR_MISALIGNMENT (dr) % elmsize)
1043 if (vect_print_dump_info (REPORT_DETAILS))
1044 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1045 return false;
1049 if (!known_alignment_for_access_p (dr))
1051 tree type = (TREE_TYPE (DR_REF (dr)));
1052 tree ba = DR_BASE_OBJECT (dr);
1053 bool is_packed = false;
1055 if (ba)
1056 is_packed = contains_packed_reference (ba);
1058 if (vect_print_dump_info (REPORT_DETAILS))
1059 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1060 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1061 return true;
1062 else
1063 return false;
1066 return true;
1070 /* Calculate the cost of the memory access represented by DR. */
1072 static void
1073 vect_get_data_access_cost (struct data_reference *dr,
1074 unsigned int *inside_cost,
1075 unsigned int *outside_cost)
1077 gimple stmt = DR_STMT (dr);
1078 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1079 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1080 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1081 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1082 int ncopies = vf / nunits;
1083 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1085 if (!supportable_dr_alignment)
1086 *inside_cost = VECT_MAX_COST;
1087 else
1089 if (DR_IS_READ (dr))
1090 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1091 else
1092 vect_get_store_cost (dr, ncopies, inside_cost);
1095 if (vect_print_dump_info (REPORT_COST))
1096 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1097 "outside_cost = %d.", *inside_cost, *outside_cost);
1101 static hashval_t
1102 vect_peeling_hash (const void *elem)
1104 const struct _vect_peel_info *peel_info;
1106 peel_info = (const struct _vect_peel_info *) elem;
1107 return (hashval_t) peel_info->npeel;
1111 static int
1112 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1114 const struct _vect_peel_info *a, *b;
1116 a = (const struct _vect_peel_info *) elem1;
1117 b = (const struct _vect_peel_info *) elem2;
1118 return (a->npeel == b->npeel);
1122 /* Insert DR into peeling hash table with NPEEL as key. */
1124 static void
1125 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1126 int npeel)
1128 struct _vect_peel_info elem, *slot;
1129 void **new_slot;
1130 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1132 elem.npeel = npeel;
1133 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1134 &elem);
1135 if (slot)
1136 slot->count++;
1137 else
1139 slot = XNEW (struct _vect_peel_info);
1140 slot->npeel = npeel;
1141 slot->dr = dr;
1142 slot->count = 1;
1143 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1144 INSERT);
1145 *new_slot = slot;
1148 if (!supportable_dr_alignment && !flag_vect_cost_model)
1149 slot->count += VECT_MAX_COST;
1153 /* Traverse peeling hash table to find peeling option that aligns maximum
1154 number of data accesses. */
1156 static int
1157 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1159 vect_peel_info elem = (vect_peel_info) *slot;
1160 vect_peel_extended_info max = (vect_peel_extended_info) data;
1162 if (elem->count > max->peel_info.count)
1164 max->peel_info.npeel = elem->npeel;
1165 max->peel_info.count = elem->count;
1166 max->peel_info.dr = elem->dr;
1169 return 1;
1173 /* Traverse peeling hash table and calculate cost for each peeling option. Find
1174 one with the lowest cost. */
1176 static int
1177 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1179 vect_peel_info elem = (vect_peel_info) *slot;
1180 vect_peel_extended_info min = (vect_peel_extended_info) data;
1181 int save_misalignment, dummy;
1182 unsigned int inside_cost = 0, outside_cost = 0, i;
1183 gimple stmt = DR_STMT (elem->dr);
1184 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1185 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1186 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1187 struct data_reference *dr;
1189 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1191 stmt = DR_STMT (dr);
1192 stmt_info = vinfo_for_stmt (stmt);
1193 /* For interleaving, only the alignment of the first access
1194 matters. */
1195 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1196 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1197 continue;
1199 save_misalignment = DR_MISALIGNMENT (dr);
1200 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1201 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1202 SET_DR_MISALIGNMENT (dr, save_misalignment);
1205 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1206 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1208 if (inside_cost < min->inside_cost
1209 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1211 min->inside_cost = inside_cost;
1212 min->outside_cost = outside_cost;
1213 min->peel_info.dr = elem->dr;
1214 min->peel_info.npeel = elem->npeel;
1217 return 1;
1221 /* Choose best peeling option by traversing peeling hash table and either
1222 choosing an option with the lowest cost (if cost model is enabled) or the
1223 option that aligns as many accesses as possible. */
1225 static struct data_reference *
1226 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1227 unsigned int *npeel)
1229 struct _vect_peel_extended_info res;
1231 res.peel_info.dr = NULL;
1233 if (flag_vect_cost_model)
1235 res.inside_cost = INT_MAX;
1236 res.outside_cost = INT_MAX;
1237 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1238 vect_peeling_hash_get_lowest_cost, &res);
1240 else
1242 res.peel_info.count = 0;
1243 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1244 vect_peeling_hash_get_most_frequent, &res);
1247 *npeel = res.peel_info.npeel;
1248 return res.peel_info.dr;
1252 /* Function vect_enhance_data_refs_alignment
1254 This pass will use loop versioning and loop peeling in order to enhance
1255 the alignment of data references in the loop.
1257 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1258 original loop is to be vectorized; Any other loops that are created by
1259 the transformations performed in this pass - are not supposed to be
1260 vectorized. This restriction will be relaxed.
1262 This pass will require a cost model to guide it whether to apply peeling
1263 or versioning or a combination of the two. For example, the scheme that
1264 intel uses when given a loop with several memory accesses, is as follows:
1265 choose one memory access ('p') which alignment you want to force by doing
1266 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1267 other accesses are not necessarily aligned, or (2) use loop versioning to
1268 generate one loop in which all accesses are aligned, and another loop in
1269 which only 'p' is necessarily aligned.
1271 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1272 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1273 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1275 Devising a cost model is the most critical aspect of this work. It will
1276 guide us on which access to peel for, whether to use loop versioning, how
1277 many versions to create, etc. The cost model will probably consist of
1278 generic considerations as well as target specific considerations (on
1279 powerpc for example, misaligned stores are more painful than misaligned
1280 loads).
1282 Here are the general steps involved in alignment enhancements:
1284 -- original loop, before alignment analysis:
1285 for (i=0; i<N; i++){
1286 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1287 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1290 -- After vect_compute_data_refs_alignment:
1291 for (i=0; i<N; i++){
1292 x = q[i]; # DR_MISALIGNMENT(q) = 3
1293 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1296 -- Possibility 1: we do loop versioning:
1297 if (p is aligned) {
1298 for (i=0; i<N; i++){ # loop 1A
1299 x = q[i]; # DR_MISALIGNMENT(q) = 3
1300 p[i] = y; # DR_MISALIGNMENT(p) = 0
1303 else {
1304 for (i=0; i<N; i++){ # loop 1B
1305 x = q[i]; # DR_MISALIGNMENT(q) = 3
1306 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1310 -- Possibility 2: we do loop peeling:
1311 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1312 x = q[i];
1313 p[i] = y;
1315 for (i = 3; i < N; i++){ # loop 2A
1316 x = q[i]; # DR_MISALIGNMENT(q) = 0
1317 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1320 -- Possibility 3: combination of loop peeling and versioning:
1321 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1322 x = q[i];
1323 p[i] = y;
1325 if (p is aligned) {
1326 for (i = 3; i<N; i++){ # loop 3A
1327 x = q[i]; # DR_MISALIGNMENT(q) = 0
1328 p[i] = y; # DR_MISALIGNMENT(p) = 0
1331 else {
1332 for (i = 3; i<N; i++){ # loop 3B
1333 x = q[i]; # DR_MISALIGNMENT(q) = 0
1334 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1338 These loops are later passed to loop_transform to be vectorized. The
1339 vectorizer will use the alignment information to guide the transformation
1340 (whether to generate regular loads/stores, or with special handling for
1341 misalignment). */
1343 bool
1344 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1346 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1347 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1348 enum dr_alignment_support supportable_dr_alignment;
1349 struct data_reference *dr0 = NULL, *first_store = NULL;
1350 struct data_reference *dr;
1351 unsigned int i, j;
1352 bool do_peeling = false;
1353 bool do_versioning = false;
1354 bool stat;
1355 gimple stmt;
1356 stmt_vec_info stmt_info;
1357 int vect_versioning_for_alias_required;
1358 unsigned int npeel = 0;
1359 bool all_misalignments_unknown = true;
1360 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1361 unsigned possible_npeel_number = 1;
1362 tree vectype;
1363 unsigned int nelements, mis, same_align_drs_max = 0;
1365 if (vect_print_dump_info (REPORT_DETAILS))
1366 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1368 /* While cost model enhancements are expected in the future, the high level
1369 view of the code at this time is as follows:
1371 A) If there is a misaligned access then see if peeling to align
1372 this access can make all data references satisfy
1373 vect_supportable_dr_alignment. If so, update data structures
1374 as needed and return true.
1376 B) If peeling wasn't possible and there is a data reference with an
1377 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1378 then see if loop versioning checks can be used to make all data
1379 references satisfy vect_supportable_dr_alignment. If so, update
1380 data structures as needed and return true.
1382 C) If neither peeling nor versioning were successful then return false if
1383 any data reference does not satisfy vect_supportable_dr_alignment.
1385 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1387 Note, Possibility 3 above (which is peeling and versioning together) is not
1388 being done at this time. */
1390 /* (1) Peeling to force alignment. */
1392 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1393 Considerations:
1394 + How many accesses will become aligned due to the peeling
1395 - How many accesses will become unaligned due to the peeling,
1396 and the cost of misaligned accesses.
1397 - The cost of peeling (the extra runtime checks, the increase
1398 in code size). */
1400 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1402 stmt = DR_STMT (dr);
1403 stmt_info = vinfo_for_stmt (stmt);
1405 /* For interleaving, only the alignment of the first access
1406 matters. */
1407 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1408 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1409 continue;
1411 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1412 do_peeling = vector_alignment_reachable_p (dr);
1413 if (do_peeling)
1415 if (known_alignment_for_access_p (dr))
1417 unsigned int npeel_tmp;
1419 /* Save info about DR in the hash table. */
1420 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1421 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1422 htab_create (1, vect_peeling_hash,
1423 vect_peeling_hash_eq, free);
1425 vectype = STMT_VINFO_VECTYPE (stmt_info);
1426 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1427 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1428 TREE_TYPE (DR_REF (dr))));
1429 npeel_tmp = (nelements - mis) % vf;
1431 /* For multiple types, it is possible that the bigger type access
1432 will have more than one peeling option. E.g., a loop with two
1433 types: one of size (vector size / 4), and the other one of
1434 size (vector size / 8). Vectorization factor will 8. If both
1435 access are misaligned by 3, the first one needs one scalar
1436 iteration to be aligned, and the second one needs 5. But the
1437 the first one will be aligned also by peeling 5 scalar
1438 iterations, and in that case both accesses will be aligned.
1439 Hence, except for the immediate peeling amount, we also want
1440 to try to add full vector size, while we don't exceed
1441 vectorization factor.
1442 We do this automtically for cost model, since we calculate cost
1443 for every peeling option. */
1444 if (!flag_vect_cost_model)
1445 possible_npeel_number = vf /nelements;
1447 /* Handle the aligned case. We may decide to align some other
1448 access, making DR unaligned. */
1449 if (DR_MISALIGNMENT (dr) == 0)
1451 npeel_tmp = 0;
1452 if (!flag_vect_cost_model)
1453 possible_npeel_number++;
1456 for (j = 0; j < possible_npeel_number; j++)
1458 gcc_assert (npeel_tmp <= vf);
1459 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1460 npeel_tmp += nelements;
1463 all_misalignments_unknown = false;
1464 /* Data-ref that was chosen for the case that all the
1465 misalignments are unknown is not relevant anymore, since we
1466 have a data-ref with known alignment. */
1467 dr0 = NULL;
1469 else
1471 /* If we don't know all the misalignment values, we prefer
1472 peeling for data-ref that has maximum number of data-refs
1473 with the same alignment, unless the target prefers to align
1474 stores over load. */
1475 if (all_misalignments_unknown)
1477 if (same_align_drs_max < VEC_length (dr_p,
1478 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1479 || !dr0)
1481 same_align_drs_max = VEC_length (dr_p,
1482 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1483 dr0 = dr;
1486 if (!first_store && !DR_IS_READ (dr))
1487 first_store = dr;
1490 /* If there are both known and unknown misaligned accesses in the
1491 loop, we choose peeling amount according to the known
1492 accesses. */
1495 if (!supportable_dr_alignment)
1497 dr0 = dr;
1498 if (!first_store && !DR_IS_READ (dr))
1499 first_store = dr;
1503 else
1505 if (!aligned_access_p (dr))
1507 if (vect_print_dump_info (REPORT_DETAILS))
1508 fprintf (vect_dump, "vector alignment may not be reachable");
1510 break;
1515 vect_versioning_for_alias_required
1516 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1518 /* Temporarily, if versioning for alias is required, we disable peeling
1519 until we support peeling and versioning. Often peeling for alignment
1520 will require peeling for loop-bound, which in turn requires that we
1521 know how to adjust the loop ivs after the loop. */
1522 if (vect_versioning_for_alias_required
1523 || !vect_can_advance_ivs_p (loop_vinfo)
1524 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1525 do_peeling = false;
1527 if (do_peeling && all_misalignments_unknown
1528 && vect_supportable_dr_alignment (dr0, false))
1531 /* Check if the target requires to prefer stores over loads, i.e., if
1532 misaligned stores are more expensive than misaligned loads (taking
1533 drs with same alignment into account). */
1534 if (first_store && DR_IS_READ (dr0))
1536 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1537 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1538 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1539 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1541 vect_get_data_access_cost (dr0, &load_inside_cost,
1542 &load_outside_cost);
1543 vect_get_data_access_cost (first_store, &store_inside_cost,
1544 &store_outside_cost);
1546 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1547 aligning the load DR0). */
1548 load_inside_penalty = store_inside_cost;
1549 load_outside_penalty = store_outside_cost;
1550 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1551 (vinfo_for_stmt (DR_STMT (first_store))),
1552 i, dr);
1553 i++)
1554 if (DR_IS_READ (dr))
1556 load_inside_penalty += load_inside_cost;
1557 load_outside_penalty += load_outside_cost;
1559 else
1561 load_inside_penalty += store_inside_cost;
1562 load_outside_penalty += store_outside_cost;
1565 /* Calculate the penalty for leaving DR0 unaligned (by
1566 aligning the FIRST_STORE). */
1567 store_inside_penalty = load_inside_cost;
1568 store_outside_penalty = load_outside_cost;
1569 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1570 (vinfo_for_stmt (DR_STMT (dr0))),
1571 i, dr);
1572 i++)
1573 if (DR_IS_READ (dr))
1575 store_inside_penalty += load_inside_cost;
1576 store_outside_penalty += load_outside_cost;
1578 else
1580 store_inside_penalty += store_inside_cost;
1581 store_outside_penalty += store_outside_cost;
1584 if (load_inside_penalty > store_inside_penalty
1585 || (load_inside_penalty == store_inside_penalty
1586 && load_outside_penalty > store_outside_penalty))
1587 dr0 = first_store;
1590 /* In case there are only loads with different unknown misalignments, use
1591 peeling only if it may help to align other accesses in the loop. */
1592 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1593 (vinfo_for_stmt (DR_STMT (dr0))))
1594 && vect_supportable_dr_alignment (dr0, false)
1595 != dr_unaligned_supported)
1596 do_peeling = false;
1599 if (do_peeling && !dr0)
1601 /* Peeling is possible, but there is no data access that is not supported
1602 unless aligned. So we try to choose the best possible peeling. */
1604 /* We should get here only if there are drs with known misalignment. */
1605 gcc_assert (!all_misalignments_unknown);
1607 /* Choose the best peeling from the hash table. */
1608 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1609 if (!dr0 || !npeel)
1610 do_peeling = false;
1613 if (do_peeling)
1615 stmt = DR_STMT (dr0);
1616 stmt_info = vinfo_for_stmt (stmt);
1617 vectype = STMT_VINFO_VECTYPE (stmt_info);
1618 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1620 if (known_alignment_for_access_p (dr0))
1622 if (!npeel)
1624 /* Since it's known at compile time, compute the number of
1625 iterations in the peeled loop (the peeling factor) for use in
1626 updating DR_MISALIGNMENT values. The peeling factor is the
1627 vectorization factor minus the misalignment as an element
1628 count. */
1629 mis = DR_MISALIGNMENT (dr0);
1630 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1631 npeel = nelements - mis;
1634 /* For interleaved data access every iteration accesses all the
1635 members of the group, therefore we divide the number of iterations
1636 by the group size. */
1637 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1638 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1639 npeel /= DR_GROUP_SIZE (stmt_info);
1641 if (vect_print_dump_info (REPORT_DETAILS))
1642 fprintf (vect_dump, "Try peeling by %d", npeel);
1645 /* Ensure that all data refs can be vectorized after the peel. */
1646 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1648 int save_misalignment;
1650 if (dr == dr0)
1651 continue;
1653 stmt = DR_STMT (dr);
1654 stmt_info = vinfo_for_stmt (stmt);
1655 /* For interleaving, only the alignment of the first access
1656 matters. */
1657 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1658 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1659 continue;
1661 save_misalignment = DR_MISALIGNMENT (dr);
1662 vect_update_misalignment_for_peel (dr, dr0, npeel);
1663 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1664 SET_DR_MISALIGNMENT (dr, save_misalignment);
1666 if (!supportable_dr_alignment)
1668 do_peeling = false;
1669 break;
1673 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1675 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1676 if (!stat)
1677 do_peeling = false;
1678 else
1679 return stat;
1682 if (do_peeling)
1684 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1685 If the misalignment of DR_i is identical to that of dr0 then set
1686 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1687 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1688 by the peeling factor times the element size of DR_i (MOD the
1689 vectorization factor times the size). Otherwise, the
1690 misalignment of DR_i must be set to unknown. */
1691 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1692 if (dr != dr0)
1693 vect_update_misalignment_for_peel (dr, dr0, npeel);
1695 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1696 if (npeel)
1697 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1698 else
1699 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1700 SET_DR_MISALIGNMENT (dr0, 0);
1701 if (vect_print_dump_info (REPORT_ALIGNMENT))
1702 fprintf (vect_dump, "Alignment of access forced using peeling.");
1704 if (vect_print_dump_info (REPORT_DETAILS))
1705 fprintf (vect_dump, "Peeling for alignment will be applied.");
1707 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1708 gcc_assert (stat);
1709 return stat;
1714 /* (2) Versioning to force alignment. */
1716 /* Try versioning if:
1717 1) flag_tree_vect_loop_version is TRUE
1718 2) optimize loop for speed
1719 3) there is at least one unsupported misaligned data ref with an unknown
1720 misalignment, and
1721 4) all misaligned data refs with a known misalignment are supported, and
1722 5) the number of runtime alignment checks is within reason. */
1724 do_versioning =
1725 flag_tree_vect_loop_version
1726 && optimize_loop_nest_for_speed_p (loop)
1727 && (!loop->inner); /* FORNOW */
1729 if (do_versioning)
1731 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1733 stmt = DR_STMT (dr);
1734 stmt_info = vinfo_for_stmt (stmt);
1736 /* For interleaving, only the alignment of the first access
1737 matters. */
1738 if (aligned_access_p (dr)
1739 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1740 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1741 continue;
1743 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1745 if (!supportable_dr_alignment)
1747 gimple stmt;
1748 int mask;
1749 tree vectype;
1751 if (known_alignment_for_access_p (dr)
1752 || VEC_length (gimple,
1753 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1754 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1756 do_versioning = false;
1757 break;
1760 stmt = DR_STMT (dr);
1761 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1762 gcc_assert (vectype);
1764 /* The rightmost bits of an aligned address must be zeros.
1765 Construct the mask needed for this test. For example,
1766 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1767 mask must be 15 = 0xf. */
1768 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1770 /* FORNOW: use the same mask to test all potentially unaligned
1771 references in the loop. The vectorizer currently supports
1772 a single vector size, see the reference to
1773 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1774 vectorization factor is computed. */
1775 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1776 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1777 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1778 VEC_safe_push (gimple, heap,
1779 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1780 DR_STMT (dr));
1784 /* Versioning requires at least one misaligned data reference. */
1785 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1786 do_versioning = false;
1787 else if (!do_versioning)
1788 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1791 if (do_versioning)
1793 VEC(gimple,heap) *may_misalign_stmts
1794 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1795 gimple stmt;
1797 /* It can now be assumed that the data references in the statements
1798 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1799 of the loop being vectorized. */
1800 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1802 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1803 dr = STMT_VINFO_DATA_REF (stmt_info);
1804 SET_DR_MISALIGNMENT (dr, 0);
1805 if (vect_print_dump_info (REPORT_ALIGNMENT))
1806 fprintf (vect_dump, "Alignment of access forced using versioning.");
1809 if (vect_print_dump_info (REPORT_DETAILS))
1810 fprintf (vect_dump, "Versioning for alignment will be applied.");
1812 /* Peeling and versioning can't be done together at this time. */
1813 gcc_assert (! (do_peeling && do_versioning));
1815 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1816 gcc_assert (stat);
1817 return stat;
1820 /* This point is reached if neither peeling nor versioning is being done. */
1821 gcc_assert (! (do_peeling || do_versioning));
1823 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1824 return stat;
1828 /* Function vect_find_same_alignment_drs.
1830 Update group and alignment relations according to the chosen
1831 vectorization factor. */
1833 static void
1834 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1835 loop_vec_info loop_vinfo)
1837 unsigned int i;
1838 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1839 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1840 struct data_reference *dra = DDR_A (ddr);
1841 struct data_reference *drb = DDR_B (ddr);
1842 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1843 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1844 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1845 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1846 lambda_vector dist_v;
1847 unsigned int loop_depth;
1849 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1850 return;
1852 if (dra == drb)
1853 return;
1855 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1856 return;
1858 /* Loop-based vectorization and known data dependence. */
1859 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1860 return;
1862 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1863 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1865 int dist = dist_v[loop_depth];
1867 if (vect_print_dump_info (REPORT_DR_DETAILS))
1868 fprintf (vect_dump, "dependence distance = %d.", dist);
1870 /* Same loop iteration. */
1871 if (dist == 0
1872 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1874 /* Two references with distance zero have the same alignment. */
1875 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1876 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1877 if (vect_print_dump_info (REPORT_ALIGNMENT))
1878 fprintf (vect_dump, "accesses have the same alignment.");
1879 if (vect_print_dump_info (REPORT_DR_DETAILS))
1881 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1882 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1883 fprintf (vect_dump, " and ");
1884 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1891 /* Function vect_analyze_data_refs_alignment
1893 Analyze the alignment of the data-references in the loop.
1894 Return FALSE if a data reference is found that cannot be vectorized. */
1896 bool
1897 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1898 bb_vec_info bb_vinfo)
1900 if (vect_print_dump_info (REPORT_DETAILS))
1901 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1903 /* Mark groups of data references with same alignment using
1904 data dependence information. */
1905 if (loop_vinfo)
1907 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1908 struct data_dependence_relation *ddr;
1909 unsigned int i;
1911 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1912 vect_find_same_alignment_drs (ddr, loop_vinfo);
1915 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1917 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1918 fprintf (vect_dump,
1919 "not vectorized: can't calculate alignment for data ref.");
1920 return false;
1923 return true;
1927 /* Analyze groups of strided accesses: check that DR belongs to a group of
1928 strided accesses of legal size, step, etc. Detect gaps, single element
1929 interleaving, and other special cases. Set strided access info.
1930 Collect groups of strided stores for further use in SLP analysis. */
1932 static bool
1933 vect_analyze_group_access (struct data_reference *dr)
1935 tree step = DR_STEP (dr);
1936 tree scalar_type = TREE_TYPE (DR_REF (dr));
1937 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1938 gimple stmt = DR_STMT (dr);
1939 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1940 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1941 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1942 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1943 HOST_WIDE_INT stride;
1944 bool slp_impossible = false;
1946 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1947 interleaving group (including gaps). */
1948 stride = dr_step / type_size;
1950 /* Not consecutive access is possible only if it is a part of interleaving. */
1951 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1953 /* Check if it this DR is a part of interleaving, and is a single
1954 element of the group that is accessed in the loop. */
1956 /* Gaps are supported only for loads. STEP must be a multiple of the type
1957 size. The size of the group must be a power of 2. */
1958 if (DR_IS_READ (dr)
1959 && (dr_step % type_size) == 0
1960 && stride > 0
1961 && exact_log2 (stride) != -1)
1963 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1964 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1965 if (vect_print_dump_info (REPORT_DR_DETAILS))
1967 fprintf (vect_dump, "Detected single element interleaving ");
1968 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1969 fprintf (vect_dump, " step ");
1970 print_generic_expr (vect_dump, step, TDF_SLIM);
1972 return true;
1975 if (vect_print_dump_info (REPORT_DETAILS))
1977 fprintf (vect_dump, "not consecutive access ");
1978 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1981 if (bb_vinfo)
1983 /* Mark the statement as unvectorizable. */
1984 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1985 return true;
1988 return false;
1991 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1993 /* First stmt in the interleaving chain. Check the chain. */
1994 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1995 struct data_reference *data_ref = dr;
1996 unsigned int count = 1;
1997 tree next_step;
1998 tree prev_init = DR_INIT (data_ref);
1999 gimple prev = stmt;
2000 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2002 while (next)
2004 /* Skip same data-refs. In case that two or more stmts share data-ref
2005 (supported only for loads), we vectorize only the first stmt, and
2006 the rest get their vectorized loads from the first one. */
2007 if (!tree_int_cst_compare (DR_INIT (data_ref),
2008 DR_INIT (STMT_VINFO_DATA_REF (
2009 vinfo_for_stmt (next)))))
2011 if (!DR_IS_READ (data_ref))
2013 if (vect_print_dump_info (REPORT_DETAILS))
2014 fprintf (vect_dump, "Two store stmts share the same dr.");
2015 return false;
2018 /* Check that there is no load-store dependencies for this loads
2019 to prevent a case of load-store-load to the same location. */
2020 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2021 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2023 if (vect_print_dump_info (REPORT_DETAILS))
2024 fprintf (vect_dump,
2025 "READ_WRITE dependence in interleaving.");
2026 return false;
2029 /* For load use the same data-ref load. */
2030 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2032 prev = next;
2033 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2034 continue;
2036 prev = next;
2038 /* Check that all the accesses have the same STEP. */
2039 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2040 if (tree_int_cst_compare (step, next_step))
2042 if (vect_print_dump_info (REPORT_DETAILS))
2043 fprintf (vect_dump, "not consecutive access in interleaving");
2044 return false;
2047 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2048 /* Check that the distance between two accesses is equal to the type
2049 size. Otherwise, we have gaps. */
2050 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2051 - TREE_INT_CST_LOW (prev_init)) / type_size;
2052 if (diff != 1)
2054 /* FORNOW: SLP of accesses with gaps is not supported. */
2055 slp_impossible = true;
2056 if (!DR_IS_READ (data_ref))
2058 if (vect_print_dump_info (REPORT_DETAILS))
2059 fprintf (vect_dump, "interleaved store with gaps");
2060 return false;
2063 gaps += diff - 1;
2066 /* Store the gap from the previous member of the group. If there is no
2067 gap in the access, DR_GROUP_GAP is always 1. */
2068 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2070 prev_init = DR_INIT (data_ref);
2071 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2072 /* Count the number of data-refs in the chain. */
2073 count++;
2076 /* COUNT is the number of accesses found, we multiply it by the size of
2077 the type to get COUNT_IN_BYTES. */
2078 count_in_bytes = type_size * count;
2080 /* Check that the size of the interleaving (including gaps) is not
2081 greater than STEP. */
2082 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2084 if (vect_print_dump_info (REPORT_DETAILS))
2086 fprintf (vect_dump, "interleaving size is greater than step for ");
2087 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2089 return false;
2092 /* Check that the size of the interleaving is equal to STEP for stores,
2093 i.e., that there are no gaps. */
2094 if (dr_step && dr_step != count_in_bytes)
2096 if (DR_IS_READ (dr))
2098 slp_impossible = true;
2099 /* There is a gap after the last load in the group. This gap is a
2100 difference between the stride and the number of elements. When
2101 there is no gap, this difference should be 0. */
2102 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2104 else
2106 if (vect_print_dump_info (REPORT_DETAILS))
2107 fprintf (vect_dump, "interleaved store with gaps");
2108 return false;
2112 /* Check that STEP is a multiple of type size. */
2113 if (dr_step && (dr_step % type_size) != 0)
2115 if (vect_print_dump_info (REPORT_DETAILS))
2117 fprintf (vect_dump, "step is not a multiple of type size: step ");
2118 print_generic_expr (vect_dump, step, TDF_SLIM);
2119 fprintf (vect_dump, " size ");
2120 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2121 TDF_SLIM);
2123 return false;
2126 /* FORNOW: we handle only interleaving that is a power of 2.
2127 We don't fail here if it may be still possible to vectorize the
2128 group using SLP. If not, the size of the group will be checked in
2129 vect_analyze_operations, and the vectorization will fail. */
2130 if (exact_log2 (stride) == -1)
2132 if (vect_print_dump_info (REPORT_DETAILS))
2133 fprintf (vect_dump, "interleaving is not a power of 2");
2135 if (slp_impossible)
2136 return false;
2139 if (stride == 0)
2140 stride = count;
2142 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2143 if (vect_print_dump_info (REPORT_DETAILS))
2144 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2146 /* SLP: create an SLP data structure for every interleaving group of
2147 stores for further analysis in vect_analyse_slp. */
2148 if (!DR_IS_READ (dr) && !slp_impossible)
2150 if (loop_vinfo)
2151 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2152 stmt);
2153 if (bb_vinfo)
2154 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2155 stmt);
2159 return true;
2163 /* Analyze the access pattern of the data-reference DR.
2164 In case of non-consecutive accesses call vect_analyze_group_access() to
2165 analyze groups of strided accesses. */
2167 static bool
2168 vect_analyze_data_ref_access (struct data_reference *dr)
2170 tree step = DR_STEP (dr);
2171 tree scalar_type = TREE_TYPE (DR_REF (dr));
2172 gimple stmt = DR_STMT (dr);
2173 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2174 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2175 struct loop *loop = NULL;
2176 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2178 if (loop_vinfo)
2179 loop = LOOP_VINFO_LOOP (loop_vinfo);
2181 if (loop_vinfo && !step)
2183 if (vect_print_dump_info (REPORT_DETAILS))
2184 fprintf (vect_dump, "bad data-ref access in loop");
2185 return false;
2188 /* Don't allow invariant accesses in loops. */
2189 if (loop_vinfo && dr_step == 0)
2190 return false;
2192 if (loop && nested_in_vect_loop_p (loop, stmt))
2194 /* Interleaved accesses are not yet supported within outer-loop
2195 vectorization for references in the inner-loop. */
2196 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2198 /* For the rest of the analysis we use the outer-loop step. */
2199 step = STMT_VINFO_DR_STEP (stmt_info);
2200 dr_step = TREE_INT_CST_LOW (step);
2202 if (dr_step == 0)
2204 if (vect_print_dump_info (REPORT_ALIGNMENT))
2205 fprintf (vect_dump, "zero step in outer loop.");
2206 if (DR_IS_READ (dr))
2207 return true;
2208 else
2209 return false;
2213 /* Consecutive? */
2214 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2216 /* Mark that it is not interleaving. */
2217 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2218 return true;
2221 if (loop && nested_in_vect_loop_p (loop, stmt))
2223 if (vect_print_dump_info (REPORT_ALIGNMENT))
2224 fprintf (vect_dump, "strided access in outer loop.");
2225 return false;
2228 /* Not consecutive access - check if it's a part of interleaving group. */
2229 return vect_analyze_group_access (dr);
2233 /* Function vect_analyze_data_ref_accesses.
2235 Analyze the access pattern of all the data references in the loop.
2237 FORNOW: the only access pattern that is considered vectorizable is a
2238 simple step 1 (consecutive) access.
2240 FORNOW: handle only arrays and pointer accesses. */
2242 bool
2243 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2245 unsigned int i;
2246 VEC (data_reference_p, heap) *datarefs;
2247 struct data_reference *dr;
2249 if (vect_print_dump_info (REPORT_DETAILS))
2250 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2252 if (loop_vinfo)
2253 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2254 else
2255 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2257 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2258 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2259 && !vect_analyze_data_ref_access (dr))
2261 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2262 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2264 if (bb_vinfo)
2266 /* Mark the statement as not vectorizable. */
2267 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2268 continue;
2270 else
2271 return false;
2274 return true;
2277 /* Function vect_prune_runtime_alias_test_list.
2279 Prune a list of ddrs to be tested at run-time by versioning for alias.
2280 Return FALSE if resulting list of ddrs is longer then allowed by
2281 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2283 bool
2284 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2286 VEC (ddr_p, heap) * ddrs =
2287 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2288 unsigned i, j;
2290 if (vect_print_dump_info (REPORT_DETAILS))
2291 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2293 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2295 bool found;
2296 ddr_p ddr_i;
2298 ddr_i = VEC_index (ddr_p, ddrs, i);
2299 found = false;
2301 for (j = 0; j < i; j++)
2303 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2305 if (vect_vfa_range_equal (ddr_i, ddr_j))
2307 if (vect_print_dump_info (REPORT_DR_DETAILS))
2309 fprintf (vect_dump, "found equal ranges ");
2310 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2311 fprintf (vect_dump, ", ");
2312 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2313 fprintf (vect_dump, " and ");
2314 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2315 fprintf (vect_dump, ", ");
2316 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2318 found = true;
2319 break;
2323 if (found)
2325 VEC_ordered_remove (ddr_p, ddrs, i);
2326 continue;
2328 i++;
2331 if (VEC_length (ddr_p, ddrs) >
2332 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2334 if (vect_print_dump_info (REPORT_DR_DETAILS))
2336 fprintf (vect_dump,
2337 "disable versioning for alias - max number of generated "
2338 "checks exceeded.");
2341 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2343 return false;
2346 return true;
2350 /* Function vect_analyze_data_refs.
2352 Find all the data references in the loop or basic block.
2354 The general structure of the analysis of data refs in the vectorizer is as
2355 follows:
2356 1- vect_analyze_data_refs(loop/bb): call
2357 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2358 in the loop/bb and their dependences.
2359 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2360 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2361 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2365 bool
2366 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2367 bb_vec_info bb_vinfo,
2368 int *min_vf)
2370 struct loop *loop = NULL;
2371 basic_block bb = NULL;
2372 unsigned int i;
2373 VEC (data_reference_p, heap) *datarefs;
2374 struct data_reference *dr;
2375 tree scalar_type;
2376 bool res;
2378 if (vect_print_dump_info (REPORT_DETAILS))
2379 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2381 if (loop_vinfo)
2383 loop = LOOP_VINFO_LOOP (loop_vinfo);
2384 res = compute_data_dependences_for_loop
2385 (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
2386 &LOOP_VINFO_DDRS (loop_vinfo));
2388 if (!res)
2390 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2391 fprintf (vect_dump, "not vectorized: loop contains function calls"
2392 " or data references that cannot be analyzed");
2393 return false;
2396 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2398 else
2400 bb = BB_VINFO_BB (bb_vinfo);
2401 res = compute_data_dependences_for_bb (bb, true,
2402 &BB_VINFO_DATAREFS (bb_vinfo),
2403 &BB_VINFO_DDRS (bb_vinfo));
2404 if (!res)
2406 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2407 fprintf (vect_dump, "not vectorized: basic block contains function"
2408 " calls or data references that cannot be analyzed");
2409 return false;
2412 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2415 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2416 from stmt_vec_info struct to DR and vectype. */
2418 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2420 gimple stmt;
2421 stmt_vec_info stmt_info;
2422 tree base, offset, init;
2423 int vf;
2425 if (!dr || !DR_REF (dr))
2427 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2428 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2429 return false;
2432 stmt = DR_STMT (dr);
2433 stmt_info = vinfo_for_stmt (stmt);
2435 /* Check that analysis of the data-ref succeeded. */
2436 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2437 || !DR_STEP (dr))
2439 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2441 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2442 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2445 if (bb_vinfo)
2447 /* Mark the statement as not vectorizable. */
2448 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2449 continue;
2451 else
2452 return false;
2455 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2457 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2458 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2459 "constant");
2460 if (bb_vinfo)
2462 /* Mark the statement as not vectorizable. */
2463 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2464 continue;
2466 else
2467 return false;
2470 base = unshare_expr (DR_BASE_ADDRESS (dr));
2471 offset = unshare_expr (DR_OFFSET (dr));
2472 init = unshare_expr (DR_INIT (dr));
2474 /* Update DR field in stmt_vec_info struct. */
2476 /* If the dataref is in an inner-loop of the loop that is considered for
2477 for vectorization, we also want to analyze the access relative to
2478 the outer-loop (DR contains information only relative to the
2479 inner-most enclosing loop). We do that by building a reference to the
2480 first location accessed by the inner-loop, and analyze it relative to
2481 the outer-loop. */
2482 if (loop && nested_in_vect_loop_p (loop, stmt))
2484 tree outer_step, outer_base, outer_init;
2485 HOST_WIDE_INT pbitsize, pbitpos;
2486 tree poffset;
2487 enum machine_mode pmode;
2488 int punsignedp, pvolatilep;
2489 affine_iv base_iv, offset_iv;
2490 tree dinit;
2492 /* Build a reference to the first location accessed by the
2493 inner-loop: *(BASE+INIT). (The first location is actually
2494 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2495 tree inner_base = build_fold_indirect_ref
2496 (fold_build2 (POINTER_PLUS_EXPR,
2497 TREE_TYPE (base), base,
2498 fold_convert (sizetype, init)));
2500 if (vect_print_dump_info (REPORT_DETAILS))
2502 fprintf (vect_dump, "analyze in outer-loop: ");
2503 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2506 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2507 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2508 gcc_assert (outer_base != NULL_TREE);
2510 if (pbitpos % BITS_PER_UNIT != 0)
2512 if (vect_print_dump_info (REPORT_DETAILS))
2513 fprintf (vect_dump, "failed: bit offset alignment.\n");
2514 return false;
2517 outer_base = build_fold_addr_expr (outer_base);
2518 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2519 &base_iv, false))
2521 if (vect_print_dump_info (REPORT_DETAILS))
2522 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2523 return false;
2526 if (offset)
2528 if (poffset)
2529 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2530 poffset);
2531 else
2532 poffset = offset;
2535 if (!poffset)
2537 offset_iv.base = ssize_int (0);
2538 offset_iv.step = ssize_int (0);
2540 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2541 &offset_iv, false))
2543 if (vect_print_dump_info (REPORT_DETAILS))
2544 fprintf (vect_dump, "evolution of offset is not affine.\n");
2545 return false;
2548 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2549 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2550 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2551 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2552 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2554 outer_step = size_binop (PLUS_EXPR,
2555 fold_convert (ssizetype, base_iv.step),
2556 fold_convert (ssizetype, offset_iv.step));
2558 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2559 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2560 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2561 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2562 STMT_VINFO_DR_OFFSET (stmt_info) =
2563 fold_convert (ssizetype, offset_iv.base);
2564 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2565 size_int (highest_pow2_factor (offset_iv.base));
2567 if (vect_print_dump_info (REPORT_DETAILS))
2569 fprintf (vect_dump, "\touter base_address: ");
2570 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2571 fprintf (vect_dump, "\n\touter offset from base address: ");
2572 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2573 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2574 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2575 fprintf (vect_dump, "\n\touter step: ");
2576 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2577 fprintf (vect_dump, "\n\touter aligned to: ");
2578 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2582 if (STMT_VINFO_DATA_REF (stmt_info))
2584 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2586 fprintf (vect_dump,
2587 "not vectorized: more than one data ref in stmt: ");
2588 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2590 return false;
2593 STMT_VINFO_DATA_REF (stmt_info) = dr;
2595 /* Set vectype for STMT. */
2596 scalar_type = TREE_TYPE (DR_REF (dr));
2597 STMT_VINFO_VECTYPE (stmt_info) =
2598 get_vectype_for_scalar_type (scalar_type);
2599 if (!STMT_VINFO_VECTYPE (stmt_info))
2601 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2603 fprintf (vect_dump,
2604 "not vectorized: no vectype for stmt: ");
2605 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2606 fprintf (vect_dump, " scalar_type: ");
2607 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2610 if (bb_vinfo)
2612 /* Mark the statement as not vectorizable. */
2613 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2614 continue;
2616 else
2617 return false;
2620 /* Adjust the minimal vectorization factor according to the
2621 vector type. */
2622 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2623 if (vf > *min_vf)
2624 *min_vf = vf;
2627 return true;
2631 /* Function vect_get_new_vect_var.
2633 Returns a name for a new variable. The current naming scheme appends the
2634 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2635 the name of vectorizer generated variables, and appends that to NAME if
2636 provided. */
2638 tree
2639 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2641 const char *prefix;
2642 tree new_vect_var;
2644 switch (var_kind)
2646 case vect_simple_var:
2647 prefix = "vect_";
2648 break;
2649 case vect_scalar_var:
2650 prefix = "stmp_";
2651 break;
2652 case vect_pointer_var:
2653 prefix = "vect_p";
2654 break;
2655 default:
2656 gcc_unreachable ();
2659 if (name)
2661 char* tmp = concat (prefix, name, NULL);
2662 new_vect_var = create_tmp_var (type, tmp);
2663 free (tmp);
2665 else
2666 new_vect_var = create_tmp_var (type, prefix);
2668 /* Mark vector typed variable as a gimple register variable. */
2669 if (TREE_CODE (type) == VECTOR_TYPE)
2670 DECL_GIMPLE_REG_P (new_vect_var) = true;
2672 return new_vect_var;
2676 /* Function vect_create_addr_base_for_vector_ref.
2678 Create an expression that computes the address of the first memory location
2679 that will be accessed for a data reference.
2681 Input:
2682 STMT: The statement containing the data reference.
2683 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2684 OFFSET: Optional. If supplied, it is be added to the initial address.
2685 LOOP: Specify relative to which loop-nest should the address be computed.
2686 For example, when the dataref is in an inner-loop nested in an
2687 outer-loop that is now being vectorized, LOOP can be either the
2688 outer-loop, or the inner-loop. The first memory location accessed
2689 by the following dataref ('in' points to short):
2691 for (i=0; i<N; i++)
2692 for (j=0; j<M; j++)
2693 s += in[i+j]
2695 is as follows:
2696 if LOOP=i_loop: &in (relative to i_loop)
2697 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2699 Output:
2700 1. Return an SSA_NAME whose value is the address of the memory location of
2701 the first vector of the data reference.
2702 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2703 these statement(s) which define the returned SSA_NAME.
2705 FORNOW: We are only handling array accesses with step 1. */
2707 tree
2708 vect_create_addr_base_for_vector_ref (gimple stmt,
2709 gimple_seq *new_stmt_list,
2710 tree offset,
2711 struct loop *loop)
2713 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2714 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2715 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2716 tree base_name;
2717 tree data_ref_base_var;
2718 tree vec_stmt;
2719 tree addr_base, addr_expr;
2720 tree dest;
2721 gimple_seq seq = NULL;
2722 tree base_offset = unshare_expr (DR_OFFSET (dr));
2723 tree init = unshare_expr (DR_INIT (dr));
2724 tree vect_ptr_type;
2725 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2726 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2727 tree base;
2729 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2731 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2733 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2735 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2736 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2737 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2740 if (loop_vinfo)
2741 base_name = build_fold_indirect_ref (data_ref_base);
2742 else
2744 base_offset = ssize_int (0);
2745 init = ssize_int (0);
2746 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2749 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2750 add_referenced_var (data_ref_base_var);
2751 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2752 data_ref_base_var);
2753 gimple_seq_add_seq (new_stmt_list, seq);
2755 /* Create base_offset */
2756 base_offset = size_binop (PLUS_EXPR,
2757 fold_convert (sizetype, base_offset),
2758 fold_convert (sizetype, init));
2759 dest = create_tmp_var (sizetype, "base_off");
2760 add_referenced_var (dest);
2761 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2762 gimple_seq_add_seq (new_stmt_list, seq);
2764 if (offset)
2766 tree tmp = create_tmp_var (sizetype, "offset");
2768 add_referenced_var (tmp);
2769 offset = fold_build2 (MULT_EXPR, sizetype,
2770 fold_convert (sizetype, offset), step);
2771 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2772 base_offset, offset);
2773 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2774 gimple_seq_add_seq (new_stmt_list, seq);
2777 /* base + base_offset */
2778 if (loop_vinfo)
2779 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2780 data_ref_base, base_offset);
2781 else
2783 addr_base = build1 (ADDR_EXPR,
2784 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2785 unshare_expr (DR_REF (dr)));
2788 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2789 base = get_base_address (DR_REF (dr));
2790 if (base
2791 && TREE_CODE (base) == MEM_REF)
2792 vect_ptr_type
2793 = build_qualified_type (vect_ptr_type,
2794 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2796 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2797 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2798 get_name (base_name));
2799 add_referenced_var (addr_expr);
2800 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2801 gimple_seq_add_seq (new_stmt_list, seq);
2803 if (DR_PTR_INFO (dr)
2804 && TREE_CODE (vec_stmt) == SSA_NAME)
2805 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2807 if (vect_print_dump_info (REPORT_DETAILS))
2809 fprintf (vect_dump, "created ");
2810 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2813 return vec_stmt;
2817 /* Function vect_create_data_ref_ptr.
2819 Create a new pointer to vector type (vp), that points to the first location
2820 accessed in the loop by STMT, along with the def-use update chain to
2821 appropriately advance the pointer through the loop iterations. Also set
2822 aliasing information for the pointer. This vector pointer is used by the
2823 callers to this function to create a memory reference expression for vector
2824 load/store access.
2826 Input:
2827 1. STMT: a stmt that references memory. Expected to be of the form
2828 GIMPLE_ASSIGN <name, data-ref> or
2829 GIMPLE_ASSIGN <data-ref, name>.
2830 2. AT_LOOP: the loop where the vector memref is to be created.
2831 3. OFFSET (optional): an offset to be added to the initial address accessed
2832 by the data-ref in STMT.
2833 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2834 pointing to the initial address.
2835 5. TYPE: if not NULL indicates the required type of the data-ref.
2837 Output:
2838 1. Declare a new ptr to vector_type, and have it point to the base of the
2839 data reference (initial addressed accessed by the data reference).
2840 For example, for vector of type V8HI, the following code is generated:
2842 v8hi *vp;
2843 vp = (v8hi *)initial_address;
2845 if OFFSET is not supplied:
2846 initial_address = &a[init];
2847 if OFFSET is supplied:
2848 initial_address = &a[init + OFFSET];
2850 Return the initial_address in INITIAL_ADDRESS.
2852 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2853 update the pointer in each iteration of the loop.
2855 Return the increment stmt that updates the pointer in PTR_INCR.
2857 3. Set INV_P to true if the access pattern of the data reference in the
2858 vectorized loop is invariant. Set it to false otherwise.
2860 4. Return the pointer. */
2862 tree
2863 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2864 tree offset, tree *initial_address, gimple *ptr_incr,
2865 bool only_init, bool *inv_p)
2867 tree base_name;
2868 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2869 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2870 struct loop *loop = NULL;
2871 bool nested_in_vect_loop = false;
2872 struct loop *containing_loop = NULL;
2873 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2874 tree vect_ptr_type;
2875 tree vect_ptr;
2876 tree new_temp;
2877 gimple vec_stmt;
2878 gimple_seq new_stmt_list = NULL;
2879 edge pe = NULL;
2880 basic_block new_bb;
2881 tree vect_ptr_init;
2882 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2883 tree vptr;
2884 gimple_stmt_iterator incr_gsi;
2885 bool insert_after;
2886 tree indx_before_incr, indx_after_incr;
2887 gimple incr;
2888 tree step;
2889 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2890 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2891 tree base;
2893 if (loop_vinfo)
2895 loop = LOOP_VINFO_LOOP (loop_vinfo);
2896 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2897 containing_loop = (gimple_bb (stmt))->loop_father;
2898 pe = loop_preheader_edge (loop);
2900 else
2902 gcc_assert (bb_vinfo);
2903 only_init = true;
2904 *ptr_incr = NULL;
2907 /* Check the step (evolution) of the load in LOOP, and record
2908 whether it's invariant. */
2909 if (nested_in_vect_loop)
2910 step = STMT_VINFO_DR_STEP (stmt_info);
2911 else
2912 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2914 if (tree_int_cst_compare (step, size_zero_node) == 0)
2915 *inv_p = true;
2916 else
2917 *inv_p = false;
2919 /* Create an expression for the first address accessed by this load
2920 in LOOP. */
2921 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2923 if (vect_print_dump_info (REPORT_DETAILS))
2925 tree data_ref_base = base_name;
2926 fprintf (vect_dump, "create vector-pointer variable to type: ");
2927 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2928 if (TREE_CODE (data_ref_base) == VAR_DECL
2929 || TREE_CODE (data_ref_base) == ARRAY_REF)
2930 fprintf (vect_dump, " vectorizing an array ref: ");
2931 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2932 fprintf (vect_dump, " vectorizing a record based array ref: ");
2933 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2934 fprintf (vect_dump, " vectorizing a pointer ref: ");
2935 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2938 /** (1) Create the new vector-pointer variable: **/
2939 vect_ptr_type = build_pointer_type (vectype);
2940 base = get_base_address (DR_REF (dr));
2941 if (base
2942 && TREE_CODE (base) == MEM_REF)
2943 vect_ptr_type
2944 = build_qualified_type (vect_ptr_type,
2945 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2946 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2947 get_name (base_name));
2949 /* Vector types inherit the alias set of their component type by default so
2950 we need to use a ref-all pointer if the data reference does not conflict
2951 with the created vector data reference because it is not addressable. */
2952 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2953 get_alias_set (DR_REF (dr))))
2955 vect_ptr_type
2956 = build_pointer_type_for_mode (vectype,
2957 TYPE_MODE (vect_ptr_type), true);
2958 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2959 get_name (base_name));
2962 /* Likewise for any of the data references in the stmt group. */
2963 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2965 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2968 tree lhs = gimple_assign_lhs (orig_stmt);
2969 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2970 get_alias_set (lhs)))
2972 vect_ptr_type
2973 = build_pointer_type_for_mode (vectype,
2974 TYPE_MODE (vect_ptr_type), true);
2975 vect_ptr
2976 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2977 get_name (base_name));
2978 break;
2981 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2983 while (orig_stmt);
2986 add_referenced_var (vect_ptr);
2988 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2989 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2990 def-use update cycles for the pointer: One relative to the outer-loop
2991 (LOOP), which is what steps (3) and (4) below do. The other is relative
2992 to the inner-loop (which is the inner-most loop containing the dataref),
2993 and this is done be step (5) below.
2995 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2996 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2997 redundant. Steps (3),(4) create the following:
2999 vp0 = &base_addr;
3000 LOOP: vp1 = phi(vp0,vp2)
3003 vp2 = vp1 + step
3004 goto LOOP
3006 If there is an inner-loop nested in loop, then step (5) will also be
3007 applied, and an additional update in the inner-loop will be created:
3009 vp0 = &base_addr;
3010 LOOP: vp1 = phi(vp0,vp2)
3012 inner: vp3 = phi(vp1,vp4)
3013 vp4 = vp3 + inner_step
3014 if () goto inner
3016 vp2 = vp1 + step
3017 if () goto LOOP */
3019 /** (3) Calculate the initial address the vector-pointer, and set
3020 the vector-pointer to point to it before the loop: **/
3022 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3024 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3025 offset, loop);
3026 if (new_stmt_list)
3028 if (pe)
3030 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3031 gcc_assert (!new_bb);
3033 else
3034 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
3037 *initial_address = new_temp;
3039 /* Create: p = (vectype *) initial_base */
3040 if (TREE_CODE (new_temp) != SSA_NAME
3041 || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
3043 vec_stmt = gimple_build_assign (vect_ptr,
3044 fold_convert (vect_ptr_type, new_temp));
3045 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
3046 /* Copy the points-to information if it exists. */
3047 if (DR_PTR_INFO (dr))
3048 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
3049 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
3050 if (pe)
3052 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3053 gcc_assert (!new_bb);
3055 else
3056 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
3058 else
3059 vect_ptr_init = new_temp;
3061 /** (4) Handle the updating of the vector-pointer inside the loop.
3062 This is needed when ONLY_INIT is false, and also when AT_LOOP
3063 is the inner-loop nested in LOOP (during outer-loop vectorization).
3066 /* No update in loop is required. */
3067 if (only_init && (!loop_vinfo || at_loop == loop))
3068 vptr = vect_ptr_init;
3069 else
3071 /* The step of the vector pointer is the Vector Size. */
3072 tree step = TYPE_SIZE_UNIT (vectype);
3073 /* One exception to the above is when the scalar step of the load in
3074 LOOP is zero. In this case the step here is also zero. */
3075 if (*inv_p)
3076 step = size_zero_node;
3078 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3080 create_iv (vect_ptr_init,
3081 fold_convert (vect_ptr_type, step),
3082 vect_ptr, loop, &incr_gsi, insert_after,
3083 &indx_before_incr, &indx_after_incr);
3084 incr = gsi_stmt (incr_gsi);
3085 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3087 /* Copy the points-to information if it exists. */
3088 if (DR_PTR_INFO (dr))
3090 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3091 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3093 if (ptr_incr)
3094 *ptr_incr = incr;
3096 vptr = indx_before_incr;
3099 if (!nested_in_vect_loop || only_init)
3100 return vptr;
3103 /** (5) Handle the updating of the vector-pointer inside the inner-loop
3104 nested in LOOP, if exists: **/
3106 gcc_assert (nested_in_vect_loop);
3107 if (!only_init)
3109 standard_iv_increment_position (containing_loop, &incr_gsi,
3110 &insert_after);
3111 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
3112 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3113 &indx_after_incr);
3114 incr = gsi_stmt (incr_gsi);
3115 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3117 /* Copy the points-to information if it exists. */
3118 if (DR_PTR_INFO (dr))
3120 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3121 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3123 if (ptr_incr)
3124 *ptr_incr = incr;
3126 return indx_before_incr;
3128 else
3129 gcc_unreachable ();
3133 /* Function bump_vector_ptr
3135 Increment a pointer (to a vector type) by vector-size. If requested,
3136 i.e. if PTR-INCR is given, then also connect the new increment stmt
3137 to the existing def-use update-chain of the pointer, by modifying
3138 the PTR_INCR as illustrated below:
3140 The pointer def-use update-chain before this function:
3141 DATAREF_PTR = phi (p_0, p_2)
3142 ....
3143 PTR_INCR: p_2 = DATAREF_PTR + step
3145 The pointer def-use update-chain after this function:
3146 DATAREF_PTR = phi (p_0, p_2)
3147 ....
3148 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3149 ....
3150 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3152 Input:
3153 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3154 in the loop.
3155 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3156 the loop. The increment amount across iterations is expected
3157 to be vector_size.
3158 BSI - location where the new update stmt is to be placed.
3159 STMT - the original scalar memory-access stmt that is being vectorized.
3160 BUMP - optional. The offset by which to bump the pointer. If not given,
3161 the offset is assumed to be vector_size.
3163 Output: Return NEW_DATAREF_PTR as illustrated above.
3167 tree
3168 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3169 gimple stmt, tree bump)
3171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3172 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3173 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3174 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3175 tree update = TYPE_SIZE_UNIT (vectype);
3176 gimple incr_stmt;
3177 ssa_op_iter iter;
3178 use_operand_p use_p;
3179 tree new_dataref_ptr;
3181 if (bump)
3182 update = bump;
3184 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3185 dataref_ptr, update);
3186 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3187 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3188 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3190 /* Copy the points-to information if it exists. */
3191 if (DR_PTR_INFO (dr))
3192 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3194 if (!ptr_incr)
3195 return new_dataref_ptr;
3197 /* Update the vector-pointer's cross-iteration increment. */
3198 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3200 tree use = USE_FROM_PTR (use_p);
3202 if (use == dataref_ptr)
3203 SET_USE (use_p, new_dataref_ptr);
3204 else
3205 gcc_assert (tree_int_cst_compare (use, update) == 0);
3208 return new_dataref_ptr;
3212 /* Function vect_create_destination_var.
3214 Create a new temporary of type VECTYPE. */
3216 tree
3217 vect_create_destination_var (tree scalar_dest, tree vectype)
3219 tree vec_dest;
3220 const char *new_name;
3221 tree type;
3222 enum vect_var_kind kind;
3224 kind = vectype ? vect_simple_var : vect_scalar_var;
3225 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3227 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3229 new_name = get_name (scalar_dest);
3230 if (!new_name)
3231 new_name = "var_";
3232 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3233 add_referenced_var (vec_dest);
3235 return vec_dest;
3238 /* Function vect_strided_store_supported.
3240 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3241 and FALSE otherwise. */
3243 bool
3244 vect_strided_store_supported (tree vectype)
3246 optab interleave_high_optab, interleave_low_optab;
3247 enum machine_mode mode;
3249 mode = TYPE_MODE (vectype);
3251 /* Check that the operation is supported. */
3252 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3253 vectype, optab_default);
3254 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3255 vectype, optab_default);
3256 if (!interleave_high_optab || !interleave_low_optab)
3258 if (vect_print_dump_info (REPORT_DETAILS))
3259 fprintf (vect_dump, "no optab for interleave.");
3260 return false;
3263 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3264 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3266 if (vect_print_dump_info (REPORT_DETAILS))
3267 fprintf (vect_dump, "interleave op not supported by target.");
3268 return false;
3271 return true;
3275 /* Function vect_permute_store_chain.
3277 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3278 a power of 2, generate interleave_high/low stmts to reorder the data
3279 correctly for the stores. Return the final references for stores in
3280 RESULT_CHAIN.
3282 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3283 The input is 4 vectors each containing 8 elements. We assign a number to each
3284 element, the input sequence is:
3286 1st vec: 0 1 2 3 4 5 6 7
3287 2nd vec: 8 9 10 11 12 13 14 15
3288 3rd vec: 16 17 18 19 20 21 22 23
3289 4th vec: 24 25 26 27 28 29 30 31
3291 The output sequence should be:
3293 1st vec: 0 8 16 24 1 9 17 25
3294 2nd vec: 2 10 18 26 3 11 19 27
3295 3rd vec: 4 12 20 28 5 13 21 30
3296 4th vec: 6 14 22 30 7 15 23 31
3298 i.e., we interleave the contents of the four vectors in their order.
3300 We use interleave_high/low instructions to create such output. The input of
3301 each interleave_high/low operation is two vectors:
3302 1st vec 2nd vec
3303 0 1 2 3 4 5 6 7
3304 the even elements of the result vector are obtained left-to-right from the
3305 high/low elements of the first vector. The odd elements of the result are
3306 obtained left-to-right from the high/low elements of the second vector.
3307 The output of interleave_high will be: 0 4 1 5
3308 and of interleave_low: 2 6 3 7
3311 The permutation is done in log LENGTH stages. In each stage interleave_high
3312 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3313 where the first argument is taken from the first half of DR_CHAIN and the
3314 second argument from it's second half.
3315 In our example,
3317 I1: interleave_high (1st vec, 3rd vec)
3318 I2: interleave_low (1st vec, 3rd vec)
3319 I3: interleave_high (2nd vec, 4th vec)
3320 I4: interleave_low (2nd vec, 4th vec)
3322 The output for the first stage is:
3324 I1: 0 16 1 17 2 18 3 19
3325 I2: 4 20 5 21 6 22 7 23
3326 I3: 8 24 9 25 10 26 11 27
3327 I4: 12 28 13 29 14 30 15 31
3329 The output of the second stage, i.e. the final result is:
3331 I1: 0 8 16 24 1 9 17 25
3332 I2: 2 10 18 26 3 11 19 27
3333 I3: 4 12 20 28 5 13 21 30
3334 I4: 6 14 22 30 7 15 23 31. */
3336 bool
3337 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3338 unsigned int length,
3339 gimple stmt,
3340 gimple_stmt_iterator *gsi,
3341 VEC(tree,heap) **result_chain)
3343 tree perm_dest, vect1, vect2, high, low;
3344 gimple perm_stmt;
3345 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3346 int i;
3347 unsigned int j;
3348 enum tree_code high_code, low_code;
3350 /* Check that the operation is supported. */
3351 if (!vect_strided_store_supported (vectype))
3352 return false;
3354 *result_chain = VEC_copy (tree, heap, dr_chain);
3356 for (i = 0; i < exact_log2 (length); i++)
3358 for (j = 0; j < length/2; j++)
3360 vect1 = VEC_index (tree, dr_chain, j);
3361 vect2 = VEC_index (tree, dr_chain, j+length/2);
3363 /* Create interleaving stmt:
3364 in the case of big endian:
3365 high = interleave_high (vect1, vect2)
3366 and in the case of little endian:
3367 high = interleave_low (vect1, vect2). */
3368 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3369 DECL_GIMPLE_REG_P (perm_dest) = 1;
3370 add_referenced_var (perm_dest);
3371 if (BYTES_BIG_ENDIAN)
3373 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3374 low_code = VEC_INTERLEAVE_LOW_EXPR;
3376 else
3378 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3379 high_code = VEC_INTERLEAVE_LOW_EXPR;
3381 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3382 vect1, vect2);
3383 high = make_ssa_name (perm_dest, perm_stmt);
3384 gimple_assign_set_lhs (perm_stmt, high);
3385 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3386 VEC_replace (tree, *result_chain, 2*j, high);
3388 /* Create interleaving stmt:
3389 in the case of big endian:
3390 low = interleave_low (vect1, vect2)
3391 and in the case of little endian:
3392 low = interleave_high (vect1, vect2). */
3393 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3394 DECL_GIMPLE_REG_P (perm_dest) = 1;
3395 add_referenced_var (perm_dest);
3396 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3397 vect1, vect2);
3398 low = make_ssa_name (perm_dest, perm_stmt);
3399 gimple_assign_set_lhs (perm_stmt, low);
3400 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3401 VEC_replace (tree, *result_chain, 2*j+1, low);
3403 dr_chain = VEC_copy (tree, heap, *result_chain);
3405 return true;
3408 /* Function vect_setup_realignment
3410 This function is called when vectorizing an unaligned load using
3411 the dr_explicit_realign[_optimized] scheme.
3412 This function generates the following code at the loop prolog:
3414 p = initial_addr;
3415 x msq_init = *(floor(p)); # prolog load
3416 realignment_token = call target_builtin;
3417 loop:
3418 x msq = phi (msq_init, ---)
3420 The stmts marked with x are generated only for the case of
3421 dr_explicit_realign_optimized.
3423 The code above sets up a new (vector) pointer, pointing to the first
3424 location accessed by STMT, and a "floor-aligned" load using that pointer.
3425 It also generates code to compute the "realignment-token" (if the relevant
3426 target hook was defined), and creates a phi-node at the loop-header bb
3427 whose arguments are the result of the prolog-load (created by this
3428 function) and the result of a load that takes place in the loop (to be
3429 created by the caller to this function).
3431 For the case of dr_explicit_realign_optimized:
3432 The caller to this function uses the phi-result (msq) to create the
3433 realignment code inside the loop, and sets up the missing phi argument,
3434 as follows:
3435 loop:
3436 msq = phi (msq_init, lsq)
3437 lsq = *(floor(p')); # load in loop
3438 result = realign_load (msq, lsq, realignment_token);
3440 For the case of dr_explicit_realign:
3441 loop:
3442 msq = *(floor(p)); # load in loop
3443 p' = p + (VS-1);
3444 lsq = *(floor(p')); # load in loop
3445 result = realign_load (msq, lsq, realignment_token);
3447 Input:
3448 STMT - (scalar) load stmt to be vectorized. This load accesses
3449 a memory location that may be unaligned.
3450 BSI - place where new code is to be inserted.
3451 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3452 is used.
3454 Output:
3455 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3456 target hook, if defined.
3457 Return value - the result of the loop-header phi node. */
3459 tree
3460 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3461 tree *realignment_token,
3462 enum dr_alignment_support alignment_support_scheme,
3463 tree init_addr,
3464 struct loop **at_loop)
3466 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3467 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3468 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3469 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3470 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3471 edge pe;
3472 tree scalar_dest = gimple_assign_lhs (stmt);
3473 tree vec_dest;
3474 gimple inc;
3475 tree ptr;
3476 tree data_ref;
3477 gimple new_stmt;
3478 basic_block new_bb;
3479 tree msq_init = NULL_TREE;
3480 tree new_temp;
3481 gimple phi_stmt;
3482 tree msq = NULL_TREE;
3483 gimple_seq stmts = NULL;
3484 bool inv_p;
3485 bool compute_in_loop = false;
3486 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3487 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3488 struct loop *loop_for_initial_load;
3490 gcc_assert (alignment_support_scheme == dr_explicit_realign
3491 || alignment_support_scheme == dr_explicit_realign_optimized);
3493 /* We need to generate three things:
3494 1. the misalignment computation
3495 2. the extra vector load (for the optimized realignment scheme).
3496 3. the phi node for the two vectors from which the realignment is
3497 done (for the optimized realignment scheme).
3500 /* 1. Determine where to generate the misalignment computation.
3502 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3503 calculation will be generated by this function, outside the loop (in the
3504 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3505 caller, inside the loop.
3507 Background: If the misalignment remains fixed throughout the iterations of
3508 the loop, then both realignment schemes are applicable, and also the
3509 misalignment computation can be done outside LOOP. This is because we are
3510 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3511 are a multiple of VS (the Vector Size), and therefore the misalignment in
3512 different vectorized LOOP iterations is always the same.
3513 The problem arises only if the memory access is in an inner-loop nested
3514 inside LOOP, which is now being vectorized using outer-loop vectorization.
3515 This is the only case when the misalignment of the memory access may not
3516 remain fixed throughout the iterations of the inner-loop (as explained in
3517 detail in vect_supportable_dr_alignment). In this case, not only is the
3518 optimized realignment scheme not applicable, but also the misalignment
3519 computation (and generation of the realignment token that is passed to
3520 REALIGN_LOAD) have to be done inside the loop.
3522 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3523 or not, which in turn determines if the misalignment is computed inside
3524 the inner-loop, or outside LOOP. */
3526 if (init_addr != NULL_TREE)
3528 compute_in_loop = true;
3529 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3533 /* 2. Determine where to generate the extra vector load.
3535 For the optimized realignment scheme, instead of generating two vector
3536 loads in each iteration, we generate a single extra vector load in the
3537 preheader of the loop, and in each iteration reuse the result of the
3538 vector load from the previous iteration. In case the memory access is in
3539 an inner-loop nested inside LOOP, which is now being vectorized using
3540 outer-loop vectorization, we need to determine whether this initial vector
3541 load should be generated at the preheader of the inner-loop, or can be
3542 generated at the preheader of LOOP. If the memory access has no evolution
3543 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3544 to be generated inside LOOP (in the preheader of the inner-loop). */
3546 if (nested_in_vect_loop)
3548 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3549 bool invariant_in_outerloop =
3550 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3551 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3553 else
3554 loop_for_initial_load = loop;
3555 if (at_loop)
3556 *at_loop = loop_for_initial_load;
3558 /* 3. For the case of the optimized realignment, create the first vector
3559 load at the loop preheader. */
3561 if (alignment_support_scheme == dr_explicit_realign_optimized)
3563 /* Create msq_init = *(floor(p1)) in the loop preheader */
3565 gcc_assert (!compute_in_loop);
3566 pe = loop_preheader_edge (loop_for_initial_load);
3567 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3568 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3569 &init_addr, &inc, true, &inv_p);
3570 new_stmt = gimple_build_assign_with_ops
3571 (BIT_AND_EXPR, NULL_TREE, ptr,
3572 build_int_cst (TREE_TYPE (ptr),
3573 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3574 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3575 gimple_assign_set_lhs (new_stmt, new_temp);
3576 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3577 gcc_assert (!new_bb);
3578 data_ref
3579 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3580 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3581 new_stmt = gimple_build_assign (vec_dest, data_ref);
3582 new_temp = make_ssa_name (vec_dest, new_stmt);
3583 gimple_assign_set_lhs (new_stmt, new_temp);
3584 mark_symbols_for_renaming (new_stmt);
3585 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3586 gcc_assert (!new_bb);
3587 msq_init = gimple_assign_lhs (new_stmt);
3590 /* 4. Create realignment token using a target builtin, if available.
3591 It is done either inside the containing loop, or before LOOP (as
3592 determined above). */
3594 if (targetm.vectorize.builtin_mask_for_load)
3596 tree builtin_decl;
3598 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3599 if (compute_in_loop)
3600 gcc_assert (init_addr); /* already computed by the caller. */
3601 else
3603 /* Generate the INIT_ADDR computation outside LOOP. */
3604 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3605 NULL_TREE, loop);
3606 pe = loop_preheader_edge (loop);
3607 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3608 gcc_assert (!new_bb);
3611 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3612 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3613 vec_dest =
3614 vect_create_destination_var (scalar_dest,
3615 gimple_call_return_type (new_stmt));
3616 new_temp = make_ssa_name (vec_dest, new_stmt);
3617 gimple_call_set_lhs (new_stmt, new_temp);
3619 if (compute_in_loop)
3620 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3621 else
3623 /* Generate the misalignment computation outside LOOP. */
3624 pe = loop_preheader_edge (loop);
3625 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3626 gcc_assert (!new_bb);
3629 *realignment_token = gimple_call_lhs (new_stmt);
3631 /* The result of the CALL_EXPR to this builtin is determined from
3632 the value of the parameter and no global variables are touched
3633 which makes the builtin a "const" function. Requiring the
3634 builtin to have the "const" attribute makes it unnecessary
3635 to call mark_call_clobbered. */
3636 gcc_assert (TREE_READONLY (builtin_decl));
3639 if (alignment_support_scheme == dr_explicit_realign)
3640 return msq;
3642 gcc_assert (!compute_in_loop);
3643 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3646 /* 5. Create msq = phi <msq_init, lsq> in loop */
3648 pe = loop_preheader_edge (containing_loop);
3649 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3650 msq = make_ssa_name (vec_dest, NULL);
3651 phi_stmt = create_phi_node (msq, containing_loop->header);
3652 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3653 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3655 return msq;
3659 /* Function vect_strided_load_supported.
3661 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3662 and FALSE otherwise. */
3664 bool
3665 vect_strided_load_supported (tree vectype)
3667 optab perm_even_optab, perm_odd_optab;
3668 enum machine_mode mode;
3670 mode = TYPE_MODE (vectype);
3672 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3673 optab_default);
3674 if (!perm_even_optab)
3676 if (vect_print_dump_info (REPORT_DETAILS))
3677 fprintf (vect_dump, "no optab for perm_even.");
3678 return false;
3681 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3683 if (vect_print_dump_info (REPORT_DETAILS))
3684 fprintf (vect_dump, "perm_even op not supported by target.");
3685 return false;
3688 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3689 optab_default);
3690 if (!perm_odd_optab)
3692 if (vect_print_dump_info (REPORT_DETAILS))
3693 fprintf (vect_dump, "no optab for perm_odd.");
3694 return false;
3697 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3699 if (vect_print_dump_info (REPORT_DETAILS))
3700 fprintf (vect_dump, "perm_odd op not supported by target.");
3701 return false;
3703 return true;
3707 /* Function vect_permute_load_chain.
3709 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3710 a power of 2, generate extract_even/odd stmts to reorder the input data
3711 correctly. Return the final references for loads in RESULT_CHAIN.
3713 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3714 The input is 4 vectors each containing 8 elements. We assign a number to each
3715 element, the input sequence is:
3717 1st vec: 0 1 2 3 4 5 6 7
3718 2nd vec: 8 9 10 11 12 13 14 15
3719 3rd vec: 16 17 18 19 20 21 22 23
3720 4th vec: 24 25 26 27 28 29 30 31
3722 The output sequence should be:
3724 1st vec: 0 4 8 12 16 20 24 28
3725 2nd vec: 1 5 9 13 17 21 25 29
3726 3rd vec: 2 6 10 14 18 22 26 30
3727 4th vec: 3 7 11 15 19 23 27 31
3729 i.e., the first output vector should contain the first elements of each
3730 interleaving group, etc.
3732 We use extract_even/odd instructions to create such output. The input of each
3733 extract_even/odd operation is two vectors
3734 1st vec 2nd vec
3735 0 1 2 3 4 5 6 7
3737 and the output is the vector of extracted even/odd elements. The output of
3738 extract_even will be: 0 2 4 6
3739 and of extract_odd: 1 3 5 7
3742 The permutation is done in log LENGTH stages. In each stage extract_even and
3743 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3744 order. In our example,
3746 E1: extract_even (1st vec, 2nd vec)
3747 E2: extract_odd (1st vec, 2nd vec)
3748 E3: extract_even (3rd vec, 4th vec)
3749 E4: extract_odd (3rd vec, 4th vec)
3751 The output for the first stage will be:
3753 E1: 0 2 4 6 8 10 12 14
3754 E2: 1 3 5 7 9 11 13 15
3755 E3: 16 18 20 22 24 26 28 30
3756 E4: 17 19 21 23 25 27 29 31
3758 In order to proceed and create the correct sequence for the next stage (or
3759 for the correct output, if the second stage is the last one, as in our
3760 example), we first put the output of extract_even operation and then the
3761 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3762 The input for the second stage is:
3764 1st vec (E1): 0 2 4 6 8 10 12 14
3765 2nd vec (E3): 16 18 20 22 24 26 28 30
3766 3rd vec (E2): 1 3 5 7 9 11 13 15
3767 4th vec (E4): 17 19 21 23 25 27 29 31
3769 The output of the second stage:
3771 E1: 0 4 8 12 16 20 24 28
3772 E2: 2 6 10 14 18 22 26 30
3773 E3: 1 5 9 13 17 21 25 29
3774 E4: 3 7 11 15 19 23 27 31
3776 And RESULT_CHAIN after reordering:
3778 1st vec (E1): 0 4 8 12 16 20 24 28
3779 2nd vec (E3): 1 5 9 13 17 21 25 29
3780 3rd vec (E2): 2 6 10 14 18 22 26 30
3781 4th vec (E4): 3 7 11 15 19 23 27 31. */
3783 bool
3784 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3785 unsigned int length,
3786 gimple stmt,
3787 gimple_stmt_iterator *gsi,
3788 VEC(tree,heap) **result_chain)
3790 tree perm_dest, data_ref, first_vect, second_vect;
3791 gimple perm_stmt;
3792 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3793 int i;
3794 unsigned int j;
3796 /* Check that the operation is supported. */
3797 if (!vect_strided_load_supported (vectype))
3798 return false;
3800 *result_chain = VEC_copy (tree, heap, dr_chain);
3801 for (i = 0; i < exact_log2 (length); i++)
3803 for (j = 0; j < length; j +=2)
3805 first_vect = VEC_index (tree, dr_chain, j);
3806 second_vect = VEC_index (tree, dr_chain, j+1);
3808 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3809 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3810 DECL_GIMPLE_REG_P (perm_dest) = 1;
3811 add_referenced_var (perm_dest);
3813 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3814 perm_dest, first_vect,
3815 second_vect);
3817 data_ref = make_ssa_name (perm_dest, perm_stmt);
3818 gimple_assign_set_lhs (perm_stmt, data_ref);
3819 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3820 mark_symbols_for_renaming (perm_stmt);
3822 VEC_replace (tree, *result_chain, j/2, data_ref);
3824 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3825 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3826 DECL_GIMPLE_REG_P (perm_dest) = 1;
3827 add_referenced_var (perm_dest);
3829 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3830 perm_dest, first_vect,
3831 second_vect);
3832 data_ref = make_ssa_name (perm_dest, perm_stmt);
3833 gimple_assign_set_lhs (perm_stmt, data_ref);
3834 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3835 mark_symbols_for_renaming (perm_stmt);
3837 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3839 dr_chain = VEC_copy (tree, heap, *result_chain);
3841 return true;
3845 /* Function vect_transform_strided_load.
3847 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3848 to perform their permutation and ascribe the result vectorized statements to
3849 the scalar statements.
3852 bool
3853 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3854 gimple_stmt_iterator *gsi)
3856 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3857 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3858 gimple next_stmt, new_stmt;
3859 VEC(tree,heap) *result_chain = NULL;
3860 unsigned int i, gap_count;
3861 tree tmp_data_ref;
3863 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3864 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3865 vectors, that are ready for vector computation. */
3866 result_chain = VEC_alloc (tree, heap, size);
3867 /* Permute. */
3868 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3869 return false;
3871 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3872 Since we scan the chain starting from it's first node, their order
3873 corresponds the order of data-refs in RESULT_CHAIN. */
3874 next_stmt = first_stmt;
3875 gap_count = 1;
3876 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3878 if (!next_stmt)
3879 break;
3881 /* Skip the gaps. Loads created for the gaps will be removed by dead
3882 code elimination pass later. No need to check for the first stmt in
3883 the group, since it always exists.
3884 DR_GROUP_GAP is the number of steps in elements from the previous
3885 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3886 correspond to the gaps.
3888 if (next_stmt != first_stmt
3889 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3891 gap_count++;
3892 continue;
3895 while (next_stmt)
3897 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3898 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3899 copies, and we put the new vector statement in the first available
3900 RELATED_STMT. */
3901 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3902 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3903 else
3905 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3907 gimple prev_stmt =
3908 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3909 gimple rel_stmt =
3910 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3911 while (rel_stmt)
3913 prev_stmt = rel_stmt;
3914 rel_stmt =
3915 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3918 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3919 new_stmt;
3923 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3924 gap_count = 1;
3925 /* If NEXT_STMT accesses the same DR as the previous statement,
3926 put the same TMP_DATA_REF as its vectorized statement; otherwise
3927 get the next data-ref from RESULT_CHAIN. */
3928 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3929 break;
3933 VEC_free (tree, heap, result_chain);
3934 return true;
3937 /* Function vect_force_dr_alignment_p.
3939 Returns whether the alignment of a DECL can be forced to be aligned
3940 on ALIGNMENT bit boundary. */
3942 bool
3943 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3945 if (TREE_CODE (decl) != VAR_DECL)
3946 return false;
3948 if (DECL_EXTERNAL (decl))
3949 return false;
3951 if (TREE_ASM_WRITTEN (decl))
3952 return false;
3954 if (TREE_STATIC (decl))
3955 return (alignment <= MAX_OFILE_ALIGNMENT);
3956 else
3957 return (alignment <= MAX_STACK_ALIGNMENT);
3961 /* Return whether the data reference DR is supported with respect to its
3962 alignment.
3963 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
3964 it is aligned, i.e., check if it is possible to vectorize it with different
3965 alignment. */
3967 enum dr_alignment_support
3968 vect_supportable_dr_alignment (struct data_reference *dr,
3969 bool check_aligned_accesses)
3971 gimple stmt = DR_STMT (dr);
3972 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3973 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3974 enum machine_mode mode = TYPE_MODE (vectype);
3975 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3976 struct loop *vect_loop = NULL;
3977 bool nested_in_vect_loop = false;
3979 if (aligned_access_p (dr) && !check_aligned_accesses)
3980 return dr_aligned;
3982 if (!loop_vinfo)
3983 /* FORNOW: Misaligned accesses are supported only in loops. */
3984 return dr_unaligned_unsupported;
3986 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3987 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3989 /* Possibly unaligned access. */
3991 /* We can choose between using the implicit realignment scheme (generating
3992 a misaligned_move stmt) and the explicit realignment scheme (generating
3993 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3994 realignment scheme: optimized, and unoptimized.
3995 We can optimize the realignment only if the step between consecutive
3996 vector loads is equal to the vector size. Since the vector memory
3997 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3998 is guaranteed that the misalignment amount remains the same throughout the
3999 execution of the vectorized loop. Therefore, we can create the
4000 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4001 at the loop preheader.
4003 However, in the case of outer-loop vectorization, when vectorizing a
4004 memory access in the inner-loop nested within the LOOP that is now being
4005 vectorized, while it is guaranteed that the misalignment of the
4006 vectorized memory access will remain the same in different outer-loop
4007 iterations, it is *not* guaranteed that is will remain the same throughout
4008 the execution of the inner-loop. This is because the inner-loop advances
4009 with the original scalar step (and not in steps of VS). If the inner-loop
4010 step happens to be a multiple of VS, then the misalignment remains fixed
4011 and we can use the optimized realignment scheme. For example:
4013 for (i=0; i<N; i++)
4014 for (j=0; j<M; j++)
4015 s += a[i+j];
4017 When vectorizing the i-loop in the above example, the step between
4018 consecutive vector loads is 1, and so the misalignment does not remain
4019 fixed across the execution of the inner-loop, and the realignment cannot
4020 be optimized (as illustrated in the following pseudo vectorized loop):
4022 for (i=0; i<N; i+=4)
4023 for (j=0; j<M; j++){
4024 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4025 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4026 // (assuming that we start from an aligned address).
4029 We therefore have to use the unoptimized realignment scheme:
4031 for (i=0; i<N; i+=4)
4032 for (j=k; j<M; j+=4)
4033 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4034 // that the misalignment of the initial address is
4035 // 0).
4037 The loop can then be vectorized as follows:
4039 for (k=0; k<4; k++){
4040 rt = get_realignment_token (&vp[k]);
4041 for (i=0; i<N; i+=4){
4042 v1 = vp[i+k];
4043 for (j=k; j<M; j+=4){
4044 v2 = vp[i+j+VS-1];
4045 va = REALIGN_LOAD <v1,v2,rt>;
4046 vs += va;
4047 v1 = v2;
4050 } */
4052 if (DR_IS_READ (dr))
4054 bool is_packed = false;
4055 tree type = (TREE_TYPE (DR_REF (dr)));
4057 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4058 && (!targetm.vectorize.builtin_mask_for_load
4059 || targetm.vectorize.builtin_mask_for_load ()))
4061 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4062 if (nested_in_vect_loop
4063 && (TREE_INT_CST_LOW (DR_STEP (dr))
4064 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4065 return dr_explicit_realign;
4066 else
4067 return dr_explicit_realign_optimized;
4069 if (!known_alignment_for_access_p (dr))
4071 tree ba = DR_BASE_OBJECT (dr);
4073 if (ba)
4074 is_packed = contains_packed_reference (ba);
4077 if (targetm.vectorize.
4078 support_vector_misalignment (mode, type,
4079 DR_MISALIGNMENT (dr), is_packed))
4080 /* Can't software pipeline the loads, but can at least do them. */
4081 return dr_unaligned_supported;
4083 else
4085 bool is_packed = false;
4086 tree type = (TREE_TYPE (DR_REF (dr)));
4088 if (!known_alignment_for_access_p (dr))
4090 tree ba = DR_BASE_OBJECT (dr);
4092 if (ba)
4093 is_packed = contains_packed_reference (ba);
4096 if (targetm.vectorize.
4097 support_vector_misalignment (mode, type,
4098 DR_MISALIGNMENT (dr), is_packed))
4099 return dr_unaligned_supported;
4102 /* Unsupported. */
4103 return dr_unaligned_unsupported;