Merged r158229 through r158464 into branch.
[official-gcc.git] / gcc / tree-vect-data-refs.c
blobba537a062ef1b199be9effd6d3f9e66b88e79ceb
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "toplev.h"
43 /* Return the smallest scalar part of STMT.
44 This is used to determine the vectype of the stmt. We generally set the
45 vectype according to the type of the result (lhs). For stmts whose
46 result-type is different than the type of the arguments (e.g., demotion,
47 promotion), vectype will be reset appropriately (later). Note that we have
48 to visit the smallest datatype in this function, because that determines the
49 VF. If the smallest datatype in the loop is present only as the rhs of a
50 promotion operation - we'd miss it.
51 Such a case, where a variable of this datatype does not appear in the lhs
52 anywhere in the loop, can only occur if it's an invariant: e.g.:
53 'int_x = (int) short_inv', which we'd expect to have been optimized away by
54 invariant motion. However, we cannot rely on invariant motion to always take
55 invariants out of the loop, and so in the case of promotion we also have to
56 check the rhs.
57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
58 types. */
60 tree
61 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62 HOST_WIDE_INT *rhs_size_unit)
64 tree scalar_type = gimple_expr_type (stmt);
65 HOST_WIDE_INT lhs, rhs;
67 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
69 if (is_gimple_assign (stmt)
70 && (gimple_assign_cast_p (stmt)
71 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
74 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
76 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
77 if (rhs < lhs)
78 scalar_type = rhs_type;
81 *lhs_size_unit = lhs;
82 *rhs_size_unit = rhs;
83 return scalar_type;
87 /* Find the place of the data-ref in STMT in the interleaving chain that starts
88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
90 int
91 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
93 gimple next_stmt = first_stmt;
94 int result = 0;
96 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
97 return -1;
99 while (next_stmt && next_stmt != stmt)
101 result++;
102 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
105 if (next_stmt)
106 return result;
107 else
108 return -1;
112 /* Function vect_insert_into_interleaving_chain.
114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
116 static void
117 vect_insert_into_interleaving_chain (struct data_reference *dra,
118 struct data_reference *drb)
120 gimple prev, next;
121 tree next_init;
122 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
125 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
127 while (next)
129 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
132 /* Insert here. */
133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
135 return;
137 prev = next;
138 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
141 /* We got to the end of the list. Insert here. */
142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
147 /* Function vect_update_interleaving_chain.
149 For two data-refs DRA and DRB that are a part of a chain interleaved data
150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
152 There are four possible cases:
153 1. New stmts - both DRA and DRB are not a part of any chain:
154 FIRST_DR = DRB
155 NEXT_DR (DRB) = DRA
156 2. DRB is a part of a chain and DRA is not:
157 no need to update FIRST_DR
158 no need to insert DRB
159 insert DRA according to init
160 3. DRA is a part of a chain and DRB is not:
161 if (init of FIRST_DR > init of DRB)
162 FIRST_DR = DRB
163 NEXT(FIRST_DR) = previous FIRST_DR
164 else
165 insert DRB according to its init
166 4. both DRA and DRB are in some interleaving chains:
167 choose the chain with the smallest init of FIRST_DR
168 insert the nodes of the second chain into the first one. */
170 static void
171 vect_update_interleaving_chain (struct data_reference *drb,
172 struct data_reference *dra)
174 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176 tree next_init, init_dra_chain, init_drb_chain;
177 gimple first_a, first_b;
178 tree node_init;
179 gimple node, prev, next, first_stmt;
181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
182 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
184 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
187 return;
190 /* 2. DRB is a part of a chain and DRA is not. */
191 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
193 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194 /* Insert DRA into the chain of DRB. */
195 vect_insert_into_interleaving_chain (dra, drb);
196 return;
199 /* 3. DRA is a part of a chain and DRB is not. */
200 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
202 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
204 old_first_stmt)));
205 gimple tmp;
207 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
209 /* DRB's init is smaller than the init of the stmt previously marked
210 as the first stmt of the interleaving chain of DRA. Therefore, we
211 update FIRST_STMT and put DRB in the head of the list. */
212 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
215 /* Update all the stmts in the list to point to the new FIRST_STMT. */
216 tmp = old_first_stmt;
217 while (tmp)
219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
223 else
225 /* Insert DRB in the list of DRA. */
226 vect_insert_into_interleaving_chain (drb, dra);
227 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
229 return;
232 /* 4. both DRA and DRB are in some interleaving chains. */
233 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235 if (first_a == first_b)
236 return;
237 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
240 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
242 /* Insert the nodes of DRA chain into the DRB chain.
243 After inserting a node, continue from this node of the DRB chain (don't
244 start from the beginning. */
245 node = DR_GROUP_FIRST_DR (stmtinfo_a);
246 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247 first_stmt = first_b;
249 else
251 /* Insert the nodes of DRB chain into the DRA chain.
252 After inserting a node, continue from this node of the DRA chain (don't
253 start from the beginning. */
254 node = DR_GROUP_FIRST_DR (stmtinfo_b);
255 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256 first_stmt = first_a;
259 while (node)
261 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
263 while (next)
265 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266 if (tree_int_cst_compare (next_init, node_init) > 0)
268 /* Insert here. */
269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
271 prev = node;
272 break;
274 prev = next;
275 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
277 if (!next)
279 /* We got to the end of the list. Insert here. */
280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
282 prev = node;
284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
290 /* Function vect_equal_offsets.
292 Check if OFFSET1 and OFFSET2 are identical expressions. */
294 static bool
295 vect_equal_offsets (tree offset1, tree offset2)
297 bool res;
299 STRIP_NOPS (offset1);
300 STRIP_NOPS (offset2);
302 if (offset1 == offset2)
303 return true;
305 if (TREE_CODE (offset1) != TREE_CODE (offset2)
306 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
307 return false;
309 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
310 TREE_OPERAND (offset2, 0));
312 if (!res || !BINARY_CLASS_P (offset1))
313 return res;
315 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
316 TREE_OPERAND (offset2, 1));
318 return res;
322 /* Function vect_check_interleaving.
324 Check if DRA and DRB are a part of interleaving. In case they are, insert
325 DRA and DRB in an interleaving chain. */
327 static bool
328 vect_check_interleaving (struct data_reference *dra,
329 struct data_reference *drb)
331 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
333 /* Check that the data-refs have same first location (except init) and they
334 are both either store or load (not load and store). */
335 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
336 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
337 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
338 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
339 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
340 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
341 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
342 || DR_IS_READ (dra) != DR_IS_READ (drb))
343 return false;
345 /* Check:
346 1. data-refs are of the same type
347 2. their steps are equal
348 3. the step (if greater than zero) is greater than the difference between
349 data-refs' inits. */
350 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
351 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
353 if (type_size_a != type_size_b
354 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
355 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
356 TREE_TYPE (DR_REF (drb))))
357 return false;
359 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
360 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
361 step = TREE_INT_CST_LOW (DR_STEP (dra));
363 if (init_a > init_b)
365 /* If init_a == init_b + the size of the type * k, we have an interleaving,
366 and DRB is accessed before DRA. */
367 diff_mod_size = (init_a - init_b) % type_size_a;
369 if (step && (init_a - init_b) > step)
370 return false;
372 if (diff_mod_size == 0)
374 vect_update_interleaving_chain (drb, dra);
375 if (vect_print_dump_info (REPORT_DR_DETAILS))
377 fprintf (vect_dump, "Detected interleaving ");
378 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
379 fprintf (vect_dump, " and ");
380 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
382 return true;
385 else
387 /* If init_b == init_a + the size of the type * k, we have an
388 interleaving, and DRA is accessed before DRB. */
389 diff_mod_size = (init_b - init_a) % type_size_a;
391 if (step && (init_b - init_a) > step)
392 return false;
394 if (diff_mod_size == 0)
396 vect_update_interleaving_chain (dra, drb);
397 if (vect_print_dump_info (REPORT_DR_DETAILS))
399 fprintf (vect_dump, "Detected interleaving ");
400 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
401 fprintf (vect_dump, " and ");
402 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
404 return true;
408 return false;
411 /* Check if data references pointed by DR_I and DR_J are same or
412 belong to same interleaving group. Return FALSE if drs are
413 different, otherwise return TRUE. */
415 static bool
416 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
418 gimple stmt_i = DR_STMT (dr_i);
419 gimple stmt_j = DR_STMT (dr_j);
421 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
422 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
423 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
424 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
425 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
426 return true;
427 else
428 return false;
431 /* If address ranges represented by DDR_I and DDR_J are equal,
432 return TRUE, otherwise return FALSE. */
434 static bool
435 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
437 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
438 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
439 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
440 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
441 return true;
442 else
443 return false;
446 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
447 tested at run-time. Return TRUE if DDR was successfully inserted.
448 Return false if versioning is not supported. */
450 static bool
451 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
453 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
455 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
456 return false;
458 if (vect_print_dump_info (REPORT_DR_DETAILS))
460 fprintf (vect_dump, "mark for run-time aliasing test between ");
461 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
462 fprintf (vect_dump, " and ");
463 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
466 if (optimize_loop_nest_for_size_p (loop))
468 if (vect_print_dump_info (REPORT_DR_DETAILS))
469 fprintf (vect_dump, "versioning not supported when optimizing for size.");
470 return false;
473 /* FORNOW: We don't support versioning with outer-loop vectorization. */
474 if (loop->inner)
476 if (vect_print_dump_info (REPORT_DR_DETAILS))
477 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
478 return false;
481 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
482 return true;
486 /* Function vect_analyze_data_ref_dependence.
488 Return TRUE if there (might) exist a dependence between a memory-reference
489 DRA and a memory-reference DRB. When versioning for alias may check a
490 dependence at run-time, return FALSE. Adjust *MAX_VF according to
491 the data dependence. */
493 static bool
494 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
495 loop_vec_info loop_vinfo, int *max_vf)
497 unsigned int i;
498 struct loop *loop = NULL;
499 struct data_reference *dra = DDR_A (ddr);
500 struct data_reference *drb = DDR_B (ddr);
501 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
502 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
503 lambda_vector dist_v;
504 unsigned int loop_depth;
506 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
508 /* Independent data accesses. */
509 vect_check_interleaving (dra, drb);
510 return false;
513 if (loop_vinfo)
514 loop = LOOP_VINFO_LOOP (loop_vinfo);
516 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
517 return false;
519 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
521 if (loop_vinfo)
523 if (vect_print_dump_info (REPORT_DR_DETAILS))
525 fprintf (vect_dump, "versioning for alias required: "
526 "can't determine dependence between ");
527 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
528 fprintf (vect_dump, " and ");
529 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
532 /* Add to list of ddrs that need to be tested at run-time. */
533 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
536 /* When vectorizing a basic block unknown depnedence can still mean
537 strided access. */
538 if (vect_check_interleaving (dra, drb))
539 return false;
541 if (vect_print_dump_info (REPORT_DR_DETAILS))
543 fprintf (vect_dump, "can't determine dependence between ");
544 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
545 fprintf (vect_dump, " and ");
546 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
549 return true;
552 /* Versioning for alias is not yet supported for basic block SLP, and
553 dependence distance is unapplicable, hence, in case of known data
554 dependence, basic block vectorization is impossible for now. */
555 if (!loop_vinfo)
557 if (dra != drb && vect_check_interleaving (dra, drb))
558 return false;
560 if (vect_print_dump_info (REPORT_DR_DETAILS))
562 fprintf (vect_dump, "determined dependence between ");
563 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
564 fprintf (vect_dump, " and ");
565 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
568 return true;
571 /* Loop-based vectorization and known data dependence. */
572 if (DDR_NUM_DIST_VECTS (ddr) == 0)
574 if (vect_print_dump_info (REPORT_DR_DETAILS))
576 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
577 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
578 fprintf (vect_dump, " and ");
579 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
581 /* Add to list of ddrs that need to be tested at run-time. */
582 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
585 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
586 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
588 int dist = dist_v[loop_depth];
590 if (vect_print_dump_info (REPORT_DR_DETAILS))
591 fprintf (vect_dump, "dependence distance = %d.", dist);
593 if (dist == 0)
595 if (vect_print_dump_info (REPORT_DR_DETAILS))
597 fprintf (vect_dump, "dependence distance == 0 between ");
598 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
599 fprintf (vect_dump, " and ");
600 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
603 /* For interleaving, mark that there is a read-write dependency if
604 necessary. We check before that one of the data-refs is store. */
605 if (DR_IS_READ (dra))
606 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
607 else
609 if (DR_IS_READ (drb))
610 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
613 continue;
616 if (dist > 0 && DDR_REVERSED_P (ddr))
618 /* If DDR_REVERSED_P the order of the data-refs in DDR was
619 reversed (to make distance vector positive), and the actual
620 distance is negative. */
621 if (vect_print_dump_info (REPORT_DR_DETAILS))
622 fprintf (vect_dump, "dependence distance negative.");
623 continue;
626 if (abs (dist) >= 2
627 && abs (dist) < *max_vf)
629 /* The dependence distance requires reduction of the maximal
630 vectorization factor. */
631 *max_vf = abs (dist);
632 if (vect_print_dump_info (REPORT_DR_DETAILS))
633 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
634 *max_vf);
637 if (abs (dist) >= *max_vf)
639 /* Dependence distance does not create dependence, as far as
640 vectorization is concerned, in this case. */
641 if (vect_print_dump_info (REPORT_DR_DETAILS))
642 fprintf (vect_dump, "dependence distance >= VF.");
643 continue;
646 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
648 fprintf (vect_dump, "not vectorized, possible dependence "
649 "between data-refs ");
650 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
651 fprintf (vect_dump, " and ");
652 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
655 return true;
658 return false;
661 /* Function vect_analyze_data_ref_dependences.
663 Examine all the data references in the loop, and make sure there do not
664 exist any data dependences between them. Set *MAX_VF according to
665 the maximum vectorization factor the data dependences allow. */
667 bool
668 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
669 bb_vec_info bb_vinfo, int *max_vf)
671 unsigned int i;
672 VEC (ddr_p, heap) *ddrs = NULL;
673 struct data_dependence_relation *ddr;
675 if (vect_print_dump_info (REPORT_DETAILS))
676 fprintf (vect_dump, "=== vect_analyze_dependences ===");
678 if (loop_vinfo)
679 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
680 else
681 ddrs = BB_VINFO_DDRS (bb_vinfo);
683 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
684 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
685 return false;
687 return true;
691 /* Function vect_compute_data_ref_alignment
693 Compute the misalignment of the data reference DR.
695 Output:
696 1. If during the misalignment computation it is found that the data reference
697 cannot be vectorized then false is returned.
698 2. DR_MISALIGNMENT (DR) is defined.
700 FOR NOW: No analysis is actually performed. Misalignment is calculated
701 only for trivial cases. TODO. */
703 static bool
704 vect_compute_data_ref_alignment (struct data_reference *dr)
706 gimple stmt = DR_STMT (dr);
707 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
708 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
709 struct loop *loop = NULL;
710 tree ref = DR_REF (dr);
711 tree vectype;
712 tree base, base_addr;
713 bool base_aligned;
714 tree misalign;
715 tree aligned_to, alignment;
717 if (vect_print_dump_info (REPORT_DETAILS))
718 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
720 if (loop_vinfo)
721 loop = LOOP_VINFO_LOOP (loop_vinfo);
723 /* Initialize misalignment to unknown. */
724 SET_DR_MISALIGNMENT (dr, -1);
726 misalign = DR_INIT (dr);
727 aligned_to = DR_ALIGNED_TO (dr);
728 base_addr = DR_BASE_ADDRESS (dr);
729 vectype = STMT_VINFO_VECTYPE (stmt_info);
731 /* In case the dataref is in an inner-loop of the loop that is being
732 vectorized (LOOP), we use the base and misalignment information
733 relative to the outer-loop (LOOP). This is ok only if the misalignment
734 stays the same throughout the execution of the inner-loop, which is why
735 we have to check that the stride of the dataref in the inner-loop evenly
736 divides by the vector size. */
737 if (loop && nested_in_vect_loop_p (loop, stmt))
739 tree step = DR_STEP (dr);
740 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
742 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
744 if (vect_print_dump_info (REPORT_ALIGNMENT))
745 fprintf (vect_dump, "inner step divides the vector-size.");
746 misalign = STMT_VINFO_DR_INIT (stmt_info);
747 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
748 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
750 else
752 if (vect_print_dump_info (REPORT_ALIGNMENT))
753 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
754 misalign = NULL_TREE;
758 base = build_fold_indirect_ref (base_addr);
759 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
761 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
762 || !misalign)
764 if (vect_print_dump_info (REPORT_ALIGNMENT))
766 fprintf (vect_dump, "Unknown alignment for access: ");
767 print_generic_expr (vect_dump, base, TDF_SLIM);
769 return true;
772 if ((DECL_P (base)
773 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
774 alignment) >= 0)
775 || (TREE_CODE (base_addr) == SSA_NAME
776 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
777 TREE_TYPE (base_addr)))),
778 alignment) >= 0))
779 base_aligned = true;
780 else
781 base_aligned = false;
783 if (!base_aligned)
785 /* Do not change the alignment of global variables if
786 flag_section_anchors is enabled. */
787 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
788 || (TREE_STATIC (base) && flag_section_anchors))
790 if (vect_print_dump_info (REPORT_DETAILS))
792 fprintf (vect_dump, "can't force alignment of ref: ");
793 print_generic_expr (vect_dump, ref, TDF_SLIM);
795 return true;
798 /* Force the alignment of the decl.
799 NOTE: This is the only change to the code we make during
800 the analysis phase, before deciding to vectorize the loop. */
801 if (vect_print_dump_info (REPORT_DETAILS))
802 fprintf (vect_dump, "force alignment");
803 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
804 DECL_USER_ALIGN (base) = 1;
807 /* At this point we assume that the base is aligned. */
808 gcc_assert (base_aligned
809 || (TREE_CODE (base) == VAR_DECL
810 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
812 /* Modulo alignment. */
813 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
815 if (!host_integerp (misalign, 1))
817 /* Negative or overflowed misalignment value. */
818 if (vect_print_dump_info (REPORT_DETAILS))
819 fprintf (vect_dump, "unexpected misalign value");
820 return false;
823 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
825 if (vect_print_dump_info (REPORT_DETAILS))
827 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
828 print_generic_expr (vect_dump, ref, TDF_SLIM);
831 return true;
835 /* Function vect_compute_data_refs_alignment
837 Compute the misalignment of data references in the loop.
838 Return FALSE if a data reference is found that cannot be vectorized. */
840 static bool
841 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
842 bb_vec_info bb_vinfo)
844 VEC (data_reference_p, heap) *datarefs;
845 struct data_reference *dr;
846 unsigned int i;
848 if (loop_vinfo)
849 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
850 else
851 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
853 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
854 if (!vect_compute_data_ref_alignment (dr))
855 return false;
857 return true;
861 /* Function vect_update_misalignment_for_peel
863 DR - the data reference whose misalignment is to be adjusted.
864 DR_PEEL - the data reference whose misalignment is being made
865 zero in the vector loop by the peel.
866 NPEEL - the number of iterations in the peel loop if the misalignment
867 of DR_PEEL is known at compile time. */
869 static void
870 vect_update_misalignment_for_peel (struct data_reference *dr,
871 struct data_reference *dr_peel, int npeel)
873 unsigned int i;
874 VEC(dr_p,heap) *same_align_drs;
875 struct data_reference *current_dr;
876 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
877 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
878 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
879 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
881 /* For interleaved data accesses the step in the loop must be multiplied by
882 the size of the interleaving group. */
883 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
884 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
885 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
886 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
888 /* It can be assumed that the data refs with the same alignment as dr_peel
889 are aligned in the vector loop. */
890 same_align_drs
891 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
892 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
894 if (current_dr != dr)
895 continue;
896 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
897 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
898 SET_DR_MISALIGNMENT (dr, 0);
899 return;
902 if (known_alignment_for_access_p (dr)
903 && known_alignment_for_access_p (dr_peel))
905 int misal = DR_MISALIGNMENT (dr);
906 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
907 misal += npeel * dr_size;
908 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
909 SET_DR_MISALIGNMENT (dr, misal);
910 return;
913 if (vect_print_dump_info (REPORT_DETAILS))
914 fprintf (vect_dump, "Setting misalignment to -1.");
915 SET_DR_MISALIGNMENT (dr, -1);
919 /* Function vect_verify_datarefs_alignment
921 Return TRUE if all data references in the loop can be
922 handled with respect to alignment. */
924 bool
925 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
927 VEC (data_reference_p, heap) *datarefs;
928 struct data_reference *dr;
929 enum dr_alignment_support supportable_dr_alignment;
930 unsigned int i;
932 if (loop_vinfo)
933 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
934 else
935 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
937 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
939 gimple stmt = DR_STMT (dr);
940 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
942 /* For interleaving, only the alignment of the first access matters. */
943 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
944 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
945 continue;
947 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
948 if (!supportable_dr_alignment)
950 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
952 if (DR_IS_READ (dr))
953 fprintf (vect_dump,
954 "not vectorized: unsupported unaligned load.");
955 else
956 fprintf (vect_dump,
957 "not vectorized: unsupported unaligned store.");
959 return false;
961 if (supportable_dr_alignment != dr_aligned
962 && vect_print_dump_info (REPORT_ALIGNMENT))
963 fprintf (vect_dump, "Vectorizing an unaligned access.");
965 return true;
969 /* Function vector_alignment_reachable_p
971 Return true if vector alignment for DR is reachable by peeling
972 a few loop iterations. Return false otherwise. */
974 static bool
975 vector_alignment_reachable_p (struct data_reference *dr)
977 gimple stmt = DR_STMT (dr);
978 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
979 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
981 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
983 /* For interleaved access we peel only if number of iterations in
984 the prolog loop ({VF - misalignment}), is a multiple of the
985 number of the interleaved accesses. */
986 int elem_size, mis_in_elements;
987 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
989 /* FORNOW: handle only known alignment. */
990 if (!known_alignment_for_access_p (dr))
991 return false;
993 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
994 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
996 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
997 return false;
1000 /* If misalignment is known at the compile time then allow peeling
1001 only if natural alignment is reachable through peeling. */
1002 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1004 HOST_WIDE_INT elmsize =
1005 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1006 if (vect_print_dump_info (REPORT_DETAILS))
1008 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1009 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1011 if (DR_MISALIGNMENT (dr) % elmsize)
1013 if (vect_print_dump_info (REPORT_DETAILS))
1014 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1015 return false;
1019 if (!known_alignment_for_access_p (dr))
1021 tree type = (TREE_TYPE (DR_REF (dr)));
1022 tree ba = DR_BASE_OBJECT (dr);
1023 bool is_packed = false;
1025 if (ba)
1026 is_packed = contains_packed_reference (ba);
1028 if (vect_print_dump_info (REPORT_DETAILS))
1029 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1030 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1031 return true;
1032 else
1033 return false;
1036 return true;
1039 /* Function vect_enhance_data_refs_alignment
1041 This pass will use loop versioning and loop peeling in order to enhance
1042 the alignment of data references in the loop.
1044 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1045 original loop is to be vectorized; Any other loops that are created by
1046 the transformations performed in this pass - are not supposed to be
1047 vectorized. This restriction will be relaxed.
1049 This pass will require a cost model to guide it whether to apply peeling
1050 or versioning or a combination of the two. For example, the scheme that
1051 intel uses when given a loop with several memory accesses, is as follows:
1052 choose one memory access ('p') which alignment you want to force by doing
1053 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1054 other accesses are not necessarily aligned, or (2) use loop versioning to
1055 generate one loop in which all accesses are aligned, and another loop in
1056 which only 'p' is necessarily aligned.
1058 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1059 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1060 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1062 Devising a cost model is the most critical aspect of this work. It will
1063 guide us on which access to peel for, whether to use loop versioning, how
1064 many versions to create, etc. The cost model will probably consist of
1065 generic considerations as well as target specific considerations (on
1066 powerpc for example, misaligned stores are more painful than misaligned
1067 loads).
1069 Here are the general steps involved in alignment enhancements:
1071 -- original loop, before alignment analysis:
1072 for (i=0; i<N; i++){
1073 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1074 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1077 -- After vect_compute_data_refs_alignment:
1078 for (i=0; i<N; i++){
1079 x = q[i]; # DR_MISALIGNMENT(q) = 3
1080 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1083 -- Possibility 1: we do loop versioning:
1084 if (p is aligned) {
1085 for (i=0; i<N; i++){ # loop 1A
1086 x = q[i]; # DR_MISALIGNMENT(q) = 3
1087 p[i] = y; # DR_MISALIGNMENT(p) = 0
1090 else {
1091 for (i=0; i<N; i++){ # loop 1B
1092 x = q[i]; # DR_MISALIGNMENT(q) = 3
1093 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1097 -- Possibility 2: we do loop peeling:
1098 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1099 x = q[i];
1100 p[i] = y;
1102 for (i = 3; i < N; i++){ # loop 2A
1103 x = q[i]; # DR_MISALIGNMENT(q) = 0
1104 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1107 -- Possibility 3: combination of loop peeling and versioning:
1108 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1109 x = q[i];
1110 p[i] = y;
1112 if (p is aligned) {
1113 for (i = 3; i<N; i++){ # loop 3A
1114 x = q[i]; # DR_MISALIGNMENT(q) = 0
1115 p[i] = y; # DR_MISALIGNMENT(p) = 0
1118 else {
1119 for (i = 3; i<N; i++){ # loop 3B
1120 x = q[i]; # DR_MISALIGNMENT(q) = 0
1121 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1125 These loops are later passed to loop_transform to be vectorized. The
1126 vectorizer will use the alignment information to guide the transformation
1127 (whether to generate regular loads/stores, or with special handling for
1128 misalignment). */
1130 bool
1131 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1133 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1134 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1135 enum dr_alignment_support supportable_dr_alignment;
1136 struct data_reference *dr0 = NULL;
1137 struct data_reference *dr;
1138 unsigned int i;
1139 bool do_peeling = false;
1140 bool do_versioning = false;
1141 bool stat;
1142 gimple stmt;
1143 stmt_vec_info stmt_info;
1144 int vect_versioning_for_alias_required;
1146 if (vect_print_dump_info (REPORT_DETAILS))
1147 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1149 /* While cost model enhancements are expected in the future, the high level
1150 view of the code at this time is as follows:
1152 A) If there is a misaligned access then see if peeling to align
1153 this access can make all data references satisfy
1154 vect_supportable_dr_alignment. If so, update data structures
1155 as needed and return true.
1157 B) If peeling wasn't possible and there is a data reference with an
1158 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1159 then see if loop versioning checks can be used to make all data
1160 references satisfy vect_supportable_dr_alignment. If so, update
1161 data structures as needed and return true.
1163 C) If neither peeling nor versioning were successful then return false if
1164 any data reference does not satisfy vect_supportable_dr_alignment.
1166 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1168 Note, Possibility 3 above (which is peeling and versioning together) is not
1169 being done at this time. */
1171 /* (1) Peeling to force alignment. */
1173 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1174 Considerations:
1175 + How many accesses will become aligned due to the peeling
1176 - How many accesses will become unaligned due to the peeling,
1177 and the cost of misaligned accesses.
1178 - The cost of peeling (the extra runtime checks, the increase
1179 in code size).
1181 The scheme we use FORNOW: peel to force the alignment of the first
1182 unsupported misaligned access in the loop.
1184 TODO: Use a cost model. */
1186 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1188 stmt = DR_STMT (dr);
1189 stmt_info = vinfo_for_stmt (stmt);
1191 /* For interleaving, only the alignment of the first access
1192 matters. */
1193 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1194 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1195 continue;
1197 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1199 do_peeling = vector_alignment_reachable_p (dr);
1200 if (do_peeling)
1201 dr0 = dr;
1202 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1203 fprintf (vect_dump, "vector alignment may not be reachable");
1204 break;
1208 vect_versioning_for_alias_required
1209 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1211 /* Temporarily, if versioning for alias is required, we disable peeling
1212 until we support peeling and versioning. Often peeling for alignment
1213 will require peeling for loop-bound, which in turn requires that we
1214 know how to adjust the loop ivs after the loop. */
1215 if (vect_versioning_for_alias_required
1216 || !vect_can_advance_ivs_p (loop_vinfo)
1217 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1218 do_peeling = false;
1220 if (do_peeling)
1222 int mis;
1223 int npeel = 0;
1224 gimple stmt = DR_STMT (dr0);
1225 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1226 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1227 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1229 if (known_alignment_for_access_p (dr0))
1231 /* Since it's known at compile time, compute the number of iterations
1232 in the peeled loop (the peeling factor) for use in updating
1233 DR_MISALIGNMENT values. The peeling factor is the vectorization
1234 factor minus the misalignment as an element count. */
1235 mis = DR_MISALIGNMENT (dr0);
1236 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1237 npeel = nelements - mis;
1239 /* For interleaved data access every iteration accesses all the
1240 members of the group, therefore we divide the number of iterations
1241 by the group size. */
1242 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1243 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1244 npeel /= DR_GROUP_SIZE (stmt_info);
1246 if (vect_print_dump_info (REPORT_DETAILS))
1247 fprintf (vect_dump, "Try peeling by %d", npeel);
1250 /* Ensure that all data refs can be vectorized after the peel. */
1251 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1253 int save_misalignment;
1255 if (dr == dr0)
1256 continue;
1258 stmt = DR_STMT (dr);
1259 stmt_info = vinfo_for_stmt (stmt);
1260 /* For interleaving, only the alignment of the first access
1261 matters. */
1262 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1263 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1264 continue;
1266 save_misalignment = DR_MISALIGNMENT (dr);
1267 vect_update_misalignment_for_peel (dr, dr0, npeel);
1268 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1269 SET_DR_MISALIGNMENT (dr, save_misalignment);
1271 if (!supportable_dr_alignment)
1273 do_peeling = false;
1274 break;
1278 if (do_peeling)
1280 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1281 If the misalignment of DR_i is identical to that of dr0 then set
1282 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1283 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1284 by the peeling factor times the element size of DR_i (MOD the
1285 vectorization factor times the size). Otherwise, the
1286 misalignment of DR_i must be set to unknown. */
1287 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1288 if (dr != dr0)
1289 vect_update_misalignment_for_peel (dr, dr0, npeel);
1291 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1292 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1293 SET_DR_MISALIGNMENT (dr0, 0);
1294 if (vect_print_dump_info (REPORT_ALIGNMENT))
1295 fprintf (vect_dump, "Alignment of access forced using peeling.");
1297 if (vect_print_dump_info (REPORT_DETAILS))
1298 fprintf (vect_dump, "Peeling for alignment will be applied.");
1300 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1301 gcc_assert (stat);
1302 return stat;
1307 /* (2) Versioning to force alignment. */
1309 /* Try versioning if:
1310 1) flag_tree_vect_loop_version is TRUE
1311 2) optimize loop for speed
1312 3) there is at least one unsupported misaligned data ref with an unknown
1313 misalignment, and
1314 4) all misaligned data refs with a known misalignment are supported, and
1315 5) the number of runtime alignment checks is within reason. */
1317 do_versioning =
1318 flag_tree_vect_loop_version
1319 && optimize_loop_nest_for_speed_p (loop)
1320 && (!loop->inner); /* FORNOW */
1322 if (do_versioning)
1324 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1326 stmt = DR_STMT (dr);
1327 stmt_info = vinfo_for_stmt (stmt);
1329 /* For interleaving, only the alignment of the first access
1330 matters. */
1331 if (aligned_access_p (dr)
1332 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1333 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1334 continue;
1336 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1338 if (!supportable_dr_alignment)
1340 gimple stmt;
1341 int mask;
1342 tree vectype;
1344 if (known_alignment_for_access_p (dr)
1345 || VEC_length (gimple,
1346 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1347 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1349 do_versioning = false;
1350 break;
1353 stmt = DR_STMT (dr);
1354 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1355 gcc_assert (vectype);
1357 /* The rightmost bits of an aligned address must be zeros.
1358 Construct the mask needed for this test. For example,
1359 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1360 mask must be 15 = 0xf. */
1361 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1363 /* FORNOW: use the same mask to test all potentially unaligned
1364 references in the loop. The vectorizer currently supports
1365 a single vector size, see the reference to
1366 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1367 vectorization factor is computed. */
1368 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1369 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1370 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1371 VEC_safe_push (gimple, heap,
1372 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1373 DR_STMT (dr));
1377 /* Versioning requires at least one misaligned data reference. */
1378 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1379 do_versioning = false;
1380 else if (!do_versioning)
1381 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1384 if (do_versioning)
1386 VEC(gimple,heap) *may_misalign_stmts
1387 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1388 gimple stmt;
1390 /* It can now be assumed that the data references in the statements
1391 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1392 of the loop being vectorized. */
1393 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1395 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1396 dr = STMT_VINFO_DATA_REF (stmt_info);
1397 SET_DR_MISALIGNMENT (dr, 0);
1398 if (vect_print_dump_info (REPORT_ALIGNMENT))
1399 fprintf (vect_dump, "Alignment of access forced using versioning.");
1402 if (vect_print_dump_info (REPORT_DETAILS))
1403 fprintf (vect_dump, "Versioning for alignment will be applied.");
1405 /* Peeling and versioning can't be done together at this time. */
1406 gcc_assert (! (do_peeling && do_versioning));
1408 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1409 gcc_assert (stat);
1410 return stat;
1413 /* This point is reached if neither peeling nor versioning is being done. */
1414 gcc_assert (! (do_peeling || do_versioning));
1416 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1417 return stat;
1421 /* Function vect_find_same_alignment_drs.
1423 Update group and alignment relations according to the chosen
1424 vectorization factor. */
1426 static void
1427 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1428 loop_vec_info loop_vinfo)
1430 unsigned int i;
1431 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1432 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1433 struct data_reference *dra = DDR_A (ddr);
1434 struct data_reference *drb = DDR_B (ddr);
1435 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1436 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1437 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1438 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1439 lambda_vector dist_v;
1440 unsigned int loop_depth;
1442 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1443 return;
1445 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1446 return;
1448 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1449 return;
1451 /* Loop-based vectorization and known data dependence. */
1452 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1453 return;
1455 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1456 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1458 int dist = dist_v[loop_depth];
1460 if (vect_print_dump_info (REPORT_DR_DETAILS))
1461 fprintf (vect_dump, "dependence distance = %d.", dist);
1463 /* Same loop iteration. */
1464 if (dist == 0
1465 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1467 /* Two references with distance zero have the same alignment. */
1468 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1469 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1470 if (vect_print_dump_info (REPORT_ALIGNMENT))
1471 fprintf (vect_dump, "accesses have the same alignment.");
1472 if (vect_print_dump_info (REPORT_DR_DETAILS))
1474 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1475 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1476 fprintf (vect_dump, " and ");
1477 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1484 /* Function vect_analyze_data_refs_alignment
1486 Analyze the alignment of the data-references in the loop.
1487 Return FALSE if a data reference is found that cannot be vectorized. */
1489 bool
1490 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1491 bb_vec_info bb_vinfo)
1493 if (vect_print_dump_info (REPORT_DETAILS))
1494 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1496 /* Mark groups of data references with same alignment using
1497 data dependence information. */
1498 if (loop_vinfo)
1500 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1501 struct data_dependence_relation *ddr;
1502 unsigned int i;
1504 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1505 vect_find_same_alignment_drs (ddr, loop_vinfo);
1508 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1510 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1511 fprintf (vect_dump,
1512 "not vectorized: can't calculate alignment for data ref.");
1513 return false;
1516 return true;
1520 /* Analyze groups of strided accesses: check that DR belongs to a group of
1521 strided accesses of legal size, step, etc. Detect gaps, single element
1522 interleaving, and other special cases. Set strided access info.
1523 Collect groups of strided stores for further use in SLP analysis. */
1525 static bool
1526 vect_analyze_group_access (struct data_reference *dr)
1528 tree step = DR_STEP (dr);
1529 tree scalar_type = TREE_TYPE (DR_REF (dr));
1530 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1531 gimple stmt = DR_STMT (dr);
1532 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1533 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1534 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1535 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1536 HOST_WIDE_INT stride;
1537 bool slp_impossible = false;
1539 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1540 interleaving group (including gaps). */
1541 stride = dr_step / type_size;
1543 /* Not consecutive access is possible only if it is a part of interleaving. */
1544 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1546 /* Check if it this DR is a part of interleaving, and is a single
1547 element of the group that is accessed in the loop. */
1549 /* Gaps are supported only for loads. STEP must be a multiple of the type
1550 size. The size of the group must be a power of 2. */
1551 if (DR_IS_READ (dr)
1552 && (dr_step % type_size) == 0
1553 && stride > 0
1554 && exact_log2 (stride) != -1)
1556 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1557 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1558 if (vect_print_dump_info (REPORT_DR_DETAILS))
1560 fprintf (vect_dump, "Detected single element interleaving ");
1561 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1562 fprintf (vect_dump, " step ");
1563 print_generic_expr (vect_dump, step, TDF_SLIM);
1565 return true;
1567 if (vect_print_dump_info (REPORT_DETAILS))
1568 fprintf (vect_dump, "not consecutive access");
1569 return false;
1572 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1574 /* First stmt in the interleaving chain. Check the chain. */
1575 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1576 struct data_reference *data_ref = dr;
1577 unsigned int count = 1;
1578 tree next_step;
1579 tree prev_init = DR_INIT (data_ref);
1580 gimple prev = stmt;
1581 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
1583 while (next)
1585 /* Skip same data-refs. In case that two or more stmts share data-ref
1586 (supported only for loads), we vectorize only the first stmt, and
1587 the rest get their vectorized loads from the first one. */
1588 if (!tree_int_cst_compare (DR_INIT (data_ref),
1589 DR_INIT (STMT_VINFO_DATA_REF (
1590 vinfo_for_stmt (next)))))
1592 if (!DR_IS_READ (data_ref))
1594 if (vect_print_dump_info (REPORT_DETAILS))
1595 fprintf (vect_dump, "Two store stmts share the same dr.");
1596 return false;
1599 /* Check that there is no load-store dependencies for this loads
1600 to prevent a case of load-store-load to the same location. */
1601 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1602 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1604 if (vect_print_dump_info (REPORT_DETAILS))
1605 fprintf (vect_dump,
1606 "READ_WRITE dependence in interleaving.");
1607 return false;
1610 /* For load use the same data-ref load. */
1611 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1613 prev = next;
1614 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1615 continue;
1617 prev = next;
1619 /* Check that all the accesses have the same STEP. */
1620 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1621 if (tree_int_cst_compare (step, next_step))
1623 if (vect_print_dump_info (REPORT_DETAILS))
1624 fprintf (vect_dump, "not consecutive access in interleaving");
1625 return false;
1628 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1629 /* Check that the distance between two accesses is equal to the type
1630 size. Otherwise, we have gaps. */
1631 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1632 - TREE_INT_CST_LOW (prev_init)) / type_size;
1633 if (diff != 1)
1635 /* FORNOW: SLP of accesses with gaps is not supported. */
1636 slp_impossible = true;
1637 if (!DR_IS_READ (data_ref))
1639 if (vect_print_dump_info (REPORT_DETAILS))
1640 fprintf (vect_dump, "interleaved store with gaps");
1641 return false;
1644 gaps += diff - 1;
1647 /* Store the gap from the previous member of the group. If there is no
1648 gap in the access, DR_GROUP_GAP is always 1. */
1649 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1651 prev_init = DR_INIT (data_ref);
1652 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1653 /* Count the number of data-refs in the chain. */
1654 count++;
1657 /* COUNT is the number of accesses found, we multiply it by the size of
1658 the type to get COUNT_IN_BYTES. */
1659 count_in_bytes = type_size * count;
1661 /* Check that the size of the interleaving (including gaps) is not
1662 greater than STEP. */
1663 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
1665 if (vect_print_dump_info (REPORT_DETAILS))
1667 fprintf (vect_dump, "interleaving size is greater than step for ");
1668 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1670 return false;
1673 /* Check that the size of the interleaving is equal to STEP for stores,
1674 i.e., that there are no gaps. */
1675 if (dr_step && dr_step != count_in_bytes)
1677 if (DR_IS_READ (dr))
1679 slp_impossible = true;
1680 /* There is a gap after the last load in the group. This gap is a
1681 difference between the stride and the number of elements. When
1682 there is no gap, this difference should be 0. */
1683 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1685 else
1687 if (vect_print_dump_info (REPORT_DETAILS))
1688 fprintf (vect_dump, "interleaved store with gaps");
1689 return false;
1693 /* Check that STEP is a multiple of type size. */
1694 if (dr_step && (dr_step % type_size) != 0)
1696 if (vect_print_dump_info (REPORT_DETAILS))
1698 fprintf (vect_dump, "step is not a multiple of type size: step ");
1699 print_generic_expr (vect_dump, step, TDF_SLIM);
1700 fprintf (vect_dump, " size ");
1701 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1702 TDF_SLIM);
1704 return false;
1707 /* FORNOW: we handle only interleaving that is a power of 2.
1708 We don't fail here if it may be still possible to vectorize the
1709 group using SLP. If not, the size of the group will be checked in
1710 vect_analyze_operations, and the vectorization will fail. */
1711 if (exact_log2 (stride) == -1)
1713 if (vect_print_dump_info (REPORT_DETAILS))
1714 fprintf (vect_dump, "interleaving is not a power of 2");
1716 if (slp_impossible)
1717 return false;
1720 if (stride == 0)
1721 stride = count;
1723 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1724 if (vect_print_dump_info (REPORT_DETAILS))
1725 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1727 /* SLP: create an SLP data structure for every interleaving group of
1728 stores for further analysis in vect_analyse_slp. */
1729 if (!DR_IS_READ (dr) && !slp_impossible)
1731 if (loop_vinfo)
1732 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
1733 stmt);
1734 if (bb_vinfo)
1735 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
1736 stmt);
1740 return true;
1744 /* Analyze the access pattern of the data-reference DR.
1745 In case of non-consecutive accesses call vect_analyze_group_access() to
1746 analyze groups of strided accesses. */
1748 static bool
1749 vect_analyze_data_ref_access (struct data_reference *dr)
1751 tree step = DR_STEP (dr);
1752 tree scalar_type = TREE_TYPE (DR_REF (dr));
1753 gimple stmt = DR_STMT (dr);
1754 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1755 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1756 struct loop *loop = NULL;
1757 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1759 if (loop_vinfo)
1760 loop = LOOP_VINFO_LOOP (loop_vinfo);
1762 if (loop_vinfo && !step)
1764 if (vect_print_dump_info (REPORT_DETAILS))
1765 fprintf (vect_dump, "bad data-ref access in loop");
1766 return false;
1769 /* Don't allow invariant accesses in loops. */
1770 if (loop_vinfo && dr_step == 0)
1771 return false;
1773 if (loop && nested_in_vect_loop_p (loop, stmt))
1775 /* Interleaved accesses are not yet supported within outer-loop
1776 vectorization for references in the inner-loop. */
1777 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1779 /* For the rest of the analysis we use the outer-loop step. */
1780 step = STMT_VINFO_DR_STEP (stmt_info);
1781 dr_step = TREE_INT_CST_LOW (step);
1783 if (dr_step == 0)
1785 if (vect_print_dump_info (REPORT_ALIGNMENT))
1786 fprintf (vect_dump, "zero step in outer loop.");
1787 if (DR_IS_READ (dr))
1788 return true;
1789 else
1790 return false;
1794 /* Consecutive? */
1795 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1797 /* Mark that it is not interleaving. */
1798 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1799 return true;
1802 if (loop && nested_in_vect_loop_p (loop, stmt))
1804 if (vect_print_dump_info (REPORT_ALIGNMENT))
1805 fprintf (vect_dump, "strided access in outer loop.");
1806 return false;
1809 /* Not consecutive access - check if it's a part of interleaving group. */
1810 return vect_analyze_group_access (dr);
1814 /* Function vect_analyze_data_ref_accesses.
1816 Analyze the access pattern of all the data references in the loop.
1818 FORNOW: the only access pattern that is considered vectorizable is a
1819 simple step 1 (consecutive) access.
1821 FORNOW: handle only arrays and pointer accesses. */
1823 bool
1824 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1826 unsigned int i;
1827 VEC (data_reference_p, heap) *datarefs;
1828 struct data_reference *dr;
1830 if (vect_print_dump_info (REPORT_DETAILS))
1831 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1833 if (loop_vinfo)
1834 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1835 else
1836 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1838 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1839 if (!vect_analyze_data_ref_access (dr))
1841 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1842 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1843 return false;
1846 return true;
1849 /* Function vect_prune_runtime_alias_test_list.
1851 Prune a list of ddrs to be tested at run-time by versioning for alias.
1852 Return FALSE if resulting list of ddrs is longer then allowed by
1853 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1855 bool
1856 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1858 VEC (ddr_p, heap) * ddrs =
1859 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1860 unsigned i, j;
1862 if (vect_print_dump_info (REPORT_DETAILS))
1863 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1865 for (i = 0; i < VEC_length (ddr_p, ddrs); )
1867 bool found;
1868 ddr_p ddr_i;
1870 ddr_i = VEC_index (ddr_p, ddrs, i);
1871 found = false;
1873 for (j = 0; j < i; j++)
1875 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1877 if (vect_vfa_range_equal (ddr_i, ddr_j))
1879 if (vect_print_dump_info (REPORT_DR_DETAILS))
1881 fprintf (vect_dump, "found equal ranges ");
1882 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1883 fprintf (vect_dump, ", ");
1884 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1885 fprintf (vect_dump, " and ");
1886 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1887 fprintf (vect_dump, ", ");
1888 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1890 found = true;
1891 break;
1895 if (found)
1897 VEC_ordered_remove (ddr_p, ddrs, i);
1898 continue;
1900 i++;
1903 if (VEC_length (ddr_p, ddrs) >
1904 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1906 if (vect_print_dump_info (REPORT_DR_DETAILS))
1908 fprintf (vect_dump,
1909 "disable versioning for alias - max number of generated "
1910 "checks exceeded.");
1913 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1915 return false;
1918 return true;
1922 /* Function vect_analyze_data_refs.
1924 Find all the data references in the loop or basic block.
1926 The general structure of the analysis of data refs in the vectorizer is as
1927 follows:
1928 1- vect_analyze_data_refs(loop/bb): call
1929 compute_data_dependences_for_loop/bb to find and analyze all data-refs
1930 in the loop/bb and their dependences.
1931 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1932 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1933 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1937 bool
1938 vect_analyze_data_refs (loop_vec_info loop_vinfo,
1939 bb_vec_info bb_vinfo,
1940 int *min_vf)
1942 struct loop *loop = NULL;
1943 basic_block bb = NULL;
1944 unsigned int i;
1945 VEC (data_reference_p, heap) *datarefs;
1946 struct data_reference *dr;
1947 tree scalar_type;
1948 bool res;
1950 if (vect_print_dump_info (REPORT_DETAILS))
1951 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1953 if (loop_vinfo)
1955 loop = LOOP_VINFO_LOOP (loop_vinfo);
1956 res = compute_data_dependences_for_loop
1957 (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
1958 &LOOP_VINFO_DDRS (loop_vinfo));
1960 if (!res)
1962 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1963 fprintf (vect_dump, "not vectorized: loop contains function calls"
1964 " or data references that cannot be analyzed");
1965 return false;
1968 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1970 else
1972 bb = BB_VINFO_BB (bb_vinfo);
1973 res = compute_data_dependences_for_bb (bb, true,
1974 &BB_VINFO_DATAREFS (bb_vinfo),
1975 &BB_VINFO_DDRS (bb_vinfo));
1976 if (!res)
1978 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1979 fprintf (vect_dump, "not vectorized: basic block contains function"
1980 " calls or data references that cannot be analyzed");
1981 return false;
1984 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1987 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1988 from stmt_vec_info struct to DR and vectype. */
1990 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1992 gimple stmt;
1993 stmt_vec_info stmt_info;
1994 tree base, offset, init;
1995 int vf;
1997 if (!dr || !DR_REF (dr))
1999 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2000 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2001 return false;
2004 stmt = DR_STMT (dr);
2005 stmt_info = vinfo_for_stmt (stmt);
2007 /* Check that analysis of the data-ref succeeded. */
2008 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2009 || !DR_STEP (dr))
2011 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2013 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2014 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2016 return false;
2019 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2021 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2022 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2023 "constant");
2024 return false;
2027 base = unshare_expr (DR_BASE_ADDRESS (dr));
2028 offset = unshare_expr (DR_OFFSET (dr));
2029 init = unshare_expr (DR_INIT (dr));
2031 /* Update DR field in stmt_vec_info struct. */
2033 /* If the dataref is in an inner-loop of the loop that is considered for
2034 for vectorization, we also want to analyze the access relative to
2035 the outer-loop (DR contains information only relative to the
2036 inner-most enclosing loop). We do that by building a reference to the
2037 first location accessed by the inner-loop, and analyze it relative to
2038 the outer-loop. */
2039 if (loop && nested_in_vect_loop_p (loop, stmt))
2041 tree outer_step, outer_base, outer_init;
2042 HOST_WIDE_INT pbitsize, pbitpos;
2043 tree poffset;
2044 enum machine_mode pmode;
2045 int punsignedp, pvolatilep;
2046 affine_iv base_iv, offset_iv;
2047 tree dinit;
2049 /* Build a reference to the first location accessed by the
2050 inner-loop: *(BASE+INIT). (The first location is actually
2051 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2052 tree inner_base = build_fold_indirect_ref
2053 (fold_build2 (POINTER_PLUS_EXPR,
2054 TREE_TYPE (base), base,
2055 fold_convert (sizetype, init)));
2057 if (vect_print_dump_info (REPORT_DETAILS))
2059 fprintf (vect_dump, "analyze in outer-loop: ");
2060 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2063 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2064 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2065 gcc_assert (outer_base != NULL_TREE);
2067 if (pbitpos % BITS_PER_UNIT != 0)
2069 if (vect_print_dump_info (REPORT_DETAILS))
2070 fprintf (vect_dump, "failed: bit offset alignment.\n");
2071 return false;
2074 outer_base = build_fold_addr_expr (outer_base);
2075 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2076 &base_iv, false))
2078 if (vect_print_dump_info (REPORT_DETAILS))
2079 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2080 return false;
2083 if (offset)
2085 if (poffset)
2086 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2087 poffset);
2088 else
2089 poffset = offset;
2092 if (!poffset)
2094 offset_iv.base = ssize_int (0);
2095 offset_iv.step = ssize_int (0);
2097 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2098 &offset_iv, false))
2100 if (vect_print_dump_info (REPORT_DETAILS))
2101 fprintf (vect_dump, "evolution of offset is not affine.\n");
2102 return false;
2105 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2106 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2107 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2108 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2109 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2111 outer_step = size_binop (PLUS_EXPR,
2112 fold_convert (ssizetype, base_iv.step),
2113 fold_convert (ssizetype, offset_iv.step));
2115 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2116 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2117 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2118 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2119 STMT_VINFO_DR_OFFSET (stmt_info) =
2120 fold_convert (ssizetype, offset_iv.base);
2121 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2122 size_int (highest_pow2_factor (offset_iv.base));
2124 if (vect_print_dump_info (REPORT_DETAILS))
2126 fprintf (vect_dump, "\touter base_address: ");
2127 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2128 fprintf (vect_dump, "\n\touter offset from base address: ");
2129 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2130 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2131 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2132 fprintf (vect_dump, "\n\touter step: ");
2133 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2134 fprintf (vect_dump, "\n\touter aligned to: ");
2135 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2139 if (STMT_VINFO_DATA_REF (stmt_info))
2141 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2143 fprintf (vect_dump,
2144 "not vectorized: more than one data ref in stmt: ");
2145 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2147 return false;
2150 STMT_VINFO_DATA_REF (stmt_info) = dr;
2152 /* Set vectype for STMT. */
2153 scalar_type = TREE_TYPE (DR_REF (dr));
2154 STMT_VINFO_VECTYPE (stmt_info) =
2155 get_vectype_for_scalar_type (scalar_type);
2156 if (!STMT_VINFO_VECTYPE (stmt_info))
2158 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2160 fprintf (vect_dump,
2161 "not vectorized: no vectype for stmt: ");
2162 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2163 fprintf (vect_dump, " scalar_type: ");
2164 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2166 return false;
2169 /* Adjust the minimal vectorization factor according to the
2170 vector type. */
2171 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2172 if (vf > *min_vf)
2173 *min_vf = vf;
2176 return true;
2180 /* Function vect_get_new_vect_var.
2182 Returns a name for a new variable. The current naming scheme appends the
2183 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2184 the name of vectorizer generated variables, and appends that to NAME if
2185 provided. */
2187 tree
2188 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2190 const char *prefix;
2191 tree new_vect_var;
2193 switch (var_kind)
2195 case vect_simple_var:
2196 prefix = "vect_";
2197 break;
2198 case vect_scalar_var:
2199 prefix = "stmp_";
2200 break;
2201 case vect_pointer_var:
2202 prefix = "vect_p";
2203 break;
2204 default:
2205 gcc_unreachable ();
2208 if (name)
2210 char* tmp = concat (prefix, name, NULL);
2211 new_vect_var = create_tmp_var (type, tmp);
2212 free (tmp);
2214 else
2215 new_vect_var = create_tmp_var (type, prefix);
2217 /* Mark vector typed variable as a gimple register variable. */
2218 if (TREE_CODE (type) == VECTOR_TYPE)
2219 DECL_GIMPLE_REG_P (new_vect_var) = true;
2221 return new_vect_var;
2225 /* Function vect_create_addr_base_for_vector_ref.
2227 Create an expression that computes the address of the first memory location
2228 that will be accessed for a data reference.
2230 Input:
2231 STMT: The statement containing the data reference.
2232 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2233 OFFSET: Optional. If supplied, it is be added to the initial address.
2234 LOOP: Specify relative to which loop-nest should the address be computed.
2235 For example, when the dataref is in an inner-loop nested in an
2236 outer-loop that is now being vectorized, LOOP can be either the
2237 outer-loop, or the inner-loop. The first memory location accessed
2238 by the following dataref ('in' points to short):
2240 for (i=0; i<N; i++)
2241 for (j=0; j<M; j++)
2242 s += in[i+j]
2244 is as follows:
2245 if LOOP=i_loop: &in (relative to i_loop)
2246 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2248 Output:
2249 1. Return an SSA_NAME whose value is the address of the memory location of
2250 the first vector of the data reference.
2251 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2252 these statement(s) which define the returned SSA_NAME.
2254 FORNOW: We are only handling array accesses with step 1. */
2256 tree
2257 vect_create_addr_base_for_vector_ref (gimple stmt,
2258 gimple_seq *new_stmt_list,
2259 tree offset,
2260 struct loop *loop)
2262 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2263 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2264 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2265 tree base_name;
2266 tree data_ref_base_var;
2267 tree vec_stmt;
2268 tree addr_base, addr_expr;
2269 tree dest;
2270 gimple_seq seq = NULL;
2271 tree base_offset = unshare_expr (DR_OFFSET (dr));
2272 tree init = unshare_expr (DR_INIT (dr));
2273 tree vect_ptr_type;
2274 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2275 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2277 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2279 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2281 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2283 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2284 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2285 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2288 if (loop_vinfo)
2289 base_name = build_fold_indirect_ref (data_ref_base);
2290 else
2292 base_offset = ssize_int (0);
2293 init = ssize_int (0);
2294 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2297 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2298 add_referenced_var (data_ref_base_var);
2299 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2300 data_ref_base_var);
2301 gimple_seq_add_seq (new_stmt_list, seq);
2303 /* Create base_offset */
2304 base_offset = size_binop (PLUS_EXPR,
2305 fold_convert (sizetype, base_offset),
2306 fold_convert (sizetype, init));
2307 dest = create_tmp_var (sizetype, "base_off");
2308 add_referenced_var (dest);
2309 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2310 gimple_seq_add_seq (new_stmt_list, seq);
2312 if (offset)
2314 tree tmp = create_tmp_var (sizetype, "offset");
2316 add_referenced_var (tmp);
2317 offset = fold_build2 (MULT_EXPR, sizetype,
2318 fold_convert (sizetype, offset), step);
2319 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2320 base_offset, offset);
2321 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2322 gimple_seq_add_seq (new_stmt_list, seq);
2325 /* base + base_offset */
2326 if (loop_vinfo)
2327 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2328 data_ref_base, base_offset);
2329 else
2331 if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF)
2332 addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0));
2333 else
2334 addr_base = build1 (ADDR_EXPR,
2335 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2336 unshare_expr (DR_REF (dr)));
2339 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2341 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2342 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2343 get_name (base_name));
2344 add_referenced_var (addr_expr);
2345 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2346 gimple_seq_add_seq (new_stmt_list, seq);
2348 if (vect_print_dump_info (REPORT_DETAILS))
2350 fprintf (vect_dump, "created ");
2351 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2354 return vec_stmt;
2358 /* Function vect_create_data_ref_ptr.
2360 Create a new pointer to vector type (vp), that points to the first location
2361 accessed in the loop by STMT, along with the def-use update chain to
2362 appropriately advance the pointer through the loop iterations. Also set
2363 aliasing information for the pointer. This vector pointer is used by the
2364 callers to this function to create a memory reference expression for vector
2365 load/store access.
2367 Input:
2368 1. STMT: a stmt that references memory. Expected to be of the form
2369 GIMPLE_ASSIGN <name, data-ref> or
2370 GIMPLE_ASSIGN <data-ref, name>.
2371 2. AT_LOOP: the loop where the vector memref is to be created.
2372 3. OFFSET (optional): an offset to be added to the initial address accessed
2373 by the data-ref in STMT.
2374 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2375 pointing to the initial address.
2376 5. TYPE: if not NULL indicates the required type of the data-ref.
2378 Output:
2379 1. Declare a new ptr to vector_type, and have it point to the base of the
2380 data reference (initial addressed accessed by the data reference).
2381 For example, for vector of type V8HI, the following code is generated:
2383 v8hi *vp;
2384 vp = (v8hi *)initial_address;
2386 if OFFSET is not supplied:
2387 initial_address = &a[init];
2388 if OFFSET is supplied:
2389 initial_address = &a[init + OFFSET];
2391 Return the initial_address in INITIAL_ADDRESS.
2393 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2394 update the pointer in each iteration of the loop.
2396 Return the increment stmt that updates the pointer in PTR_INCR.
2398 3. Set INV_P to true if the access pattern of the data reference in the
2399 vectorized loop is invariant. Set it to false otherwise.
2401 4. Return the pointer. */
2403 tree
2404 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2405 tree offset, tree *initial_address, gimple *ptr_incr,
2406 bool only_init, bool *inv_p)
2408 tree base_name;
2409 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2410 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2411 struct loop *loop = NULL;
2412 bool nested_in_vect_loop = false;
2413 struct loop *containing_loop = NULL;
2414 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2415 tree vect_ptr_type;
2416 tree vect_ptr;
2417 tree new_temp;
2418 gimple vec_stmt;
2419 gimple_seq new_stmt_list = NULL;
2420 edge pe = NULL;
2421 basic_block new_bb;
2422 tree vect_ptr_init;
2423 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2424 tree vptr;
2425 gimple_stmt_iterator incr_gsi;
2426 bool insert_after;
2427 tree indx_before_incr, indx_after_incr;
2428 gimple incr;
2429 tree step;
2430 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2431 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2433 if (loop_vinfo)
2435 loop = LOOP_VINFO_LOOP (loop_vinfo);
2436 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2437 containing_loop = (gimple_bb (stmt))->loop_father;
2438 pe = loop_preheader_edge (loop);
2440 else
2442 gcc_assert (bb_vinfo);
2443 only_init = true;
2444 *ptr_incr = NULL;
2447 /* Check the step (evolution) of the load in LOOP, and record
2448 whether it's invariant. */
2449 if (nested_in_vect_loop)
2450 step = STMT_VINFO_DR_STEP (stmt_info);
2451 else
2452 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2454 if (tree_int_cst_compare (step, size_zero_node) == 0)
2455 *inv_p = true;
2456 else
2457 *inv_p = false;
2459 /* Create an expression for the first address accessed by this load
2460 in LOOP. */
2461 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2463 if (vect_print_dump_info (REPORT_DETAILS))
2465 tree data_ref_base = base_name;
2466 fprintf (vect_dump, "create vector-pointer variable to type: ");
2467 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2468 if (TREE_CODE (data_ref_base) == VAR_DECL
2469 || TREE_CODE (data_ref_base) == ARRAY_REF)
2470 fprintf (vect_dump, " vectorizing an array ref: ");
2471 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2472 fprintf (vect_dump, " vectorizing a record based array ref: ");
2473 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2474 fprintf (vect_dump, " vectorizing a pointer ref: ");
2475 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2478 /** (1) Create the new vector-pointer variable: **/
2479 vect_ptr_type = build_pointer_type (vectype);
2480 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2481 get_name (base_name));
2483 /* Vector types inherit the alias set of their component type by default so
2484 we need to use a ref-all pointer if the data reference does not conflict
2485 with the created vector data reference because it is not addressable. */
2486 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2487 get_alias_set (DR_REF (dr))))
2489 vect_ptr_type
2490 = build_pointer_type_for_mode (vectype,
2491 TYPE_MODE (vect_ptr_type), true);
2492 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2493 get_name (base_name));
2496 /* Likewise for any of the data references in the stmt group. */
2497 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2499 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2502 tree lhs = gimple_assign_lhs (orig_stmt);
2503 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2504 get_alias_set (lhs)))
2506 vect_ptr_type
2507 = build_pointer_type_for_mode (vectype,
2508 TYPE_MODE (vect_ptr_type), true);
2509 vect_ptr
2510 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2511 get_name (base_name));
2512 break;
2515 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2517 while (orig_stmt);
2520 add_referenced_var (vect_ptr);
2522 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2523 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2524 def-use update cycles for the pointer: One relative to the outer-loop
2525 (LOOP), which is what steps (3) and (4) below do. The other is relative
2526 to the inner-loop (which is the inner-most loop containing the dataref),
2527 and this is done be step (5) below.
2529 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2530 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2531 redundant. Steps (3),(4) create the following:
2533 vp0 = &base_addr;
2534 LOOP: vp1 = phi(vp0,vp2)
2537 vp2 = vp1 + step
2538 goto LOOP
2540 If there is an inner-loop nested in loop, then step (5) will also be
2541 applied, and an additional update in the inner-loop will be created:
2543 vp0 = &base_addr;
2544 LOOP: vp1 = phi(vp0,vp2)
2546 inner: vp3 = phi(vp1,vp4)
2547 vp4 = vp3 + inner_step
2548 if () goto inner
2550 vp2 = vp1 + step
2551 if () goto LOOP */
2553 /** (3) Calculate the initial address the vector-pointer, and set
2554 the vector-pointer to point to it before the loop: **/
2556 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2558 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2559 offset, loop);
2560 if (new_stmt_list)
2562 if (pe)
2564 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2565 gcc_assert (!new_bb);
2567 else
2568 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
2571 *initial_address = new_temp;
2573 /* Create: p = (vectype *) initial_base */
2574 vec_stmt = gimple_build_assign (vect_ptr,
2575 fold_convert (vect_ptr_type, new_temp));
2576 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2577 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2578 if (pe)
2580 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2581 gcc_assert (!new_bb);
2583 else
2584 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
2586 /** (4) Handle the updating of the vector-pointer inside the loop.
2587 This is needed when ONLY_INIT is false, and also when AT_LOOP
2588 is the inner-loop nested in LOOP (during outer-loop vectorization).
2591 /* No update in loop is required. */
2592 if (only_init && (!loop_vinfo || at_loop == loop))
2594 /* Copy the points-to information if it exists. */
2595 if (DR_PTR_INFO (dr))
2596 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2597 vptr = vect_ptr_init;
2599 else
2601 /* The step of the vector pointer is the Vector Size. */
2602 tree step = TYPE_SIZE_UNIT (vectype);
2603 /* One exception to the above is when the scalar step of the load in
2604 LOOP is zero. In this case the step here is also zero. */
2605 if (*inv_p)
2606 step = size_zero_node;
2608 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2610 create_iv (vect_ptr_init,
2611 fold_convert (vect_ptr_type, step),
2612 vect_ptr, loop, &incr_gsi, insert_after,
2613 &indx_before_incr, &indx_after_incr);
2614 incr = gsi_stmt (incr_gsi);
2615 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2617 /* Copy the points-to information if it exists. */
2618 if (DR_PTR_INFO (dr))
2620 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2621 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2623 if (ptr_incr)
2624 *ptr_incr = incr;
2626 vptr = indx_before_incr;
2629 if (!nested_in_vect_loop || only_init)
2630 return vptr;
2633 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2634 nested in LOOP, if exists: **/
2636 gcc_assert (nested_in_vect_loop);
2637 if (!only_init)
2639 standard_iv_increment_position (containing_loop, &incr_gsi,
2640 &insert_after);
2641 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2642 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2643 &indx_after_incr);
2644 incr = gsi_stmt (incr_gsi);
2645 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2647 /* Copy the points-to information if it exists. */
2648 if (DR_PTR_INFO (dr))
2650 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2651 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2653 if (ptr_incr)
2654 *ptr_incr = incr;
2656 return indx_before_incr;
2658 else
2659 gcc_unreachable ();
2663 /* Function bump_vector_ptr
2665 Increment a pointer (to a vector type) by vector-size. If requested,
2666 i.e. if PTR-INCR is given, then also connect the new increment stmt
2667 to the existing def-use update-chain of the pointer, by modifying
2668 the PTR_INCR as illustrated below:
2670 The pointer def-use update-chain before this function:
2671 DATAREF_PTR = phi (p_0, p_2)
2672 ....
2673 PTR_INCR: p_2 = DATAREF_PTR + step
2675 The pointer def-use update-chain after this function:
2676 DATAREF_PTR = phi (p_0, p_2)
2677 ....
2678 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2679 ....
2680 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2682 Input:
2683 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2684 in the loop.
2685 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2686 the loop. The increment amount across iterations is expected
2687 to be vector_size.
2688 BSI - location where the new update stmt is to be placed.
2689 STMT - the original scalar memory-access stmt that is being vectorized.
2690 BUMP - optional. The offset by which to bump the pointer. If not given,
2691 the offset is assumed to be vector_size.
2693 Output: Return NEW_DATAREF_PTR as illustrated above.
2697 tree
2698 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2699 gimple stmt, tree bump)
2701 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2702 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2703 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2704 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2705 tree update = TYPE_SIZE_UNIT (vectype);
2706 gimple incr_stmt;
2707 ssa_op_iter iter;
2708 use_operand_p use_p;
2709 tree new_dataref_ptr;
2711 if (bump)
2712 update = bump;
2714 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2715 dataref_ptr, update);
2716 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2717 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2718 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2720 /* Copy the points-to information if it exists. */
2721 if (DR_PTR_INFO (dr))
2722 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2724 if (!ptr_incr)
2725 return new_dataref_ptr;
2727 /* Update the vector-pointer's cross-iteration increment. */
2728 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2730 tree use = USE_FROM_PTR (use_p);
2732 if (use == dataref_ptr)
2733 SET_USE (use_p, new_dataref_ptr);
2734 else
2735 gcc_assert (tree_int_cst_compare (use, update) == 0);
2738 return new_dataref_ptr;
2742 /* Function vect_create_destination_var.
2744 Create a new temporary of type VECTYPE. */
2746 tree
2747 vect_create_destination_var (tree scalar_dest, tree vectype)
2749 tree vec_dest;
2750 const char *new_name;
2751 tree type;
2752 enum vect_var_kind kind;
2754 kind = vectype ? vect_simple_var : vect_scalar_var;
2755 type = vectype ? vectype : TREE_TYPE (scalar_dest);
2757 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2759 new_name = get_name (scalar_dest);
2760 if (!new_name)
2761 new_name = "var_";
2762 vec_dest = vect_get_new_vect_var (type, kind, new_name);
2763 add_referenced_var (vec_dest);
2765 return vec_dest;
2768 /* Function vect_strided_store_supported.
2770 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2771 and FALSE otherwise. */
2773 bool
2774 vect_strided_store_supported (tree vectype)
2776 optab interleave_high_optab, interleave_low_optab;
2777 int mode;
2779 mode = (int) TYPE_MODE (vectype);
2781 /* Check that the operation is supported. */
2782 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2783 vectype, optab_default);
2784 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2785 vectype, optab_default);
2786 if (!interleave_high_optab || !interleave_low_optab)
2788 if (vect_print_dump_info (REPORT_DETAILS))
2789 fprintf (vect_dump, "no optab for interleave.");
2790 return false;
2793 if (optab_handler (interleave_high_optab, mode)->insn_code
2794 == CODE_FOR_nothing
2795 || optab_handler (interleave_low_optab, mode)->insn_code
2796 == CODE_FOR_nothing)
2798 if (vect_print_dump_info (REPORT_DETAILS))
2799 fprintf (vect_dump, "interleave op not supported by target.");
2800 return false;
2803 return true;
2807 /* Function vect_permute_store_chain.
2809 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2810 a power of 2, generate interleave_high/low stmts to reorder the data
2811 correctly for the stores. Return the final references for stores in
2812 RESULT_CHAIN.
2814 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2815 The input is 4 vectors each containing 8 elements. We assign a number to each
2816 element, the input sequence is:
2818 1st vec: 0 1 2 3 4 5 6 7
2819 2nd vec: 8 9 10 11 12 13 14 15
2820 3rd vec: 16 17 18 19 20 21 22 23
2821 4th vec: 24 25 26 27 28 29 30 31
2823 The output sequence should be:
2825 1st vec: 0 8 16 24 1 9 17 25
2826 2nd vec: 2 10 18 26 3 11 19 27
2827 3rd vec: 4 12 20 28 5 13 21 30
2828 4th vec: 6 14 22 30 7 15 23 31
2830 i.e., we interleave the contents of the four vectors in their order.
2832 We use interleave_high/low instructions to create such output. The input of
2833 each interleave_high/low operation is two vectors:
2834 1st vec 2nd vec
2835 0 1 2 3 4 5 6 7
2836 the even elements of the result vector are obtained left-to-right from the
2837 high/low elements of the first vector. The odd elements of the result are
2838 obtained left-to-right from the high/low elements of the second vector.
2839 The output of interleave_high will be: 0 4 1 5
2840 and of interleave_low: 2 6 3 7
2843 The permutation is done in log LENGTH stages. In each stage interleave_high
2844 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2845 where the first argument is taken from the first half of DR_CHAIN and the
2846 second argument from it's second half.
2847 In our example,
2849 I1: interleave_high (1st vec, 3rd vec)
2850 I2: interleave_low (1st vec, 3rd vec)
2851 I3: interleave_high (2nd vec, 4th vec)
2852 I4: interleave_low (2nd vec, 4th vec)
2854 The output for the first stage is:
2856 I1: 0 16 1 17 2 18 3 19
2857 I2: 4 20 5 21 6 22 7 23
2858 I3: 8 24 9 25 10 26 11 27
2859 I4: 12 28 13 29 14 30 15 31
2861 The output of the second stage, i.e. the final result is:
2863 I1: 0 8 16 24 1 9 17 25
2864 I2: 2 10 18 26 3 11 19 27
2865 I3: 4 12 20 28 5 13 21 30
2866 I4: 6 14 22 30 7 15 23 31. */
2868 bool
2869 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2870 unsigned int length,
2871 gimple stmt,
2872 gimple_stmt_iterator *gsi,
2873 VEC(tree,heap) **result_chain)
2875 tree perm_dest, vect1, vect2, high, low;
2876 gimple perm_stmt;
2877 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2878 int i;
2879 unsigned int j;
2880 enum tree_code high_code, low_code;
2882 /* Check that the operation is supported. */
2883 if (!vect_strided_store_supported (vectype))
2884 return false;
2886 *result_chain = VEC_copy (tree, heap, dr_chain);
2888 for (i = 0; i < exact_log2 (length); i++)
2890 for (j = 0; j < length/2; j++)
2892 vect1 = VEC_index (tree, dr_chain, j);
2893 vect2 = VEC_index (tree, dr_chain, j+length/2);
2895 /* Create interleaving stmt:
2896 in the case of big endian:
2897 high = interleave_high (vect1, vect2)
2898 and in the case of little endian:
2899 high = interleave_low (vect1, vect2). */
2900 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2901 DECL_GIMPLE_REG_P (perm_dest) = 1;
2902 add_referenced_var (perm_dest);
2903 if (BYTES_BIG_ENDIAN)
2905 high_code = VEC_INTERLEAVE_HIGH_EXPR;
2906 low_code = VEC_INTERLEAVE_LOW_EXPR;
2908 else
2910 low_code = VEC_INTERLEAVE_HIGH_EXPR;
2911 high_code = VEC_INTERLEAVE_LOW_EXPR;
2913 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2914 vect1, vect2);
2915 high = make_ssa_name (perm_dest, perm_stmt);
2916 gimple_assign_set_lhs (perm_stmt, high);
2917 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2918 VEC_replace (tree, *result_chain, 2*j, high);
2920 /* Create interleaving stmt:
2921 in the case of big endian:
2922 low = interleave_low (vect1, vect2)
2923 and in the case of little endian:
2924 low = interleave_high (vect1, vect2). */
2925 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2926 DECL_GIMPLE_REG_P (perm_dest) = 1;
2927 add_referenced_var (perm_dest);
2928 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2929 vect1, vect2);
2930 low = make_ssa_name (perm_dest, perm_stmt);
2931 gimple_assign_set_lhs (perm_stmt, low);
2932 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2933 VEC_replace (tree, *result_chain, 2*j+1, low);
2935 dr_chain = VEC_copy (tree, heap, *result_chain);
2937 return true;
2940 /* Function vect_setup_realignment
2942 This function is called when vectorizing an unaligned load using
2943 the dr_explicit_realign[_optimized] scheme.
2944 This function generates the following code at the loop prolog:
2946 p = initial_addr;
2947 x msq_init = *(floor(p)); # prolog load
2948 realignment_token = call target_builtin;
2949 loop:
2950 x msq = phi (msq_init, ---)
2952 The stmts marked with x are generated only for the case of
2953 dr_explicit_realign_optimized.
2955 The code above sets up a new (vector) pointer, pointing to the first
2956 location accessed by STMT, and a "floor-aligned" load using that pointer.
2957 It also generates code to compute the "realignment-token" (if the relevant
2958 target hook was defined), and creates a phi-node at the loop-header bb
2959 whose arguments are the result of the prolog-load (created by this
2960 function) and the result of a load that takes place in the loop (to be
2961 created by the caller to this function).
2963 For the case of dr_explicit_realign_optimized:
2964 The caller to this function uses the phi-result (msq) to create the
2965 realignment code inside the loop, and sets up the missing phi argument,
2966 as follows:
2967 loop:
2968 msq = phi (msq_init, lsq)
2969 lsq = *(floor(p')); # load in loop
2970 result = realign_load (msq, lsq, realignment_token);
2972 For the case of dr_explicit_realign:
2973 loop:
2974 msq = *(floor(p)); # load in loop
2975 p' = p + (VS-1);
2976 lsq = *(floor(p')); # load in loop
2977 result = realign_load (msq, lsq, realignment_token);
2979 Input:
2980 STMT - (scalar) load stmt to be vectorized. This load accesses
2981 a memory location that may be unaligned.
2982 BSI - place where new code is to be inserted.
2983 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2984 is used.
2986 Output:
2987 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2988 target hook, if defined.
2989 Return value - the result of the loop-header phi node. */
2991 tree
2992 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2993 tree *realignment_token,
2994 enum dr_alignment_support alignment_support_scheme,
2995 tree init_addr,
2996 struct loop **at_loop)
2998 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2999 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3000 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3001 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3002 edge pe;
3003 tree scalar_dest = gimple_assign_lhs (stmt);
3004 tree vec_dest;
3005 gimple inc;
3006 tree ptr;
3007 tree data_ref;
3008 gimple new_stmt;
3009 basic_block new_bb;
3010 tree msq_init = NULL_TREE;
3011 tree new_temp;
3012 gimple phi_stmt;
3013 tree msq = NULL_TREE;
3014 gimple_seq stmts = NULL;
3015 bool inv_p;
3016 bool compute_in_loop = false;
3017 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3018 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3019 struct loop *loop_for_initial_load;
3021 gcc_assert (alignment_support_scheme == dr_explicit_realign
3022 || alignment_support_scheme == dr_explicit_realign_optimized);
3024 /* We need to generate three things:
3025 1. the misalignment computation
3026 2. the extra vector load (for the optimized realignment scheme).
3027 3. the phi node for the two vectors from which the realignment is
3028 done (for the optimized realignment scheme).
3031 /* 1. Determine where to generate the misalignment computation.
3033 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3034 calculation will be generated by this function, outside the loop (in the
3035 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3036 caller, inside the loop.
3038 Background: If the misalignment remains fixed throughout the iterations of
3039 the loop, then both realignment schemes are applicable, and also the
3040 misalignment computation can be done outside LOOP. This is because we are
3041 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3042 are a multiple of VS (the Vector Size), and therefore the misalignment in
3043 different vectorized LOOP iterations is always the same.
3044 The problem arises only if the memory access is in an inner-loop nested
3045 inside LOOP, which is now being vectorized using outer-loop vectorization.
3046 This is the only case when the misalignment of the memory access may not
3047 remain fixed throughout the iterations of the inner-loop (as explained in
3048 detail in vect_supportable_dr_alignment). In this case, not only is the
3049 optimized realignment scheme not applicable, but also the misalignment
3050 computation (and generation of the realignment token that is passed to
3051 REALIGN_LOAD) have to be done inside the loop.
3053 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3054 or not, which in turn determines if the misalignment is computed inside
3055 the inner-loop, or outside LOOP. */
3057 if (init_addr != NULL_TREE)
3059 compute_in_loop = true;
3060 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3064 /* 2. Determine where to generate the extra vector load.
3066 For the optimized realignment scheme, instead of generating two vector
3067 loads in each iteration, we generate a single extra vector load in the
3068 preheader of the loop, and in each iteration reuse the result of the
3069 vector load from the previous iteration. In case the memory access is in
3070 an inner-loop nested inside LOOP, which is now being vectorized using
3071 outer-loop vectorization, we need to determine whether this initial vector
3072 load should be generated at the preheader of the inner-loop, or can be
3073 generated at the preheader of LOOP. If the memory access has no evolution
3074 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3075 to be generated inside LOOP (in the preheader of the inner-loop). */
3077 if (nested_in_vect_loop)
3079 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3080 bool invariant_in_outerloop =
3081 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3082 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3084 else
3085 loop_for_initial_load = loop;
3086 if (at_loop)
3087 *at_loop = loop_for_initial_load;
3089 /* 3. For the case of the optimized realignment, create the first vector
3090 load at the loop preheader. */
3092 if (alignment_support_scheme == dr_explicit_realign_optimized)
3094 /* Create msq_init = *(floor(p1)) in the loop preheader */
3096 gcc_assert (!compute_in_loop);
3097 pe = loop_preheader_edge (loop_for_initial_load);
3098 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3099 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3100 &init_addr, &inc, true, &inv_p);
3101 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3102 new_stmt = gimple_build_assign (vec_dest, data_ref);
3103 new_temp = make_ssa_name (vec_dest, new_stmt);
3104 gimple_assign_set_lhs (new_stmt, new_temp);
3105 mark_symbols_for_renaming (new_stmt);
3106 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3107 gcc_assert (!new_bb);
3108 msq_init = gimple_assign_lhs (new_stmt);
3111 /* 4. Create realignment token using a target builtin, if available.
3112 It is done either inside the containing loop, or before LOOP (as
3113 determined above). */
3115 if (targetm.vectorize.builtin_mask_for_load)
3117 tree builtin_decl;
3119 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3120 if (compute_in_loop)
3121 gcc_assert (init_addr); /* already computed by the caller. */
3122 else
3124 /* Generate the INIT_ADDR computation outside LOOP. */
3125 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3126 NULL_TREE, loop);
3127 pe = loop_preheader_edge (loop);
3128 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3129 gcc_assert (!new_bb);
3132 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3133 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3134 vec_dest =
3135 vect_create_destination_var (scalar_dest,
3136 gimple_call_return_type (new_stmt));
3137 new_temp = make_ssa_name (vec_dest, new_stmt);
3138 gimple_call_set_lhs (new_stmt, new_temp);
3140 if (compute_in_loop)
3141 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3142 else
3144 /* Generate the misalignment computation outside LOOP. */
3145 pe = loop_preheader_edge (loop);
3146 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3147 gcc_assert (!new_bb);
3150 *realignment_token = gimple_call_lhs (new_stmt);
3152 /* The result of the CALL_EXPR to this builtin is determined from
3153 the value of the parameter and no global variables are touched
3154 which makes the builtin a "const" function. Requiring the
3155 builtin to have the "const" attribute makes it unnecessary
3156 to call mark_call_clobbered. */
3157 gcc_assert (TREE_READONLY (builtin_decl));
3160 if (alignment_support_scheme == dr_explicit_realign)
3161 return msq;
3163 gcc_assert (!compute_in_loop);
3164 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3167 /* 5. Create msq = phi <msq_init, lsq> in loop */
3169 pe = loop_preheader_edge (containing_loop);
3170 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3171 msq = make_ssa_name (vec_dest, NULL);
3172 phi_stmt = create_phi_node (msq, containing_loop->header);
3173 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3174 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3176 return msq;
3180 /* Function vect_strided_load_supported.
3182 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3183 and FALSE otherwise. */
3185 bool
3186 vect_strided_load_supported (tree vectype)
3188 optab perm_even_optab, perm_odd_optab;
3189 int mode;
3191 mode = (int) TYPE_MODE (vectype);
3193 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3194 optab_default);
3195 if (!perm_even_optab)
3197 if (vect_print_dump_info (REPORT_DETAILS))
3198 fprintf (vect_dump, "no optab for perm_even.");
3199 return false;
3202 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
3204 if (vect_print_dump_info (REPORT_DETAILS))
3205 fprintf (vect_dump, "perm_even op not supported by target.");
3206 return false;
3209 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3210 optab_default);
3211 if (!perm_odd_optab)
3213 if (vect_print_dump_info (REPORT_DETAILS))
3214 fprintf (vect_dump, "no optab for perm_odd.");
3215 return false;
3218 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
3220 if (vect_print_dump_info (REPORT_DETAILS))
3221 fprintf (vect_dump, "perm_odd op not supported by target.");
3222 return false;
3224 return true;
3228 /* Function vect_permute_load_chain.
3230 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3231 a power of 2, generate extract_even/odd stmts to reorder the input data
3232 correctly. Return the final references for loads in RESULT_CHAIN.
3234 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3235 The input is 4 vectors each containing 8 elements. We assign a number to each
3236 element, the input sequence is:
3238 1st vec: 0 1 2 3 4 5 6 7
3239 2nd vec: 8 9 10 11 12 13 14 15
3240 3rd vec: 16 17 18 19 20 21 22 23
3241 4th vec: 24 25 26 27 28 29 30 31
3243 The output sequence should be:
3245 1st vec: 0 4 8 12 16 20 24 28
3246 2nd vec: 1 5 9 13 17 21 25 29
3247 3rd vec: 2 6 10 14 18 22 26 30
3248 4th vec: 3 7 11 15 19 23 27 31
3250 i.e., the first output vector should contain the first elements of each
3251 interleaving group, etc.
3253 We use extract_even/odd instructions to create such output. The input of each
3254 extract_even/odd operation is two vectors
3255 1st vec 2nd vec
3256 0 1 2 3 4 5 6 7
3258 and the output is the vector of extracted even/odd elements. The output of
3259 extract_even will be: 0 2 4 6
3260 and of extract_odd: 1 3 5 7
3263 The permutation is done in log LENGTH stages. In each stage extract_even and
3264 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3265 order. In our example,
3267 E1: extract_even (1st vec, 2nd vec)
3268 E2: extract_odd (1st vec, 2nd vec)
3269 E3: extract_even (3rd vec, 4th vec)
3270 E4: extract_odd (3rd vec, 4th vec)
3272 The output for the first stage will be:
3274 E1: 0 2 4 6 8 10 12 14
3275 E2: 1 3 5 7 9 11 13 15
3276 E3: 16 18 20 22 24 26 28 30
3277 E4: 17 19 21 23 25 27 29 31
3279 In order to proceed and create the correct sequence for the next stage (or
3280 for the correct output, if the second stage is the last one, as in our
3281 example), we first put the output of extract_even operation and then the
3282 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3283 The input for the second stage is:
3285 1st vec (E1): 0 2 4 6 8 10 12 14
3286 2nd vec (E3): 16 18 20 22 24 26 28 30
3287 3rd vec (E2): 1 3 5 7 9 11 13 15
3288 4th vec (E4): 17 19 21 23 25 27 29 31
3290 The output of the second stage:
3292 E1: 0 4 8 12 16 20 24 28
3293 E2: 2 6 10 14 18 22 26 30
3294 E3: 1 5 9 13 17 21 25 29
3295 E4: 3 7 11 15 19 23 27 31
3297 And RESULT_CHAIN after reordering:
3299 1st vec (E1): 0 4 8 12 16 20 24 28
3300 2nd vec (E3): 1 5 9 13 17 21 25 29
3301 3rd vec (E2): 2 6 10 14 18 22 26 30
3302 4th vec (E4): 3 7 11 15 19 23 27 31. */
3304 bool
3305 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3306 unsigned int length,
3307 gimple stmt,
3308 gimple_stmt_iterator *gsi,
3309 VEC(tree,heap) **result_chain)
3311 tree perm_dest, data_ref, first_vect, second_vect;
3312 gimple perm_stmt;
3313 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3314 int i;
3315 unsigned int j;
3317 /* Check that the operation is supported. */
3318 if (!vect_strided_load_supported (vectype))
3319 return false;
3321 *result_chain = VEC_copy (tree, heap, dr_chain);
3322 for (i = 0; i < exact_log2 (length); i++)
3324 for (j = 0; j < length; j +=2)
3326 first_vect = VEC_index (tree, dr_chain, j);
3327 second_vect = VEC_index (tree, dr_chain, j+1);
3329 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3330 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3331 DECL_GIMPLE_REG_P (perm_dest) = 1;
3332 add_referenced_var (perm_dest);
3334 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3335 perm_dest, first_vect,
3336 second_vect);
3338 data_ref = make_ssa_name (perm_dest, perm_stmt);
3339 gimple_assign_set_lhs (perm_stmt, data_ref);
3340 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3341 mark_symbols_for_renaming (perm_stmt);
3343 VEC_replace (tree, *result_chain, j/2, data_ref);
3345 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3346 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3347 DECL_GIMPLE_REG_P (perm_dest) = 1;
3348 add_referenced_var (perm_dest);
3350 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3351 perm_dest, first_vect,
3352 second_vect);
3353 data_ref = make_ssa_name (perm_dest, perm_stmt);
3354 gimple_assign_set_lhs (perm_stmt, data_ref);
3355 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3356 mark_symbols_for_renaming (perm_stmt);
3358 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3360 dr_chain = VEC_copy (tree, heap, *result_chain);
3362 return true;
3366 /* Function vect_transform_strided_load.
3368 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3369 to perform their permutation and ascribe the result vectorized statements to
3370 the scalar statements.
3373 bool
3374 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3375 gimple_stmt_iterator *gsi)
3377 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3378 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3379 gimple next_stmt, new_stmt;
3380 VEC(tree,heap) *result_chain = NULL;
3381 unsigned int i, gap_count;
3382 tree tmp_data_ref;
3384 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3385 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3386 vectors, that are ready for vector computation. */
3387 result_chain = VEC_alloc (tree, heap, size);
3388 /* Permute. */
3389 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3390 return false;
3392 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3393 Since we scan the chain starting from it's first node, their order
3394 corresponds the order of data-refs in RESULT_CHAIN. */
3395 next_stmt = first_stmt;
3396 gap_count = 1;
3397 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3399 if (!next_stmt)
3400 break;
3402 /* Skip the gaps. Loads created for the gaps will be removed by dead
3403 code elimination pass later. No need to check for the first stmt in
3404 the group, since it always exists.
3405 DR_GROUP_GAP is the number of steps in elements from the previous
3406 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3407 correspond to the gaps.
3409 if (next_stmt != first_stmt
3410 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3412 gap_count++;
3413 continue;
3416 while (next_stmt)
3418 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3419 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3420 copies, and we put the new vector statement in the first available
3421 RELATED_STMT. */
3422 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3423 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3424 else
3426 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3428 gimple prev_stmt =
3429 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3430 gimple rel_stmt =
3431 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3432 while (rel_stmt)
3434 prev_stmt = rel_stmt;
3435 rel_stmt =
3436 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3439 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3440 new_stmt;
3444 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3445 gap_count = 1;
3446 /* If NEXT_STMT accesses the same DR as the previous statement,
3447 put the same TMP_DATA_REF as its vectorized statement; otherwise
3448 get the next data-ref from RESULT_CHAIN. */
3449 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3450 break;
3454 VEC_free (tree, heap, result_chain);
3455 return true;
3458 /* Function vect_force_dr_alignment_p.
3460 Returns whether the alignment of a DECL can be forced to be aligned
3461 on ALIGNMENT bit boundary. */
3463 bool
3464 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3466 if (TREE_CODE (decl) != VAR_DECL)
3467 return false;
3469 if (DECL_EXTERNAL (decl))
3470 return false;
3472 if (TREE_ASM_WRITTEN (decl))
3473 return false;
3475 if (TREE_STATIC (decl))
3476 return (alignment <= MAX_OFILE_ALIGNMENT);
3477 else
3478 return (alignment <= MAX_STACK_ALIGNMENT);
3481 /* Function vect_supportable_dr_alignment
3483 Return whether the data reference DR is supported with respect to its
3484 alignment. */
3486 enum dr_alignment_support
3487 vect_supportable_dr_alignment (struct data_reference *dr)
3489 gimple stmt = DR_STMT (dr);
3490 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3491 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3492 enum machine_mode mode = TYPE_MODE (vectype);
3493 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3494 struct loop *vect_loop = NULL;
3495 bool nested_in_vect_loop = false;
3497 if (aligned_access_p (dr))
3498 return dr_aligned;
3500 if (!loop_vinfo)
3501 /* FORNOW: Misaligned accesses are supported only in loops. */
3502 return dr_unaligned_unsupported;
3504 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3505 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3507 /* Possibly unaligned access. */
3509 /* We can choose between using the implicit realignment scheme (generating
3510 a misaligned_move stmt) and the explicit realignment scheme (generating
3511 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3512 realignment scheme: optimized, and unoptimized.
3513 We can optimize the realignment only if the step between consecutive
3514 vector loads is equal to the vector size. Since the vector memory
3515 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3516 is guaranteed that the misalignment amount remains the same throughout the
3517 execution of the vectorized loop. Therefore, we can create the
3518 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3519 at the loop preheader.
3521 However, in the case of outer-loop vectorization, when vectorizing a
3522 memory access in the inner-loop nested within the LOOP that is now being
3523 vectorized, while it is guaranteed that the misalignment of the
3524 vectorized memory access will remain the same in different outer-loop
3525 iterations, it is *not* guaranteed that is will remain the same throughout
3526 the execution of the inner-loop. This is because the inner-loop advances
3527 with the original scalar step (and not in steps of VS). If the inner-loop
3528 step happens to be a multiple of VS, then the misalignment remains fixed
3529 and we can use the optimized realignment scheme. For example:
3531 for (i=0; i<N; i++)
3532 for (j=0; j<M; j++)
3533 s += a[i+j];
3535 When vectorizing the i-loop in the above example, the step between
3536 consecutive vector loads is 1, and so the misalignment does not remain
3537 fixed across the execution of the inner-loop, and the realignment cannot
3538 be optimized (as illustrated in the following pseudo vectorized loop):
3540 for (i=0; i<N; i+=4)
3541 for (j=0; j<M; j++){
3542 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3543 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3544 // (assuming that we start from an aligned address).
3547 We therefore have to use the unoptimized realignment scheme:
3549 for (i=0; i<N; i+=4)
3550 for (j=k; j<M; j+=4)
3551 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3552 // that the misalignment of the initial address is
3553 // 0).
3555 The loop can then be vectorized as follows:
3557 for (k=0; k<4; k++){
3558 rt = get_realignment_token (&vp[k]);
3559 for (i=0; i<N; i+=4){
3560 v1 = vp[i+k];
3561 for (j=k; j<M; j+=4){
3562 v2 = vp[i+j+VS-1];
3563 va = REALIGN_LOAD <v1,v2,rt>;
3564 vs += va;
3565 v1 = v2;
3568 } */
3570 if (DR_IS_READ (dr))
3572 bool is_packed = false;
3573 tree type = (TREE_TYPE (DR_REF (dr)));
3575 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3576 CODE_FOR_nothing
3577 && (!targetm.vectorize.builtin_mask_for_load
3578 || targetm.vectorize.builtin_mask_for_load ()))
3580 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3581 if (nested_in_vect_loop
3582 && (TREE_INT_CST_LOW (DR_STEP (dr))
3583 != GET_MODE_SIZE (TYPE_MODE (vectype))))
3584 return dr_explicit_realign;
3585 else
3586 return dr_explicit_realign_optimized;
3588 if (!known_alignment_for_access_p (dr))
3590 tree ba = DR_BASE_OBJECT (dr);
3592 if (ba)
3593 is_packed = contains_packed_reference (ba);
3596 if (targetm.vectorize.
3597 builtin_support_vector_misalignment (mode, type,
3598 DR_MISALIGNMENT (dr), is_packed))
3599 /* Can't software pipeline the loads, but can at least do them. */
3600 return dr_unaligned_supported;
3602 else
3604 bool is_packed = false;
3605 tree type = (TREE_TYPE (DR_REF (dr)));
3607 if (!known_alignment_for_access_p (dr))
3609 tree ba = DR_BASE_OBJECT (dr);
3611 if (ba)
3612 is_packed = contains_packed_reference (ba);
3615 if (targetm.vectorize.
3616 builtin_support_vector_misalignment (mode, type,
3617 DR_MISALIGNMENT (dr), is_packed))
3618 return dr_unaligned_supported;
3621 /* Unsupported. */
3622 return dr_unaligned_unsupported;