Merged r158907 through r159238 into branch.
[official-gcc.git] / gcc / tree-vect-data-refs.c
blobcf2ac21fca2673d60a6ce786372abdff493c3d95
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "toplev.h"
43 /* Return the smallest scalar part of STMT.
44 This is used to determine the vectype of the stmt. We generally set the
45 vectype according to the type of the result (lhs). For stmts whose
46 result-type is different than the type of the arguments (e.g., demotion,
47 promotion), vectype will be reset appropriately (later). Note that we have
48 to visit the smallest datatype in this function, because that determines the
49 VF. If the smallest datatype in the loop is present only as the rhs of a
50 promotion operation - we'd miss it.
51 Such a case, where a variable of this datatype does not appear in the lhs
52 anywhere in the loop, can only occur if it's an invariant: e.g.:
53 'int_x = (int) short_inv', which we'd expect to have been optimized away by
54 invariant motion. However, we cannot rely on invariant motion to always take
55 invariants out of the loop, and so in the case of promotion we also have to
56 check the rhs.
57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
58 types. */
60 tree
61 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62 HOST_WIDE_INT *rhs_size_unit)
64 tree scalar_type = gimple_expr_type (stmt);
65 HOST_WIDE_INT lhs, rhs;
67 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
69 if (is_gimple_assign (stmt)
70 && (gimple_assign_cast_p (stmt)
71 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
74 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
76 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
77 if (rhs < lhs)
78 scalar_type = rhs_type;
81 *lhs_size_unit = lhs;
82 *rhs_size_unit = rhs;
83 return scalar_type;
87 /* Find the place of the data-ref in STMT in the interleaving chain that starts
88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
90 int
91 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
93 gimple next_stmt = first_stmt;
94 int result = 0;
96 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
97 return -1;
99 while (next_stmt && next_stmt != stmt)
101 result++;
102 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
105 if (next_stmt)
106 return result;
107 else
108 return -1;
112 /* Function vect_insert_into_interleaving_chain.
114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
116 static void
117 vect_insert_into_interleaving_chain (struct data_reference *dra,
118 struct data_reference *drb)
120 gimple prev, next;
121 tree next_init;
122 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
125 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
127 while (next)
129 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
132 /* Insert here. */
133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
135 return;
137 prev = next;
138 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
141 /* We got to the end of the list. Insert here. */
142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
147 /* Function vect_update_interleaving_chain.
149 For two data-refs DRA and DRB that are a part of a chain interleaved data
150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
152 There are four possible cases:
153 1. New stmts - both DRA and DRB are not a part of any chain:
154 FIRST_DR = DRB
155 NEXT_DR (DRB) = DRA
156 2. DRB is a part of a chain and DRA is not:
157 no need to update FIRST_DR
158 no need to insert DRB
159 insert DRA according to init
160 3. DRA is a part of a chain and DRB is not:
161 if (init of FIRST_DR > init of DRB)
162 FIRST_DR = DRB
163 NEXT(FIRST_DR) = previous FIRST_DR
164 else
165 insert DRB according to its init
166 4. both DRA and DRB are in some interleaving chains:
167 choose the chain with the smallest init of FIRST_DR
168 insert the nodes of the second chain into the first one. */
170 static void
171 vect_update_interleaving_chain (struct data_reference *drb,
172 struct data_reference *dra)
174 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176 tree next_init, init_dra_chain, init_drb_chain;
177 gimple first_a, first_b;
178 tree node_init;
179 gimple node, prev, next, first_stmt;
181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
182 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
184 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
187 return;
190 /* 2. DRB is a part of a chain and DRA is not. */
191 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
193 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194 /* Insert DRA into the chain of DRB. */
195 vect_insert_into_interleaving_chain (dra, drb);
196 return;
199 /* 3. DRA is a part of a chain and DRB is not. */
200 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
202 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
204 old_first_stmt)));
205 gimple tmp;
207 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
209 /* DRB's init is smaller than the init of the stmt previously marked
210 as the first stmt of the interleaving chain of DRA. Therefore, we
211 update FIRST_STMT and put DRB in the head of the list. */
212 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
215 /* Update all the stmts in the list to point to the new FIRST_STMT. */
216 tmp = old_first_stmt;
217 while (tmp)
219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
223 else
225 /* Insert DRB in the list of DRA. */
226 vect_insert_into_interleaving_chain (drb, dra);
227 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
229 return;
232 /* 4. both DRA and DRB are in some interleaving chains. */
233 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235 if (first_a == first_b)
236 return;
237 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
240 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
242 /* Insert the nodes of DRA chain into the DRB chain.
243 After inserting a node, continue from this node of the DRB chain (don't
244 start from the beginning. */
245 node = DR_GROUP_FIRST_DR (stmtinfo_a);
246 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247 first_stmt = first_b;
249 else
251 /* Insert the nodes of DRB chain into the DRA chain.
252 After inserting a node, continue from this node of the DRA chain (don't
253 start from the beginning. */
254 node = DR_GROUP_FIRST_DR (stmtinfo_b);
255 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256 first_stmt = first_a;
259 while (node)
261 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
263 while (next)
265 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266 if (tree_int_cst_compare (next_init, node_init) > 0)
268 /* Insert here. */
269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
271 prev = node;
272 break;
274 prev = next;
275 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
277 if (!next)
279 /* We got to the end of the list. Insert here. */
280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
282 prev = node;
284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
290 /* Function vect_equal_offsets.
292 Check if OFFSET1 and OFFSET2 are identical expressions. */
294 static bool
295 vect_equal_offsets (tree offset1, tree offset2)
297 bool res;
299 STRIP_NOPS (offset1);
300 STRIP_NOPS (offset2);
302 if (offset1 == offset2)
303 return true;
305 if (TREE_CODE (offset1) != TREE_CODE (offset2)
306 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
307 return false;
309 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
310 TREE_OPERAND (offset2, 0));
312 if (!res || !BINARY_CLASS_P (offset1))
313 return res;
315 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
316 TREE_OPERAND (offset2, 1));
318 return res;
322 /* Function vect_check_interleaving.
324 Check if DRA and DRB are a part of interleaving. In case they are, insert
325 DRA and DRB in an interleaving chain. */
327 static bool
328 vect_check_interleaving (struct data_reference *dra,
329 struct data_reference *drb)
331 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
333 /* Check that the data-refs have same first location (except init) and they
334 are both either store or load (not load and store). */
335 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
336 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
337 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
338 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
339 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
340 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
341 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
342 || DR_IS_READ (dra) != DR_IS_READ (drb))
343 return false;
345 /* Check:
346 1. data-refs are of the same type
347 2. their steps are equal
348 3. the step (if greater than zero) is greater than the difference between
349 data-refs' inits. */
350 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
351 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
353 if (type_size_a != type_size_b
354 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
355 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
356 TREE_TYPE (DR_REF (drb))))
357 return false;
359 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
360 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
361 step = TREE_INT_CST_LOW (DR_STEP (dra));
363 if (init_a > init_b)
365 /* If init_a == init_b + the size of the type * k, we have an interleaving,
366 and DRB is accessed before DRA. */
367 diff_mod_size = (init_a - init_b) % type_size_a;
369 if (step && (init_a - init_b) > step)
370 return false;
372 if (diff_mod_size == 0)
374 vect_update_interleaving_chain (drb, dra);
375 if (vect_print_dump_info (REPORT_DR_DETAILS))
377 fprintf (vect_dump, "Detected interleaving ");
378 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
379 fprintf (vect_dump, " and ");
380 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
382 return true;
385 else
387 /* If init_b == init_a + the size of the type * k, we have an
388 interleaving, and DRA is accessed before DRB. */
389 diff_mod_size = (init_b - init_a) % type_size_a;
391 if (step && (init_b - init_a) > step)
392 return false;
394 if (diff_mod_size == 0)
396 vect_update_interleaving_chain (dra, drb);
397 if (vect_print_dump_info (REPORT_DR_DETAILS))
399 fprintf (vect_dump, "Detected interleaving ");
400 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
401 fprintf (vect_dump, " and ");
402 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
404 return true;
408 return false;
411 /* Check if data references pointed by DR_I and DR_J are same or
412 belong to same interleaving group. Return FALSE if drs are
413 different, otherwise return TRUE. */
415 static bool
416 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
418 gimple stmt_i = DR_STMT (dr_i);
419 gimple stmt_j = DR_STMT (dr_j);
421 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
422 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
423 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
424 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
425 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
426 return true;
427 else
428 return false;
431 /* If address ranges represented by DDR_I and DDR_J are equal,
432 return TRUE, otherwise return FALSE. */
434 static bool
435 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
437 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
438 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
439 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
440 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
441 return true;
442 else
443 return false;
446 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
447 tested at run-time. Return TRUE if DDR was successfully inserted.
448 Return false if versioning is not supported. */
450 static bool
451 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
453 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
455 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
456 return false;
458 if (vect_print_dump_info (REPORT_DR_DETAILS))
460 fprintf (vect_dump, "mark for run-time aliasing test between ");
461 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
462 fprintf (vect_dump, " and ");
463 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
466 if (optimize_loop_nest_for_size_p (loop))
468 if (vect_print_dump_info (REPORT_DR_DETAILS))
469 fprintf (vect_dump, "versioning not supported when optimizing for size.");
470 return false;
473 /* FORNOW: We don't support versioning with outer-loop vectorization. */
474 if (loop->inner)
476 if (vect_print_dump_info (REPORT_DR_DETAILS))
477 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
478 return false;
481 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
482 return true;
486 /* Function vect_analyze_data_ref_dependence.
488 Return TRUE if there (might) exist a dependence between a memory-reference
489 DRA and a memory-reference DRB. When versioning for alias may check a
490 dependence at run-time, return FALSE. Adjust *MAX_VF according to
491 the data dependence. */
493 static bool
494 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
495 loop_vec_info loop_vinfo, int *max_vf)
497 unsigned int i;
498 struct loop *loop = NULL;
499 struct data_reference *dra = DDR_A (ddr);
500 struct data_reference *drb = DDR_B (ddr);
501 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
502 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
503 lambda_vector dist_v;
504 unsigned int loop_depth;
506 /* Don't bother to analyze statements marked as unvectorizable. */
507 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
508 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
509 return false;
511 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
513 /* Independent data accesses. */
514 vect_check_interleaving (dra, drb);
515 return false;
518 if (loop_vinfo)
519 loop = LOOP_VINFO_LOOP (loop_vinfo);
521 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
522 return false;
524 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
526 if (loop_vinfo)
528 if (vect_print_dump_info (REPORT_DR_DETAILS))
530 fprintf (vect_dump, "versioning for alias required: "
531 "can't determine dependence between ");
532 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
533 fprintf (vect_dump, " and ");
534 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
537 /* Add to list of ddrs that need to be tested at run-time. */
538 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
541 /* When vectorizing a basic block unknown depnedence can still mean
542 strided access. */
543 if (vect_check_interleaving (dra, drb))
544 return false;
546 if (vect_print_dump_info (REPORT_DR_DETAILS))
548 fprintf (vect_dump, "can't determine dependence between ");
549 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
550 fprintf (vect_dump, " and ");
551 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
554 /* Mark the statements as unvectorizable. */
555 STMT_VINFO_VECTORIZABLE (stmtinfo_a) = false;
556 STMT_VINFO_VECTORIZABLE (stmtinfo_b) = false;
558 return false;
561 /* Versioning for alias is not yet supported for basic block SLP, and
562 dependence distance is unapplicable, hence, in case of known data
563 dependence, basic block vectorization is impossible for now. */
564 if (!loop_vinfo)
566 if (dra != drb && vect_check_interleaving (dra, drb))
567 return false;
569 if (vect_print_dump_info (REPORT_DR_DETAILS))
571 fprintf (vect_dump, "determined dependence between ");
572 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
573 fprintf (vect_dump, " and ");
574 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
577 return true;
580 /* Loop-based vectorization and known data dependence. */
581 if (DDR_NUM_DIST_VECTS (ddr) == 0)
583 if (vect_print_dump_info (REPORT_DR_DETAILS))
585 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
586 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
587 fprintf (vect_dump, " and ");
588 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
590 /* Add to list of ddrs that need to be tested at run-time. */
591 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
594 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
595 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
597 int dist = dist_v[loop_depth];
599 if (vect_print_dump_info (REPORT_DR_DETAILS))
600 fprintf (vect_dump, "dependence distance = %d.", dist);
602 if (dist == 0)
604 if (vect_print_dump_info (REPORT_DR_DETAILS))
606 fprintf (vect_dump, "dependence distance == 0 between ");
607 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
608 fprintf (vect_dump, " and ");
609 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
612 /* For interleaving, mark that there is a read-write dependency if
613 necessary. We check before that one of the data-refs is store. */
614 if (DR_IS_READ (dra))
615 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
616 else
618 if (DR_IS_READ (drb))
619 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
622 continue;
625 if (dist > 0 && DDR_REVERSED_P (ddr))
627 /* If DDR_REVERSED_P the order of the data-refs in DDR was
628 reversed (to make distance vector positive), and the actual
629 distance is negative. */
630 if (vect_print_dump_info (REPORT_DR_DETAILS))
631 fprintf (vect_dump, "dependence distance negative.");
632 continue;
635 if (abs (dist) >= 2
636 && abs (dist) < *max_vf)
638 /* The dependence distance requires reduction of the maximal
639 vectorization factor. */
640 *max_vf = abs (dist);
641 if (vect_print_dump_info (REPORT_DR_DETAILS))
642 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
643 *max_vf);
646 if (abs (dist) >= *max_vf)
648 /* Dependence distance does not create dependence, as far as
649 vectorization is concerned, in this case. */
650 if (vect_print_dump_info (REPORT_DR_DETAILS))
651 fprintf (vect_dump, "dependence distance >= VF.");
652 continue;
655 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
657 fprintf (vect_dump, "not vectorized, possible dependence "
658 "between data-refs ");
659 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
660 fprintf (vect_dump, " and ");
661 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
664 return true;
667 return false;
670 /* Function vect_analyze_data_ref_dependences.
672 Examine all the data references in the loop, and make sure there do not
673 exist any data dependences between them. Set *MAX_VF according to
674 the maximum vectorization factor the data dependences allow. */
676 bool
677 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
678 bb_vec_info bb_vinfo, int *max_vf)
680 unsigned int i;
681 VEC (ddr_p, heap) *ddrs = NULL;
682 struct data_dependence_relation *ddr;
684 if (vect_print_dump_info (REPORT_DETAILS))
685 fprintf (vect_dump, "=== vect_analyze_dependences ===");
687 if (loop_vinfo)
688 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
689 else
690 ddrs = BB_VINFO_DDRS (bb_vinfo);
692 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
693 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
694 return false;
696 return true;
700 /* Function vect_compute_data_ref_alignment
702 Compute the misalignment of the data reference DR.
704 Output:
705 1. If during the misalignment computation it is found that the data reference
706 cannot be vectorized then false is returned.
707 2. DR_MISALIGNMENT (DR) is defined.
709 FOR NOW: No analysis is actually performed. Misalignment is calculated
710 only for trivial cases. TODO. */
712 static bool
713 vect_compute_data_ref_alignment (struct data_reference *dr)
715 gimple stmt = DR_STMT (dr);
716 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
717 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
718 struct loop *loop = NULL;
719 tree ref = DR_REF (dr);
720 tree vectype;
721 tree base, base_addr;
722 bool base_aligned;
723 tree misalign;
724 tree aligned_to, alignment;
726 if (vect_print_dump_info (REPORT_DETAILS))
727 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
729 if (loop_vinfo)
730 loop = LOOP_VINFO_LOOP (loop_vinfo);
732 /* Initialize misalignment to unknown. */
733 SET_DR_MISALIGNMENT (dr, -1);
735 misalign = DR_INIT (dr);
736 aligned_to = DR_ALIGNED_TO (dr);
737 base_addr = DR_BASE_ADDRESS (dr);
738 vectype = STMT_VINFO_VECTYPE (stmt_info);
740 /* In case the dataref is in an inner-loop of the loop that is being
741 vectorized (LOOP), we use the base and misalignment information
742 relative to the outer-loop (LOOP). This is ok only if the misalignment
743 stays the same throughout the execution of the inner-loop, which is why
744 we have to check that the stride of the dataref in the inner-loop evenly
745 divides by the vector size. */
746 if (loop && nested_in_vect_loop_p (loop, stmt))
748 tree step = DR_STEP (dr);
749 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
751 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
753 if (vect_print_dump_info (REPORT_ALIGNMENT))
754 fprintf (vect_dump, "inner step divides the vector-size.");
755 misalign = STMT_VINFO_DR_INIT (stmt_info);
756 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
757 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
759 else
761 if (vect_print_dump_info (REPORT_ALIGNMENT))
762 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
763 misalign = NULL_TREE;
767 base = build_fold_indirect_ref (base_addr);
768 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
770 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
771 || !misalign)
773 if (vect_print_dump_info (REPORT_ALIGNMENT))
775 fprintf (vect_dump, "Unknown alignment for access: ");
776 print_generic_expr (vect_dump, base, TDF_SLIM);
778 return true;
781 if ((DECL_P (base)
782 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
783 alignment) >= 0)
784 || (TREE_CODE (base_addr) == SSA_NAME
785 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
786 TREE_TYPE (base_addr)))),
787 alignment) >= 0))
788 base_aligned = true;
789 else
790 base_aligned = false;
792 if (!base_aligned)
794 /* Do not change the alignment of global variables if
795 flag_section_anchors is enabled. */
796 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
797 || (TREE_STATIC (base) && flag_section_anchors))
799 if (vect_print_dump_info (REPORT_DETAILS))
801 fprintf (vect_dump, "can't force alignment of ref: ");
802 print_generic_expr (vect_dump, ref, TDF_SLIM);
804 return true;
807 /* Force the alignment of the decl.
808 NOTE: This is the only change to the code we make during
809 the analysis phase, before deciding to vectorize the loop. */
810 if (vect_print_dump_info (REPORT_DETAILS))
811 fprintf (vect_dump, "force alignment");
812 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
813 DECL_USER_ALIGN (base) = 1;
816 /* At this point we assume that the base is aligned. */
817 gcc_assert (base_aligned
818 || (TREE_CODE (base) == VAR_DECL
819 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
821 /* Modulo alignment. */
822 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
824 if (!host_integerp (misalign, 1))
826 /* Negative or overflowed misalignment value. */
827 if (vect_print_dump_info (REPORT_DETAILS))
828 fprintf (vect_dump, "unexpected misalign value");
829 return false;
832 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
834 if (vect_print_dump_info (REPORT_DETAILS))
836 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
837 print_generic_expr (vect_dump, ref, TDF_SLIM);
840 return true;
844 /* Function vect_compute_data_refs_alignment
846 Compute the misalignment of data references in the loop.
847 Return FALSE if a data reference is found that cannot be vectorized. */
849 static bool
850 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
851 bb_vec_info bb_vinfo)
853 VEC (data_reference_p, heap) *datarefs;
854 struct data_reference *dr;
855 unsigned int i;
857 if (loop_vinfo)
858 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
859 else
860 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
862 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
863 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
864 && !vect_compute_data_ref_alignment (dr))
866 if (bb_vinfo)
868 /* Mark unsupported statement as unvectorizable. */
869 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
870 continue;
872 else
873 return false;
876 return true;
880 /* Function vect_update_misalignment_for_peel
882 DR - the data reference whose misalignment is to be adjusted.
883 DR_PEEL - the data reference whose misalignment is being made
884 zero in the vector loop by the peel.
885 NPEEL - the number of iterations in the peel loop if the misalignment
886 of DR_PEEL is known at compile time. */
888 static void
889 vect_update_misalignment_for_peel (struct data_reference *dr,
890 struct data_reference *dr_peel, int npeel)
892 unsigned int i;
893 VEC(dr_p,heap) *same_align_drs;
894 struct data_reference *current_dr;
895 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
896 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
897 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
898 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
900 /* For interleaved data accesses the step in the loop must be multiplied by
901 the size of the interleaving group. */
902 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
903 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
904 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
905 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
907 /* It can be assumed that the data refs with the same alignment as dr_peel
908 are aligned in the vector loop. */
909 same_align_drs
910 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
911 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
913 if (current_dr != dr)
914 continue;
915 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
916 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
917 SET_DR_MISALIGNMENT (dr, 0);
918 return;
921 if (known_alignment_for_access_p (dr)
922 && known_alignment_for_access_p (dr_peel))
924 int misal = DR_MISALIGNMENT (dr);
925 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
926 misal += npeel * dr_size;
927 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
928 SET_DR_MISALIGNMENT (dr, misal);
929 return;
932 if (vect_print_dump_info (REPORT_DETAILS))
933 fprintf (vect_dump, "Setting misalignment to -1.");
934 SET_DR_MISALIGNMENT (dr, -1);
938 /* Function vect_verify_datarefs_alignment
940 Return TRUE if all data references in the loop can be
941 handled with respect to alignment. */
943 bool
944 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
946 VEC (data_reference_p, heap) *datarefs;
947 struct data_reference *dr;
948 enum dr_alignment_support supportable_dr_alignment;
949 unsigned int i;
951 if (loop_vinfo)
952 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
953 else
954 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
956 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
958 gimple stmt = DR_STMT (dr);
959 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
961 /* For interleaving, only the alignment of the first access matters.
962 Skip statements marked as not vectorizable. */
963 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
964 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
965 || !STMT_VINFO_VECTORIZABLE (stmt_info))
966 continue;
968 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
969 if (!supportable_dr_alignment)
971 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
973 if (DR_IS_READ (dr))
974 fprintf (vect_dump,
975 "not vectorized: unsupported unaligned load.");
976 else
977 fprintf (vect_dump,
978 "not vectorized: unsupported unaligned store.");
980 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
982 return false;
984 if (supportable_dr_alignment != dr_aligned
985 && vect_print_dump_info (REPORT_ALIGNMENT))
986 fprintf (vect_dump, "Vectorizing an unaligned access.");
988 return true;
992 /* Function vector_alignment_reachable_p
994 Return true if vector alignment for DR is reachable by peeling
995 a few loop iterations. Return false otherwise. */
997 static bool
998 vector_alignment_reachable_p (struct data_reference *dr)
1000 gimple stmt = DR_STMT (dr);
1001 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1002 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1004 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1006 /* For interleaved access we peel only if number of iterations in
1007 the prolog loop ({VF - misalignment}), is a multiple of the
1008 number of the interleaved accesses. */
1009 int elem_size, mis_in_elements;
1010 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1012 /* FORNOW: handle only known alignment. */
1013 if (!known_alignment_for_access_p (dr))
1014 return false;
1016 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1017 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1019 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1020 return false;
1023 /* If misalignment is known at the compile time then allow peeling
1024 only if natural alignment is reachable through peeling. */
1025 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1027 HOST_WIDE_INT elmsize =
1028 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1029 if (vect_print_dump_info (REPORT_DETAILS))
1031 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1032 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1034 if (DR_MISALIGNMENT (dr) % elmsize)
1036 if (vect_print_dump_info (REPORT_DETAILS))
1037 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1038 return false;
1042 if (!known_alignment_for_access_p (dr))
1044 tree type = (TREE_TYPE (DR_REF (dr)));
1045 tree ba = DR_BASE_OBJECT (dr);
1046 bool is_packed = false;
1048 if (ba)
1049 is_packed = contains_packed_reference (ba);
1051 if (vect_print_dump_info (REPORT_DETAILS))
1052 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1053 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1054 return true;
1055 else
1056 return false;
1059 return true;
1062 /* Function vect_enhance_data_refs_alignment
1064 This pass will use loop versioning and loop peeling in order to enhance
1065 the alignment of data references in the loop.
1067 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1068 original loop is to be vectorized; Any other loops that are created by
1069 the transformations performed in this pass - are not supposed to be
1070 vectorized. This restriction will be relaxed.
1072 This pass will require a cost model to guide it whether to apply peeling
1073 or versioning or a combination of the two. For example, the scheme that
1074 intel uses when given a loop with several memory accesses, is as follows:
1075 choose one memory access ('p') which alignment you want to force by doing
1076 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1077 other accesses are not necessarily aligned, or (2) use loop versioning to
1078 generate one loop in which all accesses are aligned, and another loop in
1079 which only 'p' is necessarily aligned.
1081 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1082 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1083 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1085 Devising a cost model is the most critical aspect of this work. It will
1086 guide us on which access to peel for, whether to use loop versioning, how
1087 many versions to create, etc. The cost model will probably consist of
1088 generic considerations as well as target specific considerations (on
1089 powerpc for example, misaligned stores are more painful than misaligned
1090 loads).
1092 Here are the general steps involved in alignment enhancements:
1094 -- original loop, before alignment analysis:
1095 for (i=0; i<N; i++){
1096 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1097 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1100 -- After vect_compute_data_refs_alignment:
1101 for (i=0; i<N; i++){
1102 x = q[i]; # DR_MISALIGNMENT(q) = 3
1103 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1106 -- Possibility 1: we do loop versioning:
1107 if (p is aligned) {
1108 for (i=0; i<N; i++){ # loop 1A
1109 x = q[i]; # DR_MISALIGNMENT(q) = 3
1110 p[i] = y; # DR_MISALIGNMENT(p) = 0
1113 else {
1114 for (i=0; i<N; i++){ # loop 1B
1115 x = q[i]; # DR_MISALIGNMENT(q) = 3
1116 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1120 -- Possibility 2: we do loop peeling:
1121 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1122 x = q[i];
1123 p[i] = y;
1125 for (i = 3; i < N; i++){ # loop 2A
1126 x = q[i]; # DR_MISALIGNMENT(q) = 0
1127 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1130 -- Possibility 3: combination of loop peeling and versioning:
1131 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1132 x = q[i];
1133 p[i] = y;
1135 if (p is aligned) {
1136 for (i = 3; i<N; i++){ # loop 3A
1137 x = q[i]; # DR_MISALIGNMENT(q) = 0
1138 p[i] = y; # DR_MISALIGNMENT(p) = 0
1141 else {
1142 for (i = 3; i<N; i++){ # loop 3B
1143 x = q[i]; # DR_MISALIGNMENT(q) = 0
1144 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1148 These loops are later passed to loop_transform to be vectorized. The
1149 vectorizer will use the alignment information to guide the transformation
1150 (whether to generate regular loads/stores, or with special handling for
1151 misalignment). */
1153 bool
1154 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1156 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1157 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1158 enum dr_alignment_support supportable_dr_alignment;
1159 struct data_reference *dr0 = NULL;
1160 struct data_reference *dr;
1161 unsigned int i;
1162 bool do_peeling = false;
1163 bool do_versioning = false;
1164 bool stat;
1165 gimple stmt;
1166 stmt_vec_info stmt_info;
1167 int vect_versioning_for_alias_required;
1169 if (vect_print_dump_info (REPORT_DETAILS))
1170 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1172 /* While cost model enhancements are expected in the future, the high level
1173 view of the code at this time is as follows:
1175 A) If there is a misaligned access then see if peeling to align
1176 this access can make all data references satisfy
1177 vect_supportable_dr_alignment. If so, update data structures
1178 as needed and return true.
1180 B) If peeling wasn't possible and there is a data reference with an
1181 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1182 then see if loop versioning checks can be used to make all data
1183 references satisfy vect_supportable_dr_alignment. If so, update
1184 data structures as needed and return true.
1186 C) If neither peeling nor versioning were successful then return false if
1187 any data reference does not satisfy vect_supportable_dr_alignment.
1189 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1191 Note, Possibility 3 above (which is peeling and versioning together) is not
1192 being done at this time. */
1194 /* (1) Peeling to force alignment. */
1196 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1197 Considerations:
1198 + How many accesses will become aligned due to the peeling
1199 - How many accesses will become unaligned due to the peeling,
1200 and the cost of misaligned accesses.
1201 - The cost of peeling (the extra runtime checks, the increase
1202 in code size).
1204 The scheme we use FORNOW: peel to force the alignment of the first
1205 unsupported misaligned access in the loop.
1207 TODO: Use a cost model. */
1209 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1211 stmt = DR_STMT (dr);
1212 stmt_info = vinfo_for_stmt (stmt);
1214 /* For interleaving, only the alignment of the first access
1215 matters. */
1216 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1217 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1218 continue;
1220 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1222 do_peeling = vector_alignment_reachable_p (dr);
1223 if (do_peeling)
1224 dr0 = dr;
1225 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1226 fprintf (vect_dump, "vector alignment may not be reachable");
1227 break;
1231 vect_versioning_for_alias_required
1232 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1234 /* Temporarily, if versioning for alias is required, we disable peeling
1235 until we support peeling and versioning. Often peeling for alignment
1236 will require peeling for loop-bound, which in turn requires that we
1237 know how to adjust the loop ivs after the loop. */
1238 if (vect_versioning_for_alias_required
1239 || !vect_can_advance_ivs_p (loop_vinfo)
1240 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1241 do_peeling = false;
1243 if (do_peeling)
1245 int mis;
1246 int npeel = 0;
1247 gimple stmt = DR_STMT (dr0);
1248 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1249 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1250 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1252 if (known_alignment_for_access_p (dr0))
1254 /* Since it's known at compile time, compute the number of iterations
1255 in the peeled loop (the peeling factor) for use in updating
1256 DR_MISALIGNMENT values. The peeling factor is the vectorization
1257 factor minus the misalignment as an element count. */
1258 mis = DR_MISALIGNMENT (dr0);
1259 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1260 npeel = nelements - mis;
1262 /* For interleaved data access every iteration accesses all the
1263 members of the group, therefore we divide the number of iterations
1264 by the group size. */
1265 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1266 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1267 npeel /= DR_GROUP_SIZE (stmt_info);
1269 if (vect_print_dump_info (REPORT_DETAILS))
1270 fprintf (vect_dump, "Try peeling by %d", npeel);
1273 /* Ensure that all data refs can be vectorized after the peel. */
1274 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1276 int save_misalignment;
1278 if (dr == dr0)
1279 continue;
1281 stmt = DR_STMT (dr);
1282 stmt_info = vinfo_for_stmt (stmt);
1283 /* For interleaving, only the alignment of the first access
1284 matters. */
1285 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1286 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1287 continue;
1289 save_misalignment = DR_MISALIGNMENT (dr);
1290 vect_update_misalignment_for_peel (dr, dr0, npeel);
1291 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1292 SET_DR_MISALIGNMENT (dr, save_misalignment);
1294 if (!supportable_dr_alignment)
1296 do_peeling = false;
1297 break;
1301 if (do_peeling)
1303 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1304 If the misalignment of DR_i is identical to that of dr0 then set
1305 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1306 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1307 by the peeling factor times the element size of DR_i (MOD the
1308 vectorization factor times the size). Otherwise, the
1309 misalignment of DR_i must be set to unknown. */
1310 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1311 if (dr != dr0)
1312 vect_update_misalignment_for_peel (dr, dr0, npeel);
1314 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1315 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1316 SET_DR_MISALIGNMENT (dr0, 0);
1317 if (vect_print_dump_info (REPORT_ALIGNMENT))
1318 fprintf (vect_dump, "Alignment of access forced using peeling.");
1320 if (vect_print_dump_info (REPORT_DETAILS))
1321 fprintf (vect_dump, "Peeling for alignment will be applied.");
1323 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1324 gcc_assert (stat);
1325 return stat;
1330 /* (2) Versioning to force alignment. */
1332 /* Try versioning if:
1333 1) flag_tree_vect_loop_version is TRUE
1334 2) optimize loop for speed
1335 3) there is at least one unsupported misaligned data ref with an unknown
1336 misalignment, and
1337 4) all misaligned data refs with a known misalignment are supported, and
1338 5) the number of runtime alignment checks is within reason. */
1340 do_versioning =
1341 flag_tree_vect_loop_version
1342 && optimize_loop_nest_for_speed_p (loop)
1343 && (!loop->inner); /* FORNOW */
1345 if (do_versioning)
1347 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1349 stmt = DR_STMT (dr);
1350 stmt_info = vinfo_for_stmt (stmt);
1352 /* For interleaving, only the alignment of the first access
1353 matters. */
1354 if (aligned_access_p (dr)
1355 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1356 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1357 continue;
1359 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1361 if (!supportable_dr_alignment)
1363 gimple stmt;
1364 int mask;
1365 tree vectype;
1367 if (known_alignment_for_access_p (dr)
1368 || VEC_length (gimple,
1369 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1370 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1372 do_versioning = false;
1373 break;
1376 stmt = DR_STMT (dr);
1377 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1378 gcc_assert (vectype);
1380 /* The rightmost bits of an aligned address must be zeros.
1381 Construct the mask needed for this test. For example,
1382 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1383 mask must be 15 = 0xf. */
1384 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1386 /* FORNOW: use the same mask to test all potentially unaligned
1387 references in the loop. The vectorizer currently supports
1388 a single vector size, see the reference to
1389 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1390 vectorization factor is computed. */
1391 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1392 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1393 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1394 VEC_safe_push (gimple, heap,
1395 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1396 DR_STMT (dr));
1400 /* Versioning requires at least one misaligned data reference. */
1401 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1402 do_versioning = false;
1403 else if (!do_versioning)
1404 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1407 if (do_versioning)
1409 VEC(gimple,heap) *may_misalign_stmts
1410 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1411 gimple stmt;
1413 /* It can now be assumed that the data references in the statements
1414 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1415 of the loop being vectorized. */
1416 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1418 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1419 dr = STMT_VINFO_DATA_REF (stmt_info);
1420 SET_DR_MISALIGNMENT (dr, 0);
1421 if (vect_print_dump_info (REPORT_ALIGNMENT))
1422 fprintf (vect_dump, "Alignment of access forced using versioning.");
1425 if (vect_print_dump_info (REPORT_DETAILS))
1426 fprintf (vect_dump, "Versioning for alignment will be applied.");
1428 /* Peeling and versioning can't be done together at this time. */
1429 gcc_assert (! (do_peeling && do_versioning));
1431 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1432 gcc_assert (stat);
1433 return stat;
1436 /* This point is reached if neither peeling nor versioning is being done. */
1437 gcc_assert (! (do_peeling || do_versioning));
1439 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1440 return stat;
1444 /* Function vect_find_same_alignment_drs.
1446 Update group and alignment relations according to the chosen
1447 vectorization factor. */
1449 static void
1450 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1451 loop_vec_info loop_vinfo)
1453 unsigned int i;
1454 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1455 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1456 struct data_reference *dra = DDR_A (ddr);
1457 struct data_reference *drb = DDR_B (ddr);
1458 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1459 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1460 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1461 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1462 lambda_vector dist_v;
1463 unsigned int loop_depth;
1465 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1466 return;
1468 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1469 return;
1471 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1472 return;
1474 /* Loop-based vectorization and known data dependence. */
1475 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1476 return;
1478 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1479 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1481 int dist = dist_v[loop_depth];
1483 if (vect_print_dump_info (REPORT_DR_DETAILS))
1484 fprintf (vect_dump, "dependence distance = %d.", dist);
1486 /* Same loop iteration. */
1487 if (dist == 0
1488 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1490 /* Two references with distance zero have the same alignment. */
1491 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1492 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1493 if (vect_print_dump_info (REPORT_ALIGNMENT))
1494 fprintf (vect_dump, "accesses have the same alignment.");
1495 if (vect_print_dump_info (REPORT_DR_DETAILS))
1497 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1498 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1499 fprintf (vect_dump, " and ");
1500 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1507 /* Function vect_analyze_data_refs_alignment
1509 Analyze the alignment of the data-references in the loop.
1510 Return FALSE if a data reference is found that cannot be vectorized. */
1512 bool
1513 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1514 bb_vec_info bb_vinfo)
1516 if (vect_print_dump_info (REPORT_DETAILS))
1517 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1519 /* Mark groups of data references with same alignment using
1520 data dependence information. */
1521 if (loop_vinfo)
1523 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1524 struct data_dependence_relation *ddr;
1525 unsigned int i;
1527 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1528 vect_find_same_alignment_drs (ddr, loop_vinfo);
1531 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1533 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1534 fprintf (vect_dump,
1535 "not vectorized: can't calculate alignment for data ref.");
1536 return false;
1539 return true;
1543 /* Analyze groups of strided accesses: check that DR belongs to a group of
1544 strided accesses of legal size, step, etc. Detect gaps, single element
1545 interleaving, and other special cases. Set strided access info.
1546 Collect groups of strided stores for further use in SLP analysis. */
1548 static bool
1549 vect_analyze_group_access (struct data_reference *dr)
1551 tree step = DR_STEP (dr);
1552 tree scalar_type = TREE_TYPE (DR_REF (dr));
1553 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1554 gimple stmt = DR_STMT (dr);
1555 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1556 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1557 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1558 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1559 HOST_WIDE_INT stride;
1560 bool slp_impossible = false;
1562 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1563 interleaving group (including gaps). */
1564 stride = dr_step / type_size;
1566 /* Not consecutive access is possible only if it is a part of interleaving. */
1567 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1569 /* Check if it this DR is a part of interleaving, and is a single
1570 element of the group that is accessed in the loop. */
1572 /* Gaps are supported only for loads. STEP must be a multiple of the type
1573 size. The size of the group must be a power of 2. */
1574 if (DR_IS_READ (dr)
1575 && (dr_step % type_size) == 0
1576 && stride > 0
1577 && exact_log2 (stride) != -1)
1579 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1580 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1581 if (vect_print_dump_info (REPORT_DR_DETAILS))
1583 fprintf (vect_dump, "Detected single element interleaving ");
1584 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1585 fprintf (vect_dump, " step ");
1586 print_generic_expr (vect_dump, step, TDF_SLIM);
1588 return true;
1591 if (vect_print_dump_info (REPORT_DETAILS))
1593 fprintf (vect_dump, "not consecutive access ");
1594 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1597 if (bb_vinfo)
1599 /* Mark the statement as unvectorizable. */
1600 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1601 return true;
1604 return false;
1607 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1609 /* First stmt in the interleaving chain. Check the chain. */
1610 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1611 struct data_reference *data_ref = dr;
1612 unsigned int count = 1;
1613 tree next_step;
1614 tree prev_init = DR_INIT (data_ref);
1615 gimple prev = stmt;
1616 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
1618 while (next)
1620 /* Skip same data-refs. In case that two or more stmts share data-ref
1621 (supported only for loads), we vectorize only the first stmt, and
1622 the rest get their vectorized loads from the first one. */
1623 if (!tree_int_cst_compare (DR_INIT (data_ref),
1624 DR_INIT (STMT_VINFO_DATA_REF (
1625 vinfo_for_stmt (next)))))
1627 if (!DR_IS_READ (data_ref))
1629 if (vect_print_dump_info (REPORT_DETAILS))
1630 fprintf (vect_dump, "Two store stmts share the same dr.");
1631 return false;
1634 /* Check that there is no load-store dependencies for this loads
1635 to prevent a case of load-store-load to the same location. */
1636 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1637 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1639 if (vect_print_dump_info (REPORT_DETAILS))
1640 fprintf (vect_dump,
1641 "READ_WRITE dependence in interleaving.");
1642 return false;
1645 /* For load use the same data-ref load. */
1646 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1648 prev = next;
1649 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1650 continue;
1652 prev = next;
1654 /* Check that all the accesses have the same STEP. */
1655 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1656 if (tree_int_cst_compare (step, next_step))
1658 if (vect_print_dump_info (REPORT_DETAILS))
1659 fprintf (vect_dump, "not consecutive access in interleaving");
1660 return false;
1663 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1664 /* Check that the distance between two accesses is equal to the type
1665 size. Otherwise, we have gaps. */
1666 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1667 - TREE_INT_CST_LOW (prev_init)) / type_size;
1668 if (diff != 1)
1670 /* FORNOW: SLP of accesses with gaps is not supported. */
1671 slp_impossible = true;
1672 if (!DR_IS_READ (data_ref))
1674 if (vect_print_dump_info (REPORT_DETAILS))
1675 fprintf (vect_dump, "interleaved store with gaps");
1676 return false;
1679 gaps += diff - 1;
1682 /* Store the gap from the previous member of the group. If there is no
1683 gap in the access, DR_GROUP_GAP is always 1. */
1684 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1686 prev_init = DR_INIT (data_ref);
1687 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1688 /* Count the number of data-refs in the chain. */
1689 count++;
1692 /* COUNT is the number of accesses found, we multiply it by the size of
1693 the type to get COUNT_IN_BYTES. */
1694 count_in_bytes = type_size * count;
1696 /* Check that the size of the interleaving (including gaps) is not
1697 greater than STEP. */
1698 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
1700 if (vect_print_dump_info (REPORT_DETAILS))
1702 fprintf (vect_dump, "interleaving size is greater than step for ");
1703 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1705 return false;
1708 /* Check that the size of the interleaving is equal to STEP for stores,
1709 i.e., that there are no gaps. */
1710 if (dr_step && dr_step != count_in_bytes)
1712 if (DR_IS_READ (dr))
1714 slp_impossible = true;
1715 /* There is a gap after the last load in the group. This gap is a
1716 difference between the stride and the number of elements. When
1717 there is no gap, this difference should be 0. */
1718 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1720 else
1722 if (vect_print_dump_info (REPORT_DETAILS))
1723 fprintf (vect_dump, "interleaved store with gaps");
1724 return false;
1728 /* Check that STEP is a multiple of type size. */
1729 if (dr_step && (dr_step % type_size) != 0)
1731 if (vect_print_dump_info (REPORT_DETAILS))
1733 fprintf (vect_dump, "step is not a multiple of type size: step ");
1734 print_generic_expr (vect_dump, step, TDF_SLIM);
1735 fprintf (vect_dump, " size ");
1736 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1737 TDF_SLIM);
1739 return false;
1742 /* FORNOW: we handle only interleaving that is a power of 2.
1743 We don't fail here if it may be still possible to vectorize the
1744 group using SLP. If not, the size of the group will be checked in
1745 vect_analyze_operations, and the vectorization will fail. */
1746 if (exact_log2 (stride) == -1)
1748 if (vect_print_dump_info (REPORT_DETAILS))
1749 fprintf (vect_dump, "interleaving is not a power of 2");
1751 if (slp_impossible)
1752 return false;
1755 if (stride == 0)
1756 stride = count;
1758 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1759 if (vect_print_dump_info (REPORT_DETAILS))
1760 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1762 /* SLP: create an SLP data structure for every interleaving group of
1763 stores for further analysis in vect_analyse_slp. */
1764 if (!DR_IS_READ (dr) && !slp_impossible)
1766 if (loop_vinfo)
1767 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
1768 stmt);
1769 if (bb_vinfo)
1770 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
1771 stmt);
1775 return true;
1779 /* Analyze the access pattern of the data-reference DR.
1780 In case of non-consecutive accesses call vect_analyze_group_access() to
1781 analyze groups of strided accesses. */
1783 static bool
1784 vect_analyze_data_ref_access (struct data_reference *dr)
1786 tree step = DR_STEP (dr);
1787 tree scalar_type = TREE_TYPE (DR_REF (dr));
1788 gimple stmt = DR_STMT (dr);
1789 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1790 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1791 struct loop *loop = NULL;
1792 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1794 if (loop_vinfo)
1795 loop = LOOP_VINFO_LOOP (loop_vinfo);
1797 if (loop_vinfo && !step)
1799 if (vect_print_dump_info (REPORT_DETAILS))
1800 fprintf (vect_dump, "bad data-ref access in loop");
1801 return false;
1804 /* Don't allow invariant accesses in loops. */
1805 if (loop_vinfo && dr_step == 0)
1806 return false;
1808 if (loop && nested_in_vect_loop_p (loop, stmt))
1810 /* Interleaved accesses are not yet supported within outer-loop
1811 vectorization for references in the inner-loop. */
1812 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1814 /* For the rest of the analysis we use the outer-loop step. */
1815 step = STMT_VINFO_DR_STEP (stmt_info);
1816 dr_step = TREE_INT_CST_LOW (step);
1818 if (dr_step == 0)
1820 if (vect_print_dump_info (REPORT_ALIGNMENT))
1821 fprintf (vect_dump, "zero step in outer loop.");
1822 if (DR_IS_READ (dr))
1823 return true;
1824 else
1825 return false;
1829 /* Consecutive? */
1830 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1832 /* Mark that it is not interleaving. */
1833 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1834 return true;
1837 if (loop && nested_in_vect_loop_p (loop, stmt))
1839 if (vect_print_dump_info (REPORT_ALIGNMENT))
1840 fprintf (vect_dump, "strided access in outer loop.");
1841 return false;
1844 /* Not consecutive access - check if it's a part of interleaving group. */
1845 return vect_analyze_group_access (dr);
1849 /* Function vect_analyze_data_ref_accesses.
1851 Analyze the access pattern of all the data references in the loop.
1853 FORNOW: the only access pattern that is considered vectorizable is a
1854 simple step 1 (consecutive) access.
1856 FORNOW: handle only arrays and pointer accesses. */
1858 bool
1859 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1861 unsigned int i;
1862 VEC (data_reference_p, heap) *datarefs;
1863 struct data_reference *dr;
1865 if (vect_print_dump_info (REPORT_DETAILS))
1866 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1868 if (loop_vinfo)
1869 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1870 else
1871 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1873 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1874 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
1875 && !vect_analyze_data_ref_access (dr))
1877 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1878 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1880 if (bb_vinfo)
1882 /* Mark the statement as not vectorizable. */
1883 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1884 continue;
1886 else
1887 return false;
1890 return true;
1893 /* Function vect_prune_runtime_alias_test_list.
1895 Prune a list of ddrs to be tested at run-time by versioning for alias.
1896 Return FALSE if resulting list of ddrs is longer then allowed by
1897 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1899 bool
1900 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1902 VEC (ddr_p, heap) * ddrs =
1903 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1904 unsigned i, j;
1906 if (vect_print_dump_info (REPORT_DETAILS))
1907 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1909 for (i = 0; i < VEC_length (ddr_p, ddrs); )
1911 bool found;
1912 ddr_p ddr_i;
1914 ddr_i = VEC_index (ddr_p, ddrs, i);
1915 found = false;
1917 for (j = 0; j < i; j++)
1919 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1921 if (vect_vfa_range_equal (ddr_i, ddr_j))
1923 if (vect_print_dump_info (REPORT_DR_DETAILS))
1925 fprintf (vect_dump, "found equal ranges ");
1926 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1927 fprintf (vect_dump, ", ");
1928 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1929 fprintf (vect_dump, " and ");
1930 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1931 fprintf (vect_dump, ", ");
1932 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1934 found = true;
1935 break;
1939 if (found)
1941 VEC_ordered_remove (ddr_p, ddrs, i);
1942 continue;
1944 i++;
1947 if (VEC_length (ddr_p, ddrs) >
1948 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1950 if (vect_print_dump_info (REPORT_DR_DETAILS))
1952 fprintf (vect_dump,
1953 "disable versioning for alias - max number of generated "
1954 "checks exceeded.");
1957 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1959 return false;
1962 return true;
1966 /* Function vect_analyze_data_refs.
1968 Find all the data references in the loop or basic block.
1970 The general structure of the analysis of data refs in the vectorizer is as
1971 follows:
1972 1- vect_analyze_data_refs(loop/bb): call
1973 compute_data_dependences_for_loop/bb to find and analyze all data-refs
1974 in the loop/bb and their dependences.
1975 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1976 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1977 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1981 bool
1982 vect_analyze_data_refs (loop_vec_info loop_vinfo,
1983 bb_vec_info bb_vinfo,
1984 int *min_vf)
1986 struct loop *loop = NULL;
1987 basic_block bb = NULL;
1988 unsigned int i;
1989 VEC (data_reference_p, heap) *datarefs;
1990 struct data_reference *dr;
1991 tree scalar_type;
1992 bool res;
1994 if (vect_print_dump_info (REPORT_DETAILS))
1995 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1997 if (loop_vinfo)
1999 loop = LOOP_VINFO_LOOP (loop_vinfo);
2000 res = compute_data_dependences_for_loop
2001 (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
2002 &LOOP_VINFO_DDRS (loop_vinfo));
2004 if (!res)
2006 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2007 fprintf (vect_dump, "not vectorized: loop contains function calls"
2008 " or data references that cannot be analyzed");
2009 return false;
2012 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2014 else
2016 bb = BB_VINFO_BB (bb_vinfo);
2017 res = compute_data_dependences_for_bb (bb, true,
2018 &BB_VINFO_DATAREFS (bb_vinfo),
2019 &BB_VINFO_DDRS (bb_vinfo));
2020 if (!res)
2022 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2023 fprintf (vect_dump, "not vectorized: basic block contains function"
2024 " calls or data references that cannot be analyzed");
2025 return false;
2028 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2031 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2032 from stmt_vec_info struct to DR and vectype. */
2034 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2036 gimple stmt;
2037 stmt_vec_info stmt_info;
2038 tree base, offset, init;
2039 int vf;
2041 if (!dr || !DR_REF (dr))
2043 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2044 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2045 return false;
2048 stmt = DR_STMT (dr);
2049 stmt_info = vinfo_for_stmt (stmt);
2051 /* Check that analysis of the data-ref succeeded. */
2052 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2053 || !DR_STEP (dr))
2055 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2057 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2058 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2061 if (bb_vinfo)
2063 /* Mark the statement as not vectorizable. */
2064 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2065 continue;
2067 else
2068 return false;
2071 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2073 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2074 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2075 "constant");
2076 if (bb_vinfo)
2078 /* Mark the statement as not vectorizable. */
2079 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2080 continue;
2082 else
2083 return false;
2086 base = unshare_expr (DR_BASE_ADDRESS (dr));
2087 offset = unshare_expr (DR_OFFSET (dr));
2088 init = unshare_expr (DR_INIT (dr));
2090 /* Update DR field in stmt_vec_info struct. */
2092 /* If the dataref is in an inner-loop of the loop that is considered for
2093 for vectorization, we also want to analyze the access relative to
2094 the outer-loop (DR contains information only relative to the
2095 inner-most enclosing loop). We do that by building a reference to the
2096 first location accessed by the inner-loop, and analyze it relative to
2097 the outer-loop. */
2098 if (loop && nested_in_vect_loop_p (loop, stmt))
2100 tree outer_step, outer_base, outer_init;
2101 HOST_WIDE_INT pbitsize, pbitpos;
2102 tree poffset;
2103 enum machine_mode pmode;
2104 int punsignedp, pvolatilep;
2105 affine_iv base_iv, offset_iv;
2106 tree dinit;
2108 /* Build a reference to the first location accessed by the
2109 inner-loop: *(BASE+INIT). (The first location is actually
2110 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2111 tree inner_base = build_fold_indirect_ref
2112 (fold_build2 (POINTER_PLUS_EXPR,
2113 TREE_TYPE (base), base,
2114 fold_convert (sizetype, init)));
2116 if (vect_print_dump_info (REPORT_DETAILS))
2118 fprintf (vect_dump, "analyze in outer-loop: ");
2119 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2122 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2123 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2124 gcc_assert (outer_base != NULL_TREE);
2126 if (pbitpos % BITS_PER_UNIT != 0)
2128 if (vect_print_dump_info (REPORT_DETAILS))
2129 fprintf (vect_dump, "failed: bit offset alignment.\n");
2130 return false;
2133 outer_base = build_fold_addr_expr (outer_base);
2134 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2135 &base_iv, false))
2137 if (vect_print_dump_info (REPORT_DETAILS))
2138 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2139 return false;
2142 if (offset)
2144 if (poffset)
2145 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2146 poffset);
2147 else
2148 poffset = offset;
2151 if (!poffset)
2153 offset_iv.base = ssize_int (0);
2154 offset_iv.step = ssize_int (0);
2156 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2157 &offset_iv, false))
2159 if (vect_print_dump_info (REPORT_DETAILS))
2160 fprintf (vect_dump, "evolution of offset is not affine.\n");
2161 return false;
2164 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2165 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2166 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2167 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2168 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2170 outer_step = size_binop (PLUS_EXPR,
2171 fold_convert (ssizetype, base_iv.step),
2172 fold_convert (ssizetype, offset_iv.step));
2174 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2175 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2176 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2177 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2178 STMT_VINFO_DR_OFFSET (stmt_info) =
2179 fold_convert (ssizetype, offset_iv.base);
2180 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2181 size_int (highest_pow2_factor (offset_iv.base));
2183 if (vect_print_dump_info (REPORT_DETAILS))
2185 fprintf (vect_dump, "\touter base_address: ");
2186 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2187 fprintf (vect_dump, "\n\touter offset from base address: ");
2188 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2189 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2190 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2191 fprintf (vect_dump, "\n\touter step: ");
2192 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2193 fprintf (vect_dump, "\n\touter aligned to: ");
2194 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2198 if (STMT_VINFO_DATA_REF (stmt_info))
2200 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2202 fprintf (vect_dump,
2203 "not vectorized: more than one data ref in stmt: ");
2204 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2206 return false;
2209 STMT_VINFO_DATA_REF (stmt_info) = dr;
2211 /* Set vectype for STMT. */
2212 scalar_type = TREE_TYPE (DR_REF (dr));
2213 STMT_VINFO_VECTYPE (stmt_info) =
2214 get_vectype_for_scalar_type (scalar_type);
2215 if (!STMT_VINFO_VECTYPE (stmt_info))
2217 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2219 fprintf (vect_dump,
2220 "not vectorized: no vectype for stmt: ");
2221 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2222 fprintf (vect_dump, " scalar_type: ");
2223 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2226 if (bb_vinfo)
2228 /* Mark the statement as not vectorizable. */
2229 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2230 continue;
2232 else
2233 return false;
2236 /* Adjust the minimal vectorization factor according to the
2237 vector type. */
2238 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2239 if (vf > *min_vf)
2240 *min_vf = vf;
2243 return true;
2247 /* Function vect_get_new_vect_var.
2249 Returns a name for a new variable. The current naming scheme appends the
2250 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2251 the name of vectorizer generated variables, and appends that to NAME if
2252 provided. */
2254 tree
2255 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2257 const char *prefix;
2258 tree new_vect_var;
2260 switch (var_kind)
2262 case vect_simple_var:
2263 prefix = "vect_";
2264 break;
2265 case vect_scalar_var:
2266 prefix = "stmp_";
2267 break;
2268 case vect_pointer_var:
2269 prefix = "vect_p";
2270 break;
2271 default:
2272 gcc_unreachable ();
2275 if (name)
2277 char* tmp = concat (prefix, name, NULL);
2278 new_vect_var = create_tmp_var (type, tmp);
2279 free (tmp);
2281 else
2282 new_vect_var = create_tmp_var (type, prefix);
2284 /* Mark vector typed variable as a gimple register variable. */
2285 if (TREE_CODE (type) == VECTOR_TYPE)
2286 DECL_GIMPLE_REG_P (new_vect_var) = true;
2288 return new_vect_var;
2292 /* Function vect_create_addr_base_for_vector_ref.
2294 Create an expression that computes the address of the first memory location
2295 that will be accessed for a data reference.
2297 Input:
2298 STMT: The statement containing the data reference.
2299 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2300 OFFSET: Optional. If supplied, it is be added to the initial address.
2301 LOOP: Specify relative to which loop-nest should the address be computed.
2302 For example, when the dataref is in an inner-loop nested in an
2303 outer-loop that is now being vectorized, LOOP can be either the
2304 outer-loop, or the inner-loop. The first memory location accessed
2305 by the following dataref ('in' points to short):
2307 for (i=0; i<N; i++)
2308 for (j=0; j<M; j++)
2309 s += in[i+j]
2311 is as follows:
2312 if LOOP=i_loop: &in (relative to i_loop)
2313 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2315 Output:
2316 1. Return an SSA_NAME whose value is the address of the memory location of
2317 the first vector of the data reference.
2318 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2319 these statement(s) which define the returned SSA_NAME.
2321 FORNOW: We are only handling array accesses with step 1. */
2323 tree
2324 vect_create_addr_base_for_vector_ref (gimple stmt,
2325 gimple_seq *new_stmt_list,
2326 tree offset,
2327 struct loop *loop)
2329 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2330 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2331 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2332 tree base_name;
2333 tree data_ref_base_var;
2334 tree vec_stmt;
2335 tree addr_base, addr_expr;
2336 tree dest;
2337 gimple_seq seq = NULL;
2338 tree base_offset = unshare_expr (DR_OFFSET (dr));
2339 tree init = unshare_expr (DR_INIT (dr));
2340 tree vect_ptr_type;
2341 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2342 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2344 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2346 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2348 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2350 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2351 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2352 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2355 if (loop_vinfo)
2356 base_name = build_fold_indirect_ref (data_ref_base);
2357 else
2359 base_offset = ssize_int (0);
2360 init = ssize_int (0);
2361 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2364 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2365 add_referenced_var (data_ref_base_var);
2366 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2367 data_ref_base_var);
2368 gimple_seq_add_seq (new_stmt_list, seq);
2370 /* Create base_offset */
2371 base_offset = size_binop (PLUS_EXPR,
2372 fold_convert (sizetype, base_offset),
2373 fold_convert (sizetype, init));
2374 dest = create_tmp_var (sizetype, "base_off");
2375 add_referenced_var (dest);
2376 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2377 gimple_seq_add_seq (new_stmt_list, seq);
2379 if (offset)
2381 tree tmp = create_tmp_var (sizetype, "offset");
2383 add_referenced_var (tmp);
2384 offset = fold_build2 (MULT_EXPR, sizetype,
2385 fold_convert (sizetype, offset), step);
2386 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2387 base_offset, offset);
2388 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2389 gimple_seq_add_seq (new_stmt_list, seq);
2392 /* base + base_offset */
2393 if (loop_vinfo)
2394 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2395 data_ref_base, base_offset);
2396 else
2398 if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF)
2399 addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0));
2400 else
2401 addr_base = build1 (ADDR_EXPR,
2402 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2403 unshare_expr (DR_REF (dr)));
2406 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2408 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2409 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2410 get_name (base_name));
2411 add_referenced_var (addr_expr);
2412 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2413 gimple_seq_add_seq (new_stmt_list, seq);
2415 if (vect_print_dump_info (REPORT_DETAILS))
2417 fprintf (vect_dump, "created ");
2418 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2421 return vec_stmt;
2425 /* Function vect_create_data_ref_ptr.
2427 Create a new pointer to vector type (vp), that points to the first location
2428 accessed in the loop by STMT, along with the def-use update chain to
2429 appropriately advance the pointer through the loop iterations. Also set
2430 aliasing information for the pointer. This vector pointer is used by the
2431 callers to this function to create a memory reference expression for vector
2432 load/store access.
2434 Input:
2435 1. STMT: a stmt that references memory. Expected to be of the form
2436 GIMPLE_ASSIGN <name, data-ref> or
2437 GIMPLE_ASSIGN <data-ref, name>.
2438 2. AT_LOOP: the loop where the vector memref is to be created.
2439 3. OFFSET (optional): an offset to be added to the initial address accessed
2440 by the data-ref in STMT.
2441 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2442 pointing to the initial address.
2443 5. TYPE: if not NULL indicates the required type of the data-ref.
2445 Output:
2446 1. Declare a new ptr to vector_type, and have it point to the base of the
2447 data reference (initial addressed accessed by the data reference).
2448 For example, for vector of type V8HI, the following code is generated:
2450 v8hi *vp;
2451 vp = (v8hi *)initial_address;
2453 if OFFSET is not supplied:
2454 initial_address = &a[init];
2455 if OFFSET is supplied:
2456 initial_address = &a[init + OFFSET];
2458 Return the initial_address in INITIAL_ADDRESS.
2460 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2461 update the pointer in each iteration of the loop.
2463 Return the increment stmt that updates the pointer in PTR_INCR.
2465 3. Set INV_P to true if the access pattern of the data reference in the
2466 vectorized loop is invariant. Set it to false otherwise.
2468 4. Return the pointer. */
2470 tree
2471 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2472 tree offset, tree *initial_address, gimple *ptr_incr,
2473 bool only_init, bool *inv_p)
2475 tree base_name;
2476 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2477 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2478 struct loop *loop = NULL;
2479 bool nested_in_vect_loop = false;
2480 struct loop *containing_loop = NULL;
2481 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2482 tree vect_ptr_type;
2483 tree vect_ptr;
2484 tree new_temp;
2485 gimple vec_stmt;
2486 gimple_seq new_stmt_list = NULL;
2487 edge pe = NULL;
2488 basic_block new_bb;
2489 tree vect_ptr_init;
2490 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2491 tree vptr;
2492 gimple_stmt_iterator incr_gsi;
2493 bool insert_after;
2494 tree indx_before_incr, indx_after_incr;
2495 gimple incr;
2496 tree step;
2497 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2498 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2500 if (loop_vinfo)
2502 loop = LOOP_VINFO_LOOP (loop_vinfo);
2503 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2504 containing_loop = (gimple_bb (stmt))->loop_father;
2505 pe = loop_preheader_edge (loop);
2507 else
2509 gcc_assert (bb_vinfo);
2510 only_init = true;
2511 *ptr_incr = NULL;
2514 /* Check the step (evolution) of the load in LOOP, and record
2515 whether it's invariant. */
2516 if (nested_in_vect_loop)
2517 step = STMT_VINFO_DR_STEP (stmt_info);
2518 else
2519 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2521 if (tree_int_cst_compare (step, size_zero_node) == 0)
2522 *inv_p = true;
2523 else
2524 *inv_p = false;
2526 /* Create an expression for the first address accessed by this load
2527 in LOOP. */
2528 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2530 if (vect_print_dump_info (REPORT_DETAILS))
2532 tree data_ref_base = base_name;
2533 fprintf (vect_dump, "create vector-pointer variable to type: ");
2534 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2535 if (TREE_CODE (data_ref_base) == VAR_DECL
2536 || TREE_CODE (data_ref_base) == ARRAY_REF)
2537 fprintf (vect_dump, " vectorizing an array ref: ");
2538 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2539 fprintf (vect_dump, " vectorizing a record based array ref: ");
2540 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2541 fprintf (vect_dump, " vectorizing a pointer ref: ");
2542 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2545 /** (1) Create the new vector-pointer variable: **/
2546 vect_ptr_type = build_pointer_type (vectype);
2547 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2548 get_name (base_name));
2550 /* Vector types inherit the alias set of their component type by default so
2551 we need to use a ref-all pointer if the data reference does not conflict
2552 with the created vector data reference because it is not addressable. */
2553 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2554 get_alias_set (DR_REF (dr))))
2556 vect_ptr_type
2557 = build_pointer_type_for_mode (vectype,
2558 TYPE_MODE (vect_ptr_type), true);
2559 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2560 get_name (base_name));
2563 /* Likewise for any of the data references in the stmt group. */
2564 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2566 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2569 tree lhs = gimple_assign_lhs (orig_stmt);
2570 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2571 get_alias_set (lhs)))
2573 vect_ptr_type
2574 = build_pointer_type_for_mode (vectype,
2575 TYPE_MODE (vect_ptr_type), true);
2576 vect_ptr
2577 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2578 get_name (base_name));
2579 break;
2582 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2584 while (orig_stmt);
2587 add_referenced_var (vect_ptr);
2589 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2590 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2591 def-use update cycles for the pointer: One relative to the outer-loop
2592 (LOOP), which is what steps (3) and (4) below do. The other is relative
2593 to the inner-loop (which is the inner-most loop containing the dataref),
2594 and this is done be step (5) below.
2596 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2597 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2598 redundant. Steps (3),(4) create the following:
2600 vp0 = &base_addr;
2601 LOOP: vp1 = phi(vp0,vp2)
2604 vp2 = vp1 + step
2605 goto LOOP
2607 If there is an inner-loop nested in loop, then step (5) will also be
2608 applied, and an additional update in the inner-loop will be created:
2610 vp0 = &base_addr;
2611 LOOP: vp1 = phi(vp0,vp2)
2613 inner: vp3 = phi(vp1,vp4)
2614 vp4 = vp3 + inner_step
2615 if () goto inner
2617 vp2 = vp1 + step
2618 if () goto LOOP */
2620 /** (3) Calculate the initial address the vector-pointer, and set
2621 the vector-pointer to point to it before the loop: **/
2623 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2625 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2626 offset, loop);
2627 if (new_stmt_list)
2629 if (pe)
2631 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2632 gcc_assert (!new_bb);
2634 else
2635 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
2638 *initial_address = new_temp;
2640 /* Create: p = (vectype *) initial_base */
2641 vec_stmt = gimple_build_assign (vect_ptr,
2642 fold_convert (vect_ptr_type, new_temp));
2643 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2644 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2645 if (pe)
2647 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2648 gcc_assert (!new_bb);
2650 else
2651 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
2653 /** (4) Handle the updating of the vector-pointer inside the loop.
2654 This is needed when ONLY_INIT is false, and also when AT_LOOP
2655 is the inner-loop nested in LOOP (during outer-loop vectorization).
2658 /* No update in loop is required. */
2659 if (only_init && (!loop_vinfo || at_loop == loop))
2661 /* Copy the points-to information if it exists. */
2662 if (DR_PTR_INFO (dr))
2663 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2664 vptr = vect_ptr_init;
2666 else
2668 /* The step of the vector pointer is the Vector Size. */
2669 tree step = TYPE_SIZE_UNIT (vectype);
2670 /* One exception to the above is when the scalar step of the load in
2671 LOOP is zero. In this case the step here is also zero. */
2672 if (*inv_p)
2673 step = size_zero_node;
2675 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2677 create_iv (vect_ptr_init,
2678 fold_convert (vect_ptr_type, step),
2679 vect_ptr, loop, &incr_gsi, insert_after,
2680 &indx_before_incr, &indx_after_incr);
2681 incr = gsi_stmt (incr_gsi);
2682 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2684 /* Copy the points-to information if it exists. */
2685 if (DR_PTR_INFO (dr))
2687 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2688 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2690 if (ptr_incr)
2691 *ptr_incr = incr;
2693 vptr = indx_before_incr;
2696 if (!nested_in_vect_loop || only_init)
2697 return vptr;
2700 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2701 nested in LOOP, if exists: **/
2703 gcc_assert (nested_in_vect_loop);
2704 if (!only_init)
2706 standard_iv_increment_position (containing_loop, &incr_gsi,
2707 &insert_after);
2708 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2709 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2710 &indx_after_incr);
2711 incr = gsi_stmt (incr_gsi);
2712 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2714 /* Copy the points-to information if it exists. */
2715 if (DR_PTR_INFO (dr))
2717 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2718 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2720 if (ptr_incr)
2721 *ptr_incr = incr;
2723 return indx_before_incr;
2725 else
2726 gcc_unreachable ();
2730 /* Function bump_vector_ptr
2732 Increment a pointer (to a vector type) by vector-size. If requested,
2733 i.e. if PTR-INCR is given, then also connect the new increment stmt
2734 to the existing def-use update-chain of the pointer, by modifying
2735 the PTR_INCR as illustrated below:
2737 The pointer def-use update-chain before this function:
2738 DATAREF_PTR = phi (p_0, p_2)
2739 ....
2740 PTR_INCR: p_2 = DATAREF_PTR + step
2742 The pointer def-use update-chain after this function:
2743 DATAREF_PTR = phi (p_0, p_2)
2744 ....
2745 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2746 ....
2747 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2749 Input:
2750 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2751 in the loop.
2752 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2753 the loop. The increment amount across iterations is expected
2754 to be vector_size.
2755 BSI - location where the new update stmt is to be placed.
2756 STMT - the original scalar memory-access stmt that is being vectorized.
2757 BUMP - optional. The offset by which to bump the pointer. If not given,
2758 the offset is assumed to be vector_size.
2760 Output: Return NEW_DATAREF_PTR as illustrated above.
2764 tree
2765 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2766 gimple stmt, tree bump)
2768 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2769 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2770 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2771 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2772 tree update = TYPE_SIZE_UNIT (vectype);
2773 gimple incr_stmt;
2774 ssa_op_iter iter;
2775 use_operand_p use_p;
2776 tree new_dataref_ptr;
2778 if (bump)
2779 update = bump;
2781 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2782 dataref_ptr, update);
2783 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2784 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2785 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2787 /* Copy the points-to information if it exists. */
2788 if (DR_PTR_INFO (dr))
2789 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2791 if (!ptr_incr)
2792 return new_dataref_ptr;
2794 /* Update the vector-pointer's cross-iteration increment. */
2795 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2797 tree use = USE_FROM_PTR (use_p);
2799 if (use == dataref_ptr)
2800 SET_USE (use_p, new_dataref_ptr);
2801 else
2802 gcc_assert (tree_int_cst_compare (use, update) == 0);
2805 return new_dataref_ptr;
2809 /* Function vect_create_destination_var.
2811 Create a new temporary of type VECTYPE. */
2813 tree
2814 vect_create_destination_var (tree scalar_dest, tree vectype)
2816 tree vec_dest;
2817 const char *new_name;
2818 tree type;
2819 enum vect_var_kind kind;
2821 kind = vectype ? vect_simple_var : vect_scalar_var;
2822 type = vectype ? vectype : TREE_TYPE (scalar_dest);
2824 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2826 new_name = get_name (scalar_dest);
2827 if (!new_name)
2828 new_name = "var_";
2829 vec_dest = vect_get_new_vect_var (type, kind, new_name);
2830 add_referenced_var (vec_dest);
2832 return vec_dest;
2835 /* Function vect_strided_store_supported.
2837 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2838 and FALSE otherwise. */
2840 bool
2841 vect_strided_store_supported (tree vectype)
2843 optab interleave_high_optab, interleave_low_optab;
2844 int mode;
2846 mode = (int) TYPE_MODE (vectype);
2848 /* Check that the operation is supported. */
2849 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2850 vectype, optab_default);
2851 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2852 vectype, optab_default);
2853 if (!interleave_high_optab || !interleave_low_optab)
2855 if (vect_print_dump_info (REPORT_DETAILS))
2856 fprintf (vect_dump, "no optab for interleave.");
2857 return false;
2860 if (optab_handler (interleave_high_optab, mode)->insn_code
2861 == CODE_FOR_nothing
2862 || optab_handler (interleave_low_optab, mode)->insn_code
2863 == CODE_FOR_nothing)
2865 if (vect_print_dump_info (REPORT_DETAILS))
2866 fprintf (vect_dump, "interleave op not supported by target.");
2867 return false;
2870 return true;
2874 /* Function vect_permute_store_chain.
2876 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2877 a power of 2, generate interleave_high/low stmts to reorder the data
2878 correctly for the stores. Return the final references for stores in
2879 RESULT_CHAIN.
2881 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2882 The input is 4 vectors each containing 8 elements. We assign a number to each
2883 element, the input sequence is:
2885 1st vec: 0 1 2 3 4 5 6 7
2886 2nd vec: 8 9 10 11 12 13 14 15
2887 3rd vec: 16 17 18 19 20 21 22 23
2888 4th vec: 24 25 26 27 28 29 30 31
2890 The output sequence should be:
2892 1st vec: 0 8 16 24 1 9 17 25
2893 2nd vec: 2 10 18 26 3 11 19 27
2894 3rd vec: 4 12 20 28 5 13 21 30
2895 4th vec: 6 14 22 30 7 15 23 31
2897 i.e., we interleave the contents of the four vectors in their order.
2899 We use interleave_high/low instructions to create such output. The input of
2900 each interleave_high/low operation is two vectors:
2901 1st vec 2nd vec
2902 0 1 2 3 4 5 6 7
2903 the even elements of the result vector are obtained left-to-right from the
2904 high/low elements of the first vector. The odd elements of the result are
2905 obtained left-to-right from the high/low elements of the second vector.
2906 The output of interleave_high will be: 0 4 1 5
2907 and of interleave_low: 2 6 3 7
2910 The permutation is done in log LENGTH stages. In each stage interleave_high
2911 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2912 where the first argument is taken from the first half of DR_CHAIN and the
2913 second argument from it's second half.
2914 In our example,
2916 I1: interleave_high (1st vec, 3rd vec)
2917 I2: interleave_low (1st vec, 3rd vec)
2918 I3: interleave_high (2nd vec, 4th vec)
2919 I4: interleave_low (2nd vec, 4th vec)
2921 The output for the first stage is:
2923 I1: 0 16 1 17 2 18 3 19
2924 I2: 4 20 5 21 6 22 7 23
2925 I3: 8 24 9 25 10 26 11 27
2926 I4: 12 28 13 29 14 30 15 31
2928 The output of the second stage, i.e. the final result is:
2930 I1: 0 8 16 24 1 9 17 25
2931 I2: 2 10 18 26 3 11 19 27
2932 I3: 4 12 20 28 5 13 21 30
2933 I4: 6 14 22 30 7 15 23 31. */
2935 bool
2936 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2937 unsigned int length,
2938 gimple stmt,
2939 gimple_stmt_iterator *gsi,
2940 VEC(tree,heap) **result_chain)
2942 tree perm_dest, vect1, vect2, high, low;
2943 gimple perm_stmt;
2944 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2945 int i;
2946 unsigned int j;
2947 enum tree_code high_code, low_code;
2949 /* Check that the operation is supported. */
2950 if (!vect_strided_store_supported (vectype))
2951 return false;
2953 *result_chain = VEC_copy (tree, heap, dr_chain);
2955 for (i = 0; i < exact_log2 (length); i++)
2957 for (j = 0; j < length/2; j++)
2959 vect1 = VEC_index (tree, dr_chain, j);
2960 vect2 = VEC_index (tree, dr_chain, j+length/2);
2962 /* Create interleaving stmt:
2963 in the case of big endian:
2964 high = interleave_high (vect1, vect2)
2965 and in the case of little endian:
2966 high = interleave_low (vect1, vect2). */
2967 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2968 DECL_GIMPLE_REG_P (perm_dest) = 1;
2969 add_referenced_var (perm_dest);
2970 if (BYTES_BIG_ENDIAN)
2972 high_code = VEC_INTERLEAVE_HIGH_EXPR;
2973 low_code = VEC_INTERLEAVE_LOW_EXPR;
2975 else
2977 low_code = VEC_INTERLEAVE_HIGH_EXPR;
2978 high_code = VEC_INTERLEAVE_LOW_EXPR;
2980 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2981 vect1, vect2);
2982 high = make_ssa_name (perm_dest, perm_stmt);
2983 gimple_assign_set_lhs (perm_stmt, high);
2984 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2985 VEC_replace (tree, *result_chain, 2*j, high);
2987 /* Create interleaving stmt:
2988 in the case of big endian:
2989 low = interleave_low (vect1, vect2)
2990 and in the case of little endian:
2991 low = interleave_high (vect1, vect2). */
2992 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2993 DECL_GIMPLE_REG_P (perm_dest) = 1;
2994 add_referenced_var (perm_dest);
2995 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2996 vect1, vect2);
2997 low = make_ssa_name (perm_dest, perm_stmt);
2998 gimple_assign_set_lhs (perm_stmt, low);
2999 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3000 VEC_replace (tree, *result_chain, 2*j+1, low);
3002 dr_chain = VEC_copy (tree, heap, *result_chain);
3004 return true;
3007 /* Function vect_setup_realignment
3009 This function is called when vectorizing an unaligned load using
3010 the dr_explicit_realign[_optimized] scheme.
3011 This function generates the following code at the loop prolog:
3013 p = initial_addr;
3014 x msq_init = *(floor(p)); # prolog load
3015 realignment_token = call target_builtin;
3016 loop:
3017 x msq = phi (msq_init, ---)
3019 The stmts marked with x are generated only for the case of
3020 dr_explicit_realign_optimized.
3022 The code above sets up a new (vector) pointer, pointing to the first
3023 location accessed by STMT, and a "floor-aligned" load using that pointer.
3024 It also generates code to compute the "realignment-token" (if the relevant
3025 target hook was defined), and creates a phi-node at the loop-header bb
3026 whose arguments are the result of the prolog-load (created by this
3027 function) and the result of a load that takes place in the loop (to be
3028 created by the caller to this function).
3030 For the case of dr_explicit_realign_optimized:
3031 The caller to this function uses the phi-result (msq) to create the
3032 realignment code inside the loop, and sets up the missing phi argument,
3033 as follows:
3034 loop:
3035 msq = phi (msq_init, lsq)
3036 lsq = *(floor(p')); # load in loop
3037 result = realign_load (msq, lsq, realignment_token);
3039 For the case of dr_explicit_realign:
3040 loop:
3041 msq = *(floor(p)); # load in loop
3042 p' = p + (VS-1);
3043 lsq = *(floor(p')); # load in loop
3044 result = realign_load (msq, lsq, realignment_token);
3046 Input:
3047 STMT - (scalar) load stmt to be vectorized. This load accesses
3048 a memory location that may be unaligned.
3049 BSI - place where new code is to be inserted.
3050 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3051 is used.
3053 Output:
3054 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3055 target hook, if defined.
3056 Return value - the result of the loop-header phi node. */
3058 tree
3059 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3060 tree *realignment_token,
3061 enum dr_alignment_support alignment_support_scheme,
3062 tree init_addr,
3063 struct loop **at_loop)
3065 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3066 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3067 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3068 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3069 edge pe;
3070 tree scalar_dest = gimple_assign_lhs (stmt);
3071 tree vec_dest;
3072 gimple inc;
3073 tree ptr;
3074 tree data_ref;
3075 gimple new_stmt;
3076 basic_block new_bb;
3077 tree msq_init = NULL_TREE;
3078 tree new_temp;
3079 gimple phi_stmt;
3080 tree msq = NULL_TREE;
3081 gimple_seq stmts = NULL;
3082 bool inv_p;
3083 bool compute_in_loop = false;
3084 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3085 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3086 struct loop *loop_for_initial_load;
3088 gcc_assert (alignment_support_scheme == dr_explicit_realign
3089 || alignment_support_scheme == dr_explicit_realign_optimized);
3091 /* We need to generate three things:
3092 1. the misalignment computation
3093 2. the extra vector load (for the optimized realignment scheme).
3094 3. the phi node for the two vectors from which the realignment is
3095 done (for the optimized realignment scheme).
3098 /* 1. Determine where to generate the misalignment computation.
3100 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3101 calculation will be generated by this function, outside the loop (in the
3102 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3103 caller, inside the loop.
3105 Background: If the misalignment remains fixed throughout the iterations of
3106 the loop, then both realignment schemes are applicable, and also the
3107 misalignment computation can be done outside LOOP. This is because we are
3108 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3109 are a multiple of VS (the Vector Size), and therefore the misalignment in
3110 different vectorized LOOP iterations is always the same.
3111 The problem arises only if the memory access is in an inner-loop nested
3112 inside LOOP, which is now being vectorized using outer-loop vectorization.
3113 This is the only case when the misalignment of the memory access may not
3114 remain fixed throughout the iterations of the inner-loop (as explained in
3115 detail in vect_supportable_dr_alignment). In this case, not only is the
3116 optimized realignment scheme not applicable, but also the misalignment
3117 computation (and generation of the realignment token that is passed to
3118 REALIGN_LOAD) have to be done inside the loop.
3120 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3121 or not, which in turn determines if the misalignment is computed inside
3122 the inner-loop, or outside LOOP. */
3124 if (init_addr != NULL_TREE)
3126 compute_in_loop = true;
3127 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3131 /* 2. Determine where to generate the extra vector load.
3133 For the optimized realignment scheme, instead of generating two vector
3134 loads in each iteration, we generate a single extra vector load in the
3135 preheader of the loop, and in each iteration reuse the result of the
3136 vector load from the previous iteration. In case the memory access is in
3137 an inner-loop nested inside LOOP, which is now being vectorized using
3138 outer-loop vectorization, we need to determine whether this initial vector
3139 load should be generated at the preheader of the inner-loop, or can be
3140 generated at the preheader of LOOP. If the memory access has no evolution
3141 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3142 to be generated inside LOOP (in the preheader of the inner-loop). */
3144 if (nested_in_vect_loop)
3146 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3147 bool invariant_in_outerloop =
3148 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3149 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3151 else
3152 loop_for_initial_load = loop;
3153 if (at_loop)
3154 *at_loop = loop_for_initial_load;
3156 /* 3. For the case of the optimized realignment, create the first vector
3157 load at the loop preheader. */
3159 if (alignment_support_scheme == dr_explicit_realign_optimized)
3161 /* Create msq_init = *(floor(p1)) in the loop preheader */
3163 gcc_assert (!compute_in_loop);
3164 pe = loop_preheader_edge (loop_for_initial_load);
3165 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3166 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3167 &init_addr, &inc, true, &inv_p);
3168 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3169 new_stmt = gimple_build_assign (vec_dest, data_ref);
3170 new_temp = make_ssa_name (vec_dest, new_stmt);
3171 gimple_assign_set_lhs (new_stmt, new_temp);
3172 mark_symbols_for_renaming (new_stmt);
3173 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3174 gcc_assert (!new_bb);
3175 msq_init = gimple_assign_lhs (new_stmt);
3178 /* 4. Create realignment token using a target builtin, if available.
3179 It is done either inside the containing loop, or before LOOP (as
3180 determined above). */
3182 if (targetm.vectorize.builtin_mask_for_load)
3184 tree builtin_decl;
3186 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3187 if (compute_in_loop)
3188 gcc_assert (init_addr); /* already computed by the caller. */
3189 else
3191 /* Generate the INIT_ADDR computation outside LOOP. */
3192 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3193 NULL_TREE, loop);
3194 pe = loop_preheader_edge (loop);
3195 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3196 gcc_assert (!new_bb);
3199 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3200 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3201 vec_dest =
3202 vect_create_destination_var (scalar_dest,
3203 gimple_call_return_type (new_stmt));
3204 new_temp = make_ssa_name (vec_dest, new_stmt);
3205 gimple_call_set_lhs (new_stmt, new_temp);
3207 if (compute_in_loop)
3208 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3209 else
3211 /* Generate the misalignment computation outside LOOP. */
3212 pe = loop_preheader_edge (loop);
3213 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3214 gcc_assert (!new_bb);
3217 *realignment_token = gimple_call_lhs (new_stmt);
3219 /* The result of the CALL_EXPR to this builtin is determined from
3220 the value of the parameter and no global variables are touched
3221 which makes the builtin a "const" function. Requiring the
3222 builtin to have the "const" attribute makes it unnecessary
3223 to call mark_call_clobbered. */
3224 gcc_assert (TREE_READONLY (builtin_decl));
3227 if (alignment_support_scheme == dr_explicit_realign)
3228 return msq;
3230 gcc_assert (!compute_in_loop);
3231 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3234 /* 5. Create msq = phi <msq_init, lsq> in loop */
3236 pe = loop_preheader_edge (containing_loop);
3237 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3238 msq = make_ssa_name (vec_dest, NULL);
3239 phi_stmt = create_phi_node (msq, containing_loop->header);
3240 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3241 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3243 return msq;
3247 /* Function vect_strided_load_supported.
3249 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3250 and FALSE otherwise. */
3252 bool
3253 vect_strided_load_supported (tree vectype)
3255 optab perm_even_optab, perm_odd_optab;
3256 int mode;
3258 mode = (int) TYPE_MODE (vectype);
3260 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3261 optab_default);
3262 if (!perm_even_optab)
3264 if (vect_print_dump_info (REPORT_DETAILS))
3265 fprintf (vect_dump, "no optab for perm_even.");
3266 return false;
3269 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
3271 if (vect_print_dump_info (REPORT_DETAILS))
3272 fprintf (vect_dump, "perm_even op not supported by target.");
3273 return false;
3276 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3277 optab_default);
3278 if (!perm_odd_optab)
3280 if (vect_print_dump_info (REPORT_DETAILS))
3281 fprintf (vect_dump, "no optab for perm_odd.");
3282 return false;
3285 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
3287 if (vect_print_dump_info (REPORT_DETAILS))
3288 fprintf (vect_dump, "perm_odd op not supported by target.");
3289 return false;
3291 return true;
3295 /* Function vect_permute_load_chain.
3297 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3298 a power of 2, generate extract_even/odd stmts to reorder the input data
3299 correctly. Return the final references for loads in RESULT_CHAIN.
3301 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3302 The input is 4 vectors each containing 8 elements. We assign a number to each
3303 element, the input sequence is:
3305 1st vec: 0 1 2 3 4 5 6 7
3306 2nd vec: 8 9 10 11 12 13 14 15
3307 3rd vec: 16 17 18 19 20 21 22 23
3308 4th vec: 24 25 26 27 28 29 30 31
3310 The output sequence should be:
3312 1st vec: 0 4 8 12 16 20 24 28
3313 2nd vec: 1 5 9 13 17 21 25 29
3314 3rd vec: 2 6 10 14 18 22 26 30
3315 4th vec: 3 7 11 15 19 23 27 31
3317 i.e., the first output vector should contain the first elements of each
3318 interleaving group, etc.
3320 We use extract_even/odd instructions to create such output. The input of each
3321 extract_even/odd operation is two vectors
3322 1st vec 2nd vec
3323 0 1 2 3 4 5 6 7
3325 and the output is the vector of extracted even/odd elements. The output of
3326 extract_even will be: 0 2 4 6
3327 and of extract_odd: 1 3 5 7
3330 The permutation is done in log LENGTH stages. In each stage extract_even and
3331 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3332 order. In our example,
3334 E1: extract_even (1st vec, 2nd vec)
3335 E2: extract_odd (1st vec, 2nd vec)
3336 E3: extract_even (3rd vec, 4th vec)
3337 E4: extract_odd (3rd vec, 4th vec)
3339 The output for the first stage will be:
3341 E1: 0 2 4 6 8 10 12 14
3342 E2: 1 3 5 7 9 11 13 15
3343 E3: 16 18 20 22 24 26 28 30
3344 E4: 17 19 21 23 25 27 29 31
3346 In order to proceed and create the correct sequence for the next stage (or
3347 for the correct output, if the second stage is the last one, as in our
3348 example), we first put the output of extract_even operation and then the
3349 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3350 The input for the second stage is:
3352 1st vec (E1): 0 2 4 6 8 10 12 14
3353 2nd vec (E3): 16 18 20 22 24 26 28 30
3354 3rd vec (E2): 1 3 5 7 9 11 13 15
3355 4th vec (E4): 17 19 21 23 25 27 29 31
3357 The output of the second stage:
3359 E1: 0 4 8 12 16 20 24 28
3360 E2: 2 6 10 14 18 22 26 30
3361 E3: 1 5 9 13 17 21 25 29
3362 E4: 3 7 11 15 19 23 27 31
3364 And RESULT_CHAIN after reordering:
3366 1st vec (E1): 0 4 8 12 16 20 24 28
3367 2nd vec (E3): 1 5 9 13 17 21 25 29
3368 3rd vec (E2): 2 6 10 14 18 22 26 30
3369 4th vec (E4): 3 7 11 15 19 23 27 31. */
3371 bool
3372 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3373 unsigned int length,
3374 gimple stmt,
3375 gimple_stmt_iterator *gsi,
3376 VEC(tree,heap) **result_chain)
3378 tree perm_dest, data_ref, first_vect, second_vect;
3379 gimple perm_stmt;
3380 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3381 int i;
3382 unsigned int j;
3384 /* Check that the operation is supported. */
3385 if (!vect_strided_load_supported (vectype))
3386 return false;
3388 *result_chain = VEC_copy (tree, heap, dr_chain);
3389 for (i = 0; i < exact_log2 (length); i++)
3391 for (j = 0; j < length; j +=2)
3393 first_vect = VEC_index (tree, dr_chain, j);
3394 second_vect = VEC_index (tree, dr_chain, j+1);
3396 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3397 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3398 DECL_GIMPLE_REG_P (perm_dest) = 1;
3399 add_referenced_var (perm_dest);
3401 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3402 perm_dest, first_vect,
3403 second_vect);
3405 data_ref = make_ssa_name (perm_dest, perm_stmt);
3406 gimple_assign_set_lhs (perm_stmt, data_ref);
3407 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3408 mark_symbols_for_renaming (perm_stmt);
3410 VEC_replace (tree, *result_chain, j/2, data_ref);
3412 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3413 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3414 DECL_GIMPLE_REG_P (perm_dest) = 1;
3415 add_referenced_var (perm_dest);
3417 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3418 perm_dest, first_vect,
3419 second_vect);
3420 data_ref = make_ssa_name (perm_dest, perm_stmt);
3421 gimple_assign_set_lhs (perm_stmt, data_ref);
3422 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3423 mark_symbols_for_renaming (perm_stmt);
3425 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3427 dr_chain = VEC_copy (tree, heap, *result_chain);
3429 return true;
3433 /* Function vect_transform_strided_load.
3435 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3436 to perform their permutation and ascribe the result vectorized statements to
3437 the scalar statements.
3440 bool
3441 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3442 gimple_stmt_iterator *gsi)
3444 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3445 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3446 gimple next_stmt, new_stmt;
3447 VEC(tree,heap) *result_chain = NULL;
3448 unsigned int i, gap_count;
3449 tree tmp_data_ref;
3451 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3452 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3453 vectors, that are ready for vector computation. */
3454 result_chain = VEC_alloc (tree, heap, size);
3455 /* Permute. */
3456 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3457 return false;
3459 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3460 Since we scan the chain starting from it's first node, their order
3461 corresponds the order of data-refs in RESULT_CHAIN. */
3462 next_stmt = first_stmt;
3463 gap_count = 1;
3464 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3466 if (!next_stmt)
3467 break;
3469 /* Skip the gaps. Loads created for the gaps will be removed by dead
3470 code elimination pass later. No need to check for the first stmt in
3471 the group, since it always exists.
3472 DR_GROUP_GAP is the number of steps in elements from the previous
3473 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3474 correspond to the gaps.
3476 if (next_stmt != first_stmt
3477 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3479 gap_count++;
3480 continue;
3483 while (next_stmt)
3485 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3486 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3487 copies, and we put the new vector statement in the first available
3488 RELATED_STMT. */
3489 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3490 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3491 else
3493 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3495 gimple prev_stmt =
3496 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3497 gimple rel_stmt =
3498 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3499 while (rel_stmt)
3501 prev_stmt = rel_stmt;
3502 rel_stmt =
3503 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3506 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3507 new_stmt;
3511 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3512 gap_count = 1;
3513 /* If NEXT_STMT accesses the same DR as the previous statement,
3514 put the same TMP_DATA_REF as its vectorized statement; otherwise
3515 get the next data-ref from RESULT_CHAIN. */
3516 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3517 break;
3521 VEC_free (tree, heap, result_chain);
3522 return true;
3525 /* Function vect_force_dr_alignment_p.
3527 Returns whether the alignment of a DECL can be forced to be aligned
3528 on ALIGNMENT bit boundary. */
3530 bool
3531 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3533 if (TREE_CODE (decl) != VAR_DECL)
3534 return false;
3536 if (DECL_EXTERNAL (decl))
3537 return false;
3539 if (TREE_ASM_WRITTEN (decl))
3540 return false;
3542 if (TREE_STATIC (decl))
3543 return (alignment <= MAX_OFILE_ALIGNMENT);
3544 else
3545 return (alignment <= MAX_STACK_ALIGNMENT);
3548 /* Function vect_supportable_dr_alignment
3550 Return whether the data reference DR is supported with respect to its
3551 alignment. */
3553 enum dr_alignment_support
3554 vect_supportable_dr_alignment (struct data_reference *dr)
3556 gimple stmt = DR_STMT (dr);
3557 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3558 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3559 enum machine_mode mode = TYPE_MODE (vectype);
3560 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3561 struct loop *vect_loop = NULL;
3562 bool nested_in_vect_loop = false;
3564 if (aligned_access_p (dr))
3565 return dr_aligned;
3567 if (!loop_vinfo)
3568 /* FORNOW: Misaligned accesses are supported only in loops. */
3569 return dr_unaligned_unsupported;
3571 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3572 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3574 /* Possibly unaligned access. */
3576 /* We can choose between using the implicit realignment scheme (generating
3577 a misaligned_move stmt) and the explicit realignment scheme (generating
3578 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3579 realignment scheme: optimized, and unoptimized.
3580 We can optimize the realignment only if the step between consecutive
3581 vector loads is equal to the vector size. Since the vector memory
3582 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3583 is guaranteed that the misalignment amount remains the same throughout the
3584 execution of the vectorized loop. Therefore, we can create the
3585 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3586 at the loop preheader.
3588 However, in the case of outer-loop vectorization, when vectorizing a
3589 memory access in the inner-loop nested within the LOOP that is now being
3590 vectorized, while it is guaranteed that the misalignment of the
3591 vectorized memory access will remain the same in different outer-loop
3592 iterations, it is *not* guaranteed that is will remain the same throughout
3593 the execution of the inner-loop. This is because the inner-loop advances
3594 with the original scalar step (and not in steps of VS). If the inner-loop
3595 step happens to be a multiple of VS, then the misalignment remains fixed
3596 and we can use the optimized realignment scheme. For example:
3598 for (i=0; i<N; i++)
3599 for (j=0; j<M; j++)
3600 s += a[i+j];
3602 When vectorizing the i-loop in the above example, the step between
3603 consecutive vector loads is 1, and so the misalignment does not remain
3604 fixed across the execution of the inner-loop, and the realignment cannot
3605 be optimized (as illustrated in the following pseudo vectorized loop):
3607 for (i=0; i<N; i+=4)
3608 for (j=0; j<M; j++){
3609 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3610 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3611 // (assuming that we start from an aligned address).
3614 We therefore have to use the unoptimized realignment scheme:
3616 for (i=0; i<N; i+=4)
3617 for (j=k; j<M; j+=4)
3618 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3619 // that the misalignment of the initial address is
3620 // 0).
3622 The loop can then be vectorized as follows:
3624 for (k=0; k<4; k++){
3625 rt = get_realignment_token (&vp[k]);
3626 for (i=0; i<N; i+=4){
3627 v1 = vp[i+k];
3628 for (j=k; j<M; j+=4){
3629 v2 = vp[i+j+VS-1];
3630 va = REALIGN_LOAD <v1,v2,rt>;
3631 vs += va;
3632 v1 = v2;
3635 } */
3637 if (DR_IS_READ (dr))
3639 bool is_packed = false;
3640 tree type = (TREE_TYPE (DR_REF (dr)));
3642 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3643 CODE_FOR_nothing
3644 && (!targetm.vectorize.builtin_mask_for_load
3645 || targetm.vectorize.builtin_mask_for_load ()))
3647 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3648 if (nested_in_vect_loop
3649 && (TREE_INT_CST_LOW (DR_STEP (dr))
3650 != GET_MODE_SIZE (TYPE_MODE (vectype))))
3651 return dr_explicit_realign;
3652 else
3653 return dr_explicit_realign_optimized;
3655 if (!known_alignment_for_access_p (dr))
3657 tree ba = DR_BASE_OBJECT (dr);
3659 if (ba)
3660 is_packed = contains_packed_reference (ba);
3663 if (targetm.vectorize.
3664 builtin_support_vector_misalignment (mode, type,
3665 DR_MISALIGNMENT (dr), is_packed))
3666 /* Can't software pipeline the loads, but can at least do them. */
3667 return dr_unaligned_supported;
3669 else
3671 bool is_packed = false;
3672 tree type = (TREE_TYPE (DR_REF (dr)));
3674 if (!known_alignment_for_access_p (dr))
3676 tree ba = DR_BASE_OBJECT (dr);
3678 if (ba)
3679 is_packed = contains_packed_reference (ba);
3682 if (targetm.vectorize.
3683 builtin_support_vector_misalignment (mode, type,
3684 DR_MISALIGNMENT (dr), is_packed))
3685 return dr_unaligned_supported;
3688 /* Unsupported. */
3689 return dr_unaligned_unsupported;