PR testsuite/44195
[official-gcc.git] / gcc / tree-vect-data-refs.c
blob7755426876b32bd4a1adcce3c77a88e74874f3b9
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
39 #include "toplev.h"
41 /* Need to include rtl.h, expr.h, etc. for optabs. */
42 #include "expr.h"
43 #include "optabs.h"
45 /* Return the smallest scalar part of STMT.
46 This is used to determine the vectype of the stmt. We generally set the
47 vectype according to the type of the result (lhs). For stmts whose
48 result-type is different than the type of the arguments (e.g., demotion,
49 promotion), vectype will be reset appropriately (later). Note that we have
50 to visit the smallest datatype in this function, because that determines the
51 VF. If the smallest datatype in the loop is present only as the rhs of a
52 promotion operation - we'd miss it.
53 Such a case, where a variable of this datatype does not appear in the lhs
54 anywhere in the loop, can only occur if it's an invariant: e.g.:
55 'int_x = (int) short_inv', which we'd expect to have been optimized away by
56 invariant motion. However, we cannot rely on invariant motion to always take
57 invariants out of the loop, and so in the case of promotion we also have to
58 check the rhs.
59 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
60 types. */
62 tree
63 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
64 HOST_WIDE_INT *rhs_size_unit)
66 tree scalar_type = gimple_expr_type (stmt);
67 HOST_WIDE_INT lhs, rhs;
69 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
71 if (is_gimple_assign (stmt)
72 && (gimple_assign_cast_p (stmt)
73 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
74 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
76 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
78 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
79 if (rhs < lhs)
80 scalar_type = rhs_type;
83 *lhs_size_unit = lhs;
84 *rhs_size_unit = rhs;
85 return scalar_type;
89 /* Find the place of the data-ref in STMT in the interleaving chain that starts
90 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
92 int
93 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
95 gimple next_stmt = first_stmt;
96 int result = 0;
98 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
99 return -1;
101 while (next_stmt && next_stmt != stmt)
103 result++;
104 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
107 if (next_stmt)
108 return result;
109 else
110 return -1;
114 /* Function vect_insert_into_interleaving_chain.
116 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
118 static void
119 vect_insert_into_interleaving_chain (struct data_reference *dra,
120 struct data_reference *drb)
122 gimple prev, next;
123 tree next_init;
124 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
125 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
127 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
128 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
129 while (next)
131 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
132 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
134 /* Insert here. */
135 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
136 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
137 return;
139 prev = next;
140 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
143 /* We got to the end of the list. Insert here. */
144 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
145 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
149 /* Function vect_update_interleaving_chain.
151 For two data-refs DRA and DRB that are a part of a chain interleaved data
152 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
154 There are four possible cases:
155 1. New stmts - both DRA and DRB are not a part of any chain:
156 FIRST_DR = DRB
157 NEXT_DR (DRB) = DRA
158 2. DRB is a part of a chain and DRA is not:
159 no need to update FIRST_DR
160 no need to insert DRB
161 insert DRA according to init
162 3. DRA is a part of a chain and DRB is not:
163 if (init of FIRST_DR > init of DRB)
164 FIRST_DR = DRB
165 NEXT(FIRST_DR) = previous FIRST_DR
166 else
167 insert DRB according to its init
168 4. both DRA and DRB are in some interleaving chains:
169 choose the chain with the smallest init of FIRST_DR
170 insert the nodes of the second chain into the first one. */
172 static void
173 vect_update_interleaving_chain (struct data_reference *drb,
174 struct data_reference *dra)
176 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
177 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
178 tree next_init, init_dra_chain, init_drb_chain;
179 gimple first_a, first_b;
180 tree node_init;
181 gimple node, prev, next, first_stmt;
183 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
184 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
186 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
187 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
188 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
189 return;
192 /* 2. DRB is a part of a chain and DRA is not. */
193 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
195 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
196 /* Insert DRA into the chain of DRB. */
197 vect_insert_into_interleaving_chain (dra, drb);
198 return;
201 /* 3. DRA is a part of a chain and DRB is not. */
202 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
204 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
205 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
206 old_first_stmt)));
207 gimple tmp;
209 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
211 /* DRB's init is smaller than the init of the stmt previously marked
212 as the first stmt of the interleaving chain of DRA. Therefore, we
213 update FIRST_STMT and put DRB in the head of the list. */
214 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
215 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
217 /* Update all the stmts in the list to point to the new FIRST_STMT. */
218 tmp = old_first_stmt;
219 while (tmp)
221 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
222 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
225 else
227 /* Insert DRB in the list of DRA. */
228 vect_insert_into_interleaving_chain (drb, dra);
229 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
231 return;
234 /* 4. both DRA and DRB are in some interleaving chains. */
235 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
236 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
237 if (first_a == first_b)
238 return;
239 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
240 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
242 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
244 /* Insert the nodes of DRA chain into the DRB chain.
245 After inserting a node, continue from this node of the DRB chain (don't
246 start from the beginning. */
247 node = DR_GROUP_FIRST_DR (stmtinfo_a);
248 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
249 first_stmt = first_b;
251 else
253 /* Insert the nodes of DRB chain into the DRA chain.
254 After inserting a node, continue from this node of the DRA chain (don't
255 start from the beginning. */
256 node = DR_GROUP_FIRST_DR (stmtinfo_b);
257 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
258 first_stmt = first_a;
261 while (node)
263 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
264 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
265 while (next)
267 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
268 if (tree_int_cst_compare (next_init, node_init) > 0)
270 /* Insert here. */
271 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
272 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
273 prev = node;
274 break;
276 prev = next;
277 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
279 if (!next)
281 /* We got to the end of the list. Insert here. */
282 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
283 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
284 prev = node;
286 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
287 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
292 /* Function vect_equal_offsets.
294 Check if OFFSET1 and OFFSET2 are identical expressions. */
296 static bool
297 vect_equal_offsets (tree offset1, tree offset2)
299 bool res;
301 STRIP_NOPS (offset1);
302 STRIP_NOPS (offset2);
304 if (offset1 == offset2)
305 return true;
307 if (TREE_CODE (offset1) != TREE_CODE (offset2)
308 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
309 return false;
311 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
312 TREE_OPERAND (offset2, 0));
314 if (!res || !BINARY_CLASS_P (offset1))
315 return res;
317 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
318 TREE_OPERAND (offset2, 1));
320 return res;
324 /* Function vect_check_interleaving.
326 Check if DRA and DRB are a part of interleaving. In case they are, insert
327 DRA and DRB in an interleaving chain. */
329 static bool
330 vect_check_interleaving (struct data_reference *dra,
331 struct data_reference *drb)
333 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
335 /* Check that the data-refs have same first location (except init) and they
336 are both either store or load (not load and store). */
337 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
338 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
339 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
340 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
341 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
342 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
343 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
344 || DR_IS_READ (dra) != DR_IS_READ (drb))
345 return false;
347 /* Check:
348 1. data-refs are of the same type
349 2. their steps are equal
350 3. the step (if greater than zero) is greater than the difference between
351 data-refs' inits. */
352 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
353 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
355 if (type_size_a != type_size_b
356 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
357 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
358 TREE_TYPE (DR_REF (drb))))
359 return false;
361 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
362 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
363 step = TREE_INT_CST_LOW (DR_STEP (dra));
365 if (init_a > init_b)
367 /* If init_a == init_b + the size of the type * k, we have an interleaving,
368 and DRB is accessed before DRA. */
369 diff_mod_size = (init_a - init_b) % type_size_a;
371 if (step && (init_a - init_b) > step)
372 return false;
374 if (diff_mod_size == 0)
376 vect_update_interleaving_chain (drb, dra);
377 if (vect_print_dump_info (REPORT_DR_DETAILS))
379 fprintf (vect_dump, "Detected interleaving ");
380 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
381 fprintf (vect_dump, " and ");
382 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
384 return true;
387 else
389 /* If init_b == init_a + the size of the type * k, we have an
390 interleaving, and DRA is accessed before DRB. */
391 diff_mod_size = (init_b - init_a) % type_size_a;
393 if (step && (init_b - init_a) > step)
394 return false;
396 if (diff_mod_size == 0)
398 vect_update_interleaving_chain (dra, drb);
399 if (vect_print_dump_info (REPORT_DR_DETAILS))
401 fprintf (vect_dump, "Detected interleaving ");
402 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
403 fprintf (vect_dump, " and ");
404 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
406 return true;
410 return false;
413 /* Check if data references pointed by DR_I and DR_J are same or
414 belong to same interleaving group. Return FALSE if drs are
415 different, otherwise return TRUE. */
417 static bool
418 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
420 gimple stmt_i = DR_STMT (dr_i);
421 gimple stmt_j = DR_STMT (dr_j);
423 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
424 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
425 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
426 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
427 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
428 return true;
429 else
430 return false;
433 /* If address ranges represented by DDR_I and DDR_J are equal,
434 return TRUE, otherwise return FALSE. */
436 static bool
437 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
439 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
440 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
441 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
442 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
443 return true;
444 else
445 return false;
448 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
449 tested at run-time. Return TRUE if DDR was successfully inserted.
450 Return false if versioning is not supported. */
452 static bool
453 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
457 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
458 return false;
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "mark for run-time aliasing test between ");
463 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
468 if (optimize_loop_nest_for_size_p (loop))
470 if (vect_print_dump_info (REPORT_DR_DETAILS))
471 fprintf (vect_dump, "versioning not supported when optimizing for size.");
472 return false;
475 /* FORNOW: We don't support versioning with outer-loop vectorization. */
476 if (loop->inner)
478 if (vect_print_dump_info (REPORT_DR_DETAILS))
479 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
480 return false;
483 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
484 return true;
488 /* Function vect_analyze_data_ref_dependence.
490 Return TRUE if there (might) exist a dependence between a memory-reference
491 DRA and a memory-reference DRB. When versioning for alias may check a
492 dependence at run-time, return FALSE. Adjust *MAX_VF according to
493 the data dependence. */
495 static bool
496 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
497 loop_vec_info loop_vinfo, int *max_vf)
499 unsigned int i;
500 struct loop *loop = NULL;
501 struct data_reference *dra = DDR_A (ddr);
502 struct data_reference *drb = DDR_B (ddr);
503 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
504 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
505 lambda_vector dist_v;
506 unsigned int loop_depth;
508 /* Don't bother to analyze statements marked as unvectorizable. */
509 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
510 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
511 return false;
513 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
515 /* Independent data accesses. */
516 vect_check_interleaving (dra, drb);
517 return false;
520 if (loop_vinfo)
521 loop = LOOP_VINFO_LOOP (loop_vinfo);
523 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
524 return false;
526 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
528 if (loop_vinfo)
530 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning for alias required: "
533 "can't determine dependence between ");
534 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
535 fprintf (vect_dump, " and ");
536 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
539 /* Add to list of ddrs that need to be tested at run-time. */
540 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
543 /* When vectorizing a basic block unknown depnedence can still mean
544 strided access. */
545 if (vect_check_interleaving (dra, drb))
546 return false;
548 if (vect_print_dump_info (REPORT_DR_DETAILS))
550 fprintf (vect_dump, "can't determine dependence between ");
551 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
552 fprintf (vect_dump, " and ");
553 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
556 /* Mark the statements as unvectorizable. */
557 STMT_VINFO_VECTORIZABLE (stmtinfo_a) = false;
558 STMT_VINFO_VECTORIZABLE (stmtinfo_b) = false;
560 return false;
563 /* Versioning for alias is not yet supported for basic block SLP, and
564 dependence distance is unapplicable, hence, in case of known data
565 dependence, basic block vectorization is impossible for now. */
566 if (!loop_vinfo)
568 if (dra != drb && vect_check_interleaving (dra, drb))
569 return false;
571 if (vect_print_dump_info (REPORT_DR_DETAILS))
573 fprintf (vect_dump, "determined dependence between ");
574 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
575 fprintf (vect_dump, " and ");
576 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
579 return true;
582 /* Loop-based vectorization and known data dependence. */
583 if (DDR_NUM_DIST_VECTS (ddr) == 0)
585 if (vect_print_dump_info (REPORT_DR_DETAILS))
587 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
588 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
589 fprintf (vect_dump, " and ");
590 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
592 /* Add to list of ddrs that need to be tested at run-time. */
593 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
596 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
597 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
599 int dist = dist_v[loop_depth];
601 if (vect_print_dump_info (REPORT_DR_DETAILS))
602 fprintf (vect_dump, "dependence distance = %d.", dist);
604 if (dist == 0)
606 if (vect_print_dump_info (REPORT_DR_DETAILS))
608 fprintf (vect_dump, "dependence distance == 0 between ");
609 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
610 fprintf (vect_dump, " and ");
611 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
614 /* For interleaving, mark that there is a read-write dependency if
615 necessary. We check before that one of the data-refs is store. */
616 if (DR_IS_READ (dra))
617 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
618 else
620 if (DR_IS_READ (drb))
621 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
624 continue;
627 if (dist > 0 && DDR_REVERSED_P (ddr))
629 /* If DDR_REVERSED_P the order of the data-refs in DDR was
630 reversed (to make distance vector positive), and the actual
631 distance is negative. */
632 if (vect_print_dump_info (REPORT_DR_DETAILS))
633 fprintf (vect_dump, "dependence distance negative.");
634 continue;
637 if (abs (dist) >= 2
638 && abs (dist) < *max_vf)
640 /* The dependence distance requires reduction of the maximal
641 vectorization factor. */
642 *max_vf = abs (dist);
643 if (vect_print_dump_info (REPORT_DR_DETAILS))
644 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
645 *max_vf);
648 if (abs (dist) >= *max_vf)
650 /* Dependence distance does not create dependence, as far as
651 vectorization is concerned, in this case. */
652 if (vect_print_dump_info (REPORT_DR_DETAILS))
653 fprintf (vect_dump, "dependence distance >= VF.");
654 continue;
657 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
659 fprintf (vect_dump, "not vectorized, possible dependence "
660 "between data-refs ");
661 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
662 fprintf (vect_dump, " and ");
663 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
666 return true;
669 return false;
672 /* Function vect_analyze_data_ref_dependences.
674 Examine all the data references in the loop, and make sure there do not
675 exist any data dependences between them. Set *MAX_VF according to
676 the maximum vectorization factor the data dependences allow. */
678 bool
679 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
680 bb_vec_info bb_vinfo, int *max_vf)
682 unsigned int i;
683 VEC (ddr_p, heap) *ddrs = NULL;
684 struct data_dependence_relation *ddr;
686 if (vect_print_dump_info (REPORT_DETAILS))
687 fprintf (vect_dump, "=== vect_analyze_dependences ===");
689 if (loop_vinfo)
690 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
691 else
692 ddrs = BB_VINFO_DDRS (bb_vinfo);
694 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
695 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
696 return false;
698 return true;
702 /* Function vect_compute_data_ref_alignment
704 Compute the misalignment of the data reference DR.
706 Output:
707 1. If during the misalignment computation it is found that the data reference
708 cannot be vectorized then false is returned.
709 2. DR_MISALIGNMENT (DR) is defined.
711 FOR NOW: No analysis is actually performed. Misalignment is calculated
712 only for trivial cases. TODO. */
714 static bool
715 vect_compute_data_ref_alignment (struct data_reference *dr)
717 gimple stmt = DR_STMT (dr);
718 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
719 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
720 struct loop *loop = NULL;
721 tree ref = DR_REF (dr);
722 tree vectype;
723 tree base, base_addr;
724 bool base_aligned;
725 tree misalign;
726 tree aligned_to, alignment;
728 if (vect_print_dump_info (REPORT_DETAILS))
729 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
731 if (loop_vinfo)
732 loop = LOOP_VINFO_LOOP (loop_vinfo);
734 /* Initialize misalignment to unknown. */
735 SET_DR_MISALIGNMENT (dr, -1);
737 misalign = DR_INIT (dr);
738 aligned_to = DR_ALIGNED_TO (dr);
739 base_addr = DR_BASE_ADDRESS (dr);
740 vectype = STMT_VINFO_VECTYPE (stmt_info);
742 /* In case the dataref is in an inner-loop of the loop that is being
743 vectorized (LOOP), we use the base and misalignment information
744 relative to the outer-loop (LOOP). This is ok only if the misalignment
745 stays the same throughout the execution of the inner-loop, which is why
746 we have to check that the stride of the dataref in the inner-loop evenly
747 divides by the vector size. */
748 if (loop && nested_in_vect_loop_p (loop, stmt))
750 tree step = DR_STEP (dr);
751 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
753 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
755 if (vect_print_dump_info (REPORT_ALIGNMENT))
756 fprintf (vect_dump, "inner step divides the vector-size.");
757 misalign = STMT_VINFO_DR_INIT (stmt_info);
758 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
759 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
761 else
763 if (vect_print_dump_info (REPORT_ALIGNMENT))
764 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
765 misalign = NULL_TREE;
769 base = build_fold_indirect_ref (base_addr);
770 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
772 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
773 || !misalign)
775 if (vect_print_dump_info (REPORT_ALIGNMENT))
777 fprintf (vect_dump, "Unknown alignment for access: ");
778 print_generic_expr (vect_dump, base, TDF_SLIM);
780 return true;
783 if ((DECL_P (base)
784 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
785 alignment) >= 0)
786 || (TREE_CODE (base_addr) == SSA_NAME
787 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
788 TREE_TYPE (base_addr)))),
789 alignment) >= 0))
790 base_aligned = true;
791 else
792 base_aligned = false;
794 if (!base_aligned)
796 /* Do not change the alignment of global variables if
797 flag_section_anchors is enabled. */
798 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
799 || (TREE_STATIC (base) && flag_section_anchors))
801 if (vect_print_dump_info (REPORT_DETAILS))
803 fprintf (vect_dump, "can't force alignment of ref: ");
804 print_generic_expr (vect_dump, ref, TDF_SLIM);
806 return true;
809 /* Force the alignment of the decl.
810 NOTE: This is the only change to the code we make during
811 the analysis phase, before deciding to vectorize the loop. */
812 if (vect_print_dump_info (REPORT_DETAILS))
814 fprintf (vect_dump, "force alignment of ");
815 print_generic_expr (vect_dump, ref, TDF_SLIM);
818 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
819 DECL_USER_ALIGN (base) = 1;
822 /* At this point we assume that the base is aligned. */
823 gcc_assert (base_aligned
824 || (TREE_CODE (base) == VAR_DECL
825 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
827 /* Modulo alignment. */
828 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
830 if (!host_integerp (misalign, 1))
832 /* Negative or overflowed misalignment value. */
833 if (vect_print_dump_info (REPORT_DETAILS))
834 fprintf (vect_dump, "unexpected misalign value");
835 return false;
838 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
840 if (vect_print_dump_info (REPORT_DETAILS))
842 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
843 print_generic_expr (vect_dump, ref, TDF_SLIM);
846 return true;
850 /* Function vect_compute_data_refs_alignment
852 Compute the misalignment of data references in the loop.
853 Return FALSE if a data reference is found that cannot be vectorized. */
855 static bool
856 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
857 bb_vec_info bb_vinfo)
859 VEC (data_reference_p, heap) *datarefs;
860 struct data_reference *dr;
861 unsigned int i;
863 if (loop_vinfo)
864 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
865 else
866 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
868 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
869 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
870 && !vect_compute_data_ref_alignment (dr))
872 if (bb_vinfo)
874 /* Mark unsupported statement as unvectorizable. */
875 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
876 continue;
878 else
879 return false;
882 return true;
886 /* Function vect_update_misalignment_for_peel
888 DR - the data reference whose misalignment is to be adjusted.
889 DR_PEEL - the data reference whose misalignment is being made
890 zero in the vector loop by the peel.
891 NPEEL - the number of iterations in the peel loop if the misalignment
892 of DR_PEEL is known at compile time. */
894 static void
895 vect_update_misalignment_for_peel (struct data_reference *dr,
896 struct data_reference *dr_peel, int npeel)
898 unsigned int i;
899 VEC(dr_p,heap) *same_align_drs;
900 struct data_reference *current_dr;
901 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
902 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
903 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
904 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
906 /* For interleaved data accesses the step in the loop must be multiplied by
907 the size of the interleaving group. */
908 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
909 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
910 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
911 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
913 /* It can be assumed that the data refs with the same alignment as dr_peel
914 are aligned in the vector loop. */
915 same_align_drs
916 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
917 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
919 if (current_dr != dr)
920 continue;
921 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
922 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
923 SET_DR_MISALIGNMENT (dr, 0);
924 return;
927 if (known_alignment_for_access_p (dr)
928 && known_alignment_for_access_p (dr_peel))
930 int misal = DR_MISALIGNMENT (dr);
931 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
932 misal += npeel * dr_size;
933 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
934 SET_DR_MISALIGNMENT (dr, misal);
935 return;
938 if (vect_print_dump_info (REPORT_DETAILS))
939 fprintf (vect_dump, "Setting misalignment to -1.");
940 SET_DR_MISALIGNMENT (dr, -1);
944 /* Function vect_verify_datarefs_alignment
946 Return TRUE if all data references in the loop can be
947 handled with respect to alignment. */
949 bool
950 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
952 VEC (data_reference_p, heap) *datarefs;
953 struct data_reference *dr;
954 enum dr_alignment_support supportable_dr_alignment;
955 unsigned int i;
957 if (loop_vinfo)
958 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
959 else
960 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
962 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
964 gimple stmt = DR_STMT (dr);
965 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
967 /* For interleaving, only the alignment of the first access matters.
968 Skip statements marked as not vectorizable. */
969 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
970 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
971 || !STMT_VINFO_VECTORIZABLE (stmt_info))
972 continue;
974 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
975 if (!supportable_dr_alignment)
977 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
979 if (DR_IS_READ (dr))
980 fprintf (vect_dump,
981 "not vectorized: unsupported unaligned load.");
982 else
983 fprintf (vect_dump,
984 "not vectorized: unsupported unaligned store.");
986 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
988 return false;
990 if (supportable_dr_alignment != dr_aligned
991 && vect_print_dump_info (REPORT_ALIGNMENT))
992 fprintf (vect_dump, "Vectorizing an unaligned access.");
994 return true;
998 /* Function vector_alignment_reachable_p
1000 Return true if vector alignment for DR is reachable by peeling
1001 a few loop iterations. Return false otherwise. */
1003 static bool
1004 vector_alignment_reachable_p (struct data_reference *dr)
1006 gimple stmt = DR_STMT (dr);
1007 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1008 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1010 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1012 /* For interleaved access we peel only if number of iterations in
1013 the prolog loop ({VF - misalignment}), is a multiple of the
1014 number of the interleaved accesses. */
1015 int elem_size, mis_in_elements;
1016 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1018 /* FORNOW: handle only known alignment. */
1019 if (!known_alignment_for_access_p (dr))
1020 return false;
1022 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1023 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1025 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1026 return false;
1029 /* If misalignment is known at the compile time then allow peeling
1030 only if natural alignment is reachable through peeling. */
1031 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1033 HOST_WIDE_INT elmsize =
1034 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1035 if (vect_print_dump_info (REPORT_DETAILS))
1037 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1038 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1040 if (DR_MISALIGNMENT (dr) % elmsize)
1042 if (vect_print_dump_info (REPORT_DETAILS))
1043 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1044 return false;
1048 if (!known_alignment_for_access_p (dr))
1050 tree type = (TREE_TYPE (DR_REF (dr)));
1051 tree ba = DR_BASE_OBJECT (dr);
1052 bool is_packed = false;
1054 if (ba)
1055 is_packed = contains_packed_reference (ba);
1057 if (vect_print_dump_info (REPORT_DETAILS))
1058 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1059 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1060 return true;
1061 else
1062 return false;
1065 return true;
1069 /* Calculate the cost of the memory access represented by DR. */
1071 static void
1072 vect_get_data_access_cost (struct data_reference *dr,
1073 unsigned int *inside_cost,
1074 unsigned int *outside_cost)
1076 gimple stmt = DR_STMT (dr);
1077 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1078 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1079 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1080 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1081 int ncopies = vf / nunits;
1082 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1084 if (!supportable_dr_alignment)
1085 *inside_cost = VECT_MAX_COST;
1086 else
1088 if (DR_IS_READ (dr))
1089 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1090 else
1091 vect_get_store_cost (dr, ncopies, inside_cost);
1094 if (vect_print_dump_info (REPORT_COST))
1095 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1096 "outside_cost = %d.", *inside_cost, *outside_cost);
1100 static hashval_t
1101 vect_peeling_hash (const void *elem)
1103 const struct _vect_peel_info *peel_info;
1105 peel_info = (const struct _vect_peel_info *) elem;
1106 return (hashval_t) peel_info->npeel;
1110 static int
1111 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1113 const struct _vect_peel_info *a, *b;
1115 a = (const struct _vect_peel_info *) elem1;
1116 b = (const struct _vect_peel_info *) elem2;
1117 return (a->npeel == b->npeel);
1121 /* Insert DR into peeling hash table with NPEEL as key. */
1123 static void
1124 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1125 int npeel)
1127 struct _vect_peel_info elem, *slot;
1128 void **new_slot;
1129 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1131 elem.npeel = npeel;
1132 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1133 &elem);
1134 if (slot)
1135 slot->count++;
1136 else
1138 slot = XNEW (struct _vect_peel_info);
1139 slot->npeel = npeel;
1140 slot->dr = dr;
1141 slot->count = 1;
1142 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1143 INSERT);
1144 *new_slot = slot;
1147 if (!supportable_dr_alignment && !flag_vect_cost_model)
1148 slot->count += VECT_MAX_COST;
1152 /* Traverse peeling hash table to find peeling option that aligns maximum
1153 number of data accesses. */
1155 static int
1156 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1158 vect_peel_info elem = (vect_peel_info) *slot;
1159 vect_peel_extended_info max = (vect_peel_extended_info) data;
1161 if (elem->count > max->peel_info.count)
1163 max->peel_info.npeel = elem->npeel;
1164 max->peel_info.count = elem->count;
1165 max->peel_info.dr = elem->dr;
1168 return 1;
1172 /* Traverse peeling hash table and calculate cost for each peeling option. Find
1173 one with the lowest cost. */
1175 static int
1176 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1178 vect_peel_info elem = (vect_peel_info) *slot;
1179 vect_peel_extended_info min = (vect_peel_extended_info) data;
1180 int save_misalignment, dummy;
1181 unsigned int inside_cost = 0, outside_cost = 0, i;
1182 gimple stmt = DR_STMT (elem->dr);
1183 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1184 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1185 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1186 struct data_reference *dr;
1188 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1190 stmt = DR_STMT (dr);
1191 stmt_info = vinfo_for_stmt (stmt);
1192 /* For interleaving, only the alignment of the first access
1193 matters. */
1194 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1195 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1196 continue;
1198 save_misalignment = DR_MISALIGNMENT (dr);
1199 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1200 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1201 SET_DR_MISALIGNMENT (dr, save_misalignment);
1204 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1205 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1207 if (inside_cost < min->inside_cost
1208 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1210 min->inside_cost = inside_cost;
1211 min->outside_cost = outside_cost;
1212 min->peel_info.dr = elem->dr;
1213 min->peel_info.npeel = elem->npeel;
1216 return 1;
1220 /* Choose best peeling option by traversing peeling hash table and either
1221 choosing an option with the lowest cost (if cost model is enabled) or the
1222 option that aligns as many accesses as possible. */
1224 static struct data_reference *
1225 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1226 unsigned int *npeel)
1228 struct _vect_peel_extended_info res;
1230 res.peel_info.dr = NULL;
1232 if (flag_vect_cost_model)
1234 res.inside_cost = INT_MAX;
1235 res.outside_cost = INT_MAX;
1236 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1237 vect_peeling_hash_get_lowest_cost, &res);
1239 else
1241 res.peel_info.count = 0;
1242 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1243 vect_peeling_hash_get_most_frequent, &res);
1246 *npeel = res.peel_info.npeel;
1247 return res.peel_info.dr;
1251 /* Function vect_enhance_data_refs_alignment
1253 This pass will use loop versioning and loop peeling in order to enhance
1254 the alignment of data references in the loop.
1256 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1257 original loop is to be vectorized; Any other loops that are created by
1258 the transformations performed in this pass - are not supposed to be
1259 vectorized. This restriction will be relaxed.
1261 This pass will require a cost model to guide it whether to apply peeling
1262 or versioning or a combination of the two. For example, the scheme that
1263 intel uses when given a loop with several memory accesses, is as follows:
1264 choose one memory access ('p') which alignment you want to force by doing
1265 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1266 other accesses are not necessarily aligned, or (2) use loop versioning to
1267 generate one loop in which all accesses are aligned, and another loop in
1268 which only 'p' is necessarily aligned.
1270 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1271 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1272 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1274 Devising a cost model is the most critical aspect of this work. It will
1275 guide us on which access to peel for, whether to use loop versioning, how
1276 many versions to create, etc. The cost model will probably consist of
1277 generic considerations as well as target specific considerations (on
1278 powerpc for example, misaligned stores are more painful than misaligned
1279 loads).
1281 Here are the general steps involved in alignment enhancements:
1283 -- original loop, before alignment analysis:
1284 for (i=0; i<N; i++){
1285 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1286 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1289 -- After vect_compute_data_refs_alignment:
1290 for (i=0; i<N; i++){
1291 x = q[i]; # DR_MISALIGNMENT(q) = 3
1292 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1295 -- Possibility 1: we do loop versioning:
1296 if (p is aligned) {
1297 for (i=0; i<N; i++){ # loop 1A
1298 x = q[i]; # DR_MISALIGNMENT(q) = 3
1299 p[i] = y; # DR_MISALIGNMENT(p) = 0
1302 else {
1303 for (i=0; i<N; i++){ # loop 1B
1304 x = q[i]; # DR_MISALIGNMENT(q) = 3
1305 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1309 -- Possibility 2: we do loop peeling:
1310 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1311 x = q[i];
1312 p[i] = y;
1314 for (i = 3; i < N; i++){ # loop 2A
1315 x = q[i]; # DR_MISALIGNMENT(q) = 0
1316 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1319 -- Possibility 3: combination of loop peeling and versioning:
1320 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1321 x = q[i];
1322 p[i] = y;
1324 if (p is aligned) {
1325 for (i = 3; i<N; i++){ # loop 3A
1326 x = q[i]; # DR_MISALIGNMENT(q) = 0
1327 p[i] = y; # DR_MISALIGNMENT(p) = 0
1330 else {
1331 for (i = 3; i<N; i++){ # loop 3B
1332 x = q[i]; # DR_MISALIGNMENT(q) = 0
1333 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1337 These loops are later passed to loop_transform to be vectorized. The
1338 vectorizer will use the alignment information to guide the transformation
1339 (whether to generate regular loads/stores, or with special handling for
1340 misalignment). */
1342 bool
1343 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1345 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1346 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1347 enum dr_alignment_support supportable_dr_alignment;
1348 struct data_reference *dr0 = NULL, *first_store = NULL;
1349 struct data_reference *dr;
1350 unsigned int i, j;
1351 bool do_peeling = false;
1352 bool do_versioning = false;
1353 bool stat;
1354 gimple stmt;
1355 stmt_vec_info stmt_info;
1356 int vect_versioning_for_alias_required;
1357 unsigned int npeel = 0;
1358 bool all_misalignments_unknown = true;
1359 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1360 unsigned possible_npeel_number = 1;
1361 tree vectype;
1362 unsigned int nelements, mis, same_align_drs_max = 0;
1364 if (vect_print_dump_info (REPORT_DETAILS))
1365 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1367 /* While cost model enhancements are expected in the future, the high level
1368 view of the code at this time is as follows:
1370 A) If there is a misaligned access then see if peeling to align
1371 this access can make all data references satisfy
1372 vect_supportable_dr_alignment. If so, update data structures
1373 as needed and return true.
1375 B) If peeling wasn't possible and there is a data reference with an
1376 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1377 then see if loop versioning checks can be used to make all data
1378 references satisfy vect_supportable_dr_alignment. If so, update
1379 data structures as needed and return true.
1381 C) If neither peeling nor versioning were successful then return false if
1382 any data reference does not satisfy vect_supportable_dr_alignment.
1384 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1386 Note, Possibility 3 above (which is peeling and versioning together) is not
1387 being done at this time. */
1389 /* (1) Peeling to force alignment. */
1391 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1392 Considerations:
1393 + How many accesses will become aligned due to the peeling
1394 - How many accesses will become unaligned due to the peeling,
1395 and the cost of misaligned accesses.
1396 - The cost of peeling (the extra runtime checks, the increase
1397 in code size). */
1399 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1401 stmt = DR_STMT (dr);
1402 stmt_info = vinfo_for_stmt (stmt);
1404 /* For interleaving, only the alignment of the first access
1405 matters. */
1406 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1407 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1408 continue;
1410 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1411 do_peeling = vector_alignment_reachable_p (dr);
1412 if (do_peeling)
1414 if (known_alignment_for_access_p (dr))
1416 unsigned int npeel_tmp;
1418 /* Save info about DR in the hash table. */
1419 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1420 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1421 htab_create (1, vect_peeling_hash,
1422 vect_peeling_hash_eq, free);
1424 vectype = STMT_VINFO_VECTYPE (stmt_info);
1425 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1426 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1427 TREE_TYPE (DR_REF (dr))));
1428 npeel_tmp = (nelements - mis) % vf;
1430 /* For multiple types, it is possible that the bigger type access
1431 will have more than one peeling option. E.g., a loop with two
1432 types: one of size (vector size / 4), and the other one of
1433 size (vector size / 8). Vectorization factor will 8. If both
1434 access are misaligned by 3, the first one needs one scalar
1435 iteration to be aligned, and the second one needs 5. But the
1436 the first one will be aligned also by peeling 5 scalar
1437 iterations, and in that case both accesses will be aligned.
1438 Hence, except for the immediate peeling amount, we also want
1439 to try to add full vector size, while we don't exceed
1440 vectorization factor.
1441 We do this automtically for cost model, since we calculate cost
1442 for every peeling option. */
1443 if (!flag_vect_cost_model)
1444 possible_npeel_number = vf /nelements;
1446 /* Handle the aligned case. We may decide to align some other
1447 access, making DR unaligned. */
1448 if (DR_MISALIGNMENT (dr) == 0)
1450 npeel_tmp = 0;
1451 if (!flag_vect_cost_model)
1452 possible_npeel_number++;
1455 for (j = 0; j < possible_npeel_number; j++)
1457 gcc_assert (npeel_tmp <= vf);
1458 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1459 npeel_tmp += nelements;
1462 all_misalignments_unknown = false;
1463 /* Data-ref that was chosen for the case that all the
1464 misalignments are unknown is not relevant anymore, since we
1465 have a data-ref with known alignment. */
1466 dr0 = NULL;
1468 else
1470 /* If we don't know all the misalignment values, we prefer
1471 peeling for data-ref that has maximum number of data-refs
1472 with the same alignment, unless the target prefers to align
1473 stores over load. */
1474 if (all_misalignments_unknown)
1476 if (same_align_drs_max < VEC_length (dr_p,
1477 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1478 || !dr0)
1480 same_align_drs_max = VEC_length (dr_p,
1481 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1482 dr0 = dr;
1485 if (!first_store && !DR_IS_READ (dr))
1486 first_store = dr;
1489 /* If there are both known and unknown misaligned accesses in the
1490 loop, we choose peeling amount according to the known
1491 accesses. */
1494 if (!supportable_dr_alignment)
1496 dr0 = dr;
1497 if (!first_store && !DR_IS_READ (dr))
1498 first_store = dr;
1502 else
1504 if (!aligned_access_p (dr))
1506 if (vect_print_dump_info (REPORT_DETAILS))
1507 fprintf (vect_dump, "vector alignment may not be reachable");
1509 break;
1514 vect_versioning_for_alias_required
1515 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1517 /* Temporarily, if versioning for alias is required, we disable peeling
1518 until we support peeling and versioning. Often peeling for alignment
1519 will require peeling for loop-bound, which in turn requires that we
1520 know how to adjust the loop ivs after the loop. */
1521 if (vect_versioning_for_alias_required
1522 || !vect_can_advance_ivs_p (loop_vinfo)
1523 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1524 do_peeling = false;
1526 if (do_peeling && all_misalignments_unknown
1527 && vect_supportable_dr_alignment (dr0, false))
1530 /* Check if the target requires to prefer stores over loads, i.e., if
1531 misaligned stores are more expensive than misaligned loads (taking
1532 drs with same alignment into account). */
1533 if (first_store && DR_IS_READ (dr0))
1535 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1536 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1537 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1538 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1540 vect_get_data_access_cost (dr0, &load_inside_cost,
1541 &load_outside_cost);
1542 vect_get_data_access_cost (first_store, &store_inside_cost,
1543 &store_outside_cost);
1545 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1546 aligning the load DR0). */
1547 load_inside_penalty = store_inside_cost;
1548 load_outside_penalty = store_outside_cost;
1549 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1550 (vinfo_for_stmt (DR_STMT (first_store))),
1551 i, dr);
1552 i++)
1553 if (DR_IS_READ (dr))
1555 load_inside_penalty += load_inside_cost;
1556 load_outside_penalty += load_outside_cost;
1558 else
1560 load_inside_penalty += store_inside_cost;
1561 load_outside_penalty += store_outside_cost;
1564 /* Calculate the penalty for leaving DR0 unaligned (by
1565 aligning the FIRST_STORE). */
1566 store_inside_penalty = load_inside_cost;
1567 store_outside_penalty = load_outside_cost;
1568 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1569 (vinfo_for_stmt (DR_STMT (dr0))),
1570 i, dr);
1571 i++)
1572 if (DR_IS_READ (dr))
1574 store_inside_penalty += load_inside_cost;
1575 store_outside_penalty += load_outside_cost;
1577 else
1579 store_inside_penalty += store_inside_cost;
1580 store_outside_penalty += store_outside_cost;
1583 if (load_inside_penalty > store_inside_penalty
1584 || (load_inside_penalty == store_inside_penalty
1585 && load_outside_penalty > store_outside_penalty))
1586 dr0 = first_store;
1589 /* In case there are only loads with different unknown misalignments, use
1590 peeling only if it may help to align other accesses in the loop. */
1591 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1592 (vinfo_for_stmt (DR_STMT (dr0))))
1593 && vect_supportable_dr_alignment (dr0, false)
1594 != dr_unaligned_supported)
1595 do_peeling = false;
1598 if (do_peeling && !dr0)
1600 /* Peeling is possible, but there is no data access that is not supported
1601 unless aligned. So we try to choose the best possible peeling. */
1603 /* We should get here only if there are drs with known misalignment. */
1604 gcc_assert (!all_misalignments_unknown);
1606 /* Choose the best peeling from the hash table. */
1607 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1608 if (!dr0 || !npeel)
1609 do_peeling = false;
1612 if (do_peeling)
1614 stmt = DR_STMT (dr0);
1615 stmt_info = vinfo_for_stmt (stmt);
1616 vectype = STMT_VINFO_VECTYPE (stmt_info);
1617 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1619 if (known_alignment_for_access_p (dr0))
1621 if (!npeel)
1623 /* Since it's known at compile time, compute the number of
1624 iterations in the peeled loop (the peeling factor) for use in
1625 updating DR_MISALIGNMENT values. The peeling factor is the
1626 vectorization factor minus the misalignment as an element
1627 count. */
1628 mis = DR_MISALIGNMENT (dr0);
1629 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1630 npeel = nelements - mis;
1633 /* For interleaved data access every iteration accesses all the
1634 members of the group, therefore we divide the number of iterations
1635 by the group size. */
1636 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1637 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1638 npeel /= DR_GROUP_SIZE (stmt_info);
1640 if (vect_print_dump_info (REPORT_DETAILS))
1641 fprintf (vect_dump, "Try peeling by %d", npeel);
1644 /* Ensure that all data refs can be vectorized after the peel. */
1645 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1647 int save_misalignment;
1649 if (dr == dr0)
1650 continue;
1652 stmt = DR_STMT (dr);
1653 stmt_info = vinfo_for_stmt (stmt);
1654 /* For interleaving, only the alignment of the first access
1655 matters. */
1656 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1657 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1658 continue;
1660 save_misalignment = DR_MISALIGNMENT (dr);
1661 vect_update_misalignment_for_peel (dr, dr0, npeel);
1662 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1663 SET_DR_MISALIGNMENT (dr, save_misalignment);
1665 if (!supportable_dr_alignment)
1667 do_peeling = false;
1668 break;
1672 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1674 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1675 if (!stat)
1676 do_peeling = false;
1677 else
1678 return stat;
1681 if (do_peeling)
1683 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1684 If the misalignment of DR_i is identical to that of dr0 then set
1685 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1686 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1687 by the peeling factor times the element size of DR_i (MOD the
1688 vectorization factor times the size). Otherwise, the
1689 misalignment of DR_i must be set to unknown. */
1690 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1691 if (dr != dr0)
1692 vect_update_misalignment_for_peel (dr, dr0, npeel);
1694 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1695 if (npeel)
1696 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1697 else
1698 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1699 SET_DR_MISALIGNMENT (dr0, 0);
1700 if (vect_print_dump_info (REPORT_ALIGNMENT))
1701 fprintf (vect_dump, "Alignment of access forced using peeling.");
1703 if (vect_print_dump_info (REPORT_DETAILS))
1704 fprintf (vect_dump, "Peeling for alignment will be applied.");
1706 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1707 gcc_assert (stat);
1708 return stat;
1713 /* (2) Versioning to force alignment. */
1715 /* Try versioning if:
1716 1) flag_tree_vect_loop_version is TRUE
1717 2) optimize loop for speed
1718 3) there is at least one unsupported misaligned data ref with an unknown
1719 misalignment, and
1720 4) all misaligned data refs with a known misalignment are supported, and
1721 5) the number of runtime alignment checks is within reason. */
1723 do_versioning =
1724 flag_tree_vect_loop_version
1725 && optimize_loop_nest_for_speed_p (loop)
1726 && (!loop->inner); /* FORNOW */
1728 if (do_versioning)
1730 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1732 stmt = DR_STMT (dr);
1733 stmt_info = vinfo_for_stmt (stmt);
1735 /* For interleaving, only the alignment of the first access
1736 matters. */
1737 if (aligned_access_p (dr)
1738 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1739 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1740 continue;
1742 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1744 if (!supportable_dr_alignment)
1746 gimple stmt;
1747 int mask;
1748 tree vectype;
1750 if (known_alignment_for_access_p (dr)
1751 || VEC_length (gimple,
1752 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1753 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1755 do_versioning = false;
1756 break;
1759 stmt = DR_STMT (dr);
1760 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1761 gcc_assert (vectype);
1763 /* The rightmost bits of an aligned address must be zeros.
1764 Construct the mask needed for this test. For example,
1765 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1766 mask must be 15 = 0xf. */
1767 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1769 /* FORNOW: use the same mask to test all potentially unaligned
1770 references in the loop. The vectorizer currently supports
1771 a single vector size, see the reference to
1772 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1773 vectorization factor is computed. */
1774 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1775 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1776 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1777 VEC_safe_push (gimple, heap,
1778 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1779 DR_STMT (dr));
1783 /* Versioning requires at least one misaligned data reference. */
1784 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1785 do_versioning = false;
1786 else if (!do_versioning)
1787 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1790 if (do_versioning)
1792 VEC(gimple,heap) *may_misalign_stmts
1793 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1794 gimple stmt;
1796 /* It can now be assumed that the data references in the statements
1797 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1798 of the loop being vectorized. */
1799 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1801 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1802 dr = STMT_VINFO_DATA_REF (stmt_info);
1803 SET_DR_MISALIGNMENT (dr, 0);
1804 if (vect_print_dump_info (REPORT_ALIGNMENT))
1805 fprintf (vect_dump, "Alignment of access forced using versioning.");
1808 if (vect_print_dump_info (REPORT_DETAILS))
1809 fprintf (vect_dump, "Versioning for alignment will be applied.");
1811 /* Peeling and versioning can't be done together at this time. */
1812 gcc_assert (! (do_peeling && do_versioning));
1814 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1815 gcc_assert (stat);
1816 return stat;
1819 /* This point is reached if neither peeling nor versioning is being done. */
1820 gcc_assert (! (do_peeling || do_versioning));
1822 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1823 return stat;
1827 /* Function vect_find_same_alignment_drs.
1829 Update group and alignment relations according to the chosen
1830 vectorization factor. */
1832 static void
1833 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1834 loop_vec_info loop_vinfo)
1836 unsigned int i;
1837 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1838 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1839 struct data_reference *dra = DDR_A (ddr);
1840 struct data_reference *drb = DDR_B (ddr);
1841 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1842 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1843 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1844 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1845 lambda_vector dist_v;
1846 unsigned int loop_depth;
1848 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1849 return;
1851 if (dra == drb)
1852 return;
1854 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1855 return;
1857 /* Loop-based vectorization and known data dependence. */
1858 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1859 return;
1861 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1862 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1864 int dist = dist_v[loop_depth];
1866 if (vect_print_dump_info (REPORT_DR_DETAILS))
1867 fprintf (vect_dump, "dependence distance = %d.", dist);
1869 /* Same loop iteration. */
1870 if (dist == 0
1871 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1873 /* Two references with distance zero have the same alignment. */
1874 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1875 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1876 if (vect_print_dump_info (REPORT_ALIGNMENT))
1877 fprintf (vect_dump, "accesses have the same alignment.");
1878 if (vect_print_dump_info (REPORT_DR_DETAILS))
1880 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1881 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1882 fprintf (vect_dump, " and ");
1883 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1890 /* Function vect_analyze_data_refs_alignment
1892 Analyze the alignment of the data-references in the loop.
1893 Return FALSE if a data reference is found that cannot be vectorized. */
1895 bool
1896 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1897 bb_vec_info bb_vinfo)
1899 if (vect_print_dump_info (REPORT_DETAILS))
1900 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1902 /* Mark groups of data references with same alignment using
1903 data dependence information. */
1904 if (loop_vinfo)
1906 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1907 struct data_dependence_relation *ddr;
1908 unsigned int i;
1910 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1911 vect_find_same_alignment_drs (ddr, loop_vinfo);
1914 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1916 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1917 fprintf (vect_dump,
1918 "not vectorized: can't calculate alignment for data ref.");
1919 return false;
1922 return true;
1926 /* Analyze groups of strided accesses: check that DR belongs to a group of
1927 strided accesses of legal size, step, etc. Detect gaps, single element
1928 interleaving, and other special cases. Set strided access info.
1929 Collect groups of strided stores for further use in SLP analysis. */
1931 static bool
1932 vect_analyze_group_access (struct data_reference *dr)
1934 tree step = DR_STEP (dr);
1935 tree scalar_type = TREE_TYPE (DR_REF (dr));
1936 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1937 gimple stmt = DR_STMT (dr);
1938 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1939 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1940 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1941 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1942 HOST_WIDE_INT stride;
1943 bool slp_impossible = false;
1945 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1946 interleaving group (including gaps). */
1947 stride = dr_step / type_size;
1949 /* Not consecutive access is possible only if it is a part of interleaving. */
1950 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1952 /* Check if it this DR is a part of interleaving, and is a single
1953 element of the group that is accessed in the loop. */
1955 /* Gaps are supported only for loads. STEP must be a multiple of the type
1956 size. The size of the group must be a power of 2. */
1957 if (DR_IS_READ (dr)
1958 && (dr_step % type_size) == 0
1959 && stride > 0
1960 && exact_log2 (stride) != -1)
1962 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1963 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1964 if (vect_print_dump_info (REPORT_DR_DETAILS))
1966 fprintf (vect_dump, "Detected single element interleaving ");
1967 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1968 fprintf (vect_dump, " step ");
1969 print_generic_expr (vect_dump, step, TDF_SLIM);
1971 return true;
1974 if (vect_print_dump_info (REPORT_DETAILS))
1976 fprintf (vect_dump, "not consecutive access ");
1977 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1980 if (bb_vinfo)
1982 /* Mark the statement as unvectorizable. */
1983 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1984 return true;
1987 return false;
1990 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1992 /* First stmt in the interleaving chain. Check the chain. */
1993 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1994 struct data_reference *data_ref = dr;
1995 unsigned int count = 1;
1996 tree next_step;
1997 tree prev_init = DR_INIT (data_ref);
1998 gimple prev = stmt;
1999 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2001 while (next)
2003 /* Skip same data-refs. In case that two or more stmts share data-ref
2004 (supported only for loads), we vectorize only the first stmt, and
2005 the rest get their vectorized loads from the first one. */
2006 if (!tree_int_cst_compare (DR_INIT (data_ref),
2007 DR_INIT (STMT_VINFO_DATA_REF (
2008 vinfo_for_stmt (next)))))
2010 if (!DR_IS_READ (data_ref))
2012 if (vect_print_dump_info (REPORT_DETAILS))
2013 fprintf (vect_dump, "Two store stmts share the same dr.");
2014 return false;
2017 /* Check that there is no load-store dependencies for this loads
2018 to prevent a case of load-store-load to the same location. */
2019 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2020 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2022 if (vect_print_dump_info (REPORT_DETAILS))
2023 fprintf (vect_dump,
2024 "READ_WRITE dependence in interleaving.");
2025 return false;
2028 /* For load use the same data-ref load. */
2029 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2031 prev = next;
2032 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2033 continue;
2035 prev = next;
2037 /* Check that all the accesses have the same STEP. */
2038 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2039 if (tree_int_cst_compare (step, next_step))
2041 if (vect_print_dump_info (REPORT_DETAILS))
2042 fprintf (vect_dump, "not consecutive access in interleaving");
2043 return false;
2046 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2047 /* Check that the distance between two accesses is equal to the type
2048 size. Otherwise, we have gaps. */
2049 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2050 - TREE_INT_CST_LOW (prev_init)) / type_size;
2051 if (diff != 1)
2053 /* FORNOW: SLP of accesses with gaps is not supported. */
2054 slp_impossible = true;
2055 if (!DR_IS_READ (data_ref))
2057 if (vect_print_dump_info (REPORT_DETAILS))
2058 fprintf (vect_dump, "interleaved store with gaps");
2059 return false;
2062 gaps += diff - 1;
2065 /* Store the gap from the previous member of the group. If there is no
2066 gap in the access, DR_GROUP_GAP is always 1. */
2067 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2069 prev_init = DR_INIT (data_ref);
2070 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2071 /* Count the number of data-refs in the chain. */
2072 count++;
2075 /* COUNT is the number of accesses found, we multiply it by the size of
2076 the type to get COUNT_IN_BYTES. */
2077 count_in_bytes = type_size * count;
2079 /* Check that the size of the interleaving (including gaps) is not
2080 greater than STEP. */
2081 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2083 if (vect_print_dump_info (REPORT_DETAILS))
2085 fprintf (vect_dump, "interleaving size is greater than step for ");
2086 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2088 return false;
2091 /* Check that the size of the interleaving is equal to STEP for stores,
2092 i.e., that there are no gaps. */
2093 if (dr_step && dr_step != count_in_bytes)
2095 if (DR_IS_READ (dr))
2097 slp_impossible = true;
2098 /* There is a gap after the last load in the group. This gap is a
2099 difference between the stride and the number of elements. When
2100 there is no gap, this difference should be 0. */
2101 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2103 else
2105 if (vect_print_dump_info (REPORT_DETAILS))
2106 fprintf (vect_dump, "interleaved store with gaps");
2107 return false;
2111 /* Check that STEP is a multiple of type size. */
2112 if (dr_step && (dr_step % type_size) != 0)
2114 if (vect_print_dump_info (REPORT_DETAILS))
2116 fprintf (vect_dump, "step is not a multiple of type size: step ");
2117 print_generic_expr (vect_dump, step, TDF_SLIM);
2118 fprintf (vect_dump, " size ");
2119 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2120 TDF_SLIM);
2122 return false;
2125 /* FORNOW: we handle only interleaving that is a power of 2.
2126 We don't fail here if it may be still possible to vectorize the
2127 group using SLP. If not, the size of the group will be checked in
2128 vect_analyze_operations, and the vectorization will fail. */
2129 if (exact_log2 (stride) == -1)
2131 if (vect_print_dump_info (REPORT_DETAILS))
2132 fprintf (vect_dump, "interleaving is not a power of 2");
2134 if (slp_impossible)
2135 return false;
2138 if (stride == 0)
2139 stride = count;
2141 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2142 if (vect_print_dump_info (REPORT_DETAILS))
2143 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2145 /* SLP: create an SLP data structure for every interleaving group of
2146 stores for further analysis in vect_analyse_slp. */
2147 if (!DR_IS_READ (dr) && !slp_impossible)
2149 if (loop_vinfo)
2150 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2151 stmt);
2152 if (bb_vinfo)
2153 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2154 stmt);
2158 return true;
2162 /* Analyze the access pattern of the data-reference DR.
2163 In case of non-consecutive accesses call vect_analyze_group_access() to
2164 analyze groups of strided accesses. */
2166 static bool
2167 vect_analyze_data_ref_access (struct data_reference *dr)
2169 tree step = DR_STEP (dr);
2170 tree scalar_type = TREE_TYPE (DR_REF (dr));
2171 gimple stmt = DR_STMT (dr);
2172 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2173 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2174 struct loop *loop = NULL;
2175 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2177 if (loop_vinfo)
2178 loop = LOOP_VINFO_LOOP (loop_vinfo);
2180 if (loop_vinfo && !step)
2182 if (vect_print_dump_info (REPORT_DETAILS))
2183 fprintf (vect_dump, "bad data-ref access in loop");
2184 return false;
2187 /* Don't allow invariant accesses in loops. */
2188 if (loop_vinfo && dr_step == 0)
2189 return false;
2191 if (loop && nested_in_vect_loop_p (loop, stmt))
2193 /* Interleaved accesses are not yet supported within outer-loop
2194 vectorization for references in the inner-loop. */
2195 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2197 /* For the rest of the analysis we use the outer-loop step. */
2198 step = STMT_VINFO_DR_STEP (stmt_info);
2199 dr_step = TREE_INT_CST_LOW (step);
2201 if (dr_step == 0)
2203 if (vect_print_dump_info (REPORT_ALIGNMENT))
2204 fprintf (vect_dump, "zero step in outer loop.");
2205 if (DR_IS_READ (dr))
2206 return true;
2207 else
2208 return false;
2212 /* Consecutive? */
2213 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2215 /* Mark that it is not interleaving. */
2216 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2217 return true;
2220 if (loop && nested_in_vect_loop_p (loop, stmt))
2222 if (vect_print_dump_info (REPORT_ALIGNMENT))
2223 fprintf (vect_dump, "strided access in outer loop.");
2224 return false;
2227 /* Not consecutive access - check if it's a part of interleaving group. */
2228 return vect_analyze_group_access (dr);
2232 /* Function vect_analyze_data_ref_accesses.
2234 Analyze the access pattern of all the data references in the loop.
2236 FORNOW: the only access pattern that is considered vectorizable is a
2237 simple step 1 (consecutive) access.
2239 FORNOW: handle only arrays and pointer accesses. */
2241 bool
2242 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2244 unsigned int i;
2245 VEC (data_reference_p, heap) *datarefs;
2246 struct data_reference *dr;
2248 if (vect_print_dump_info (REPORT_DETAILS))
2249 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2251 if (loop_vinfo)
2252 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2253 else
2254 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2256 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2257 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2258 && !vect_analyze_data_ref_access (dr))
2260 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2261 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2263 if (bb_vinfo)
2265 /* Mark the statement as not vectorizable. */
2266 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2267 continue;
2269 else
2270 return false;
2273 return true;
2276 /* Function vect_prune_runtime_alias_test_list.
2278 Prune a list of ddrs to be tested at run-time by versioning for alias.
2279 Return FALSE if resulting list of ddrs is longer then allowed by
2280 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2282 bool
2283 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2285 VEC (ddr_p, heap) * ddrs =
2286 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2287 unsigned i, j;
2289 if (vect_print_dump_info (REPORT_DETAILS))
2290 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2292 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2294 bool found;
2295 ddr_p ddr_i;
2297 ddr_i = VEC_index (ddr_p, ddrs, i);
2298 found = false;
2300 for (j = 0; j < i; j++)
2302 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2304 if (vect_vfa_range_equal (ddr_i, ddr_j))
2306 if (vect_print_dump_info (REPORT_DR_DETAILS))
2308 fprintf (vect_dump, "found equal ranges ");
2309 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2310 fprintf (vect_dump, ", ");
2311 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2312 fprintf (vect_dump, " and ");
2313 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2314 fprintf (vect_dump, ", ");
2315 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2317 found = true;
2318 break;
2322 if (found)
2324 VEC_ordered_remove (ddr_p, ddrs, i);
2325 continue;
2327 i++;
2330 if (VEC_length (ddr_p, ddrs) >
2331 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2333 if (vect_print_dump_info (REPORT_DR_DETAILS))
2335 fprintf (vect_dump,
2336 "disable versioning for alias - max number of generated "
2337 "checks exceeded.");
2340 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2342 return false;
2345 return true;
2349 /* Function vect_analyze_data_refs.
2351 Find all the data references in the loop or basic block.
2353 The general structure of the analysis of data refs in the vectorizer is as
2354 follows:
2355 1- vect_analyze_data_refs(loop/bb): call
2356 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2357 in the loop/bb and their dependences.
2358 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2359 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2360 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2364 bool
2365 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2366 bb_vec_info bb_vinfo,
2367 int *min_vf)
2369 struct loop *loop = NULL;
2370 basic_block bb = NULL;
2371 unsigned int i;
2372 VEC (data_reference_p, heap) *datarefs;
2373 struct data_reference *dr;
2374 tree scalar_type;
2375 bool res;
2377 if (vect_print_dump_info (REPORT_DETAILS))
2378 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2380 if (loop_vinfo)
2382 loop = LOOP_VINFO_LOOP (loop_vinfo);
2383 res = compute_data_dependences_for_loop
2384 (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
2385 &LOOP_VINFO_DDRS (loop_vinfo));
2387 if (!res)
2389 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2390 fprintf (vect_dump, "not vectorized: loop contains function calls"
2391 " or data references that cannot be analyzed");
2392 return false;
2395 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2397 else
2399 bb = BB_VINFO_BB (bb_vinfo);
2400 res = compute_data_dependences_for_bb (bb, true,
2401 &BB_VINFO_DATAREFS (bb_vinfo),
2402 &BB_VINFO_DDRS (bb_vinfo));
2403 if (!res)
2405 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2406 fprintf (vect_dump, "not vectorized: basic block contains function"
2407 " calls or data references that cannot be analyzed");
2408 return false;
2411 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2414 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2415 from stmt_vec_info struct to DR and vectype. */
2417 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2419 gimple stmt;
2420 stmt_vec_info stmt_info;
2421 tree base, offset, init;
2422 int vf;
2424 if (!dr || !DR_REF (dr))
2426 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2427 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2428 return false;
2431 stmt = DR_STMT (dr);
2432 stmt_info = vinfo_for_stmt (stmt);
2434 /* Check that analysis of the data-ref succeeded. */
2435 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2436 || !DR_STEP (dr))
2438 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2440 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2441 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2444 if (bb_vinfo)
2446 /* Mark the statement as not vectorizable. */
2447 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2448 continue;
2450 else
2451 return false;
2454 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2456 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2457 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2458 "constant");
2459 if (bb_vinfo)
2461 /* Mark the statement as not vectorizable. */
2462 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2463 continue;
2465 else
2466 return false;
2469 base = unshare_expr (DR_BASE_ADDRESS (dr));
2470 offset = unshare_expr (DR_OFFSET (dr));
2471 init = unshare_expr (DR_INIT (dr));
2473 /* Update DR field in stmt_vec_info struct. */
2475 /* If the dataref is in an inner-loop of the loop that is considered for
2476 for vectorization, we also want to analyze the access relative to
2477 the outer-loop (DR contains information only relative to the
2478 inner-most enclosing loop). We do that by building a reference to the
2479 first location accessed by the inner-loop, and analyze it relative to
2480 the outer-loop. */
2481 if (loop && nested_in_vect_loop_p (loop, stmt))
2483 tree outer_step, outer_base, outer_init;
2484 HOST_WIDE_INT pbitsize, pbitpos;
2485 tree poffset;
2486 enum machine_mode pmode;
2487 int punsignedp, pvolatilep;
2488 affine_iv base_iv, offset_iv;
2489 tree dinit;
2491 /* Build a reference to the first location accessed by the
2492 inner-loop: *(BASE+INIT). (The first location is actually
2493 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2494 tree inner_base = build_fold_indirect_ref
2495 (fold_build2 (POINTER_PLUS_EXPR,
2496 TREE_TYPE (base), base,
2497 fold_convert (sizetype, init)));
2499 if (vect_print_dump_info (REPORT_DETAILS))
2501 fprintf (vect_dump, "analyze in outer-loop: ");
2502 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2505 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2506 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2507 gcc_assert (outer_base != NULL_TREE);
2509 if (pbitpos % BITS_PER_UNIT != 0)
2511 if (vect_print_dump_info (REPORT_DETAILS))
2512 fprintf (vect_dump, "failed: bit offset alignment.\n");
2513 return false;
2516 outer_base = build_fold_addr_expr (outer_base);
2517 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2518 &base_iv, false))
2520 if (vect_print_dump_info (REPORT_DETAILS))
2521 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2522 return false;
2525 if (offset)
2527 if (poffset)
2528 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2529 poffset);
2530 else
2531 poffset = offset;
2534 if (!poffset)
2536 offset_iv.base = ssize_int (0);
2537 offset_iv.step = ssize_int (0);
2539 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2540 &offset_iv, false))
2542 if (vect_print_dump_info (REPORT_DETAILS))
2543 fprintf (vect_dump, "evolution of offset is not affine.\n");
2544 return false;
2547 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2548 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2549 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2550 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2551 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2553 outer_step = size_binop (PLUS_EXPR,
2554 fold_convert (ssizetype, base_iv.step),
2555 fold_convert (ssizetype, offset_iv.step));
2557 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2558 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2559 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2560 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2561 STMT_VINFO_DR_OFFSET (stmt_info) =
2562 fold_convert (ssizetype, offset_iv.base);
2563 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2564 size_int (highest_pow2_factor (offset_iv.base));
2566 if (vect_print_dump_info (REPORT_DETAILS))
2568 fprintf (vect_dump, "\touter base_address: ");
2569 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2570 fprintf (vect_dump, "\n\touter offset from base address: ");
2571 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2572 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2573 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2574 fprintf (vect_dump, "\n\touter step: ");
2575 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2576 fprintf (vect_dump, "\n\touter aligned to: ");
2577 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2581 if (STMT_VINFO_DATA_REF (stmt_info))
2583 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2585 fprintf (vect_dump,
2586 "not vectorized: more than one data ref in stmt: ");
2587 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2589 return false;
2592 STMT_VINFO_DATA_REF (stmt_info) = dr;
2594 /* Set vectype for STMT. */
2595 scalar_type = TREE_TYPE (DR_REF (dr));
2596 STMT_VINFO_VECTYPE (stmt_info) =
2597 get_vectype_for_scalar_type (scalar_type);
2598 if (!STMT_VINFO_VECTYPE (stmt_info))
2600 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2602 fprintf (vect_dump,
2603 "not vectorized: no vectype for stmt: ");
2604 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2605 fprintf (vect_dump, " scalar_type: ");
2606 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2609 if (bb_vinfo)
2611 /* Mark the statement as not vectorizable. */
2612 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2613 continue;
2615 else
2616 return false;
2619 /* Adjust the minimal vectorization factor according to the
2620 vector type. */
2621 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2622 if (vf > *min_vf)
2623 *min_vf = vf;
2626 return true;
2630 /* Function vect_get_new_vect_var.
2632 Returns a name for a new variable. The current naming scheme appends the
2633 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2634 the name of vectorizer generated variables, and appends that to NAME if
2635 provided. */
2637 tree
2638 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2640 const char *prefix;
2641 tree new_vect_var;
2643 switch (var_kind)
2645 case vect_simple_var:
2646 prefix = "vect_";
2647 break;
2648 case vect_scalar_var:
2649 prefix = "stmp_";
2650 break;
2651 case vect_pointer_var:
2652 prefix = "vect_p";
2653 break;
2654 default:
2655 gcc_unreachable ();
2658 if (name)
2660 char* tmp = concat (prefix, name, NULL);
2661 new_vect_var = create_tmp_var (type, tmp);
2662 free (tmp);
2664 else
2665 new_vect_var = create_tmp_var (type, prefix);
2667 /* Mark vector typed variable as a gimple register variable. */
2668 if (TREE_CODE (type) == VECTOR_TYPE)
2669 DECL_GIMPLE_REG_P (new_vect_var) = true;
2671 return new_vect_var;
2675 /* Function vect_create_addr_base_for_vector_ref.
2677 Create an expression that computes the address of the first memory location
2678 that will be accessed for a data reference.
2680 Input:
2681 STMT: The statement containing the data reference.
2682 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2683 OFFSET: Optional. If supplied, it is be added to the initial address.
2684 LOOP: Specify relative to which loop-nest should the address be computed.
2685 For example, when the dataref is in an inner-loop nested in an
2686 outer-loop that is now being vectorized, LOOP can be either the
2687 outer-loop, or the inner-loop. The first memory location accessed
2688 by the following dataref ('in' points to short):
2690 for (i=0; i<N; i++)
2691 for (j=0; j<M; j++)
2692 s += in[i+j]
2694 is as follows:
2695 if LOOP=i_loop: &in (relative to i_loop)
2696 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2698 Output:
2699 1. Return an SSA_NAME whose value is the address of the memory location of
2700 the first vector of the data reference.
2701 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2702 these statement(s) which define the returned SSA_NAME.
2704 FORNOW: We are only handling array accesses with step 1. */
2706 tree
2707 vect_create_addr_base_for_vector_ref (gimple stmt,
2708 gimple_seq *new_stmt_list,
2709 tree offset,
2710 struct loop *loop)
2712 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2713 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2714 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2715 tree base_name;
2716 tree data_ref_base_var;
2717 tree vec_stmt;
2718 tree addr_base, addr_expr;
2719 tree dest;
2720 gimple_seq seq = NULL;
2721 tree base_offset = unshare_expr (DR_OFFSET (dr));
2722 tree init = unshare_expr (DR_INIT (dr));
2723 tree vect_ptr_type;
2724 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2725 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2726 tree base;
2728 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2730 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2732 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2734 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2735 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2736 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2739 if (loop_vinfo)
2740 base_name = build_fold_indirect_ref (data_ref_base);
2741 else
2743 base_offset = ssize_int (0);
2744 init = ssize_int (0);
2745 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2748 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2749 add_referenced_var (data_ref_base_var);
2750 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2751 data_ref_base_var);
2752 gimple_seq_add_seq (new_stmt_list, seq);
2754 /* Create base_offset */
2755 base_offset = size_binop (PLUS_EXPR,
2756 fold_convert (sizetype, base_offset),
2757 fold_convert (sizetype, init));
2758 dest = create_tmp_var (sizetype, "base_off");
2759 add_referenced_var (dest);
2760 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2761 gimple_seq_add_seq (new_stmt_list, seq);
2763 if (offset)
2765 tree tmp = create_tmp_var (sizetype, "offset");
2767 add_referenced_var (tmp);
2768 offset = fold_build2 (MULT_EXPR, sizetype,
2769 fold_convert (sizetype, offset), step);
2770 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2771 base_offset, offset);
2772 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2773 gimple_seq_add_seq (new_stmt_list, seq);
2776 /* base + base_offset */
2777 if (loop_vinfo)
2778 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2779 data_ref_base, base_offset);
2780 else
2782 addr_base = build1 (ADDR_EXPR,
2783 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2784 unshare_expr (DR_REF (dr)));
2787 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2788 base = get_base_address (DR_REF (dr));
2789 if (base
2790 && TREE_CODE (base) == MEM_REF)
2791 vect_ptr_type
2792 = build_qualified_type (vect_ptr_type,
2793 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2795 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2796 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2797 get_name (base_name));
2798 add_referenced_var (addr_expr);
2799 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2800 gimple_seq_add_seq (new_stmt_list, seq);
2802 if (DR_PTR_INFO (dr)
2803 && TREE_CODE (vec_stmt) == SSA_NAME)
2804 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2806 if (vect_print_dump_info (REPORT_DETAILS))
2808 fprintf (vect_dump, "created ");
2809 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2812 return vec_stmt;
2816 /* Function vect_create_data_ref_ptr.
2818 Create a new pointer to vector type (vp), that points to the first location
2819 accessed in the loop by STMT, along with the def-use update chain to
2820 appropriately advance the pointer through the loop iterations. Also set
2821 aliasing information for the pointer. This vector pointer is used by the
2822 callers to this function to create a memory reference expression for vector
2823 load/store access.
2825 Input:
2826 1. STMT: a stmt that references memory. Expected to be of the form
2827 GIMPLE_ASSIGN <name, data-ref> or
2828 GIMPLE_ASSIGN <data-ref, name>.
2829 2. AT_LOOP: the loop where the vector memref is to be created.
2830 3. OFFSET (optional): an offset to be added to the initial address accessed
2831 by the data-ref in STMT.
2832 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2833 pointing to the initial address.
2834 5. TYPE: if not NULL indicates the required type of the data-ref.
2836 Output:
2837 1. Declare a new ptr to vector_type, and have it point to the base of the
2838 data reference (initial addressed accessed by the data reference).
2839 For example, for vector of type V8HI, the following code is generated:
2841 v8hi *vp;
2842 vp = (v8hi *)initial_address;
2844 if OFFSET is not supplied:
2845 initial_address = &a[init];
2846 if OFFSET is supplied:
2847 initial_address = &a[init + OFFSET];
2849 Return the initial_address in INITIAL_ADDRESS.
2851 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2852 update the pointer in each iteration of the loop.
2854 Return the increment stmt that updates the pointer in PTR_INCR.
2856 3. Set INV_P to true if the access pattern of the data reference in the
2857 vectorized loop is invariant. Set it to false otherwise.
2859 4. Return the pointer. */
2861 tree
2862 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2863 tree offset, tree *initial_address, gimple *ptr_incr,
2864 bool only_init, bool *inv_p)
2866 tree base_name;
2867 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2868 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2869 struct loop *loop = NULL;
2870 bool nested_in_vect_loop = false;
2871 struct loop *containing_loop = NULL;
2872 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2873 tree vect_ptr_type;
2874 tree vect_ptr;
2875 tree new_temp;
2876 gimple vec_stmt;
2877 gimple_seq new_stmt_list = NULL;
2878 edge pe = NULL;
2879 basic_block new_bb;
2880 tree vect_ptr_init;
2881 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2882 tree vptr;
2883 gimple_stmt_iterator incr_gsi;
2884 bool insert_after;
2885 tree indx_before_incr, indx_after_incr;
2886 gimple incr;
2887 tree step;
2888 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2889 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2890 tree base;
2892 if (loop_vinfo)
2894 loop = LOOP_VINFO_LOOP (loop_vinfo);
2895 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2896 containing_loop = (gimple_bb (stmt))->loop_father;
2897 pe = loop_preheader_edge (loop);
2899 else
2901 gcc_assert (bb_vinfo);
2902 only_init = true;
2903 *ptr_incr = NULL;
2906 /* Check the step (evolution) of the load in LOOP, and record
2907 whether it's invariant. */
2908 if (nested_in_vect_loop)
2909 step = STMT_VINFO_DR_STEP (stmt_info);
2910 else
2911 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2913 if (tree_int_cst_compare (step, size_zero_node) == 0)
2914 *inv_p = true;
2915 else
2916 *inv_p = false;
2918 /* Create an expression for the first address accessed by this load
2919 in LOOP. */
2920 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2922 if (vect_print_dump_info (REPORT_DETAILS))
2924 tree data_ref_base = base_name;
2925 fprintf (vect_dump, "create vector-pointer variable to type: ");
2926 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2927 if (TREE_CODE (data_ref_base) == VAR_DECL
2928 || TREE_CODE (data_ref_base) == ARRAY_REF)
2929 fprintf (vect_dump, " vectorizing an array ref: ");
2930 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2931 fprintf (vect_dump, " vectorizing a record based array ref: ");
2932 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2933 fprintf (vect_dump, " vectorizing a pointer ref: ");
2934 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2937 /** (1) Create the new vector-pointer variable: **/
2938 vect_ptr_type = build_pointer_type (vectype);
2939 base = get_base_address (DR_REF (dr));
2940 if (base
2941 && TREE_CODE (base) == MEM_REF)
2942 vect_ptr_type
2943 = build_qualified_type (vect_ptr_type,
2944 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2945 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2946 get_name (base_name));
2948 /* Vector types inherit the alias set of their component type by default so
2949 we need to use a ref-all pointer if the data reference does not conflict
2950 with the created vector data reference because it is not addressable. */
2951 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2952 get_alias_set (DR_REF (dr))))
2954 vect_ptr_type
2955 = build_pointer_type_for_mode (vectype,
2956 TYPE_MODE (vect_ptr_type), true);
2957 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2958 get_name (base_name));
2961 /* Likewise for any of the data references in the stmt group. */
2962 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2964 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2967 tree lhs = gimple_assign_lhs (orig_stmt);
2968 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2969 get_alias_set (lhs)))
2971 vect_ptr_type
2972 = build_pointer_type_for_mode (vectype,
2973 TYPE_MODE (vect_ptr_type), true);
2974 vect_ptr
2975 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2976 get_name (base_name));
2977 break;
2980 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2982 while (orig_stmt);
2985 add_referenced_var (vect_ptr);
2987 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2988 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2989 def-use update cycles for the pointer: One relative to the outer-loop
2990 (LOOP), which is what steps (3) and (4) below do. The other is relative
2991 to the inner-loop (which is the inner-most loop containing the dataref),
2992 and this is done be step (5) below.
2994 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2995 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2996 redundant. Steps (3),(4) create the following:
2998 vp0 = &base_addr;
2999 LOOP: vp1 = phi(vp0,vp2)
3002 vp2 = vp1 + step
3003 goto LOOP
3005 If there is an inner-loop nested in loop, then step (5) will also be
3006 applied, and an additional update in the inner-loop will be created:
3008 vp0 = &base_addr;
3009 LOOP: vp1 = phi(vp0,vp2)
3011 inner: vp3 = phi(vp1,vp4)
3012 vp4 = vp3 + inner_step
3013 if () goto inner
3015 vp2 = vp1 + step
3016 if () goto LOOP */
3018 /** (3) Calculate the initial address the vector-pointer, and set
3019 the vector-pointer to point to it before the loop: **/
3021 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3023 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3024 offset, loop);
3025 if (new_stmt_list)
3027 if (pe)
3029 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3030 gcc_assert (!new_bb);
3032 else
3033 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
3036 *initial_address = new_temp;
3038 /* Create: p = (vectype *) initial_base */
3039 if (TREE_CODE (new_temp) != SSA_NAME
3040 || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
3042 vec_stmt = gimple_build_assign (vect_ptr,
3043 fold_convert (vect_ptr_type, new_temp));
3044 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
3045 /* Copy the points-to information if it exists. */
3046 if (DR_PTR_INFO (dr))
3047 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
3048 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
3049 if (pe)
3051 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3052 gcc_assert (!new_bb);
3054 else
3055 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
3057 else
3058 vect_ptr_init = new_temp;
3060 /** (4) Handle the updating of the vector-pointer inside the loop.
3061 This is needed when ONLY_INIT is false, and also when AT_LOOP
3062 is the inner-loop nested in LOOP (during outer-loop vectorization).
3065 /* No update in loop is required. */
3066 if (only_init && (!loop_vinfo || at_loop == loop))
3067 vptr = vect_ptr_init;
3068 else
3070 /* The step of the vector pointer is the Vector Size. */
3071 tree step = TYPE_SIZE_UNIT (vectype);
3072 /* One exception to the above is when the scalar step of the load in
3073 LOOP is zero. In this case the step here is also zero. */
3074 if (*inv_p)
3075 step = size_zero_node;
3077 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3079 create_iv (vect_ptr_init,
3080 fold_convert (vect_ptr_type, step),
3081 vect_ptr, loop, &incr_gsi, insert_after,
3082 &indx_before_incr, &indx_after_incr);
3083 incr = gsi_stmt (incr_gsi);
3084 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3086 /* Copy the points-to information if it exists. */
3087 if (DR_PTR_INFO (dr))
3089 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3090 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3092 if (ptr_incr)
3093 *ptr_incr = incr;
3095 vptr = indx_before_incr;
3098 if (!nested_in_vect_loop || only_init)
3099 return vptr;
3102 /** (5) Handle the updating of the vector-pointer inside the inner-loop
3103 nested in LOOP, if exists: **/
3105 gcc_assert (nested_in_vect_loop);
3106 if (!only_init)
3108 standard_iv_increment_position (containing_loop, &incr_gsi,
3109 &insert_after);
3110 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
3111 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3112 &indx_after_incr);
3113 incr = gsi_stmt (incr_gsi);
3114 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3116 /* Copy the points-to information if it exists. */
3117 if (DR_PTR_INFO (dr))
3119 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3120 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3122 if (ptr_incr)
3123 *ptr_incr = incr;
3125 return indx_before_incr;
3127 else
3128 gcc_unreachable ();
3132 /* Function bump_vector_ptr
3134 Increment a pointer (to a vector type) by vector-size. If requested,
3135 i.e. if PTR-INCR is given, then also connect the new increment stmt
3136 to the existing def-use update-chain of the pointer, by modifying
3137 the PTR_INCR as illustrated below:
3139 The pointer def-use update-chain before this function:
3140 DATAREF_PTR = phi (p_0, p_2)
3141 ....
3142 PTR_INCR: p_2 = DATAREF_PTR + step
3144 The pointer def-use update-chain after this function:
3145 DATAREF_PTR = phi (p_0, p_2)
3146 ....
3147 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3148 ....
3149 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3151 Input:
3152 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3153 in the loop.
3154 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3155 the loop. The increment amount across iterations is expected
3156 to be vector_size.
3157 BSI - location where the new update stmt is to be placed.
3158 STMT - the original scalar memory-access stmt that is being vectorized.
3159 BUMP - optional. The offset by which to bump the pointer. If not given,
3160 the offset is assumed to be vector_size.
3162 Output: Return NEW_DATAREF_PTR as illustrated above.
3166 tree
3167 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3168 gimple stmt, tree bump)
3170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3171 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3172 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3173 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3174 tree update = TYPE_SIZE_UNIT (vectype);
3175 gimple incr_stmt;
3176 ssa_op_iter iter;
3177 use_operand_p use_p;
3178 tree new_dataref_ptr;
3180 if (bump)
3181 update = bump;
3183 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3184 dataref_ptr, update);
3185 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3186 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3187 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3189 /* Copy the points-to information if it exists. */
3190 if (DR_PTR_INFO (dr))
3191 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3193 if (!ptr_incr)
3194 return new_dataref_ptr;
3196 /* Update the vector-pointer's cross-iteration increment. */
3197 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3199 tree use = USE_FROM_PTR (use_p);
3201 if (use == dataref_ptr)
3202 SET_USE (use_p, new_dataref_ptr);
3203 else
3204 gcc_assert (tree_int_cst_compare (use, update) == 0);
3207 return new_dataref_ptr;
3211 /* Function vect_create_destination_var.
3213 Create a new temporary of type VECTYPE. */
3215 tree
3216 vect_create_destination_var (tree scalar_dest, tree vectype)
3218 tree vec_dest;
3219 const char *new_name;
3220 tree type;
3221 enum vect_var_kind kind;
3223 kind = vectype ? vect_simple_var : vect_scalar_var;
3224 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3226 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3228 new_name = get_name (scalar_dest);
3229 if (!new_name)
3230 new_name = "var_";
3231 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3232 add_referenced_var (vec_dest);
3234 return vec_dest;
3237 /* Function vect_strided_store_supported.
3239 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3240 and FALSE otherwise. */
3242 bool
3243 vect_strided_store_supported (tree vectype)
3245 optab interleave_high_optab, interleave_low_optab;
3246 enum machine_mode mode;
3248 mode = TYPE_MODE (vectype);
3250 /* Check that the operation is supported. */
3251 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3252 vectype, optab_default);
3253 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3254 vectype, optab_default);
3255 if (!interleave_high_optab || !interleave_low_optab)
3257 if (vect_print_dump_info (REPORT_DETAILS))
3258 fprintf (vect_dump, "no optab for interleave.");
3259 return false;
3262 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3263 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3265 if (vect_print_dump_info (REPORT_DETAILS))
3266 fprintf (vect_dump, "interleave op not supported by target.");
3267 return false;
3270 return true;
3274 /* Function vect_permute_store_chain.
3276 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3277 a power of 2, generate interleave_high/low stmts to reorder the data
3278 correctly for the stores. Return the final references for stores in
3279 RESULT_CHAIN.
3281 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3282 The input is 4 vectors each containing 8 elements. We assign a number to each
3283 element, the input sequence is:
3285 1st vec: 0 1 2 3 4 5 6 7
3286 2nd vec: 8 9 10 11 12 13 14 15
3287 3rd vec: 16 17 18 19 20 21 22 23
3288 4th vec: 24 25 26 27 28 29 30 31
3290 The output sequence should be:
3292 1st vec: 0 8 16 24 1 9 17 25
3293 2nd vec: 2 10 18 26 3 11 19 27
3294 3rd vec: 4 12 20 28 5 13 21 30
3295 4th vec: 6 14 22 30 7 15 23 31
3297 i.e., we interleave the contents of the four vectors in their order.
3299 We use interleave_high/low instructions to create such output. The input of
3300 each interleave_high/low operation is two vectors:
3301 1st vec 2nd vec
3302 0 1 2 3 4 5 6 7
3303 the even elements of the result vector are obtained left-to-right from the
3304 high/low elements of the first vector. The odd elements of the result are
3305 obtained left-to-right from the high/low elements of the second vector.
3306 The output of interleave_high will be: 0 4 1 5
3307 and of interleave_low: 2 6 3 7
3310 The permutation is done in log LENGTH stages. In each stage interleave_high
3311 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3312 where the first argument is taken from the first half of DR_CHAIN and the
3313 second argument from it's second half.
3314 In our example,
3316 I1: interleave_high (1st vec, 3rd vec)
3317 I2: interleave_low (1st vec, 3rd vec)
3318 I3: interleave_high (2nd vec, 4th vec)
3319 I4: interleave_low (2nd vec, 4th vec)
3321 The output for the first stage is:
3323 I1: 0 16 1 17 2 18 3 19
3324 I2: 4 20 5 21 6 22 7 23
3325 I3: 8 24 9 25 10 26 11 27
3326 I4: 12 28 13 29 14 30 15 31
3328 The output of the second stage, i.e. the final result is:
3330 I1: 0 8 16 24 1 9 17 25
3331 I2: 2 10 18 26 3 11 19 27
3332 I3: 4 12 20 28 5 13 21 30
3333 I4: 6 14 22 30 7 15 23 31. */
3335 bool
3336 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3337 unsigned int length,
3338 gimple stmt,
3339 gimple_stmt_iterator *gsi,
3340 VEC(tree,heap) **result_chain)
3342 tree perm_dest, vect1, vect2, high, low;
3343 gimple perm_stmt;
3344 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3345 int i;
3346 unsigned int j;
3347 enum tree_code high_code, low_code;
3349 /* Check that the operation is supported. */
3350 if (!vect_strided_store_supported (vectype))
3351 return false;
3353 *result_chain = VEC_copy (tree, heap, dr_chain);
3355 for (i = 0; i < exact_log2 (length); i++)
3357 for (j = 0; j < length/2; j++)
3359 vect1 = VEC_index (tree, dr_chain, j);
3360 vect2 = VEC_index (tree, dr_chain, j+length/2);
3362 /* Create interleaving stmt:
3363 in the case of big endian:
3364 high = interleave_high (vect1, vect2)
3365 and in the case of little endian:
3366 high = interleave_low (vect1, vect2). */
3367 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3368 DECL_GIMPLE_REG_P (perm_dest) = 1;
3369 add_referenced_var (perm_dest);
3370 if (BYTES_BIG_ENDIAN)
3372 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3373 low_code = VEC_INTERLEAVE_LOW_EXPR;
3375 else
3377 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3378 high_code = VEC_INTERLEAVE_LOW_EXPR;
3380 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3381 vect1, vect2);
3382 high = make_ssa_name (perm_dest, perm_stmt);
3383 gimple_assign_set_lhs (perm_stmt, high);
3384 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3385 VEC_replace (tree, *result_chain, 2*j, high);
3387 /* Create interleaving stmt:
3388 in the case of big endian:
3389 low = interleave_low (vect1, vect2)
3390 and in the case of little endian:
3391 low = interleave_high (vect1, vect2). */
3392 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3393 DECL_GIMPLE_REG_P (perm_dest) = 1;
3394 add_referenced_var (perm_dest);
3395 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3396 vect1, vect2);
3397 low = make_ssa_name (perm_dest, perm_stmt);
3398 gimple_assign_set_lhs (perm_stmt, low);
3399 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3400 VEC_replace (tree, *result_chain, 2*j+1, low);
3402 dr_chain = VEC_copy (tree, heap, *result_chain);
3404 return true;
3407 /* Function vect_setup_realignment
3409 This function is called when vectorizing an unaligned load using
3410 the dr_explicit_realign[_optimized] scheme.
3411 This function generates the following code at the loop prolog:
3413 p = initial_addr;
3414 x msq_init = *(floor(p)); # prolog load
3415 realignment_token = call target_builtin;
3416 loop:
3417 x msq = phi (msq_init, ---)
3419 The stmts marked with x are generated only for the case of
3420 dr_explicit_realign_optimized.
3422 The code above sets up a new (vector) pointer, pointing to the first
3423 location accessed by STMT, and a "floor-aligned" load using that pointer.
3424 It also generates code to compute the "realignment-token" (if the relevant
3425 target hook was defined), and creates a phi-node at the loop-header bb
3426 whose arguments are the result of the prolog-load (created by this
3427 function) and the result of a load that takes place in the loop (to be
3428 created by the caller to this function).
3430 For the case of dr_explicit_realign_optimized:
3431 The caller to this function uses the phi-result (msq) to create the
3432 realignment code inside the loop, and sets up the missing phi argument,
3433 as follows:
3434 loop:
3435 msq = phi (msq_init, lsq)
3436 lsq = *(floor(p')); # load in loop
3437 result = realign_load (msq, lsq, realignment_token);
3439 For the case of dr_explicit_realign:
3440 loop:
3441 msq = *(floor(p)); # load in loop
3442 p' = p + (VS-1);
3443 lsq = *(floor(p')); # load in loop
3444 result = realign_load (msq, lsq, realignment_token);
3446 Input:
3447 STMT - (scalar) load stmt to be vectorized. This load accesses
3448 a memory location that may be unaligned.
3449 BSI - place where new code is to be inserted.
3450 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3451 is used.
3453 Output:
3454 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3455 target hook, if defined.
3456 Return value - the result of the loop-header phi node. */
3458 tree
3459 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3460 tree *realignment_token,
3461 enum dr_alignment_support alignment_support_scheme,
3462 tree init_addr,
3463 struct loop **at_loop)
3465 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3466 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3467 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3468 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3469 edge pe;
3470 tree scalar_dest = gimple_assign_lhs (stmt);
3471 tree vec_dest;
3472 gimple inc;
3473 tree ptr;
3474 tree data_ref;
3475 gimple new_stmt;
3476 basic_block new_bb;
3477 tree msq_init = NULL_TREE;
3478 tree new_temp;
3479 gimple phi_stmt;
3480 tree msq = NULL_TREE;
3481 gimple_seq stmts = NULL;
3482 bool inv_p;
3483 bool compute_in_loop = false;
3484 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3485 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3486 struct loop *loop_for_initial_load;
3488 gcc_assert (alignment_support_scheme == dr_explicit_realign
3489 || alignment_support_scheme == dr_explicit_realign_optimized);
3491 /* We need to generate three things:
3492 1. the misalignment computation
3493 2. the extra vector load (for the optimized realignment scheme).
3494 3. the phi node for the two vectors from which the realignment is
3495 done (for the optimized realignment scheme).
3498 /* 1. Determine where to generate the misalignment computation.
3500 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3501 calculation will be generated by this function, outside the loop (in the
3502 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3503 caller, inside the loop.
3505 Background: If the misalignment remains fixed throughout the iterations of
3506 the loop, then both realignment schemes are applicable, and also the
3507 misalignment computation can be done outside LOOP. This is because we are
3508 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3509 are a multiple of VS (the Vector Size), and therefore the misalignment in
3510 different vectorized LOOP iterations is always the same.
3511 The problem arises only if the memory access is in an inner-loop nested
3512 inside LOOP, which is now being vectorized using outer-loop vectorization.
3513 This is the only case when the misalignment of the memory access may not
3514 remain fixed throughout the iterations of the inner-loop (as explained in
3515 detail in vect_supportable_dr_alignment). In this case, not only is the
3516 optimized realignment scheme not applicable, but also the misalignment
3517 computation (and generation of the realignment token that is passed to
3518 REALIGN_LOAD) have to be done inside the loop.
3520 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3521 or not, which in turn determines if the misalignment is computed inside
3522 the inner-loop, or outside LOOP. */
3524 if (init_addr != NULL_TREE)
3526 compute_in_loop = true;
3527 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3531 /* 2. Determine where to generate the extra vector load.
3533 For the optimized realignment scheme, instead of generating two vector
3534 loads in each iteration, we generate a single extra vector load in the
3535 preheader of the loop, and in each iteration reuse the result of the
3536 vector load from the previous iteration. In case the memory access is in
3537 an inner-loop nested inside LOOP, which is now being vectorized using
3538 outer-loop vectorization, we need to determine whether this initial vector
3539 load should be generated at the preheader of the inner-loop, or can be
3540 generated at the preheader of LOOP. If the memory access has no evolution
3541 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3542 to be generated inside LOOP (in the preheader of the inner-loop). */
3544 if (nested_in_vect_loop)
3546 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3547 bool invariant_in_outerloop =
3548 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3549 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3551 else
3552 loop_for_initial_load = loop;
3553 if (at_loop)
3554 *at_loop = loop_for_initial_load;
3556 /* 3. For the case of the optimized realignment, create the first vector
3557 load at the loop preheader. */
3559 if (alignment_support_scheme == dr_explicit_realign_optimized)
3561 /* Create msq_init = *(floor(p1)) in the loop preheader */
3563 gcc_assert (!compute_in_loop);
3564 pe = loop_preheader_edge (loop_for_initial_load);
3565 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3566 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3567 &init_addr, &inc, true, &inv_p);
3568 new_stmt = gimple_build_assign_with_ops
3569 (BIT_AND_EXPR, NULL_TREE, ptr,
3570 build_int_cst (TREE_TYPE (ptr),
3571 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3572 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3573 gimple_assign_set_lhs (new_stmt, new_temp);
3574 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3575 gcc_assert (!new_bb);
3576 data_ref = build_simple_mem_ref (new_temp);
3577 new_stmt = gimple_build_assign (vec_dest, data_ref);
3578 new_temp = make_ssa_name (vec_dest, new_stmt);
3579 gimple_assign_set_lhs (new_stmt, new_temp);
3580 mark_symbols_for_renaming (new_stmt);
3581 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3582 gcc_assert (!new_bb);
3583 msq_init = gimple_assign_lhs (new_stmt);
3586 /* 4. Create realignment token using a target builtin, if available.
3587 It is done either inside the containing loop, or before LOOP (as
3588 determined above). */
3590 if (targetm.vectorize.builtin_mask_for_load)
3592 tree builtin_decl;
3594 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3595 if (compute_in_loop)
3596 gcc_assert (init_addr); /* already computed by the caller. */
3597 else
3599 /* Generate the INIT_ADDR computation outside LOOP. */
3600 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3601 NULL_TREE, loop);
3602 pe = loop_preheader_edge (loop);
3603 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3604 gcc_assert (!new_bb);
3607 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3608 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3609 vec_dest =
3610 vect_create_destination_var (scalar_dest,
3611 gimple_call_return_type (new_stmt));
3612 new_temp = make_ssa_name (vec_dest, new_stmt);
3613 gimple_call_set_lhs (new_stmt, new_temp);
3615 if (compute_in_loop)
3616 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3617 else
3619 /* Generate the misalignment computation outside LOOP. */
3620 pe = loop_preheader_edge (loop);
3621 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3622 gcc_assert (!new_bb);
3625 *realignment_token = gimple_call_lhs (new_stmt);
3627 /* The result of the CALL_EXPR to this builtin is determined from
3628 the value of the parameter and no global variables are touched
3629 which makes the builtin a "const" function. Requiring the
3630 builtin to have the "const" attribute makes it unnecessary
3631 to call mark_call_clobbered. */
3632 gcc_assert (TREE_READONLY (builtin_decl));
3635 if (alignment_support_scheme == dr_explicit_realign)
3636 return msq;
3638 gcc_assert (!compute_in_loop);
3639 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3642 /* 5. Create msq = phi <msq_init, lsq> in loop */
3644 pe = loop_preheader_edge (containing_loop);
3645 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3646 msq = make_ssa_name (vec_dest, NULL);
3647 phi_stmt = create_phi_node (msq, containing_loop->header);
3648 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3649 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3651 return msq;
3655 /* Function vect_strided_load_supported.
3657 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3658 and FALSE otherwise. */
3660 bool
3661 vect_strided_load_supported (tree vectype)
3663 optab perm_even_optab, perm_odd_optab;
3664 enum machine_mode mode;
3666 mode = TYPE_MODE (vectype);
3668 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3669 optab_default);
3670 if (!perm_even_optab)
3672 if (vect_print_dump_info (REPORT_DETAILS))
3673 fprintf (vect_dump, "no optab for perm_even.");
3674 return false;
3677 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3679 if (vect_print_dump_info (REPORT_DETAILS))
3680 fprintf (vect_dump, "perm_even op not supported by target.");
3681 return false;
3684 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3685 optab_default);
3686 if (!perm_odd_optab)
3688 if (vect_print_dump_info (REPORT_DETAILS))
3689 fprintf (vect_dump, "no optab for perm_odd.");
3690 return false;
3693 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3695 if (vect_print_dump_info (REPORT_DETAILS))
3696 fprintf (vect_dump, "perm_odd op not supported by target.");
3697 return false;
3699 return true;
3703 /* Function vect_permute_load_chain.
3705 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3706 a power of 2, generate extract_even/odd stmts to reorder the input data
3707 correctly. Return the final references for loads in RESULT_CHAIN.
3709 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3710 The input is 4 vectors each containing 8 elements. We assign a number to each
3711 element, the input sequence is:
3713 1st vec: 0 1 2 3 4 5 6 7
3714 2nd vec: 8 9 10 11 12 13 14 15
3715 3rd vec: 16 17 18 19 20 21 22 23
3716 4th vec: 24 25 26 27 28 29 30 31
3718 The output sequence should be:
3720 1st vec: 0 4 8 12 16 20 24 28
3721 2nd vec: 1 5 9 13 17 21 25 29
3722 3rd vec: 2 6 10 14 18 22 26 30
3723 4th vec: 3 7 11 15 19 23 27 31
3725 i.e., the first output vector should contain the first elements of each
3726 interleaving group, etc.
3728 We use extract_even/odd instructions to create such output. The input of each
3729 extract_even/odd operation is two vectors
3730 1st vec 2nd vec
3731 0 1 2 3 4 5 6 7
3733 and the output is the vector of extracted even/odd elements. The output of
3734 extract_even will be: 0 2 4 6
3735 and of extract_odd: 1 3 5 7
3738 The permutation is done in log LENGTH stages. In each stage extract_even and
3739 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3740 order. In our example,
3742 E1: extract_even (1st vec, 2nd vec)
3743 E2: extract_odd (1st vec, 2nd vec)
3744 E3: extract_even (3rd vec, 4th vec)
3745 E4: extract_odd (3rd vec, 4th vec)
3747 The output for the first stage will be:
3749 E1: 0 2 4 6 8 10 12 14
3750 E2: 1 3 5 7 9 11 13 15
3751 E3: 16 18 20 22 24 26 28 30
3752 E4: 17 19 21 23 25 27 29 31
3754 In order to proceed and create the correct sequence for the next stage (or
3755 for the correct output, if the second stage is the last one, as in our
3756 example), we first put the output of extract_even operation and then the
3757 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3758 The input for the second stage is:
3760 1st vec (E1): 0 2 4 6 8 10 12 14
3761 2nd vec (E3): 16 18 20 22 24 26 28 30
3762 3rd vec (E2): 1 3 5 7 9 11 13 15
3763 4th vec (E4): 17 19 21 23 25 27 29 31
3765 The output of the second stage:
3767 E1: 0 4 8 12 16 20 24 28
3768 E2: 2 6 10 14 18 22 26 30
3769 E3: 1 5 9 13 17 21 25 29
3770 E4: 3 7 11 15 19 23 27 31
3772 And RESULT_CHAIN after reordering:
3774 1st vec (E1): 0 4 8 12 16 20 24 28
3775 2nd vec (E3): 1 5 9 13 17 21 25 29
3776 3rd vec (E2): 2 6 10 14 18 22 26 30
3777 4th vec (E4): 3 7 11 15 19 23 27 31. */
3779 bool
3780 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3781 unsigned int length,
3782 gimple stmt,
3783 gimple_stmt_iterator *gsi,
3784 VEC(tree,heap) **result_chain)
3786 tree perm_dest, data_ref, first_vect, second_vect;
3787 gimple perm_stmt;
3788 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3789 int i;
3790 unsigned int j;
3792 /* Check that the operation is supported. */
3793 if (!vect_strided_load_supported (vectype))
3794 return false;
3796 *result_chain = VEC_copy (tree, heap, dr_chain);
3797 for (i = 0; i < exact_log2 (length); i++)
3799 for (j = 0; j < length; j +=2)
3801 first_vect = VEC_index (tree, dr_chain, j);
3802 second_vect = VEC_index (tree, dr_chain, j+1);
3804 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3805 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3806 DECL_GIMPLE_REG_P (perm_dest) = 1;
3807 add_referenced_var (perm_dest);
3809 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3810 perm_dest, first_vect,
3811 second_vect);
3813 data_ref = make_ssa_name (perm_dest, perm_stmt);
3814 gimple_assign_set_lhs (perm_stmt, data_ref);
3815 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3816 mark_symbols_for_renaming (perm_stmt);
3818 VEC_replace (tree, *result_chain, j/2, data_ref);
3820 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3821 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3822 DECL_GIMPLE_REG_P (perm_dest) = 1;
3823 add_referenced_var (perm_dest);
3825 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3826 perm_dest, first_vect,
3827 second_vect);
3828 data_ref = make_ssa_name (perm_dest, perm_stmt);
3829 gimple_assign_set_lhs (perm_stmt, data_ref);
3830 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3831 mark_symbols_for_renaming (perm_stmt);
3833 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3835 dr_chain = VEC_copy (tree, heap, *result_chain);
3837 return true;
3841 /* Function vect_transform_strided_load.
3843 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3844 to perform their permutation and ascribe the result vectorized statements to
3845 the scalar statements.
3848 bool
3849 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3850 gimple_stmt_iterator *gsi)
3852 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3853 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3854 gimple next_stmt, new_stmt;
3855 VEC(tree,heap) *result_chain = NULL;
3856 unsigned int i, gap_count;
3857 tree tmp_data_ref;
3859 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3860 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3861 vectors, that are ready for vector computation. */
3862 result_chain = VEC_alloc (tree, heap, size);
3863 /* Permute. */
3864 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3865 return false;
3867 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3868 Since we scan the chain starting from it's first node, their order
3869 corresponds the order of data-refs in RESULT_CHAIN. */
3870 next_stmt = first_stmt;
3871 gap_count = 1;
3872 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3874 if (!next_stmt)
3875 break;
3877 /* Skip the gaps. Loads created for the gaps will be removed by dead
3878 code elimination pass later. No need to check for the first stmt in
3879 the group, since it always exists.
3880 DR_GROUP_GAP is the number of steps in elements from the previous
3881 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3882 correspond to the gaps.
3884 if (next_stmt != first_stmt
3885 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3887 gap_count++;
3888 continue;
3891 while (next_stmt)
3893 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3894 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3895 copies, and we put the new vector statement in the first available
3896 RELATED_STMT. */
3897 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3898 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3899 else
3901 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3903 gimple prev_stmt =
3904 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3905 gimple rel_stmt =
3906 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3907 while (rel_stmt)
3909 prev_stmt = rel_stmt;
3910 rel_stmt =
3911 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3914 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3915 new_stmt;
3919 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3920 gap_count = 1;
3921 /* If NEXT_STMT accesses the same DR as the previous statement,
3922 put the same TMP_DATA_REF as its vectorized statement; otherwise
3923 get the next data-ref from RESULT_CHAIN. */
3924 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3925 break;
3929 VEC_free (tree, heap, result_chain);
3930 return true;
3933 /* Function vect_force_dr_alignment_p.
3935 Returns whether the alignment of a DECL can be forced to be aligned
3936 on ALIGNMENT bit boundary. */
3938 bool
3939 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3941 if (TREE_CODE (decl) != VAR_DECL)
3942 return false;
3944 if (DECL_EXTERNAL (decl))
3945 return false;
3947 if (TREE_ASM_WRITTEN (decl))
3948 return false;
3950 if (TREE_STATIC (decl))
3951 return (alignment <= MAX_OFILE_ALIGNMENT);
3952 else
3953 return (alignment <= MAX_STACK_ALIGNMENT);
3957 /* Return whether the data reference DR is supported with respect to its
3958 alignment.
3959 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
3960 it is aligned, i.e., check if it is possible to vectorize it with different
3961 alignment. */
3963 enum dr_alignment_support
3964 vect_supportable_dr_alignment (struct data_reference *dr,
3965 bool check_aligned_accesses)
3967 gimple stmt = DR_STMT (dr);
3968 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3969 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3970 enum machine_mode mode = TYPE_MODE (vectype);
3971 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3972 struct loop *vect_loop = NULL;
3973 bool nested_in_vect_loop = false;
3975 if (aligned_access_p (dr) && !check_aligned_accesses)
3976 return dr_aligned;
3978 if (!loop_vinfo)
3979 /* FORNOW: Misaligned accesses are supported only in loops. */
3980 return dr_unaligned_unsupported;
3982 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3983 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3985 /* Possibly unaligned access. */
3987 /* We can choose between using the implicit realignment scheme (generating
3988 a misaligned_move stmt) and the explicit realignment scheme (generating
3989 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3990 realignment scheme: optimized, and unoptimized.
3991 We can optimize the realignment only if the step between consecutive
3992 vector loads is equal to the vector size. Since the vector memory
3993 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3994 is guaranteed that the misalignment amount remains the same throughout the
3995 execution of the vectorized loop. Therefore, we can create the
3996 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3997 at the loop preheader.
3999 However, in the case of outer-loop vectorization, when vectorizing a
4000 memory access in the inner-loop nested within the LOOP that is now being
4001 vectorized, while it is guaranteed that the misalignment of the
4002 vectorized memory access will remain the same in different outer-loop
4003 iterations, it is *not* guaranteed that is will remain the same throughout
4004 the execution of the inner-loop. This is because the inner-loop advances
4005 with the original scalar step (and not in steps of VS). If the inner-loop
4006 step happens to be a multiple of VS, then the misalignment remains fixed
4007 and we can use the optimized realignment scheme. For example:
4009 for (i=0; i<N; i++)
4010 for (j=0; j<M; j++)
4011 s += a[i+j];
4013 When vectorizing the i-loop in the above example, the step between
4014 consecutive vector loads is 1, and so the misalignment does not remain
4015 fixed across the execution of the inner-loop, and the realignment cannot
4016 be optimized (as illustrated in the following pseudo vectorized loop):
4018 for (i=0; i<N; i+=4)
4019 for (j=0; j<M; j++){
4020 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4021 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4022 // (assuming that we start from an aligned address).
4025 We therefore have to use the unoptimized realignment scheme:
4027 for (i=0; i<N; i+=4)
4028 for (j=k; j<M; j+=4)
4029 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4030 // that the misalignment of the initial address is
4031 // 0).
4033 The loop can then be vectorized as follows:
4035 for (k=0; k<4; k++){
4036 rt = get_realignment_token (&vp[k]);
4037 for (i=0; i<N; i+=4){
4038 v1 = vp[i+k];
4039 for (j=k; j<M; j+=4){
4040 v2 = vp[i+j+VS-1];
4041 va = REALIGN_LOAD <v1,v2,rt>;
4042 vs += va;
4043 v1 = v2;
4046 } */
4048 if (DR_IS_READ (dr))
4050 bool is_packed = false;
4051 tree type = (TREE_TYPE (DR_REF (dr)));
4053 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4054 && (!targetm.vectorize.builtin_mask_for_load
4055 || targetm.vectorize.builtin_mask_for_load ()))
4057 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4058 if (nested_in_vect_loop
4059 && (TREE_INT_CST_LOW (DR_STEP (dr))
4060 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4061 return dr_explicit_realign;
4062 else
4063 return dr_explicit_realign_optimized;
4065 if (!known_alignment_for_access_p (dr))
4067 tree ba = DR_BASE_OBJECT (dr);
4069 if (ba)
4070 is_packed = contains_packed_reference (ba);
4073 if (targetm.vectorize.
4074 support_vector_misalignment (mode, type,
4075 DR_MISALIGNMENT (dr), is_packed))
4076 /* Can't software pipeline the loads, but can at least do them. */
4077 return dr_unaligned_supported;
4079 else
4081 bool is_packed = false;
4082 tree type = (TREE_TYPE (DR_REF (dr)));
4084 if (!known_alignment_for_access_p (dr))
4086 tree ba = DR_BASE_OBJECT (dr);
4088 if (ba)
4089 is_packed = contains_packed_reference (ba);
4092 if (targetm.vectorize.
4093 support_vector_misalignment (mode, type,
4094 DR_MISALIGNMENT (dr), is_packed))
4095 return dr_unaligned_supported;
4098 /* Unsupported. */
4099 return dr_unaligned_unsupported;